@clickzetta/cz-cli-darwin-x64 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1243) hide show
  1. package/bin/cz-cli +0 -0
  2. package/bin/skills/cz-cli/SKILL.md +58 -0
  3. package/bin/skills/cz-cli/references/profile-setup.md +88 -0
  4. package/bin/skills/cz-cli-inner/SKILL.md +96 -0
  5. package/bin/skills/dt-creator/SKILL.md +15 -0
  6. package/bin/skills/dt-creator/references/dt-declaration-strategy.md +185 -0
  7. package/bin/skills/dt-creator/references/incremental-config-reference.md +429 -0
  8. package/bin/skills/dt-creator/references/refresh-history-guide.md +268 -0
  9. package/bin/skills/dt-creator/references/sql-limitations.md +80 -0
  10. package/bin/skills/dynamic-table-alter/SKILL.md +190 -0
  11. package/bin/skills/lakehouse-doc/SKILL.md +107 -0
  12. package/bin/skills/lakehouse-doc/references/.gitattributes +1 -0
  13. package/bin/skills/lakehouse-doc/references/.gitlab-ci.yml +11 -0
  14. package/bin/skills/lakehouse-doc/references/702bc8f656.md +387 -0
  15. package/bin/skills/lakehouse-doc/references/774c65e217.md +136 -0
  16. package/bin/skills/lakehouse-doc/references/9164fed65a.md +1 -0
  17. package/bin/skills/lakehouse-doc/references/AIGateway.md +3 -0
  18. package/bin/skills/lakehouse-doc/references/AI_COMPLETE.md +157 -0
  19. package/bin/skills/lakehouse-doc/references/AI_EMBEDDING.md +70 -0
  20. package/bin/skills/lakehouse-doc/references/AI_Gateway.md +255 -0
  21. package/bin/skills/lakehouse-doc/references/AI_eco.md +1 -0
  22. package/bin/skills/lakehouse-doc/references/AI_function_in_SQL.md +25 -0
  23. package/bin/skills/lakehouse-doc/references/AI_function_overview.md +25 -0
  24. package/bin/skills/lakehouse-doc/references/ALTER-EXTERNAL-TABLE.md +78 -0
  25. package/bin/skills/lakehouse-doc/references/ALTER-SCHEMA.md +85 -0
  26. package/bin/skills/lakehouse-doc/references/ALTER-TABLE-COLUMN.md +223 -0
  27. package/bin/skills/lakehouse-doc/references/ALTERTABLE.md +92 -0
  28. package/bin/skills/lakehouse-doc/references/ARRAY.md +99 -0
  29. package/bin/skills/lakehouse-doc/references/Account.md +2 -0
  30. package/bin/skills/lakehouse-doc/references/Analysis.md +1 -0
  31. package/bin/skills/lakehouse-doc/references/AnalyticsModernDataStack.md +390 -0
  32. package/bin/skills/lakehouse-doc/references/Application_list.md +26 -0
  33. package/bin/skills/lakehouse-doc/references/Approval.md +1 -0
  34. package/bin/skills/lakehouse-doc/references/Approval_list.md +61 -0
  35. package/bin/skills/lakehouse-doc/references/BIGINT.md +49 -0
  36. package/bin/skills/lakehouse-doc/references/BINARY.md +104 -0
  37. package/bin/skills/lakehouse-doc/references/BOOLEAN.md +47 -0
  38. package/bin/skills/lakehouse-doc/references/BP_AI_Function_Image2text.md +203 -0
  39. package/bin/skills/lakehouse-doc/references/BluepipeOracleLakehouse_DataSync.md +244 -0
  40. package/bin/skills/lakehouse-doc/references/CHAR.md +38 -0
  41. package/bin/skills/lakehouse-doc/references/CONNECTION.md +1 -0
  42. package/bin/skills/lakehouse-doc/references/COPY-INTO-Location.md +404 -0
  43. package/bin/skills/lakehouse-doc/references/COPY_INTO_Location.md +371 -0
  44. package/bin/skills/lakehouse-doc/references/CREAREUSER.md +42 -0
  45. package/bin/skills/lakehouse-doc/references/CREATE-BLOOMFILTER-INDEX.md +138 -0
  46. package/bin/skills/lakehouse-doc/references/CREATECONNECTION.md +11 -0
  47. package/bin/skills/lakehouse-doc/references/CREATEEXTERNAlLSCHEMA.md +290 -0
  48. package/bin/skills/lakehouse-doc/references/CREATEMATERIALIZEDVIEW.md +518 -0
  49. package/bin/skills/lakehouse-doc/references/CREATEROLE.md +71 -0
  50. package/bin/skills/lakehouse-doc/references/CREATESCHEMA.md +40 -0
  51. package/bin/skills/lakehouse-doc/references/CREATEVIEW.md +63 -0
  52. package/bin/skills/lakehouse-doc/references/CREATE_EXTERNATL_FUNCTION.md +219 -0
  53. package/bin/skills/lakehouse-doc/references/CTERevenueCohort.md +275 -0
  54. package/bin/skills/lakehouse-doc/references/ComputeResourceDDL.md +1 -0
  55. package/bin/skills/lakehouse-doc/references/Concepts.md +1 -0
  56. package/bin/skills/lakehouse-doc/references/Create_Embeding_Function.md +236 -0
  57. package/bin/skills/lakehouse-doc/references/Create_LLM_Function.md +242 -0
  58. package/bin/skills/lakehouse-doc/references/CreditScoringwithZettaparkandPythonMLlibraryNew.md +873 -0
  59. package/bin/skills/lakehouse-doc/references/DATE.md +50 -0
  60. package/bin/skills/lakehouse-doc/references/DDL.md +1 -0
  61. package/bin/skills/lakehouse-doc/references/DECIMAL.md +27 -0
  62. package/bin/skills/lakehouse-doc/references/DELETE.md +48 -0
  63. package/bin/skills/lakehouse-doc/references/DESC-INDEX.md +41 -0
  64. package/bin/skills/lakehouse-doc/references/DESC-JOB.md +31 -0
  65. package/bin/skills/lakehouse-doc/references/DESCCONNECTION.md +39 -0
  66. package/bin/skills/lakehouse-doc/references/DESCMATERIALIZEDVIEW.md +31 -0
  67. package/bin/skills/lakehouse-doc/references/DESCSCHEMAS.md +59 -0
  68. package/bin/skills/lakehouse-doc/references/DESCTABLE.md +105 -0
  69. package/bin/skills/lakehouse-doc/references/DESCVIEW.md +66 -0
  70. package/bin/skills/lakehouse-doc/references/DOUBLE.md +58 -0
  71. package/bin/skills/lakehouse-doc/references/DQL.md +1 -0
  72. package/bin/skills/lakehouse-doc/references/DROP-INDEX.md +29 -0
  73. package/bin/skills/lakehouse-doc/references/DROPCONNECTION.md +32 -0
  74. package/bin/skills/lakehouse-doc/references/DROPMATERIALIZEDVIEW.md +42 -0
  75. package/bin/skills/lakehouse-doc/references/DROPROLE.md +56 -0
  76. package/bin/skills/lakehouse-doc/references/DROPSCHEMA.md +37 -0
  77. package/bin/skills/lakehouse-doc/references/DROPTABLE.md +46 -0
  78. package/bin/skills/lakehouse-doc/references/DROPUSER.md +34 -0
  79. package/bin/skills/lakehouse-doc/references/DROPVIEW.md +33 -0
  80. package/bin/skills/lakehouse-doc/references/DataQuality.md +99 -0
  81. package/bin/skills/lakehouse-doc/references/DataSourceConfigGuide.md +1 -0
  82. package/bin/skills/lakehouse-doc/references/DataSource_ADBMySQL.md +35 -0
  83. package/bin/skills/lakehouse-doc/references/DataSource_ADB_PostgreSQL.md +36 -0
  84. package/bin/skills/lakehouse-doc/references/DataSource_AMQP.md +37 -0
  85. package/bin/skills/lakehouse-doc/references/DataSource_Amazon_DocumentDB.md +115 -0
  86. package/bin/skills/lakehouse-doc/references/DataSource_Amazon_OpenSearch.md +42 -0
  87. package/bin/skills/lakehouse-doc/references/DataSource_Aurora_MySQL.md +35 -0
  88. package/bin/skills/lakehouse-doc/references/DataSource_Aurora_PostgreSQL.md +36 -0
  89. package/bin/skills/lakehouse-doc/references/DataSource_AutoMQ.md +124 -0
  90. package/bin/skills/lakehouse-doc/references/DataSource_COS.md +31 -0
  91. package/bin/skills/lakehouse-doc/references/DataSource_ClickHouse.md +36 -0
  92. package/bin/skills/lakehouse-doc/references/DataSource_DB2.md +36 -0
  93. package/bin/skills/lakehouse-doc/references/DataSource_DM.md +35 -0
  94. package/bin/skills/lakehouse-doc/references/DataSource_Databricks.md +38 -0
  95. package/bin/skills/lakehouse-doc/references/DataSource_Doris.md +34 -0
  96. package/bin/skills/lakehouse-doc/references/DataSource_DynamoDB.md +37 -0
  97. package/bin/skills/lakehouse-doc/references/DataSource_ElasticSearch.md +30 -0
  98. package/bin/skills/lakehouse-doc/references/DataSource_Greenplum.md +36 -0
  99. package/bin/skills/lakehouse-doc/references/DataSource_HANA.md +53 -0
  100. package/bin/skills/lakehouse-doc/references/DataSource_HBase.md +29 -0
  101. package/bin/skills/lakehouse-doc/references/DataSource_Hive.md +52 -0
  102. package/bin/skills/lakehouse-doc/references/DataSource_Hologres.md +36 -0
  103. package/bin/skills/lakehouse-doc/references/DataSource_Kafka.md +32 -0
  104. package/bin/skills/lakehouse-doc/references/DataSource_MariaDB.md +36 -0
  105. package/bin/skills/lakehouse-doc/references/DataSource_MaxCompute.md +32 -0
  106. package/bin/skills/lakehouse-doc/references/DataSource_MongoDB.md +36 -0
  107. package/bin/skills/lakehouse-doc/references/DataSource_MySQL.md +36 -0
  108. package/bin/skills/lakehouse-doc/references/DataSource_OSS.md +31 -0
  109. package/bin/skills/lakehouse-doc/references/DataSource_Oracle.md +35 -0
  110. package/bin/skills/lakehouse-doc/references/DataSource_PorarDB.md +37 -0
  111. package/bin/skills/lakehouse-doc/references/DataSource_PostgreSQL.md +36 -0
  112. package/bin/skills/lakehouse-doc/references/DataSource_Redis.md +55 -0
  113. package/bin/skills/lakehouse-doc/references/DataSource_Redshift.md +49 -0
  114. package/bin/skills/lakehouse-doc/references/DataSource_RestApi.md +30 -0
  115. package/bin/skills/lakehouse-doc/references/DataSource_S3.md +25 -0
  116. package/bin/skills/lakehouse-doc/references/DataSource_SLS.md +31 -0
  117. package/bin/skills/lakehouse-doc/references/DataSource_StarRocks.md +35 -0
  118. package/bin/skills/lakehouse-doc/references/DataSource_TiDB.md +36 -0
  119. package/bin/skills/lakehouse-doc/references/Datalake_StorageConnection.md +12 -0
  120. package/bin/skills/lakehouse-doc/references/Datasource_SQLServer.md +36 -0
  121. package/bin/skills/lakehouse-doc/references/Datus_Lakehouse_Integrated_Guide.md +3 -0
  122. package/bin/skills/lakehouse-doc/references/Datus_Lakehouse_MCPServer.md +111 -0
  123. package/bin/skills/lakehouse-doc/references/Dify_Integreated_with_LakehouseMCPServer.md +71 -0
  124. package/bin/skills/lakehouse-doc/references/ELTModernDataStack.md +497 -0
  125. package/bin/skills/lakehouse-doc/references/ELT_practice.md +1 -0
  126. package/bin/skills/lakehouse-doc/references/EXPLAIN.md +92 -0
  127. package/bin/skills/lakehouse-doc/references/EXTERNALFUNCITON.md +1 -0
  128. package/bin/skills/lakehouse-doc/references/EXTERNALFUNCTION.md +0 -0
  129. package/bin/skills/lakehouse-doc/references/EXTERNALFUNCTION/345/274/200/345/217/221/346/214/207/345/215/227.md +142 -0
  130. package/bin/skills/lakehouse-doc/references/EXTERNALSCHEMA.md +1 -0
  131. package/bin/skills/lakehouse-doc/references/EXTERNALSCHMEA.md +94 -0
  132. package/bin/skills/lakehouse-doc/references/ExternalFunctionDevGuideJava.md +556 -0
  133. package/bin/skills/lakehouse-doc/references/FLOAT.md +33 -0
  134. package/bin/skills/lakehouse-doc/references/FeatureEngineeringForExpandingCustomerFeatureswithZettapark.md +427 -0
  135. package/bin/skills/lakehouse-doc/references/FileCommand.md +1 -0
  136. package/bin/skills/lakehouse-doc/references/FileFunction.md +1 -0
  137. package/bin/skills/lakehouse-doc/references/FineBI.md +195 -0
  138. package/bin/skills/lakehouse-doc/references/Full_Text_Search.md +1 -0
  139. package/bin/skills/lakehouse-doc/references/GET.md +63 -0
  140. package/bin/skills/lakehouse-doc/references/GET_PRESIGNED_URL.md +91 -0
  141. package/bin/skills/lakehouse-doc/references/GrantPriveleges.md +113 -0
  142. package/bin/skills/lakehouse-doc/references/Hive_connection.md +50 -0
  143. package/bin/skills/lakehouse-doc/references/IDENTITY-Column.md +74 -0
  144. package/bin/skills/lakehouse-doc/references/INSERT.md +186 -0
  145. package/bin/skills/lakehouse-doc/references/INT.md +36 -0
  146. package/bin/skills/lakehouse-doc/references/INTERVAL.md +143 -0
  147. package/bin/skills/lakehouse-doc/references/Ingesting_Data_from_Alibaba_Cloud_Data_Lake_into_Lakehouse.md +976 -0
  148. package/bin/skills/lakehouse-doc/references/Ingestion.md +1 -0
  149. package/bin/skills/lakehouse-doc/references/JDBC-Driver.md +67 -0
  150. package/bin/skills/lakehouse-doc/references/JDBC_MindsDB_ML_LLM.md +237 -0
  151. package/bin/skills/lakehouse-doc/references/JOIN.md +204 -0
  152. package/bin/skills/lakehouse-doc/references/JSON.md +423 -0
  153. package/bin/skills/lakehouse-doc/references/JSON_DataType.md +49 -0
  154. package/bin/skills/lakehouse-doc/references/KAFKA_Storage_connection.md +1 -0
  155. package/bin/skills/lakehouse-doc/references/Kafka_connection.md +36 -0
  156. package/bin/skills/lakehouse-doc/references/Key_Concepts.md +112 -0
  157. package/bin/skills/lakehouse-doc/references/LATERALVIEW.md +78 -0
  158. package/bin/skills/lakehouse-doc/references/Lakehouse-client-repository.md +11 -0
  159. package/bin/skills/lakehouse-doc/references/LakehouseAI.md +0 -0
  160. package/bin/skills/lakehouse-doc/references/LakehouseAI_overview.md +16 -0
  161. package/bin/skills/lakehouse-doc/references/LakehouseAI/346/246/202/350/277/260.md +0 -0
  162. package/bin/skills/lakehouse-doc/references/LakehouseDataGPTTour.md +64 -0
  163. package/bin/skills/lakehouse-doc/references/LakehouseMCPServer.md +1 -0
  164. package/bin/skills/lakehouse-doc/references/LakehouseMCPServer_intro.md +493 -0
  165. package/bin/skills/lakehouse-doc/references/LakehousePythonZettapark.md +1 -0
  166. package/bin/skills/lakehouse-doc/references/LakehouseStudioTour.md +185 -0
  167. package/bin/skills/lakehouse-doc/references/Lakehouse_Index_Best_Practice.md +681 -0
  168. package/bin/skills/lakehouse-doc/references/Lakehouse_Insight.md +104 -0
  169. package/bin/skills/lakehouse-doc/references/Lakehouse_Platform_Release_Note.md +1 -0
  170. package/bin/skills/lakehouse-doc/references/Lakehouse_Studio_101.md +1 -0
  171. package/bin/skills/lakehouse-doc/references/Lakehouse_Studio_Release_Note.md +1 -0
  172. package/bin/skills/lakehouse-doc/references/Lakehouse_Zilliz_MakeDataReadyforBIandAI.md +228 -0
  173. package/bin/skills/lakehouse-doc/references/Langchain_plug_installation.md +244 -0
  174. package/bin/skills/lakehouse-doc/references/Langchain_plug_quick_start.md +225 -0
  175. package/bin/skills/lakehouse-doc/references/Langchain_plugins_overview.md +406 -0
  176. package/bin/skills/lakehouse-doc/references/Limitation.md +8 -0
  177. package/bin/skills/lakehouse-doc/references/LoggingIn.md +67 -0
  178. package/bin/skills/lakehouse-doc/references/Logstash.md +172 -0
  179. package/bin/skills/lakehouse-doc/references/MAP.md +42 -0
  180. package/bin/skills/lakehouse-doc/references/MATERIALIZEDVIEW.md +112 -0
  181. package/bin/skills/lakehouse-doc/references/MCPServers.md +267 -0
  182. package/bin/skills/lakehouse-doc/references/MERGE.md +498 -0
  183. package/bin/skills/lakehouse-doc/references/ManageAccounts.md +184 -0
  184. package/bin/skills/lakehouse-doc/references/ManagingFilesonDatalakeVolumewithZettapark.md +145 -0
  185. package/bin/skills/lakehouse-doc/references/MigrateSnowflakeRealtimeETLPipelinetoClickzettaLakehouse.md +865 -0
  186. package/bin/skills/lakehouse-doc/references/Migrate_Spark_DataEngineeringBestPractices_Project_to_Lakehouse.md +292 -0
  187. package/bin/skills/lakehouse-doc/references/ModernDataStackWithEcosystemTools.md +1 -0
  188. package/bin/skills/lakehouse-doc/references/N8N_AI_Workflow_Integration.md +1 -0
  189. package/bin/skills/lakehouse-doc/references/N8N_Integrated_with_LakehouseMCPServer.md +128 -0
  190. package/bin/skills/lakehouse-doc/references/Notebook.md +109 -0
  191. package/bin/skills/lakehouse-doc/references/NotesandGuidelinesforUsingPartitionTables.md +1627 -0
  192. package/bin/skills/lakehouse-doc/references/OPTIMIZE.md +80 -0
  193. package/bin/skills/lakehouse-doc/references/OptimizingComputingResources.md +1 -0
  194. package/bin/skills/lakehouse-doc/references/Overview.md +1 -0
  195. package/bin/skills/lakehouse-doc/references/PUT.md +70 -0
  196. package/bin/skills/lakehouse-doc/references/PerformingVectorandScalarRetrievalinheSameTableinLakehouse.md +84 -0
  197. package/bin/skills/lakehouse-doc/references/Permission_application.md +43 -0
  198. package/bin/skills/lakehouse-doc/references/PowerBI.md +113 -0
  199. package/bin/skills/lakehouse-doc/references/PythonSDKVersionHistory.md +24 -0
  200. package/bin/skills/lakehouse-doc/references/PythonSample_put_gharchive2oss.md +153 -0
  201. package/bin/skills/lakehouse-doc/references/PythonSample_put_github_rt_events.md +336 -0
  202. package/bin/skills/lakehouse-doc/references/PythonSqlAlchemyVersionHistory.md +23 -0
  203. package/bin/skills/lakehouse-doc/references/PythonTaskDev.md +1 -0
  204. package/bin/skills/lakehouse-doc/references/Python_Task.md +28 -0
  205. package/bin/skills/lakehouse-doc/references/Query_SnowflakeOpenCatalog_Icebergtable.md +114 -0
  206. package/bin/skills/lakehouse-doc/references/QuickStartwithCopycommand.md +79 -0
  207. package/bin/skills/lakehouse-doc/references/README.md +1 -0
  208. package/bin/skills/lakehouse-doc/references/REFRESH.md +37 -0
  209. package/bin/skills/lakehouse-doc/references/REMOTEFUNCTION.md +1 -0
  210. package/bin/skills/lakehouse-doc/references/RN_2023-08-07.md +67 -0
  211. package/bin/skills/lakehouse-doc/references/RN_2023-09-05.md +75 -0
  212. package/bin/skills/lakehouse-doc/references/RN_2023-09-18.md +50 -0
  213. package/bin/skills/lakehouse-doc/references/RN_2023-09-20.md +55 -0
  214. package/bin/skills/lakehouse-doc/references/RN_2023-10-25.md +102 -0
  215. package/bin/skills/lakehouse-doc/references/RN_2023-11-09.md +84 -0
  216. package/bin/skills/lakehouse-doc/references/RN_2023-12-25.md +84 -0
  217. package/bin/skills/lakehouse-doc/references/RN_2024-01-05.md +78 -0
  218. package/bin/skills/lakehouse-doc/references/RN_2024-02-05.md +87 -0
  219. package/bin/skills/lakehouse-doc/references/RN_2024-03-22.md +76 -0
  220. package/bin/skills/lakehouse-doc/references/RN_2024-04-10.md +61 -0
  221. package/bin/skills/lakehouse-doc/references/RN_2024-04-16.md +38 -0
  222. package/bin/skills/lakehouse-doc/references/RN_2024-05-10.md +47 -0
  223. package/bin/skills/lakehouse-doc/references/RN_2024-05-15.md +43 -0
  224. package/bin/skills/lakehouse-doc/references/RN_2024-05-24.md +143 -0
  225. package/bin/skills/lakehouse-doc/references/RN_2024-06-06.md +59 -0
  226. package/bin/skills/lakehouse-doc/references/RN_2024-06-07.md +71 -0
  227. package/bin/skills/lakehouse-doc/references/RN_2024-06-27.md +55 -0
  228. package/bin/skills/lakehouse-doc/references/RN_2024-07-22.md +121 -0
  229. package/bin/skills/lakehouse-doc/references/RN_2024-07-24.md +58 -0
  230. package/bin/skills/lakehouse-doc/references/RN_2024-08-07.md +42 -0
  231. package/bin/skills/lakehouse-doc/references/RN_2024-09-26.md +106 -0
  232. package/bin/skills/lakehouse-doc/references/RN_2024-10-15.md +49 -0
  233. package/bin/skills/lakehouse-doc/references/RN_2024_11_11.md +50 -0
  234. package/bin/skills/lakehouse-doc/references/RN_2024_12_12.md +52 -0
  235. package/bin/skills/lakehouse-doc/references/RN_2024_12_25.md +40 -0
  236. package/bin/skills/lakehouse-doc/references/RN_2025-03-05.md +68 -0
  237. package/bin/skills/lakehouse-doc/references/RN_2025-04-01.md +74 -0
  238. package/bin/skills/lakehouse-doc/references/RN_2025-05-20.md +90 -0
  239. package/bin/skills/lakehouse-doc/references/RN_2025-07-03.md +100 -0
  240. package/bin/skills/lakehouse-doc/references/RN_2025-08-25.md +106 -0
  241. package/bin/skills/lakehouse-doc/references/RN_2025_03_03.md +141 -0
  242. package/bin/skills/lakehouse-doc/references/RN_2025_04_22.md +110 -0
  243. package/bin/skills/lakehouse-doc/references/RN_2025_07_15.md +60 -0
  244. package/bin/skills/lakehouse-doc/references/RN_2025_10_23.md +85 -0
  245. package/bin/skills/lakehouse-doc/references/RN_2025_10_30.md +68 -0
  246. package/bin/skills/lakehouse-doc/references/RN_2025_12_17.md +71 -0
  247. package/bin/skills/lakehouse-doc/references/RN_2026_1_30-2.0.0.md +43 -0
  248. package/bin/skills/lakehouse-doc/references/RN_LH_2025_12_30.md +73 -0
  249. package/bin/skills/lakehouse-doc/references/RN_LH_2026_03_13.md +47 -0
  250. package/bin/skills/lakehouse-doc/references/Refactor_ELT_practice.md +241 -0
  251. package/bin/skills/lakehouse-doc/references/RemoteFunctionAsUDF.md +1 -0
  252. package/bin/skills/lakehouse-doc/references/RemoteFunctionBestPractice.md +350 -0
  253. package/bin/skills/lakehouse-doc/references/RemoteFunctionDevGuidePython3.md +571 -0
  254. package/bin/skills/lakehouse-doc/references/RemoteFunctionOnACR.md +249 -0
  255. package/bin/skills/lakehouse-doc/references/RemoteFunctionintro.md +54 -0
  256. package/bin/skills/lakehouse-doc/references/RemoteFunction/344/273/213/347/273/215.md +1 -0
  257. package/bin/skills/lakehouse-doc/references/RemoteFunction/345/274/200/345/217/221/346/214/207/345/215/227Python3.md +1 -0
  258. package/bin/skills/lakehouse-doc/references/RemoteFunction/346/234/200/344/275/263/345/256/236/350/267/265.md +1 -0
  259. package/bin/skills/lakehouse-doc/references/RevokePriveleges.md +98 -0
  260. package/bin/skills/lakehouse-doc/references/SCHEMA.md +48 -0
  261. package/bin/skills/lakehouse-doc/references/SCHEMADDL.md +0 -0
  262. package/bin/skills/lakehouse-doc/references/SHOW-INDEX.md +22 -0
  263. package/bin/skills/lakehouse-doc/references/SHOWCONNECTIONS.md +44 -0
  264. package/bin/skills/lakehouse-doc/references/SHOWFUNCTIONS.md +38 -0
  265. package/bin/skills/lakehouse-doc/references/SHOWGRANTS.md +62 -0
  266. package/bin/skills/lakehouse-doc/references/SHOWROLES.md +59 -0
  267. package/bin/skills/lakehouse-doc/references/SHOWTABLES.md +46 -0
  268. package/bin/skills/lakehouse-doc/references/SHOWUSERS.md +26 -0
  269. package/bin/skills/lakehouse-doc/references/SMALLINT.md +48 -0
  270. package/bin/skills/lakehouse-doc/references/SQL_CREATE_TABLE_GUIDE.md +1210 -0
  271. package/bin/skills/lakehouse-doc/references/SQL_DML_Considerations.md +601 -0
  272. package/bin/skills/lakehouse-doc/references/SQL_Join_Guide.md +655 -0
  273. package/bin/skills/lakehouse-doc/references/SQL_SELECT_Considerations.md +1818 -0
  274. package/bin/skills/lakehouse-doc/references/SQL_With_CTE_Guide.md +1510 -0
  275. package/bin/skills/lakehouse-doc/references/SQL_customers.md +74 -0
  276. package/bin/skills/lakehouse-doc/references/SQL_revenue.md +31 -0
  277. package/bin/skills/lakehouse-doc/references/STRING.md +80 -0
  278. package/bin/skills/lakehouse-doc/references/STRUCT.md +33 -0
  279. package/bin/skills/lakehouse-doc/references/SUMMARY.md +1279 -0
  280. package/bin/skills/lakehouse-doc/references/Security_system_inventory_and_optimization_based_Information_Schema.md +412 -0
  281. package/bin/skills/lakehouse-doc/references/Server_data_for_AI.md +15 -0
  282. package/bin/skills/lakehouse-doc/references/SlowlyChangingDimensionsInLakehouseUsingStreamsandTasks.md +616 -0
  283. package/bin/skills/lakehouse-doc/references/Spark_Lakehouse_iceberg_REST.md +151 -0
  284. package/bin/skills/lakehouse-doc/references/StudioDI_PrivateLinkVPC_fromRDS.md +105 -0
  285. package/bin/skills/lakehouse-doc/references/Supported_Cloud_Platforms.md +40 -0
  286. package/bin/skills/lakehouse-doc/references/TABLE.md +49 -0
  287. package/bin/skills/lakehouse-doc/references/TIMESTAMP.md +56 -0
  288. package/bin/skills/lakehouse-doc/references/TIMETRAVEL.md +207 -0
  289. package/bin/skills/lakehouse-doc/references/TINYINT.md +63 -0
  290. package/bin/skills/lakehouse-doc/references/TPC-H100G_experience.md +49 -0
  291. package/bin/skills/lakehouse-doc/references/TRUNCATE.md +144 -0
  292. package/bin/skills/lakehouse-doc/references/TableDesign.md +270 -0
  293. package/bin/skills/lakehouse-doc/references/TableauConnectToLakehouse.md +64 -0
  294. package/bin/skills/lakehouse-doc/references/Tutorials.md +1 -0
  295. package/bin/skills/lakehouse-doc/references/UNDROP-TABLE.md +163 -0
  296. package/bin/skills/lakehouse-doc/references/UPDATE.md +70 -0
  297. package/bin/skills/lakehouse-doc/references/USESCHEMA.md +53 -0
  298. package/bin/skills/lakehouse-doc/references/UnifiedWorkflowIntro.md +31 -0
  299. package/bin/skills/lakehouse-doc/references/UnifiedWorkflow_demo.md +175 -0
  300. package/bin/skills/lakehouse-doc/references/Unstructured_io.md +735 -0
  301. package/bin/skills/lakehouse-doc/references/VARCHARleghth.md +42 -0
  302. package/bin/skills/lakehouse-doc/references/VIEW.md +47 -0
  303. package/bin/skills/lakehouse-doc/references/Volume_LIST.md +52 -0
  304. package/bin/skills/lakehouse-doc/references/WINDOWFUNCTION.md +561 -0
  305. package/bin/skills/lakehouse-doc/references/WITH.md +41 -0
  306. package/bin/skills/lakehouse-doc/references/ZettaparkQuickStart.md +453 -0
  307. package/bin/skills/lakehouse-doc/references/Zettapark_Data_Engineering_Demo.md +348 -0
  308. package/bin/skills/lakehouse-doc/references/a_comprehensive_guide_to_ingesting_data_into_clickzetta_lakehouse.md +66 -0
  309. package/bin/skills/lakehouse-doc/references/access-control-configration.md +249 -0
  310. package/bin/skills/lakehouse-doc/references/access-control-general.md +82 -0
  311. package/bin/skills/lakehouse-doc/references/access-control.md +240 -0
  312. package/bin/skills/lakehouse-doc/references/account_user_management.md +105 -0
  313. package/bin/skills/lakehouse-doc/references/accountfunds.md +87 -0
  314. package/bin/skills/lakehouse-doc/references/agg_function.md +1 -0
  315. package/bin/skills/lakehouse-doc/references/ai_ready_data_overview.md +13 -0
  316. package/bin/skills/lakehouse-doc/references/airbyte.md +95 -0
  317. package/bin/skills/lakehouse-doc/references/alert.md +143 -0
  318. package/bin/skills/lakehouse-doc/references/alicloud-arn-externalid.md +51 -0
  319. package/bin/skills/lakehouse-doc/references/alicloud_byos_configration.md +129 -0
  320. package/bin/skills/lakehouse-doc/references/aliyun_storage_connection.md +135 -0
  321. package/bin/skills/lakehouse-doc/references/alter-dynamic-table.md +375 -0
  322. package/bin/skills/lakehouse-doc/references/alter-external-schema.md +20 -0
  323. package/bin/skills/lakehouse-doc/references/alter-materialzied-view.md +238 -0
  324. package/bin/skills/lakehouse-doc/references/alter-share.md +43 -0
  325. package/bin/skills/lakehouse-doc/references/alter-user.md +13 -0
  326. package/bin/skills/lakehouse-doc/references/alter-vcluster.md +134 -0
  327. package/bin/skills/lakehouse-doc/references/alter-worksapce.md +55 -0
  328. package/bin/skills/lakehouse-doc/references/alter.md +35 -0
  329. package/bin/skills/lakehouse-doc/references/analysis_internet_data_nyc_green_data.md +449 -0
  330. package/bin/skills/lakehouse-doc/references/analytics_cluster_best_practices.md +377 -0
  331. package/bin/skills/lakehouse-doc/references/analyze-table.md +58 -0
  332. package/bin/skills/lakehouse-doc/references/array_size.md +34 -0
  333. package/bin/skills/lakehouse-doc/references/authentication.md +53 -0
  334. package/bin/skills/lakehouse-doc/references/authoritymanagement.md +1 -0
  335. package/bin/skills/lakehouse-doc/references/auto-index.md +57 -0
  336. package/bin/skills/lakehouse-doc/references/aws_storage_connection.md +114 -0
  337. package/bin/skills/lakehouse-doc/references/backfilling_data.md +60 -0
  338. package/bin/skills/lakehouse-doc/references/batch_sync.md +54 -0
  339. package/bin/skills/lakehouse-doc/references/batch_sync_Sop.md +135 -0
  340. package/bin/skills/lakehouse-doc/references/batchloadparquertfileintoLakehouse.md +79 -0
  341. package/bin/skills/lakehouse-doc/references/bestpractice_bazhuanyu.md +1 -0
  342. package/bin/skills/lakehouse-doc/references/billing.md +62 -0
  343. package/bin/skills/lakehouse-doc/references/bitmap-type.md +524 -0
  344. package/bin/skills/lakehouse-doc/references/bitmap_uba_guide.md +1190 -0
  345. package/bin/skills/lakehouse-doc/references/bloomfilter-summary.md +164 -0
  346. package/bin/skills/lakehouse-doc/references/book.json +17 -0
  347. package/bin/skills/lakehouse-doc/references/bring_your_own_storage.md +1 -0
  348. package/bin/skills/lakehouse-doc/references/build-inverted-index.md +27 -0
  349. package/bin/skills/lakehouse-doc/references/build_rag_with_langchain.md +616 -0
  350. package/bin/skills/lakehouse-doc/references/bulkload-summary.md +37 -0
  351. package/bin/skills/lakehouse-doc/references/bulkloadv1-java-sdk.md +178 -0
  352. package/bin/skills/lakehouse-doc/references/bulkloadv1-python-sdk.md +169 -0
  353. package/bin/skills/lakehouse-doc/references/byos_general.md +165 -0
  354. package/bin/skills/lakehouse-doc/references/byos_tencentcloud_configration.md +138 -0
  355. package/bin/skills/lakehouse-doc/references/cache-command.md +39 -0
  356. package/bin/skills/lakehouse-doc/references/cancel-job.md +51 -0
  357. package/bin/skills/lakehouse-doc/references/cardinality_array.md +45 -0
  358. package/bin/skills/lakehouse-doc/references/charge_analysis_with_lakehouse_mcp_server.md +393 -0
  359. package/bin/skills/lakehouse-doc/references/clone-doc.md +111 -0
  360. package/bin/skills/lakehouse-doc/references/cloud_object_storage.md +1 -0
  361. package/bin/skills/lakehouse-doc/references/cluster-table-guide.md +68 -0
  362. package/bin/skills/lakehouse-doc/references/cluster-table.md +64 -0
  363. package/bin/skills/lakehouse-doc/references/composite_task.md +178 -0
  364. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_3rd_tools.md +11 -0
  365. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_dbv_sql_put.md +48 -0
  366. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_environment_and_data_generate.md +627 -0
  367. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_javasdk_buckload_realtime.md +740 -0
  368. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_kafka_realtime_sync.md +71 -0
  369. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_local_file_into_table_by_studio.md +64 -0
  370. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_overview.md +66 -0
  371. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_pipe_kafka.md +7 -0
  372. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_pipe_oss.md +7 -0
  373. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_studio_batchload_public_network.md +52 -0
  374. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_studio_python_node.md +111 -0
  375. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_studio_realtime_cdc_public_network.md +249 -0
  376. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_studio_sql_insert.md +180 -0
  377. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_zettapark_put_file_to_lake.md +206 -0
  378. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_zettapark_save_as_table.md +79 -0
  379. package/bin/skills/lakehouse-doc/references/comprehensive_guide_to_ingesting_zettapark_sql_insert.md +113 -0
  380. package/bin/skills/lakehouse-doc/references/computation.md +6 -0
  381. package/bin/skills/lakehouse-doc/references/concurrency_scaling.md +57 -0
  382. package/bin/skills/lakehouse-doc/references/config-datasource.md +79 -0
  383. package/bin/skills/lakehouse-doc/references/config_volume_dify_storage.md +254 -0
  384. package/bin/skills/lakehouse-doc/references/connect-with-cli.md +155 -0
  385. package/bin/skills/lakehouse-doc/references/connect_to_Lakehouse.md +1 -0
  386. package/bin/skills/lakehouse-doc/references/connection-guide.md +61 -0
  387. package/bin/skills/lakehouse-doc/references/continue-job.md +260 -0
  388. package/bin/skills/lakehouse-doc/references/conversational_analytics_datagpt.md +1 -0
  389. package/bin/skills/lakehouse-doc/references/copy-into-table.md +398 -0
  390. package/bin/skills/lakehouse-doc/references/cos_storage_connection.md +73 -0
  391. package/bin/skills/lakehouse-doc/references/cos_volume_creation.md +39 -0
  392. package/bin/skills/lakehouse-doc/references/cost_management.md +1 -0
  393. package/bin/skills/lakehouse-doc/references/create-api-connection.md +363 -0
  394. package/bin/skills/lakehouse-doc/references/create-catalog-connection.md +220 -0
  395. package/bin/skills/lakehouse-doc/references/create-dynamic-table.md +910 -0
  396. package/bin/skills/lakehouse-doc/references/create-external-catalog.md +305 -0
  397. package/bin/skills/lakehouse-doc/references/create-external-table.md +78 -0
  398. package/bin/skills/lakehouse-doc/references/create-hive-catalog.md +77 -0
  399. package/bin/skills/lakehouse-doc/references/create-inverted-index.md +163 -0
  400. package/bin/skills/lakehouse-doc/references/create-kafka-external.md +300 -0
  401. package/bin/skills/lakehouse-doc/references/create-schema-from-share.md +45 -0
  402. package/bin/skills/lakehouse-doc/references/create-share.md +46 -0
  403. package/bin/skills/lakehouse-doc/references/create-sql-function.md +120 -0
  404. package/bin/skills/lakehouse-doc/references/create-storage-connection.md +346 -0
  405. package/bin/skills/lakehouse-doc/references/create-synonym.md +75 -0
  406. package/bin/skills/lakehouse-doc/references/create-table-ddl.md +405 -0
  407. package/bin/skills/lakehouse-doc/references/create-table-stream.md +226 -0
  408. package/bin/skills/lakehouse-doc/references/create-vector-index.md +115 -0
  409. package/bin/skills/lakehouse-doc/references/create.md +46 -0
  410. package/bin/skills/lakehouse-doc/references/create_cluster.md +121 -0
  411. package/bin/skills/lakehouse-doc/references/creating_alicloud_privatelinkendpoint.md +37 -0
  412. package/bin/skills/lakehouse-doc/references/creating_alicloud_privatelinkservice.md +31 -0
  413. package/bin/skills/lakehouse-doc/references/creating_tencentcloud_privatelinkendpoint.md +33 -0
  414. package/bin/skills/lakehouse-doc/references/creating_tencentcloud_privatelinkservice.md +19 -0
  415. package/bin/skills/lakehouse-doc/references/czguide-intro-to-cdc-using-clickzetta-rtsync-dynamic-tables.md +717 -0
  416. package/bin/skills/lakehouse-doc/references/data-catalog.md +1 -0
  417. package/bin/skills/lakehouse-doc/references/data-integration-intro.md +60 -0
  418. package/bin/skills/lakehouse-doc/references/data-integration.md +10 -0
  419. package/bin/skills/lakehouse-doc/references/data-lifecycle.md +46 -0
  420. package/bin/skills/lakehouse-doc/references/data-load-summary.md +71 -0
  421. package/bin/skills/lakehouse-doc/references/data-mamager-tool.md +1 -0
  422. package/bin/skills/lakehouse-doc/references/data-recover.md +52 -0
  423. package/bin/skills/lakehouse-doc/references/data-type.md +1 -0
  424. package/bin/skills/lakehouse-doc/references/data-types-timestamp-ntz.md +139 -0
  425. package/bin/skills/lakehouse-doc/references/data.md +1 -0
  426. package/bin/skills/lakehouse-doc/references/data_catalog.md +106 -0
  427. package/bin/skills/lakehouse-doc/references/data_clean_with_sql.md +406 -0
  428. package/bin/skills/lakehouse-doc/references/data_ecosystem.md +1 -0
  429. package/bin/skills/lakehouse-doc/references/data_ops.md +7 -0
  430. package/bin/skills/lakehouse-doc/references/data_org.md +1 -0
  431. package/bin/skills/lakehouse-doc/references/data_privacy.md +50 -0
  432. package/bin/skills/lakehouse-doc/references/data_result_profile.md +22 -0
  433. package/bin/skills/lakehouse-doc/references/data_security.md +1 -0
  434. package/bin/skills/lakehouse-doc/references/data_sharing_between_accounts_guide.md +331 -0
  435. package/bin/skills/lakehouse-doc/references/data_transfer_datalake.md +1 -0
  436. package/bin/skills/lakehouse-doc/references/data_visualization.md +96 -0
  437. package/bin/skills/lakehouse-doc/references/databricks_yunqi_integration_guide_v2.md +811 -0
  438. package/bin/skills/lakehouse-doc/references/datagpt_bestpractice.md +1 -0
  439. package/bin/skills/lakehouse-doc/references/datagpt_data_source.md +58 -0
  440. package/bin/skills/lakehouse-doc/references/datagpt_get_accurate_answers.md +34 -0
  441. package/bin/skills/lakehouse-doc/references/datagpt_intro.md +1 -0
  442. package/bin/skills/lakehouse-doc/references/datagpt_quickstart.md +94 -0
  443. package/bin/skills/lakehouse-doc/references/datagpt_tutorial.md +1 -0
  444. package/bin/skills/lakehouse-doc/references/datalake_FAQ.md +54 -0
  445. package/bin/skills/lakehouse-doc/references/datalake_overview.md +17 -0
  446. package/bin/skills/lakehouse-doc/references/datalake_privilege.md +55 -0
  447. package/bin/skills/lakehouse-doc/references/datalake_query_ingest.md +18 -0
  448. package/bin/skills/lakehouse-doc/references/datalake_unstructure_data.md +3 -0
  449. package/bin/skills/lakehouse-doc/references/datalake_volume.md +92 -0
  450. package/bin/skills/lakehouse-doc/references/datalake_volume_anlytics.md +1 -0
  451. package/bin/skills/lakehouse-doc/references/datalake_volume_object.md +1 -0
  452. package/bin/skills/lakehouse-doc/references/dataops_practice.md +105 -0
  453. package/bin/skills/lakehouse-doc/references/datasharing.md +322 -0
  454. package/bin/skills/lakehouse-doc/references/datasharing_catalog.md +1 -0
  455. package/bin/skills/lakehouse-doc/references/datasource_ip_whitelist.md +93 -0
  456. package/bin/skills/lakehouse-doc/references/datasources.md +62 -0
  457. package/bin/skills/lakehouse-doc/references/datatype-cast.md +105 -0
  458. package/bin/skills/lakehouse-doc/references/datatype-conversion.md +85 -0
  459. package/bin/skills/lakehouse-doc/references/datetime_patterns.md +61 -0
  460. package/bin/skills/lakehouse-doc/references/datus_lakehouse_installation.md +376 -0
  461. package/bin/skills/lakehouse-doc/references/datus_lakehouse_solution_overview.md +148 -0
  462. package/bin/skills/lakehouse-doc/references/db_dw_connection.md +1 -0
  463. package/bin/skills/lakehouse-doc/references/default-value.md +89 -0
  464. package/bin/skills/lakehouse-doc/references/delta-lake.md +185 -0
  465. package/bin/skills/lakehouse-doc/references/desc-catalog-table.md +33 -0
  466. package/bin/skills/lakehouse-doc/references/desc-catalog.md +31 -0
  467. package/bin/skills/lakehouse-doc/references/desc-dynamic-table.md +62 -0
  468. package/bin/skills/lakehouse-doc/references/desc-external-schemas.md +66 -0
  469. package/bin/skills/lakehouse-doc/references/desc-external-table.md +70 -0
  470. package/bin/skills/lakehouse-doc/references/desc-function.md +16 -0
  471. package/bin/skills/lakehouse-doc/references/desc-history-dynamic-table.md +50 -0
  472. package/bin/skills/lakehouse-doc/references/desc-history-table.md +73 -0
  473. package/bin/skills/lakehouse-doc/references/desc-history.md +73 -0
  474. package/bin/skills/lakehouse-doc/references/desc-share.md +59 -0
  475. package/bin/skills/lakehouse-doc/references/desc-table-stream.md +44 -0
  476. package/bin/skills/lakehouse-doc/references/desc-vcluster.md +42 -0
  477. package/bin/skills/lakehouse-doc/references/describe.md +38 -0
  478. package/bin/skills/lakehouse-doc/references/dify_config_lakehouse_as_vectordb.md +286 -0
  479. package/bin/skills/lakehouse-doc/references/dify_yunqilakehouse_integration_overview.md +188 -0
  480. package/bin/skills/lakehouse-doc/references/discovery_analysis_data_in_json_file_on_external_volume.md +625 -0
  481. package/bin/skills/lakehouse-doc/references/discovery_analysis_data_in_parquert_file_on_external_volume.md +599 -0
  482. package/bin/skills/lakehouse-doc/references/download-data.md +1 -0
  483. package/bin/skills/lakehouse-doc/references/drop-dynamic-table.md +46 -0
  484. package/bin/skills/lakehouse-doc/references/drop-external-schema.md +44 -0
  485. package/bin/skills/lakehouse-doc/references/drop-external-table.md +43 -0
  486. package/bin/skills/lakehouse-doc/references/drop-function.md +36 -0
  487. package/bin/skills/lakehouse-doc/references/drop-share.md +43 -0
  488. package/bin/skills/lakehouse-doc/references/drop-synonym.md +35 -0
  489. package/bin/skills/lakehouse-doc/references/drop-table-stream.md +42 -0
  490. package/bin/skills/lakehouse-doc/references/drop-vcluster.md +52 -0
  491. package/bin/skills/lakehouse-doc/references/drop.md +32 -0
  492. package/bin/skills/lakehouse-doc/references/dynamic-mask.md +201 -0
  493. package/bin/skills/lakehouse-doc/references/dynamic-table-incre.md +62 -0
  494. package/bin/skills/lakehouse-doc/references/dynamic-table-introduce.md +339 -0
  495. package/bin/skills/lakehouse-doc/references/dynamicTable-DML-sql.md +52 -0
  496. package/bin/skills/lakehouse-doc/references/dynamicTable-dml.md +48 -0
  497. package/bin/skills/lakehouse-doc/references/dynamicTable-parmaters.md +425 -0
  498. package/bin/skills/lakehouse-doc/references/dynamic_table_summary.md +366 -0
  499. package/bin/skills/lakehouse-doc/references/dynamic_table_task.md +73 -0
  500. package/bin/skills/lakehouse-doc/references/dynamic_table_using_studio.md +159 -0
  501. package/bin/skills/lakehouse-doc/references/dynamictable.md +56 -0
  502. package/bin/skills/lakehouse-doc/references/eco_integration/Zeppelin.md +84 -0
  503. package/bin/skills/lakehouse-doc/references/eco_integration/airbyte.md +75 -0
  504. package/bin/skills/lakehouse-doc/references/eco_integration/datagrip-lakehouse.md +56 -0
  505. package/bin/skills/lakehouse-doc/references/eco_integration/datax.md +154 -0
  506. package/bin/skills/lakehouse-doc/references/eco_integration/dbeaver-lakehouse.md +67 -0
  507. package/bin/skills/lakehouse-doc/references/eco_integration/dbt.md +139 -0
  508. package/bin/skills/lakehouse-doc/references/eco_integration/rath.md +87 -0
  509. package/bin/skills/lakehouse-doc/references/eco_integration/sqlline.md +82 -0
  510. package/bin/skills/lakehouse-doc/references/eco_integration/sqlworkbench-j-lakehouse.md +54 -0
  511. package/bin/skills/lakehouse-doc/references/eco_integration/streamlit.md +117 -0
  512. package/bin/skills/lakehouse-doc/references/eco_integration/superset.md +109 -0
  513. package/bin/skills/lakehouse-doc/references/eco_integration/trino.md +75 -0
  514. package/bin/skills/lakehouse-doc/references/ecosystem-all.md +24 -0
  515. package/bin/skills/lakehouse-doc/references/export_data_with_data-integration.md +3 -0
  516. package/bin/skills/lakehouse-doc/references/external-Volume.md +10 -0
  517. package/bin/skills/lakehouse-doc/references/external-catalog-summary.md +42 -0
  518. package/bin/skills/lakehouse-doc/references/external-function-summary.md +0 -0
  519. package/bin/skills/lakehouse-doc/references/external-hudi-table.md +187 -0
  520. package/bin/skills/lakehouse-doc/references/external-table-guide.md +96 -0
  521. package/bin/skills/lakehouse-doc/references/external_object_user_guide.md +298 -0
  522. package/bin/skills/lakehouse-doc/references/external_volume.md +1 -0
  523. package/bin/skills/lakehouse-doc/references/f6fc6447ee.md +151 -0
  524. package/bin/skills/lakehouse-doc/references/federation-query.md +1 -0
  525. package/bin/skills/lakehouse-doc/references/finebi-mysql.md +104 -0
  526. package/bin/skills/lakehouse-doc/references/flink-write-connector.md +695 -0
  527. package/bin/skills/lakehouse-doc/references/from_lakehouse_to_volume.md +98 -0
  528. package/bin/skills/lakehouse-doc/references/from_volume_to_table.md +45 -0
  529. package/bin/skills/lakehouse-doc/references/fulltext_indexes_guide.md +1180 -0
  530. package/bin/skills/lakehouse-doc/references/generated-column.md +113 -0
  531. package/bin/skills/lakehouse-doc/references/generated_columns_guide.md +847 -0
  532. package/bin/skills/lakehouse-doc/references/geospatial_analysis.md +558 -0
  533. package/bin/skills/lakehouse-doc/references/get-started-with-sample-data.md +83 -0
  534. package/bin/skills/lakehouse-doc/references/getting_started_with_vcluster_for_processing_analytics.md +471 -0
  535. package/bin/skills/lakehouse-doc/references/grant-to-share.md +55 -0
  536. package/bin/skills/lakehouse-doc/references/grant-user-privileges.md +100 -0
  537. package/bin/skills/lakehouse-doc/references/groupby.md +1260 -0
  538. package/bin/skills/lakehouse-doc/references/guides-overview-connecting.md +46 -0
  539. package/bin/skills/lakehouse-doc/references/ide.md +2 -0
  540. package/bin/skills/lakehouse-doc/references/ifnull.md +72 -0
  541. package/bin/skills/lakehouse-doc/references/ilike.md +89 -0
  542. package/bin/skills/lakehouse-doc/references/import_data_with_data-integration.md +3 -0
  543. package/bin/skills/lakehouse-doc/references/importdatabypythonintoLakehouse.md +134 -0
  544. package/bin/skills/lakehouse-doc/references/index2.md +1 -0
  545. package/bin/skills/lakehouse-doc/references/instance-informaiton-schema-summary.md +51 -0
  546. package/bin/skills/lakehouse-doc/references/instance-informaiton-schema.md +278 -0
  547. package/bin/skills/lakehouse-doc/references/instance-information_schema.md +1 -0
  548. package/bin/skills/lakehouse-doc/references/internal_volume.md +271 -0
  549. package/bin/skills/lakehouse-doc/references/intro-supported-features.md +48 -0
  550. package/bin/skills/lakehouse-doc/references/inverted-index.md +445 -0
  551. package/bin/skills/lakehouse-doc/references/inverted_idx_bm25_param.md +251 -0
  552. package/bin/skills/lakehouse-doc/references/inverted_idx_multi-match.md +89 -0
  553. package/bin/skills/lakehouse-doc/references/is-null.md +59 -0
  554. package/bin/skills/lakehouse-doc/references/it-operation-management.md +1 -0
  555. package/bin/skills/lakehouse-doc/references/java_reference/client.md +49 -0
  556. package/bin/skills/lakehouse-doc/references/java_reference/java-sdk-release-notes.md +32 -0
  557. package/bin/skills/lakehouse-doc/references/java_reference/java-sdk-summary.md +138 -0
  558. package/bin/skills/lakehouse-doc/references/java_reference/jdbc.md +211 -0
  559. package/bin/skills/lakehouse-doc/references/java_reference/realtime-upload.md +295 -0
  560. package/bin/skills/lakehouse-doc/references/jdbc_task.md +37 -0
  561. package/bin/skills/lakehouse-doc/references/job-manage.md +1 -0
  562. package/bin/skills/lakehouse-doc/references/job_history_analysis_with_information_schema.md +597 -0
  563. package/bin/skills/lakehouse-doc/references/jobprofile-bestpractices.md +104 -0
  564. package/bin/skills/lakehouse-doc/references/json_analyze.md +422 -0
  565. package/bin/skills/lakehouse-doc/references/json_data_process_guide.md +881 -0
  566. package/bin/skills/lakehouse-doc/references/json_guide_for_complex_biz_cases.md +1899 -0
  567. package/bin/skills/lakehouse-doc/references/kafka-external-table.md +103 -0
  568. package/bin/skills/lakehouse-doc/references/lakehouse-ai.md +1 -0
  569. package/bin/skills/lakehouse-doc/references/lakehouse-quick-experience_guide.md +964 -0
  570. package/bin/skills/lakehouse-doc/references/lakehouse-table-stream-best-practices.md +500 -0
  571. package/bin/skills/lakehouse-doc/references/lakehouse_billing_anomaly_alert_configuration_guide.md +226 -0
  572. package/bin/skills/lakehouse-doc/references/lakehouse_instance_overview.md +39 -0
  573. package/bin/skills/lakehouse-doc/references/lakehouse_table_design_guide.md +2676 -0
  574. package/bin/skills/lakehouse-doc/references/langchain.md +71 -0
  575. package/bin/skills/lakehouse-doc/references/langchain_basic_samples.md +606 -0
  576. package/bin/skills/lakehouse-doc/references/langchain_integration.md +1 -0
  577. package/bin/skills/lakehouse-doc/references/left.md +51 -0
  578. package/bin/skills/lakehouse-doc/references/like.md +115 -0
  579. package/bin/skills/lakehouse-doc/references/list-partition.md +121 -0
  580. package/bin/skills/lakehouse-doc/references/llama-index.md +57 -0
  581. package/bin/skills/lakehouse-doc/references/llms-full.txt +1286 -0
  582. package/bin/skills/lakehouse-doc/references/llms.txt +71 -0
  583. package/bin/skills/lakehouse-doc/references/load-data-local.md +82 -0
  584. package/bin/skills/lakehouse-doc/references/load-data-oss.md +174 -0
  585. package/bin/skills/lakehouse-doc/references/management.md +5 -0
  586. package/bin/skills/lakehouse-doc/references/managing-instance.md +67 -0
  587. package/bin/skills/lakehouse-doc/references/mapjoin.md +62 -0
  588. package/bin/skills/lakehouse-doc/references/materialized_ddl.md +1 -0
  589. package/bin/skills/lakehouse-doc/references/meta-objects-and-privileges.md +271 -0
  590. package/bin/skills/lakehouse-doc/references/metabase.md +73 -0
  591. package/bin/skills/lakehouse-doc/references/metadata_show_desc_command_guide.md +711 -0
  592. package/bin/skills/lakehouse-doc/references/metrics_answer_build.md +46 -0
  593. package/bin/skills/lakehouse-doc/references/mindsdb.md +269 -0
  594. package/bin/skills/lakehouse-doc/references/monitoring_and_alerting.md +177 -0
  595. package/bin/skills/lakehouse-doc/references/monitoring_item_specification.md +44 -0
  596. package/bin/skills/lakehouse-doc/references/multi_cloud_instance_manage_with_mcp_server.md +281 -0
  597. package/bin/skills/lakehouse-doc/references/multitable_batch_sync.md +463 -0
  598. package/bin/skills/lakehouse-doc/references/multitable_realtime_sync.md +412 -0
  599. package/bin/skills/lakehouse-doc/references/multitable_realtime_sync_sop.md +593 -0
  600. package/bin/skills/lakehouse-doc/references/n8n_Integreated_with_lakehouse_mcp_server.md +494 -0
  601. package/bin/skills/lakehouse-doc/references/navicat-mysql.md +65 -0
  602. package/bin/skills/lakehouse-doc/references/network_policy.md +281 -0
  603. package/bin/skills/lakehouse-doc/references/nyc_green_taxi_data_clean_transform_with_mcp_server.md +315 -0
  604. package/bin/skills/lakehouse-doc/references/object-model-overview.md +70 -0
  605. package/bin/skills/lakehouse-doc/references/object_identifier.md +259 -0
  606. package/bin/skills/lakehouse-doc/references/object_model_design.md +1 -0
  607. package/bin/skills/lakehouse-doc/references/opensource/travel.md +134 -0
  608. package/bin/skills/lakehouse-doc/references/operation-maintenance.md +172 -0
  609. package/bin/skills/lakehouse-doc/references/oss_volume_creation.md +39 -0
  610. package/bin/skills/lakehouse-doc/references/partition_table.md +344 -0
  611. package/bin/skills/lakehouse-doc/references/partition_table_guide.md +340 -0
  612. package/bin/skills/lakehouse-doc/references/performance_optimization.md +1 -0
  613. package/bin/skills/lakehouse-doc/references/performence_test.md +1 -0
  614. package/bin/skills/lakehouse-doc/references/permissions-of-built-in-workspace-level-roles.md +131 -0
  615. package/bin/skills/lakehouse-doc/references/pipe-kafka-bestpractice-1.md +431 -0
  616. package/bin/skills/lakehouse-doc/references/pipe-kafka-table-stream.md +180 -0
  617. package/bin/skills/lakehouse-doc/references/pipe-kafka.md +210 -0
  618. package/bin/skills/lakehouse-doc/references/pipe-storage-object.md +247 -0
  619. package/bin/skills/lakehouse-doc/references/pipe-summary.md +114 -0
  620. package/bin/skills/lakehouse-doc/references/pipe-syntax.md +200 -0
  621. package/bin/skills/lakehouse-doc/references/practice_data_analysis.md +1 -0
  622. package/bin/skills/lakehouse-doc/references/practice_data_import_and_export.md +1 -0
  623. package/bin/skills/lakehouse-doc/references/practice_python_task.md +157 -0
  624. package/bin/skills/lakehouse-doc/references/pricing.md +225 -0
  625. package/bin/skills/lakehouse-doc/references/primary key.md +86 -0
  626. package/bin/skills/lakehouse-doc/references/primary-key.md +187 -0
  627. package/bin/skills/lakehouse-doc/references/privacy-policy.md +364 -0
  628. package/bin/skills/lakehouse-doc/references/private-link-general.md +68 -0
  629. package/bin/skills/lakehouse-doc/references/private_link.md +1 -0
  630. package/bin/skills/lakehouse-doc/references/product-trial-agreement.md +99 -0
  631. package/bin/skills/lakehouse-doc/references/product_concept.md +1 -0
  632. package/bin/skills/lakehouse-doc/references/put-get.md +1 -0
  633. package/bin/skills/lakehouse-doc/references/put_get_volume.md +3 -0
  634. package/bin/skills/lakehouse-doc/references/python-igs.md +297 -0
  635. package/bin/skills/lakehouse-doc/references/python_package_install_import_guide.md +53 -0
  636. package/bin/skills/lakehouse-doc/references/python_reference/connector.md +281 -0
  637. package/bin/skills/lakehouse-doc/references/python_reference/python-sdk-summary.md +13 -0
  638. package/bin/skills/lakehouse-doc/references/python_reference/sqlalchemy.md +77 -0
  639. package/bin/skills/lakehouse-doc/references/python_shell_datasource.md +334 -0
  640. package/bin/skills/lakehouse-doc/references/query-json-sy.md +47 -0
  641. package/bin/skills/lakehouse-doc/references/query-syntax.md +234 -0
  642. package/bin/skills/lakehouse-doc/references/quick_start_batch_sync_data.md +116 -0
  643. package/bin/skills/lakehouse-doc/references/quick_start_bi_analysis.md +589 -0
  644. package/bin/skills/lakehouse-doc/references/quick_start_create_workspace.md +58 -0
  645. package/bin/skills/lakehouse-doc/references/quick_start_data_quality.md +75 -0
  646. package/bin/skills/lakehouse-doc/references/quick_start_etl.md +131 -0
  647. package/bin/skills/lakehouse-doc/references/quick_start_monitoring_and_alerting.md +93 -0
  648. package/bin/skills/lakehouse-doc/references/quick_start_sql_query.md +93 -0
  649. package/bin/skills/lakehouse-doc/references/quick_start_upload_data.md +69 -0
  650. package/bin/skills/lakehouse-doc/references/quick_start_user_management.md +73 -0
  651. package/bin/skills/lakehouse-doc/references/quick_start_workspace.md +72 -0
  652. package/bin/skills/lakehouse-doc/references/quick_start_workspace_user.md +67 -0
  653. package/bin/skills/lakehouse-doc/references/quickstart_datashare_between_companies.md +249 -0
  654. package/bin/skills/lakehouse-doc/references/quickstart_envirment_for_team.md +271 -0
  655. package/bin/skills/lakehouse-doc/references/quickstart_local_csv.md +99 -0
  656. package/bin/skills/lakehouse-doc/references/realtime_sync.md +36 -0
  657. package/bin/skills/lakehouse-doc/references/realtime_sync_and_analysis_practice.md +187 -0
  658. package/bin/skills/lakehouse-doc/references/realtimesync_m.md +190 -0
  659. package/bin/skills/lakehouse-doc/references/refresh-history.md +63 -0
  660. package/bin/skills/lakehouse-doc/references/regexp-statement.md +80 -0
  661. package/bin/skills/lakehouse-doc/references/releasenotes.md +1 -0
  662. package/bin/skills/lakehouse-doc/references/releasenotesupdata.md +1 -0
  663. package/bin/skills/lakehouse-doc/references/remove-volume.md +32 -0
  664. package/bin/skills/lakehouse-doc/references/restore-dynamic-table.md +126 -0
  665. package/bin/skills/lakehouse-doc/references/restore.md +127 -0
  666. package/bin/skills/lakehouse-doc/references/result_cache.md +102 -0
  667. package/bin/skills/lakehouse-doc/references/revoke-from-share.md +48 -0
  668. package/bin/skills/lakehouse-doc/references/revoke-user-privileges.md +91 -0
  669. package/bin/skills/lakehouse-doc/references/right.md +84 -0
  670. package/bin/skills/lakehouse-doc/references/rlike.md +78 -0
  671. package/bin/skills/lakehouse-doc/references/rn_2024_11_12.md +63 -0
  672. package/bin/skills/lakehouse-doc/references/role-privlilige-manage.md +1 -0
  673. package/bin/skills/lakehouse-doc/references/roles.md +123 -0
  674. package/bin/skills/lakehouse-doc/references/rom_lakehouse_to_volume.md +1 -0
  675. package/bin/skills/lakehouse-doc/references/s3_volume_creation.md +37 -0
  676. package/bin/skills/lakehouse-doc/references/sample-data-using.md +768 -0
  677. package/bin/skills/lakehouse-doc/references/security_compliance_audit_guide.md +1 -0
  678. package/bin/skills/lakehouse-doc/references/security_overview.md +41 -0
  679. package/bin/skills/lakehouse-doc/references/select-catalog-table.md +26 -0
  680. package/bin/skills/lakehouse-doc/references/semantic_view.md +711 -0
  681. package/bin/skills/lakehouse-doc/references/setup.md +33 -0
  682. package/bin/skills/lakehouse-doc/references/share-ddl.md +1 -0
  683. package/bin/skills/lakehouse-doc/references/show-cached-status.md +34 -0
  684. package/bin/skills/lakehouse-doc/references/show-catalog-schema.md +48 -0
  685. package/bin/skills/lakehouse-doc/references/show-catalog-table.md +22 -0
  686. package/bin/skills/lakehouse-doc/references/show-catalog.md +26 -0
  687. package/bin/skills/lakehouse-doc/references/show-columns.md +75 -0
  688. package/bin/skills/lakehouse-doc/references/show-create-dynamic-table.md +44 -0
  689. package/bin/skills/lakehouse-doc/references/show-create-external-table.md +45 -0
  690. package/bin/skills/lakehouse-doc/references/show-create-materialized-view.md +43 -0
  691. package/bin/skills/lakehouse-doc/references/show-create-table.md +107 -0
  692. package/bin/skills/lakehouse-doc/references/show-dynamic-table.md +68 -0
  693. package/bin/skills/lakehouse-doc/references/show-external-schemas.md +35 -0
  694. package/bin/skills/lakehouse-doc/references/show-external-table.md +38 -0
  695. package/bin/skills/lakehouse-doc/references/show-finctions.md +25 -0
  696. package/bin/skills/lakehouse-doc/references/show-functions.md +44 -0
  697. package/bin/skills/lakehouse-doc/references/show-grants-user.md +41 -0
  698. package/bin/skills/lakehouse-doc/references/show-jobs.md +48 -0
  699. package/bin/skills/lakehouse-doc/references/show-materialized-view.md +42 -0
  700. package/bin/skills/lakehouse-doc/references/show-schemas.md +41 -0
  701. package/bin/skills/lakehouse-doc/references/show-shares.md +94 -0
  702. package/bin/skills/lakehouse-doc/references/show-synonyms.md +29 -0
  703. package/bin/skills/lakehouse-doc/references/show-table-streams.md +53 -0
  704. package/bin/skills/lakehouse-doc/references/show-tables-history.md +91 -0
  705. package/bin/skills/lakehouse-doc/references/show-tables.md +50 -0
  706. package/bin/skills/lakehouse-doc/references/show-users.md +46 -0
  707. package/bin/skills/lakehouse-doc/references/show-vclusters.md +59 -0
  708. package/bin/skills/lakehouse-doc/references/show-views.md +31 -0
  709. package/bin/skills/lakehouse-doc/references/show-volume.md +57 -0
  710. package/bin/skills/lakehouse-doc/references/show.md +168 -0
  711. package/bin/skills/lakehouse-doc/references/simpletosimple_bazhuayu_datagpt.md +152 -0
  712. package/bin/skills/lakehouse-doc/references/small_file_optimization.md +101 -0
  713. package/bin/skills/lakehouse-doc/references/spark-connector-summary.md +329 -0
  714. package/bin/skills/lakehouse-doc/references/spark-connector-use.md +233 -0
  715. package/bin/skills/lakehouse-doc/references/sql-parmaters.md +594 -0
  716. package/bin/skills/lakehouse-doc/references/sql-qualify.md +236 -0
  717. package/bin/skills/lakehouse-doc/references/sql-reference.md +1 -0
  718. package/bin/skills/lakehouse-doc/references/sql_data_transfom_NestedDataTypes.md +451 -0
  719. package/bin/skills/lakehouse-doc/references/sql_data_transform.md +1 -0
  720. package/bin/skills/lakehouse-doc/references/sql_data_transform_basic.md +576 -0
  721. package/bin/skills/lakehouse-doc/references/sql_data_transform_cte.md +177 -0
  722. package/bin/skills/lakehouse-doc/references/sql_data_transform_tips.md +407 -0
  723. package/bin/skills/lakehouse-doc/references/sql_data_transform_windows.md +430 -0
  724. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/any_value.md +64 -0
  725. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/approx_count_distinct.md +45 -0
  726. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/approx_histogram.md +56 -0
  727. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/approx_percentile.md +65 -0
  728. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/approx_top_k.md +62 -0
  729. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/avg.md +68 -0
  730. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/bit_and.md +81 -0
  731. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/bit_or.md +78 -0
  732. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/bit_xor.md +79 -0
  733. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/bool_and.md +85 -0
  734. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/bool_or.md +86 -0
  735. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/collect_list.md +95 -0
  736. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/collect_list_on_array.md +64 -0
  737. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/collect_set.md +85 -0
  738. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/collect_set_on_array.md +80 -0
  739. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/corr.md +103 -0
  740. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/count.md +96 -0
  741. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/count_distinct.md +96 -0
  742. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/count_if.md +62 -0
  743. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/covar_pop.md +89 -0
  744. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/covar_samp.md +113 -0
  745. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/first_value.md +122 -0
  746. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_bitmap.md +42 -0
  747. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_bitmap_and.md +46 -0
  748. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_bitmap_and_state.md +47 -0
  749. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_bitmap_merge.md +37 -0
  750. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_bitmap_merge_state.md +29 -0
  751. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_bitmap_or.md +44 -0
  752. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_bitmap_or_state.md +64 -0
  753. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_bitmap_state.md +27 -0
  754. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_bitmap_xor.md +45 -0
  755. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_bitmap_xor_state.md +99 -0
  756. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/group_concat.md +53 -0
  757. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/last_value.md +100 -0
  758. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/map_agg.md +79 -0
  759. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/max.md +102 -0
  760. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/max_by.md +67 -0
  761. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/median.md +60 -0
  762. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/min.md +89 -0
  763. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/min_by.md +99 -0
  764. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/percentile.md +69 -0
  765. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/percentile_approx.md +33 -0
  766. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/percentile_rank.md +36 -0
  767. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/std.md +47 -0
  768. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/stddev.md +68 -0
  769. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/stddev_pop.md +67 -0
  770. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/stddev_samp.md +67 -0
  771. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/sum.md +67 -0
  772. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/var_pop.md +65 -0
  773. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/var_samp.md +65 -0
  774. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/variance.md +47 -0
  775. package/bin/skills/lakehouse-doc/references/sql_functions/aggregate_functions/wm_concat.md +79 -0
  776. package/bin/skills/lakehouse-doc/references/sql_functions/context_functions/current_instance_id.md +16 -0
  777. package/bin/skills/lakehouse-doc/references/sql_functions/context_functions/current_schema.md +29 -0
  778. package/bin/skills/lakehouse-doc/references/sql_functions/context_functions/current_session_id.md +30 -0
  779. package/bin/skills/lakehouse-doc/references/sql_functions/context_functions/current_user.md +29 -0
  780. package/bin/skills/lakehouse-doc/references/sql_functions/context_functions/current_user_id.md +21 -0
  781. package/bin/skills/lakehouse-doc/references/sql_functions/context_functions/current_vcluster.md +40 -0
  782. package/bin/skills/lakehouse-doc/references/sql_functions/context_functions/current_workspace.md +19 -0
  783. package/bin/skills/lakehouse-doc/references/sql_functions/context_functions/current_workspace_id.md +24 -0
  784. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/binary_to_bitmap.md +29 -0
  785. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_and.md +39 -0
  786. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_and_cardinality.md +48 -0
  787. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_andnot.md +61 -0
  788. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_andnot_cardinality.md +60 -0
  789. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_build.md +58 -0
  790. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_cardinality.md +43 -0
  791. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_contains.md +50 -0
  792. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_count.md +45 -0
  793. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_empty.md +52 -0
  794. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_has_all.md +49 -0
  795. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_has_any.md +65 -0
  796. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_hash.md +52 -0
  797. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_max.md +26 -0
  798. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_min.md +33 -0
  799. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_or.md +61 -0
  800. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_or_cardinality.md +27 -0
  801. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_remove.md +42 -0
  802. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_subset_in_range.md +60 -0
  803. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_subset_limit.md +36 -0
  804. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_to_array.md +24 -0
  805. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_to_binary.md +28 -0
  806. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_to_string.md +41 -0
  807. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_transform.md +42 -0
  808. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_xor.md +59 -0
  809. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/bitmap_xor_cardinality.md +30 -0
  810. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/string_to_bitmap.md +56 -0
  811. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/sub_bitmap.md +34 -0
  812. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitmap_functions/to_bitmap.md +78 -0
  813. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitwise_functions/bit_count.md +43 -0
  814. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/bitwise_functions/bitnot.md +34 -0
  815. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/assert_true.md +44 -0
  816. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/between.md +52 -0
  817. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/case_when.md +99 -0
  818. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/coalesce.md +51 -0
  819. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/decode.md +45 -0
  820. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/if.md +56 -0
  821. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/in.md +33 -0
  822. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/is_false.md +63 -0
  823. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/is_not_null.md +40 -0
  824. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/is_null.md +44 -0
  825. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/is_true.md +63 -0
  826. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/multiif.md +54 -0
  827. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/nvl.md +39 -0
  828. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/conditional_functions/raise_error.md +52 -0
  829. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/add_days.md +65 -0
  830. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/add_months.md +38 -0
  831. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/add_years.md +61 -0
  832. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/adddate.md +53 -0
  833. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/convert_timezone.md +46 -0
  834. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/current_date.md +56 -0
  835. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/current_timestamp.md +46 -0
  836. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/date.md +42 -0
  837. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/date_add.md +35 -0
  838. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/date_format.md +48 -0
  839. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/date_format_mysql.md +58 -0
  840. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/date_format_pg.md +54 -0
  841. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/date_sub.md +58 -0
  842. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/date_trunc.md +61 -0
  843. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/dateadd.md +60 -0
  844. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/datediff.md +50 -0
  845. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/datetime_patterns.md +61 -0
  846. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/day.md +53 -0
  847. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/dayofmonth.md +36 -0
  848. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/dayofweek.md +31 -0
  849. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/dayofweek_iso.md +56 -0
  850. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/dayofyear.md +40 -0
  851. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/days.md +21 -0
  852. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/extract.md +49 -0
  853. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/from_unixtime.md +43 -0
  854. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/from_utc_timestamp.md +45 -0
  855. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/hour.md +49 -0
  856. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/hours.md +21 -0
  857. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/last_day.md +35 -0
  858. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/localtimestamp.md +19 -0
  859. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/makde_date.md +20 -0
  860. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/make_date.md +20 -0
  861. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/make_dt_interval.md +66 -0
  862. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/make_ym_interval.md +53 -0
  863. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/minute.md +49 -0
  864. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/month.md +39 -0
  865. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/months.md +21 -0
  866. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/months_between.md +18 -0
  867. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/next_day.md +16 -0
  868. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/now.md +53 -0
  869. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/quarter.md +52 -0
  870. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/second.md +34 -0
  871. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/str_to_date_mysql.md +42 -0
  872. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/sub_days.md +66 -0
  873. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/timestamp_micros.md +34 -0
  874. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/timestamp_millis.md +54 -0
  875. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/timestamp_seconds.md +62 -0
  876. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/timestampadd.md +59 -0
  877. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/timestampdiff.md +78 -0
  878. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/to_date.md +43 -0
  879. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/to_start_of_interval.md +25 -0
  880. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/to_timestamp.md +66 -0
  881. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/to_timestamp_ntz.md +52 -0
  882. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/to_unix_timestamp.md +59 -0
  883. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/to_unix_timestamp_ms.md +45 -0
  884. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/to_unix_timestamp_us.md +40 -0
  885. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/to_utc_timestamp.md +62 -0
  886. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/toyyyymmdd.md +69 -0
  887. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/trunc.md +38 -0
  888. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/unix_timestamp.md +96 -0
  889. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/week.md +31 -0
  890. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/weekday.md +51 -0
  891. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/weekofyear.md +29 -0
  892. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/year.md +46 -0
  893. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/yearofweek.md +64 -0
  894. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/datetime_functions/years.md +21 -0
  895. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/geo_functions/st_geohash.md +34 -0
  896. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/geo_functions/st_latfromgeohash.md +59 -0
  897. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/geo_functions/st_longfromgeohash.md +53 -0
  898. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/hash_functions/bucket.md +33 -0
  899. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/hash_functions/general_hash.md +49 -0
  900. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/hash_functions/hash_combine.md +37 -0
  901. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/hash_functions/hash_combine_commutative.md +54 -0
  902. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/hash_functions/murmurhash.md +36 -0
  903. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/array_sort_by_key.md +42 -0
  904. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/element_at.md +59 -0
  905. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/exists.md +48 -0
  906. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/filter.md +37 -0
  907. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/forall.md +68 -0
  908. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/high_order_functions.md +34 -0
  909. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/map_filter.md +25 -0
  910. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/map_zip_with.md +46 -0
  911. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/transform.md +48 -0
  912. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/transform_keys.md +48 -0
  913. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/transform_values.md +36 -0
  914. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/high_order_functions/zip_with.md +38 -0
  915. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/ip_functions/get_ip_info.md +199 -0
  916. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/ip_functions/ipv4_num_to_string.md +29 -0
  917. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/ip_functions/ipv4_string_to_num.md +34 -0
  918. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/ip_functions/ipv6_num_to_string.md +35 -0
  919. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/ip_functions/ipv6_string_to_num.md +46 -0
  920. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/ip_functions/is_ip_address_in_range.md +42 -0
  921. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/from_json.md +86 -0
  922. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/get_json_object.md +83 -0
  923. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/json_array.md +30 -0
  924. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/json_contains.md +86 -0
  925. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/json_extract.md +64 -0
  926. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/json_minify.md +28 -0
  927. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/json_normalize.md +36 -0
  928. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/json_object.md +37 -0
  929. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/json_parse.md +59 -0
  930. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/json_remove.md +36 -0
  931. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/json_type.md +44 -0
  932. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/json_valid.md +46 -0
  933. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/schema_of_json.md +79 -0
  934. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/json_functions/to_json.md +68 -0
  935. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/abs.md +46 -0
  936. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/acos.md +30 -0
  937. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/acosh.md +39 -0
  938. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/asin.md +39 -0
  939. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/asinh.md +46 -0
  940. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/atan.md +27 -0
  941. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/atan2.md +43 -0
  942. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/atanh.md +24 -0
  943. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/bround.md +43 -0
  944. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/cbrt.md +56 -0
  945. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/ceil.md +45 -0
  946. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/ceilling.md +47 -0
  947. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/cos.md +41 -0
  948. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/cosh.md +41 -0
  949. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/cot.md +49 -0
  950. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/csc.md +34 -0
  951. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/degrees.md +42 -0
  952. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/div.md +28 -0
  953. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/e.md +34 -0
  954. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/exp.md +32 -0
  955. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/exp2.md +36 -0
  956. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/expm1.md +54 -0
  957. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/floor.md +41 -0
  958. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/greatest.md +36 -0
  959. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/hypot.md +37 -0
  960. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/isnan.md +27 -0
  961. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/least.md +38 -0
  962. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/ln.md +57 -0
  963. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/log.md +39 -0
  964. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/log10.md +47 -0
  965. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/log1p.md +48 -0
  966. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/log2.md +28 -0
  967. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/median.md +25 -0
  968. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/mod.md +35 -0
  969. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/monotonically_increasing_id.md +81 -0
  970. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/negative.md +15 -0
  971. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/operators.md +427 -0
  972. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/pi.md +41 -0
  973. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/pmod.md +31 -0
  974. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/positive.md +15 -0
  975. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/pow.md +39 -0
  976. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/radians.md +30 -0
  977. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/rand.md +39 -0
  978. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/randn.md +26 -0
  979. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/random.md +67 -0
  980. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/round.md +48 -0
  981. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/shiftleft.md +67 -0
  982. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/shiftright.md +70 -0
  983. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/shiftrightunsigned.md +70 -0
  984. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/sign.md +46 -0
  985. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/sin.md +67 -0
  986. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/sinh.md +46 -0
  987. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/sqrt.md +40 -0
  988. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/tan.md +42 -0
  989. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/math_functions/tanh.md +38 -0
  990. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array.md +46 -0
  991. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_append.md +26 -0
  992. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_compact.md +29 -0
  993. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_contains.md +38 -0
  994. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_distinct.md +43 -0
  995. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_except.md +35 -0
  996. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_intersect.md +39 -0
  997. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_join.md +51 -0
  998. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_max.md +57 -0
  999. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_min.md +60 -0
  1000. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_position.md +50 -0
  1001. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_prepend.md +26 -0
  1002. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_remove.md +34 -0
  1003. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_repeat.md +51 -0
  1004. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_size.md +32 -0
  1005. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_sort.md +46 -0
  1006. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_sort_reverse.md +31 -0
  1007. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/array_union.md +36 -0
  1008. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/arrays_overlap.md +41 -0
  1009. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/arrays_zip.md +50 -0
  1010. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/cardinality.md +45 -0
  1011. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/concat.md +49 -0
  1012. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/concat_ws.md +73 -0
  1013. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/element_at.md +34 -0
  1014. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/flatten.md +62 -0
  1015. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/map.md +44 -0
  1016. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/map_concat.md +38 -0
  1017. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/map_contains_key.md +34 -0
  1018. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/map_entries.md +44 -0
  1019. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/map_equal.md +33 -0
  1020. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/map_except.md +34 -0
  1021. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/map_from_arrays.md +28 -0
  1022. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/map_from_entries.md +52 -0
  1023. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/map_keys.md +37 -0
  1024. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/map_values.md +46 -0
  1025. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/multimap_from_entries.md +47 -0
  1026. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/named_struct.md +55 -0
  1027. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/sequence.md +35 -0
  1028. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/size.md +34 -0
  1029. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/slice.md +43 -0
  1030. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/sort_array.md +22 -0
  1031. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/struct.md +35 -0
  1032. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/struct_insert.md +54 -0
  1033. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/struct_update.md +42 -0
  1034. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/nested_functions/trans_array.md +147 -0
  1035. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/partition/max_pt.md +41 -0
  1036. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/search_functions/match_all.md +56 -0
  1037. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/search_functions/match_any.md +50 -0
  1038. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/search_functions/match_phrase.md +65 -0
  1039. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/search_functions/match_phrase_prefix.md +36 -0
  1040. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/search_functions/match_regexp.md +30 -0
  1041. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/search_functions/tokenize.md +57 -0
  1042. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/aes_decrypt.md +40 -0
  1043. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/aes_decrypt_mysql.md +30 -0
  1044. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/aes_encrypt.md +34 -0
  1045. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/aes_encrypt_mysql.md +53 -0
  1046. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/ascii.md +36 -0
  1047. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/base64.md +27 -0
  1048. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/binary.md +46 -0
  1049. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/btrim.md +38 -0
  1050. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/char.md +55 -0
  1051. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/char_length.md +52 -0
  1052. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/character_length.md +52 -0
  1053. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/chr.md +26 -0
  1054. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/collation_sort_key.md +58 -0
  1055. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/concat.md +75 -0
  1056. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/concat_ws.md +72 -0
  1057. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/contains.md +31 -0
  1058. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/conv.md +50 -0
  1059. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/endswith.md +70 -0
  1060. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/find_in_set.md +19 -0
  1061. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/format_string.md +53 -0
  1062. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/hex.md +45 -0
  1063. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/instr.md +41 -0
  1064. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/is_ascii.md +27 -0
  1065. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/is_utf8.md +27 -0
  1066. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/lcase.md +45 -0
  1067. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/left.md +35 -0
  1068. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/length.md +33 -0
  1069. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/lengthb.md +49 -0
  1070. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/like.md +63 -0
  1071. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/locate.md +42 -0
  1072. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/lower.md +61 -0
  1073. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/lpad.md +45 -0
  1074. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/ltrim.md +48 -0
  1075. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/mask.md +37 -0
  1076. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/md5.md +71 -0
  1077. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/octet_length.md +47 -0
  1078. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/parse_url.md +78 -0
  1079. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/position.md +35 -0
  1080. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/regexp_count.md +115 -0
  1081. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/regexp_extract.md +37 -0
  1082. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/regexp_extract_all.md +31 -0
  1083. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/regexp_instr.md +152 -0
  1084. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/regexp_replace.md +34 -0
  1085. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/repeat.md +44 -0
  1086. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/replace.md +37 -0
  1087. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/reverse.md +70 -0
  1088. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/right.md +34 -0
  1089. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/rlike.md +64 -0
  1090. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/rpad.md +53 -0
  1091. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/rtrim.md +37 -0
  1092. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/sha1.md +67 -0
  1093. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/space.md +34 -0
  1094. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/split.md +61 -0
  1095. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/split_part.md +42 -0
  1096. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/startswith.md +70 -0
  1097. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/str_to_map.md +60 -0
  1098. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/strpos.md +60 -0
  1099. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/substr.md +43 -0
  1100. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/substring.md +53 -0
  1101. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/substring_index.md +46 -0
  1102. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/translate.md +43 -0
  1103. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/trim.md +54 -0
  1104. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/typeof.md +54 -0
  1105. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/ucase.md +46 -0
  1106. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/unbase64.md +54 -0
  1107. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/unhex.md +46 -0
  1108. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/upper.md +51 -0
  1109. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/url_decode.md +32 -0
  1110. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/url_encode.md +33 -0
  1111. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/string_functions/uuid.md +33 -0
  1112. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/binary_quantize.md +51 -0
  1113. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/cosine_distance.md +53 -0
  1114. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/dot_product.md +54 -0
  1115. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/hamming_distance.md +57 -0
  1116. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/jaccard_distance.md +55 -0
  1117. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/l2_distance.md +53 -0
  1118. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/l2_norm.md +52 -0
  1119. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/l2_normalize.md +54 -0
  1120. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/print_vector_bits.md +56 -0
  1121. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/vector.md +70 -0
  1122. package/bin/skills/lakehouse-doc/references/sql_functions/scalar_functions/vector_functions/vector_add_scalar.md +57 -0
  1123. package/bin/skills/lakehouse-doc/references/sql_functions/table_functions/explode.md +71 -0
  1124. package/bin/skills/lakehouse-doc/references/sql_functions/table_functions/inline.md +48 -0
  1125. package/bin/skills/lakehouse-doc/references/sql_functions/table_functions/json_tuple.md +44 -0
  1126. package/bin/skills/lakehouse-doc/references/sql_functions/table_functions/load_history.md +17 -0
  1127. package/bin/skills/lakehouse-doc/references/sql_functions/table_functions/posexplode.md +89 -0
  1128. package/bin/skills/lakehouse-doc/references/sql_functions/table_functions/read_kafka.md +85 -0
  1129. package/bin/skills/lakehouse-doc/references/sql_functions/table_functions/stack.md +69 -0
  1130. package/bin/skills/lakehouse-doc/references/sql_functions/table_functions/table_changes.md +136 -0
  1131. package/bin/skills/lakehouse-doc/references/sql_functions/table_functions/unnset.md +106 -0
  1132. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/avg.md +91 -0
  1133. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/count.md +88 -0
  1134. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/cume_dist.md +104 -0
  1135. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/dense_rank.md +119 -0
  1136. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/first.md +159 -0
  1137. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/first_value.md +133 -0
  1138. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/lag.md +72 -0
  1139. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/last.md +166 -0
  1140. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/last_value.md +160 -0
  1141. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/lead.md +49 -0
  1142. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/max.md +167 -0
  1143. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/min.md +138 -0
  1144. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/nth_value.md +58 -0
  1145. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/ntile.md +57 -0
  1146. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/percent_rank.md +58 -0
  1147. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/rank.md +139 -0
  1148. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/row_number.md +141 -0
  1149. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/sum.md +105 -0
  1150. package/bin/skills/lakehouse-doc/references/sql_functions/window_functions/window_clause.md +91 -0
  1151. package/bin/skills/lakehouse-doc/references/sql_practice.md +1 -0
  1152. package/bin/skills/lakehouse-doc/references/sql_rfm.md +181 -0
  1153. package/bin/skills/lakehouse-doc/references/sqlalchemy.md +82 -0
  1154. package/bin/skills/lakehouse-doc/references/ssb-benchmark.md +224 -0
  1155. package/bin/skills/lakehouse-doc/references/sso-configuration.md +157 -0
  1156. package/bin/skills/lakehouse-doc/references/storage_encryption.md +81 -0
  1157. package/bin/skills/lakehouse-doc/references/streaming_data_pipeline_overview.md +20 -0
  1158. package/bin/skills/lakehouse-doc/references/streaming_data_pipeline_overview1.md +1 -0
  1159. package/bin/skills/lakehouse-doc/references/streaming_pipeline_with_dynamic_table.md +66 -0
  1160. package/bin/skills/lakehouse-doc/references/structure_data_analysis.md +229 -0
  1161. package/bin/skills/lakehouse-doc/references/studio_manual.md +1 -0
  1162. package/bin/skills/lakehouse-doc/references/studio_overview.md +71 -0
  1163. package/bin/skills/lakehouse-doc/references/synonym.md +139 -0
  1164. package/bin/skills/lakehouse-doc/references/table-stream-title.md +1 -0
  1165. package/bin/skills/lakehouse-doc/references/table-stream.md +1 -0
  1166. package/bin/skills/lakehouse-doc/references/table-summary.md +1 -0
  1167. package/bin/skills/lakehouse-doc/references/table_funciotn.md +1 -0
  1168. package/bin/skills/lakehouse-doc/references/table_stream.md +118 -0
  1169. package/bin/skills/lakehouse-doc/references/tablesample.md +474 -0
  1170. package/bin/skills/lakehouse-doc/references/tablestream_summary.md +515 -0
  1171. package/bin/skills/lakehouse-doc/references/task-instance-maintenance.md +207 -0
  1172. package/bin/skills/lakehouse-doc/references/task_development.md +56 -0
  1173. package/bin/skills/lakehouse-doc/references/task_group.md +151 -0
  1174. package/bin/skills/lakehouse-doc/references/task_param.md +978 -0
  1175. package/bin/skills/lakehouse-doc/references/task_scheduling.md +1 -0
  1176. package/bin/skills/lakehouse-doc/references/task_scheduling_dependency.md +74 -0
  1177. package/bin/skills/lakehouse-doc/references/taskdevelop.md +268 -0
  1178. package/bin/skills/lakehouse-doc/references/tencentcloud_arn_and_externalid.md +29 -0
  1179. package/bin/skills/lakehouse-doc/references/time-function.md +67 -0
  1180. package/bin/skills/lakehouse-doc/references/timetravel-summary.md +47 -0
  1181. package/bin/skills/lakehouse-doc/references/tools_AI.md +1 -0
  1182. package/bin/skills/lakehouse-doc/references/tools_BI.md +1 -0
  1183. package/bin/skills/lakehouse-doc/references/tpcds-benchmark.md +754 -0
  1184. package/bin/skills/lakehouse-doc/references/tpch-benchmark.md +887 -0
  1185. package/bin/skills/lakehouse-doc/references/transformt-dt.md +291 -0
  1186. package/bin/skills/lakehouse-doc/references/trial-account-quotas-and-limits.md +81 -0
  1187. package/bin/skills/lakehouse-doc/references/tutorial_DataGPT.md +1 -0
  1188. package/bin/skills/lakehouse-doc/references/tutorial_connect_to_lakehouse.md +1 -0
  1189. package/bin/skills/lakehouse-doc/references/tutorial_data_transformation.md +1 -0
  1190. package/bin/skills/lakehouse-doc/references/tutorial_migration.md +1 -0
  1191. package/bin/skills/lakehouse-doc/references/tutorial_virtual_cluster.md +1 -0
  1192. package/bin/skills/lakehouse-doc/references/tutorial_work_with_workspace.md +1 -0
  1193. package/bin/skills/lakehouse-doc/references/tutorial_zettapark.md +1 -0
  1194. package/bin/skills/lakehouse-doc/references/tutorials-streaming-data-pipeline-with_dynamic-table.md +124 -0
  1195. package/bin/skills/lakehouse-doc/references/undrop-dynamic-table.md +79 -0
  1196. package/bin/skills/lakehouse-doc/references/undrop-materialized-view.md +80 -0
  1197. package/bin/skills/lakehouse-doc/references/unifiedWorkflow.md +1 -0
  1198. package/bin/skills/lakehouse-doc/references/unloa-data-summary.md +17 -0
  1199. package/bin/skills/lakehouse-doc/references/unload-data-local.md +72 -0
  1200. package/bin/skills/lakehouse-doc/references/unstructure_data_analysis.md +1 -0
  1201. package/bin/skills/lakehouse-doc/references/unstructured_etl_pipeline_notebook.md +12 -0
  1202. package/bin/skills/lakehouse-doc/references/unstructured_etl_pipeline_user_guide.md +949 -0
  1203. package/bin/skills/lakehouse-doc/references/unstructured_etl_python_api.md +896 -0
  1204. package/bin/skills/lakehouse-doc/references/upload-data.md +1 -0
  1205. package/bin/skills/lakehouse-doc/references/upload_data.md +123 -0
  1206. package/bin/skills/lakehouse-doc/references/use-dbt-dev.md +441 -0
  1207. package/bin/skills/lakehouse-doc/references/use-external-schema.md +42 -0
  1208. package/bin/skills/lakehouse-doc/references/use-java-sdk-releatime-uploaddata.md +168 -0
  1209. package/bin/skills/lakehouse-doc/references/use-java-sdk-upload-dta-local.md +140 -0
  1210. package/bin/skills/lakehouse-doc/references/use-mysql-client.md +189 -0
  1211. package/bin/skills/lakehouse-doc/references/use-python-sdk-upload-data.md +99 -0
  1212. package/bin/skills/lakehouse-doc/references/use-schema.md +49 -0
  1213. package/bin/skills/lakehouse-doc/references/use-vcluster.md +38 -0
  1214. package/bin/skills/lakehouse-doc/references/user-aggrement.md +229 -0
  1215. package/bin/skills/lakehouse-doc/references/user-external-funciton.md +1 -0
  1216. package/bin/skills/lakehouse-doc/references/user-identification.md +58 -0
  1217. package/bin/skills/lakehouse-doc/references/user_permission_grand_guide.md +322 -0
  1218. package/bin/skills/lakehouse-doc/references/using-google-authenticator.md +48 -0
  1219. package/bin/skills/lakehouse-doc/references/using-udf-in-dynamic-table.md +162 -0
  1220. package/bin/skills/lakehouse-doc/references/using_mcp_solute_data_pipeline_issue.md +343 -0
  1221. package/bin/skills/lakehouse-doc/references/uuid.md +47 -0
  1222. package/bin/skills/lakehouse-doc/references/validate_schema_evolution.md +167 -0
  1223. package/bin/skills/lakehouse-doc/references/vc-job.md +1 -0
  1224. package/bin/skills/lakehouse-doc/references/vc_cache.md +71 -0
  1225. package/bin/skills/lakehouse-doc/references/vcluster_size_description.md +98 -0
  1226. package/bin/skills/lakehouse-doc/references/vector-search.md +144 -0
  1227. package/bin/skills/lakehouse-doc/references/vector-type.md +52 -0
  1228. package/bin/skills/lakehouse-doc/references/vector_data_process_guide.md +952 -0
  1229. package/bin/skills/lakehouse-doc/references/vector_search_ai.md +423 -0
  1230. package/bin/skills/lakehouse-doc/references/version-update.md +21 -0
  1231. package/bin/skills/lakehouse-doc/references/virtual-cluster.md +221 -0
  1232. package/bin/skills/lakehouse-doc/references/volume_best_practices.md +1141 -0
  1233. package/bin/skills/lakehouse-doc/references/web-job-history.md +163 -0
  1234. package/bin/skills/lakehouse-doc/references/what_is_clickzetta_lakehouse.md +92 -0
  1235. package/bin/skills/lakehouse-doc/references/window-function-summary.md +134 -0
  1236. package/bin/skills/lakehouse-doc/references/windowframe.md +139 -0
  1237. package/bin/skills/lakehouse-doc/references/working_with_Vclusters.md +171 -0
  1238. package/bin/skills/lakehouse-doc/references/working_with_cache.md +102 -0
  1239. package/bin/skills/lakehouse-doc/references/worksapce-informaiton_schema-views.md +207 -0
  1240. package/bin/skills/lakehouse-doc/references/worksheet.md +15 -0
  1241. package/bin/skills/lakehouse-doc/references/workspace-introduction.md +41 -0
  1242. package/bin/skills/lakehouse-doc/references/worskapce-infroamtionschema-summary.md +56 -0
  1243. package/package.json +13 -0
@@ -0,0 +1,2676 @@
1
+ # 云器Lakehouse表设计最佳实践指南
2
+
3
+ ## 🧭 内容介绍
4
+
5
+ ### 文档概述
6
+
7
+ 本指南是云器Lakehouse平台上表设计的全面参考手册,涵盖从基础数据类型选择到复杂企业级架构模式的各个方面。
8
+
9
+ ### 如何使用本指南
10
+
11
+ 根据您的角色和需求,我们建议以下阅读路径:
12
+
13
+ * **数据架构师**: 重点关注设计理念(第1章)、分区架构(第5章)和企业级设计模式(第11章)
14
+ * **数据工程师**: 详细了解数据类型设计(第3章)、索引架构(第6章)和性能优化(第9章)
15
+ * **后端开发者**: 集中阅读表结构设计(第4章)、复杂数据类型(第3.3节)和故障排查(第10章)
16
+ * **快速应用**: 直接参考设计评审检查清单(第9章)作为项目指导框架
17
+
18
+ ### 核心章节导览
19
+
20
+ 1. **设计理念与原则** - 基础设计哲学和决策框架
21
+ 2. **数据类型设计策略** - 详细的类型选择指南和应用场景
22
+ 3. **表结构设计模式** - 约束、默认值和生成列的有效应用
23
+ 4. **分区架构设计** - 分区类型选择和优化策略
24
+ 5. **分桶与排序优化** - 数据物理组织的最佳实践
25
+ 6. **索引架构设计** - 向量、倒排、布隆过滤器索引详解
26
+ 7. **性能优化策略** - 查询性能和存储成本优化技巧
27
+ 8. **常见设计陷阱与解决方案** - 避免常见错误和优化建议
28
+ 9. **设计评审检查清单** - 全面的设计验证流程
29
+ 10. **企业级设计模式实战** - 四种高级应用架构详解
30
+ 11. **实验环境清理指南** - 资源管理最佳实践
31
+ 12. **总结** - 内容总结
32
+
33
+ 首次阅读时,建议先通读设计理念部分,了解核心原则,然后根据您当前的具体需求选择相关章节深入研究。每个代码示例都可以直接复制使用,帮助您快速应用到实际工作中。
34
+
35
+ ***
36
+
37
+ ## 🎯 设计理念与原则
38
+
39
+ ### 核心设计思想
40
+
41
+ 在云器Lakehouse中,优秀的表设计应当平衡**性能、可维护性和业务需求**。本指南遵循以下验证过的核心原则:
42
+
43
+ 1. **业务驱动设计** - 表结构应当反映业务模型和查询模式
44
+ 2. **性能优先考虑** - 合理的分区、分桶和索引策略至关重要
45
+ 3. **面向未来扩展** - 设计时考虑数据增长和业务演进
46
+ 4. **运维友好性** - 简化日常维护和问题排查的复杂度
47
+
48
+ ### 设计决策框架
49
+
50
+ 每个设计决策都应当考虑以下维度:
51
+
52
+ * **查询模式**: 主要的数据访问方式和频率
53
+ * **数据特征**: 数据量级、增长速度、分布特点
54
+ * **业务需求**: 实时性要求、一致性需求、扩展性需求
55
+ * **资源约束**: 存储成本、计算资源、运维复杂度
56
+
57
+ ***
58
+
59
+ ## 📊 数据类型设计策略
60
+
61
+ ### 数值类型选择指南
62
+
63
+ #### 自增主键设计
64
+
65
+ **关键限制**: IDENTITY列仅支持BIGINT类型
66
+
67
+ ```sql
68
+ -- 正确的IDENTITY使用(唯一支持的语法)
69
+ CREATE TABLE business_events (
70
+ event_id BIGINT IDENTITY, -- 仅支持BIGINT类型
71
+ event_data JSON,
72
+ created_at TIMESTAMP DEFAULT current_timestamp()
73
+ );
74
+
75
+ -- 带种子值的IDENTITY
76
+ CREATE TABLE user_accounts (
77
+ user_id BIGINT IDENTITY(1000), -- 从1000开始自增
78
+ username VARCHAR(50) NOT NULL
79
+ );
80
+ ```
81
+
82
+ **不支持的IDENTITY语法(测试确认会失败**):
83
+
84
+ ```sql
85
+ -- 这些都会导致错误:invalid identity column type int, currently only BIGINT is supported
86
+ CREATE TABLE wrong_examples (
87
+ id INT IDENTITY, -- 失败
88
+ small_id SMALLINT IDENTITY, -- 失败
89
+ str_id VARCHAR(50) IDENTITY -- 失败
90
+ );
91
+ ```
92
+
93
+ #### 业务数值字段选择
94
+
95
+ | 数据类型 | 存储空间 | 数值范围 | 推荐场景 | 实际应用示例 |
96
+ | -------------- | ---- | ---------------- | -------- | -------------------------- |
97
+ | `TINYINT` | 1字节 | -128 到 127 | 状态码、等级 | `status TINYINT DEFAULT 1` |
98
+ | `SMALLINT` | 2字节 | -32,768 到 32,767 | 年份、计数器 | `birth_year SMALLINT` |
99
+ | `INT` | 4字节 | ±21亿 | 业务ID、大计数 | `user_id INT NOT NULL` |
100
+ | `BIGINT` | 8字节 | ±922万万亿 | 自增主键、大数值 | `id BIGINT IDENTITY` |
101
+ | `DECIMAL(p,s)` | 变长 | 最高精度38位 | 金融计算 | `amount DECIMAL(15,2)` |
102
+ | `FLOAT` | 4字节 | 单精度浮点 | 科学计算、坐标 | `temperature FLOAT` |
103
+ | `DOUBLE` | 8字节 | 双精度浮点 | 高精度计算 | `coordinate DOUBLE` |
104
+
105
+ ### 字符串类型策略
106
+
107
+ #### 长度规划原则(基于实际业务需求)
108
+
109
+ | 业务场景 | 推荐类型 | 长度设置 | 实际覆盖率 | 设计考量 |
110
+ | ----- | --------------- | --------- | ----- | ----------------- |
111
+ | 邮箱地址 | `VARCHAR(320)` | RFC5321标准 | 99.9% | 国际标准长度 |
112
+ | 用户名 | `VARCHAR(50)` | 实际调研 | 99.5% | 平衡存储和使用 |
113
+ | 手机号 | `VARCHAR(20)` | 国际格式 | 100% | 支持+86-138\*\*\*\* |
114
+ | URL地址 | `VARCHAR(2048)` | 实际测量 | 98% | 含复杂查询参数 |
115
+ | 文章标题 | `VARCHAR(200)` | SEO优化 | 95% | 搜索引擎友好 |
116
+ | 商品描述 | `VARCHAR(2000)` | 电商需求 | 90% | 详情页展示 |
117
+ | 长文本内容 | `STRING` | 不限长度 | 100% | 博客、评论等 |
118
+
119
+ ```sql
120
+ -- 字符串类型最佳实践
121
+ CREATE TABLE user_profiles (
122
+ user_id BIGINT IDENTITY,
123
+
124
+ -- 固定格式使用CHAR
125
+ country_code CHAR(2), -- CN, US, JP
126
+ currency_code CHAR(3), -- USD, CNY, EUR
127
+
128
+ -- 业务字段使用合理的VARCHAR长度
129
+ username VARCHAR(50) NOT NULL,
130
+ email VARCHAR(320) NOT NULL,
131
+ mobile_phone VARCHAR(20),
132
+
133
+ -- 描述性内容
134
+ nickname VARCHAR(100),
135
+ bio VARCHAR(500), -- 个人简介
136
+ full_description STRING, -- 详细描述,长度不定
137
+
138
+ -- 结构化数据
139
+ preferences JSON DEFAULT '{}'
140
+ );
141
+ ```
142
+
143
+ ### 向量类型应用场景
144
+
145
+ #### 向量类型语法和应用
146
+
147
+ **标准语法**: `VECTOR(scalar_type, dimension)` 或 `VECTOR(dimension)`
148
+
149
+ | 标量类型 | 存储开销 | 适用场景 | 维度推荐 | 应用示例 |
150
+ | --------- | ----- | --------- | -------- | ---------------------- |
151
+ | `FLOAT` | 4字节/维 | 语义向量、通用AI | 128-2048 | `VECTOR(FLOAT, 768)` |
152
+ | `INT` | 4字节/维 | 离散特征、计数向量 | 64-1024 | `VECTOR(INT, 256)` |
153
+ | `TINYINT` | 1字节/维 | 压缩向量、移动端 | 64-512 | `VECTOR(TINYINT, 128)` |
154
+
155
+ **实际应用案例**:
156
+
157
+ ```sql
158
+ CREATE TABLE ai_content_vectors (
159
+ content_id BIGINT IDENTITY,
160
+ content_type VARCHAR(50),
161
+
162
+ -- 不同业务场景的向量配置
163
+ text_embedding VECTOR(FLOAT, 768), -- BERT/RoBERTa输出
164
+ image_features VECTOR(FLOAT, 512), -- ResNet/CNN特征
165
+ user_preference VECTOR(INT, 256), -- 推荐系统用户画像
166
+ mobile_compact VECTOR(TINYINT, 128), -- 移动端轻量化
167
+ general_vector VECTOR(512) -- 默认FLOAT类型
168
+ );
169
+
170
+ -- 向量数据插入语法(注意:维度必须严格匹配)
171
+ INSERT INTO ai_content_vectors (content_type, text_embedding) VALUES (
172
+ 'document',
173
+ cast(concat('[', repeat('0.1,', 767), '0.1]') as VECTOR(FLOAT, 768))
174
+ );
175
+ ```
176
+
177
+ ### 复杂数据类型使用指南
178
+
179
+ #### STRUCT类型正确使用
180
+
181
+ **正确的STRUCT数据插入语法**:
182
+
183
+ ```sql
184
+ CREATE TABLE user_complex_data (
185
+ user_id BIGINT IDENTITY,
186
+
187
+ -- 简单结构体
188
+ basic_info STRUCT<id:INT, name:STRING, age:INT>,
189
+
190
+ -- 复杂嵌套结构体
191
+ detailed_profile STRUCT<
192
+ personal:STRUCT<name:STRING, email:STRING>,
193
+ address:STRUCT<city:STRING, country:STRING>,
194
+ preferences:MAP<STRING, STRING>
195
+ >
196
+ );
197
+
198
+ -- 方法1:使用struct函数(按位置传参)
199
+ INSERT INTO user_complex_data (basic_info) VALUES (
200
+ struct(123, 'Alice', 25)
201
+ );
202
+
203
+ -- 方法2:使用named_struct函数(推荐,明确字段名)
204
+ INSERT INTO user_complex_data (basic_info) VALUES (
205
+ named_struct('id', 123, 'name', 'Alice', 'age', 25)
206
+ );
207
+
208
+ -- 复杂嵌套结构的插入
209
+ INSERT INTO user_complex_data (detailed_profile) VALUES (
210
+ named_struct(
211
+ 'personal', named_struct('name', 'Bob', 'email', 'bob@test.com'),
212
+ 'address', named_struct('city', 'Shanghai', 'country', 'China'),
213
+ 'preferences', map('lang', 'zh', 'theme', 'dark')
214
+ )
215
+ );
216
+ ```
217
+
218
+ #### ARRAY和MAP类型使用
219
+
220
+ ```sql
221
+ CREATE TABLE collection_types_demo (
222
+ record_id BIGINT IDENTITY,
223
+
224
+ -- 数组类型
225
+ tags ARRAY<STRING>,
226
+ scores ARRAY<INT>,
227
+ nested_arrays ARRAY<ARRAY<STRING>>,
228
+
229
+ -- 映射类型
230
+ config MAP<STRING, STRING>,
231
+ metrics MAP<STRING, DOUBLE>,
232
+ complex_map MAP<STRING, ARRAY<INT>>
233
+ );
234
+
235
+ -- 正确的插入语法
236
+ INSERT INTO collection_types_demo (
237
+ tags, scores, nested_arrays, config, metrics, complex_map
238
+ ) VALUES (
239
+ array('tech', 'AI', 'database'), -- 字符串数组
240
+ array(85, 92, 78), -- 整数数组
241
+ array(array('group1', 'item1'), array('group2', 'item2')), -- 嵌套数组
242
+ map('env', 'prod', 'version', 'v2.2'), -- 字符串映射
243
+ map('cpu_usage', 0.75, 'memory_usage', 0.60), -- 数值映射
244
+ map('feature1', array(1, 2, 3), 'feature2', array(4, 5, 6)) -- 复杂映射
245
+ );
246
+ ```
247
+
248
+ ***
249
+
250
+ ## 🏗️ 表结构设计模式
251
+
252
+ ### 约束设计策略
253
+
254
+ #### NOT NULL约束的合理应用
255
+
256
+ NOT NULL约束不仅保障数据完整性,更是查询优化器的重要提示:
257
+
258
+ ```sql
259
+ CREATE TABLE order_management (
260
+ order_id BIGINT IDENTITY,
261
+
262
+ -- 业务核心字段:必须非空
263
+ customer_id INT NOT NULL, -- 核心业务关联
264
+ order_time TIMESTAMP NOT NULL, -- 核心时间维度
265
+ order_status TINYINT NOT NULL DEFAULT 0, -- 业务状态
266
+ total_amount DECIMAL(12,2) NOT NULL, -- 核心金额字段
267
+
268
+ -- 可选业务字段:允许为空
269
+ coupon_code VARCHAR(20), -- 优惠券(可选)
270
+ customer_notes VARCHAR(500), -- 客户备注(可选)
271
+ gift_message VARCHAR(200), -- 礼品留言(可选)
272
+
273
+ -- 系统字段:非空且有默认值
274
+ created_at TIMESTAMP NOT NULL DEFAULT current_timestamp(),
275
+ updated_at TIMESTAMP, -- 更新时间(首次为NULL)
276
+
277
+ -- 分区字段(生成列)
278
+ date_partition STRING GENERATED ALWAYS AS (
279
+ date_format(order_time, 'yyyy-MM-dd')
280
+ )
281
+ )
282
+ PARTITIONED BY (date_partition);
283
+ ```
284
+
285
+ #### 默认值的使用
286
+
287
+ 默认值设计应当反映业务逻辑和系统行为:
288
+
289
+ ```sql
290
+ CREATE TABLE user_account_enhanced (
291
+ user_id BIGINT IDENTITY,
292
+ username VARCHAR(50) NOT NULL,
293
+
294
+ -- 业务状态的合理默认值
295
+ account_status TINYINT DEFAULT 1, -- 1=正常, 0=禁用, 2=锁定
296
+ email_verified BOOLEAN DEFAULT false, -- 默认未验证
297
+ phone_verified BOOLEAN DEFAULT false, -- 默认未验证
298
+
299
+ -- 数值字段的业务默认值
300
+ credit_balance DECIMAL(10,2) DEFAULT 0.00, -- 默认余额为0
301
+ loyalty_points INT DEFAULT 0, -- 默认积分为0
302
+ login_attempts TINYINT DEFAULT 0, -- 默认登录尝试次数
303
+
304
+ -- 时间字段的系统默认值
305
+ registration_time TIMESTAMP DEFAULT current_timestamp(),
306
+ last_login_time TIMESTAMP, -- 首次登录前为NULL
307
+ password_changed_at TIMESTAMP DEFAULT current_timestamp(),
308
+
309
+ -- JSON字段的默认值
310
+ user_preferences JSON DEFAULT '{}', -- 默认空对象
311
+ security_settings JSON DEFAULT '{"two_factor": false, "login_notifications": true}'
312
+ );
313
+ ```
314
+
315
+ ### 生成列函数详细清单
316
+
317
+ 生成列仅支持**确定性标量函数**,以下是经过测试验证的完整函数列表:
318
+
319
+ #### 时间日期函数
320
+
321
+ | 函数名 | 功能描述 | 输入类型 | 返回类型 | 使用示例 | 验证状态 |
322
+ | --------------- | -------- | -------------- | ------ | ------------------------------- | ---- |
323
+ | `year()` | 提取年份 | DATE/TIMESTAMP | INT | `year(order_date)` | 验证通过 |
324
+ | `month()` | 提取月份 | DATE/TIMESTAMP | INT | `month(order_date)` | 验证通过 |
325
+ | `day()` | 提取日 | DATE/TIMESTAMP | INT | `day(order_date)` | 验证通过 |
326
+ | `hour()` | 提取小时 | TIMESTAMP | INT | `hour(event_time)` | 验证通过 |
327
+ | `minute()` | 提取分钟 | TIMESTAMP | INT | `minute(event_time)` | 验证通过 |
328
+ | `second()` | 提取秒 | TIMESTAMP | INT | `second(event_time)` | 验证通过 |
329
+ | `dayofweek()` | 星期几(1-7) | DATE/TIMESTAMP | INT | `dayofweek(order_date)` | 验证通过 |
330
+ | `dayofyear()` | 年中第几天 | DATE/TIMESTAMP | INT | `dayofyear(order_date)` | 验证通过 |
331
+ | `quarter()` | 季度(1-4) | DATE/TIMESTAMP | INT | `quarter(order_date)` | 验证通过 |
332
+ | `date_format()` | 格式化日期 | DATE/TIMESTAMP | STRING | `date_format(dt, 'yyyy-MM-dd')` | 验证通过 |
333
+
334
+ #### 数学函数
335
+
336
+ | 函数名 | 功能描述 | 使用示例 | 验证状态 |
337
+ | --------- | ---- | ------------------ | ---- |
338
+ | `abs()` | 绝对值 | `abs(profit_loss)` | 验证通过 |
339
+ | `round()` | 四舍五入 | `round(amount, 2)` | 验证通过 |
340
+ | `ceil()` | 向上取整 | `ceil(price)` | 验证通过 |
341
+ | `floor()` | 向下取整 | `floor(score)` | 验证通过 |
342
+ | `power()` | 幂运算 | `power(base, 2)` | 验证通过 |
343
+ | `sqrt()` | 平方根 | `sqrt(area)` | 验证通过 |
344
+ | `mod()` | 取模运算 | `mod(id, 10)` | 验证通过 |
345
+
346
+ #### 字符串函数
347
+
348
+ | 函数名 | 功能描述 | 使用示例 | 返回类型 | 验证状态 |
349
+ | ----------- | ------ | ------------------------------------ | ------ | ---- |
350
+ | `concat()` | 字符串连接 | `concat(first_name, ' ', last_name)` | STRING | 验证通过 |
351
+ | `length()` | 字符串长度 | `length(username)` | INT | 验证通过 |
352
+ | `upper()` | 转大写 | `upper(code)` | STRING | 验证通过 |
353
+ | `lower()` | 转小写 | `lower(email)` | STRING | 验证通过 |
354
+ | `trim()` | 去除首尾空格 | `trim(input_text)` | STRING | 验证通过 |
355
+ | `substr()` | 截取子串 | `substr(phone, 1, 3)` | STRING | 验证通过 |
356
+ | `replace()` | 字符串替换 | `replace(text, 'old', 'new')` | STRING | 验证通过 |
357
+
358
+ #### 类型转换和条件函数
359
+
360
+ | 函数名 | 功能描述 | 使用示例 | 验证状态 |
361
+ | ------------ | ------ | ------------------------------------------- | ---- |
362
+ | `cast()` | 类型转换 | `cast(amount AS STRING)` | 验证通过 |
363
+ | `string()` | 转字符串 | `string(user_id)` | 验证通过 |
364
+ | `int()` | 转整数 | `int(price_str)` | 验证通过 |
365
+ | `if()` | 简单条件判断 | `if(amount > 0, 'positive', 'negative')` | 验证通过 |
366
+ | `coalesce()` | 空值处理 | `coalesce(nickname, username, 'anonymous')` | 验证通过 |
367
+ | `nullif()` | 空值转换 | `nullif(status, '')` | 验证通过 |
368
+
369
+ #### 不支持的非确定性函数(测试确认)
370
+
371
+ 以下函数在生成列中不被支持,会导致语法错误:
372
+
373
+ * `current_timestamp()` - 当前时间戳
374
+ * `current_date()` - 当前日期
375
+ * `random()` - 随机数生成
376
+ * `uuid()` - UUID生成
377
+ * `current_user()` - 当前用户
378
+
379
+ **生成列综合应用示例**:
380
+
381
+ ```sql
382
+ CREATE TABLE comprehensive_generated_columns (
383
+ order_id BIGINT IDENTITY,
384
+ customer_name VARCHAR(100),
385
+ order_time TIMESTAMP NOT NULL,
386
+ total_amount DECIMAL(12,2),
387
+ discount_rate DECIMAL(5,4) DEFAULT 0,
388
+
389
+ -- 时间维度生成列(用于分区和分析)
390
+ order_year INT GENERATED ALWAYS AS (year(order_time)),
391
+ order_month INT GENERATED ALWAYS AS (month(order_time)),
392
+ order_date STRING GENERATED ALWAYS AS (date_format(order_time, 'yyyy-MM-dd')),
393
+ order_hour INT GENERATED ALWAYS AS (hour(order_time)),
394
+ quarter_label STRING GENERATED ALWAYS AS (concat('Q', string(quarter(order_time)))),
395
+ weekday INT GENERATED ALWAYS AS (dayofweek(order_time)),
396
+
397
+ -- 业务计算生成列
398
+ final_amount DECIMAL(12,2) GENERATED ALWAYS AS (round(total_amount * (1 - discount_rate), 2)),
399
+ amount_category STRING GENERATED ALWAYS AS (
400
+ if(total_amount < 100, 'small',
401
+ if(total_amount < 1000, 'medium', 'large'))
402
+ ),
403
+
404
+ -- 字符串处理生成列
405
+ customer_initial STRING GENERATED ALWAYS AS (upper(substr(trim(customer_name), 1, 1))),
406
+ name_length INT GENERATED ALWAYS AS (length(trim(customer_name))),
407
+ display_name STRING GENERATED ALWAYS AS (concat('[', string(order_id), '] ', customer_name)),
408
+ normalized_name STRING GENERATED ALWAYS AS (lower(trim(customer_name)))
409
+ )
410
+ PARTITIONED BY (order_date) -- 使用生成列作为分区键
411
+ COMMENT '订单表 - 展示生成列的各种实际应用场景';
412
+ ```
413
+
414
+ ***
415
+
416
+ ## 🗂️ 分区架构设计
417
+
418
+ ### 分区策略选择框架
419
+
420
+ #### 支持的分区数据类型(测试确认)
421
+
422
+ | 类型 | 支持状态 | 使用建议 | 实际应用示例 | 测试状态 |
423
+ | -------------- | ---- | ----------- | ----------------------- | ---- |
424
+ | `TINYINT` | 支持 | 状态、等级分区 | `status TINYINT` | 已验证 |
425
+ | `SMALLINT` | 支持 | 年份、月份分区 | `year_part SMALLINT` | 已验证 |
426
+ | `INT` | 支持 | 常用分区类型 | `user_id INT` | 已验证 |
427
+ | `BIGINT` | 支持 | 大数值分区 | `account_id BIGINT` | 已验证 |
428
+ | `STRING` | 支持 | **最常用分区类型** | `date_partition STRING` | 已验证 |
429
+ | `VARCHAR(n)` | 支持 | 变长字符串分区 | `region VARCHAR(50)` | 已验证 |
430
+ | `CHAR(n)` | 支持 | 固定长度分区 | `country CHAR(2)` | 已验证 |
431
+ | `BOOLEAN` | 支持 | 二值分区 | `is_active BOOLEAN` | 已验证 |
432
+ | `DATE` | 支持 | 日期分区 | `order_date DATE` | 已验证 |
433
+ | `TIMESTAMP` | 不支持 | 需要转换为其他类型 | 使用生成列转换 | 确认限制 |
434
+ | `FLOAT/DOUBLE` | 不支持 | 精度问题不推荐 | 避免使用 | 确认限制 |
435
+ | `DECIMAL` | 不支持 | 精度和性能考虑 | 避免使用 | 确认限制 |
436
+
437
+ #### 时间序列分区模式
438
+
439
+ **模式1: 按日分区(推荐,最常用**)
440
+
441
+ ```sql
442
+ CREATE TABLE daily_business_logs (
443
+ log_id BIGINT IDENTITY,
444
+ application VARCHAR(50) NOT NULL,
445
+ log_level VARCHAR(10) NOT NULL,
446
+ message STRING,
447
+ user_id INT,
448
+ log_timestamp TIMESTAMP NOT NULL,
449
+
450
+ -- 使用生成列创建日期分区键
451
+ date_partition STRING GENERATED ALWAYS AS (
452
+ date_format(log_timestamp, 'yyyy-MM-dd')
453
+ )
454
+ )
455
+ PARTITIONED BY (date_partition)
456
+ HASH CLUSTERED BY (application)
457
+ SORTED BY (log_timestamp DESC)
458
+ INTO 128 BUCKETS
459
+ COMMENT '业务日志表 - 按日期分区,便于日志管理和查询';
460
+ ```
461
+
462
+ **模式2: 按小时分区(高频数据**)
463
+
464
+ ```sql
465
+ CREATE TABLE realtime_metrics (
466
+ metric_id BIGINT IDENTITY,
467
+ sensor_id VARCHAR(100) NOT NULL,
468
+ metric_value DOUBLE,
469
+ collect_time TIMESTAMP NOT NULL,
470
+
471
+ -- 按小时分区,适合实时监控
472
+ hour_partition STRING GENERATED ALWAYS AS (
473
+ date_format(collect_time, 'yyyy-MM-dd-HH')
474
+ )
475
+ )
476
+ PARTITIONED BY (hour_partition)
477
+ HASH CLUSTERED BY (sensor_id)
478
+ SORTED BY (collect_time DESC)
479
+ INTO 512 BUCKETS
480
+ COMMENT '实时指标表 - 按小时分区,支持高频数据写入';
481
+ ```
482
+
483
+ **模式3: 按月分区(历史归档**)
484
+
485
+ ```sql
486
+ CREATE TABLE monthly_report_data (
487
+ report_id BIGINT IDENTITY,
488
+ business_data JSON,
489
+ created_time TIMESTAMP NOT NULL,
490
+
491
+ -- 按月分区,减少分区数量
492
+ month_partition STRING GENERATED ALWAYS AS (
493
+ date_format(created_time, 'yyyy-MM')
494
+ )
495
+ )
496
+ PARTITIONED BY (month_partition)
497
+ COMMENT '月度报表数据 - 按月分区,优化长期存储';
498
+ ```
499
+
500
+ #### 业务维度分区模式
501
+
502
+ **多租户分区模式**:
503
+
504
+ ```sql
505
+ CREATE TABLE saas_tenant_data (
506
+ record_id BIGINT IDENTITY,
507
+ tenant_id VARCHAR(50) NOT NULL,
508
+ entity_type VARCHAR(50) NOT NULL,
509
+ entity_data JSON,
510
+ created_time TIMESTAMP DEFAULT current_timestamp(),
511
+
512
+ -- 按租户分区,实现数据隔离
513
+ tenant_partition STRING GENERATED ALWAYS AS (tenant_id)
514
+ )
515
+ PARTITIONED BY (tenant_partition)
516
+ HASH CLUSTERED BY (entity_type)
517
+ SORTED BY (created_time DESC)
518
+ INTO 64 BUCKETS
519
+ COMMENT '多租户数据表 - 按租户ID分区,实现完全数据隔离';
520
+ ```
521
+
522
+ **地理区域分区模式**:
523
+
524
+ ```sql
525
+ CREATE TABLE global_order_data (
526
+ order_id BIGINT IDENTITY,
527
+ customer_id INT NOT NULL,
528
+ region VARCHAR(50) NOT NULL, -- 地理区域
529
+ country VARCHAR(50) NOT NULL,
530
+ order_data JSON,
531
+ order_time TIMESTAMP
532
+ )
533
+ PARTITIONED BY (region) -- 按区域分区
534
+ HASH CLUSTERED BY (customer_id)
535
+ SORTED BY (order_time DESC)
536
+ INTO 128 BUCKETS
537
+ COMMENT '全球订单数据 - 按地理区域分区,支持区域化查询';
538
+ ```
539
+
540
+ #### 复合分区策略(高级应用)
541
+
542
+ **时间+业务维度双重分区**:
543
+
544
+ ```sql
545
+ CREATE TABLE advanced_partitioning_example (
546
+ event_id BIGINT IDENTITY,
547
+ user_id INT NOT NULL,
548
+ business_type VARCHAR(50) NOT NULL,
549
+ event_time TIMESTAMP NOT NULL,
550
+ event_data JSON,
551
+
552
+ -- 复合分区键
553
+ date_partition STRING GENERATED ALWAYS AS (date_format(event_time, 'yyyy-MM-dd')),
554
+ business_partition STRING GENERATED ALWAYS AS (business_type)
555
+ )
556
+ PARTITIONED BY (date_partition, business_partition) -- 双重分区
557
+ HASH CLUSTERED BY (user_id)
558
+ SORTED BY (event_time DESC)
559
+ INTO 256 BUCKETS
560
+ COMMENT '高级分区示例 - 时间和业务维度双重分区';
561
+ ```
562
+
563
+ ### 分区管理和优化
564
+
565
+ #### 动态分区限制
566
+
567
+ **关键限制**: 单个插入任务最多创建2048个动态分区
568
+
569
+ ```sql
570
+ -- 可能超出限制的操作
571
+ INSERT INTO large_partition_table
572
+ SELECT * FROM source_table_with_many_partitions; -- 如果source表分区数>2048会失败
573
+
574
+ -- 解决方案1: 分批插入
575
+ INSERT INTO large_partition_table
576
+ SELECT * FROM source_table_with_many_partitions
577
+ WHERE date_column BETWEEN '2024-01-01' AND '2024-01-10'; -- 限制分区范围
578
+
579
+ -- 解决方案2: 循环插入(应用层实现)
580
+ -- 在应用程序中按日期/区域等维度分批插入,每批控制在2000个分区以内
581
+ ```
582
+
583
+ #### 数据生命周期管理
584
+
585
+ ```sql
586
+ -- 设置表级数据生命周期
587
+ CREATE TABLE lifecycle_managed_table (
588
+ record_id BIGINT IDENTITY,
589
+ business_data JSON,
590
+ created_time TIMESTAMP,
591
+
592
+ date_partition STRING GENERATED ALWAYS AS (date_format(created_time, 'yyyy-MM-dd'))
593
+ )
594
+ PARTITIONED BY (date_partition)
595
+ PROPERTIES ('data_lifecycle' = '90') -- 90天后自动清理
596
+ COMMENT '生命周期管理表 - 90天数据保留策略';
597
+ ```
598
+
599
+ ***
600
+
601
+ ## 🪣 分桶与排序优化
602
+
603
+ ### 分桶策略设计
604
+
605
+ #### 分桶数量规划指南
606
+
607
+ 基于实际测试验证的分桶配置建议:
608
+
609
+ | 数据规模 | 建议桶数 | 单桶目标大小 | 适用场景 | 测试验证结果 |
610
+ | -------- | -------- | ------- | --------- | ------ |
611
+ | < 10GB | 16-32 | \~512MB | 小型业务表、维度表 | 测试通过 |
612
+ | 10GB-1TB | 64-256 | \~1GB | 主要业务表、事实表 | 测试通过 |
613
+ | 1TB-10TB | 256-1024 | \~2GB | 大型分析表、历史表 | 推荐配置 |
614
+ | > 10TB | 1024+ | \~4GB | 超大数据仓库表 | 架构支持 |
615
+
616
+ #### 分桶列选择原则
617
+
618
+ 1. **高基数原则**: 选择值分布均匀、基数高的列
619
+ 2. **查询亲和性**: 优先选择JOIN和GROUP BY中的关键列
620
+ 3. **写入均衡**: 避免数据倾斜和写入热点
621
+
622
+ ```sql
623
+ -- 最佳实践:用户行为分析表
624
+ CREATE TABLE user_behavior_optimized (
625
+ behavior_id BIGINT IDENTITY,
626
+ user_id INT NOT NULL, -- 高基数,分布均匀
627
+ session_id VARCHAR(100) NOT NULL,
628
+ behavior_type VARCHAR(50), -- 浏览、点击、购买等
629
+ behavior_time TIMESTAMP NOT NULL,
630
+ product_id INT,
631
+
632
+ -- 分区策略
633
+ date_partition STRING GENERATED ALWAYS AS (date_format(behavior_time, 'yyyy-MM-dd'))
634
+ )
635
+ PARTITIONED BY (date_partition)
636
+ HASH CLUSTERED BY (user_id) -- 用户维度分桶,支持用户行为分析
637
+ SORTED BY (behavior_time DESC, behavior_type ASC) -- 时间倒序+行为类型排序
638
+ INTO 256 BUCKETS; -- 适合中大型数据量
639
+
640
+ -- 索引优化
641
+ CREATE BLOOMFILTER INDEX user_lookup_idx ON TABLE user_behavior_optimized(user_id);
642
+ CREATE BLOOMFILTER INDEX product_filter_idx ON TABLE user_behavior_optimized(product_id);
643
+ CREATE INVERTED INDEX behavior_type_idx ON TABLE user_behavior_optimized(behavior_type);
644
+ ```
645
+
646
+ ### 排序策略优化
647
+
648
+ 排序字段的选择直接影响查询性能,特别是范围查询和TOP-N查询:
649
+
650
+ ```sql
651
+ -- 金融交易表的排序优化
652
+ CREATE TABLE financial_transactions_optimized (
653
+ transaction_id BIGINT IDENTITY,
654
+ account_id INT NOT NULL,
655
+ transaction_time TIMESTAMP NOT NULL,
656
+ amount DECIMAL(15,2) NOT NULL,
657
+ transaction_type VARCHAR(20) NOT NULL,
658
+ risk_score DECIMAL(5,3),
659
+
660
+ date_partition STRING GENERATED ALWAYS AS (date_format(transaction_time, 'yyyy-MM-dd'))
661
+ )
662
+ PARTITIONED BY (date_partition)
663
+ HASH CLUSTERED BY (account_id) -- 按账户分桶
664
+ SORTED BY (
665
+ transaction_time DESC, -- 时间倒序:最新交易优先
666
+ amount DESC, -- 金额倒序:大额交易优先
667
+ risk_score DESC -- 风险评分倒序:高风险优先
668
+ )
669
+ INTO 512 BUCKETS
670
+ COMMENT '金融交易表 - 优化时间、金额、风险维度的查询性能';
671
+ ```
672
+
673
+ ***
674
+
675
+ ## 🔍 索引架构设计
676
+
677
+ ### 向量索引详细配置
678
+
679
+ #### 距离函数完整支持列表(全部验证通过)
680
+
681
+ **完整测试验证**: 以下所有距离函数已通过详尽测试,确认在云器Lakehouse当前版本中完全可用
682
+
683
+ | 距离函数 | 适用场景 | 数学特性 | 性能特点 | 验证状态 |
684
+ | ------------------ | ------------ | ------------- | ------- | -------- |
685
+ | `cosine_distance` | 文本语义相似度、推荐系统 | 角度距离,归一化无关 | 中等性能 | **完全验证** |
686
+ | `l2_distance` | 图像特征匹配、欧式空间 | 欧几里得距离 | 较高性能 | **完全验证** |
687
+ | `dot_product` | 点积相似度、已归一化向量 | 点积(优化最小化/最大化) | **高性能** | **完全验证** |
688
+ | `jaccard_distance` | 集合相似度、稀疏向量 | 交集/并集比例 | 中等性能 | **完全验证** |
689
+ | `hamming_distance` | 二进制特征、哈希码 | 位差异计数 | 高性能 | **完全验证** |
690
+
691
+ #### 向量索引标量类型配置
692
+
693
+ | 标量类型 | 存储精度 | 支持的向量列类型 | 性能影响 | 适用场景 |
694
+ | ----- | ----- | ------------------- | ----------- | ---------- |
695
+ | `f32` | 32位浮点 | INT, FLOAT | 标准性能,平衡精度 | 通用推荐,生产级应用 |
696
+ | `f16` | 16位浮点 | INT, FLOAT | 更高性能,轻微精度损失 | 移动端、快速检索 |
697
+ | `i8` | 8位整数 | TINYINT, INT, FLOAT | 高性能,量化精度 | 极致性能要求 |
698
+ | `b1` | 1位二进制 | TINYINT, INT, FLOAT | 最高性能,最小存储 | 二进制向量、布隆过滤 |
699
+
700
+ #### HNSW算法参数详解
701
+
702
+ | 参数名 | 默认值 | 推荐范围 | 功能说明 | 性能影响 |
703
+ | ----------------- | ---- | ------- | --------- | ----------- |
704
+ | `m` | 16 | 8-64 | 每个节点最大连接数 | 提高→精度↑内存↑ |
705
+ | `ef.construction` | 128 | 64-1000 | 构建时候选集大小 | 提高→质量↑构建时间↑ |
706
+ | `max.elements` | auto | 根据数据量 | 最大向量数量预估 | 合理设置避免重建 |
707
+
708
+ #### 完整的向量索引配置示例
709
+
710
+ ```sql
711
+ -- 创建包含多种向量类型的表
712
+ CREATE TABLE comprehensive_vector_demo (
713
+ doc_id INT,
714
+ title VARCHAR(200),
715
+
716
+ -- 不同场景的向量配置
717
+ semantic_vector VECTOR(FLOAT, 768), -- 语义搜索向量
718
+ image_vector VECTOR(FLOAT, 512), -- 图像特征向量
719
+ user_vector VECTOR(INT, 256), -- 用户画像向量
720
+ binary_vector VECTOR(TINYINT, 128) -- 二进制特征向量
721
+ );
722
+
723
+ -- 高质量语义搜索索引
724
+ CREATE VECTOR INDEX semantic_search_idx
725
+ ON TABLE comprehensive_vector_demo(semantic_vector)
726
+ PROPERTIES (
727
+ "distance.function" = "cosine_distance", -- 语义相似度首选
728
+ "scalar.type" = "f32", -- 标准精度
729
+ "m" = "32", -- 提高连接数增强精度
730
+ "ef.construction" = "400", -- 高质量构建
731
+ "reuse.vector.column" = "false", -- 独立存储最高性能
732
+ "compress.codec" = "uncompressed" -- 不压缩保证性能
733
+ );
734
+
735
+ -- 快速图像检索索引
736
+ CREATE VECTOR INDEX image_search_idx
737
+ ON TABLE comprehensive_vector_demo(image_vector)
738
+ PROPERTIES (
739
+ "distance.function" = "l2_distance", -- 图像特征适合L2距离
740
+ "scalar.type" = "f16", -- 半精度提升速度
741
+ "m" = "16", -- 标准连接数
742
+ "ef.construction" = "128", -- 平衡质量和速度
743
+ "reuse.vector.column" = "true", -- 复用数据节省空间
744
+ "compress.codec" = "lz4" -- 轻量压缩
745
+ );
746
+
747
+ -- 极致性能二进制索引
748
+ CREATE VECTOR INDEX binary_search_idx
749
+ ON TABLE comprehensive_vector_demo(binary_vector)
750
+ PROPERTIES (
751
+ "distance.function" = "hamming_distance", -- 二进制向量专用
752
+ "scalar.type" = "b1", -- 1位存储最小化
753
+ "m" = "16",
754
+ "ef.construction" = "128",
755
+ "conversion.rule" = "as_bits", -- 按位处理
756
+ "compress.codec" = "zstd", -- 高压缩比
757
+ "compress.level" = "best" -- 最高压缩
758
+ );
759
+
760
+ -- 推荐系统用户画像索引
761
+ CREATE VECTOR INDEX user_profile_idx
762
+ ON TABLE comprehensive_vector_demo(user_vector)
763
+ PROPERTIES (
764
+ "distance.function" = "dot_product", -- 点积距离函数
765
+ "scalar.type" = "i8", -- 8位整数适合离散特征
766
+ "m" = "24", -- 适中连接数
767
+ "ef.construction" = "200" -- 平衡构建质量
768
+ );
769
+ ```
770
+
771
+ ### 全文检索索引配置(倒排索引)
772
+
773
+ #### 分词器选择指南
774
+
775
+ | 分词器 | 语言支持 | 分词规则 | 大小写处理 | 适用场景 | 性能特点 |
776
+ | --------- | ---- | ----------- | ----- | --------- | -------- |
777
+ | `keyword` | 通用 | 不分词,精确匹配 | 保持原样 | 状态码、标签、ID | **最高性能** |
778
+ | `english` | 英文 | ASCII字母数字边界 | 转小写 | 英文文档、产品描述 | 较高性能 |
779
+ | `chinese` | 中英混合 | 中文分词+英文单词 | 英文转小写 | 中文内容、混合文本 | 中等性能 |
780
+ | `unicode` | 多语言 | Unicode文本边界 | 转小写 | 国际化内容、多语言 | 较低性能 |
781
+
782
+ #### 数据类型倒排索引支持
783
+
784
+ | 数据类型 | 索引支持 | 分词器要求 | 使用场景 | 注意事项 |
785
+ | ---------------- | -------- | --------------- | --------- | ----------------------- |
786
+ | `STRING` | 支持 | **建议指定** | 长文本全文搜索 | 字符串类型建议指定analyzer |
787
+ | `VARCHAR(n)` | 支持 | **建议指定** | 标题、描述字段搜索 | 同STRING要求 |
788
+ | `CHAR(n)` | 支持 | **建议指定** | 固定长度文本 | 较少使用场景 |
789
+ | `INT/BIGINT` | 支持 | 不需要 | 数值范围查询优化 | 自动处理,高效 |
790
+ | `DECIMAL` | 支持 | 不需要 | 精确数值查询 | 金融场景常用 |
791
+ | `DATE/TIMESTAMP` | 支持 | 不需要 | 时间范围查询优化 | 时序数据必备 |
792
+ | `BOOLEAN` | 支持 | 不需要 | 布尔值快速过滤 | 状态筛选优化 |
793
+ | `ARRAY<T>` | **部分支持** | **不支持analyzer** | 标签列表等 | ARRAY类型列不支持指定analyzer参数 |
794
+
795
+ #### 完整的倒排索引应用示例
796
+
797
+ ```sql
798
+ -- 综合搜索场景的表设计
799
+ CREATE TABLE comprehensive_search_demo (
800
+ record_id BIGINT IDENTITY,
801
+
802
+ -- 文本搜索字段
803
+ title VARCHAR(200) NOT NULL,
804
+ content STRING,
805
+ tags ARRAY<STRING>,
806
+ author VARCHAR(100),
807
+ category VARCHAR(50),
808
+
809
+ -- 数值和时间字段
810
+ price DECIMAL(10,2),
811
+ view_count INT,
812
+ rating TINYINT,
813
+ created_date DATE,
814
+ updated_time TIMESTAMP,
815
+ is_featured BOOLEAN DEFAULT false
816
+ );
817
+
818
+ -- 中文标题搜索索引
819
+ CREATE INVERTED INDEX title_chinese_idx
820
+ ON TABLE comprehensive_search_demo(title)
821
+ PROPERTIES ('analyzer' = 'chinese');
822
+
823
+ -- 内容全文搜索索引(多语言)
824
+ CREATE INVERTED INDEX content_unicode_idx
825
+ ON TABLE comprehensive_search_demo(content)
826
+ PROPERTIES ('analyzer' = 'unicode');
827
+
828
+ -- 标签数组索引(不能指定analyzer)
829
+ CREATE INVERTED INDEX tags_idx
830
+ ON TABLE comprehensive_search_demo(tags);
831
+
832
+ -- 作者姓名搜索索引
833
+ CREATE INVERTED INDEX author_keyword_idx
834
+ ON TABLE comprehensive_search_demo(author)
835
+ PROPERTIES ('analyzer' = 'keyword');
836
+
837
+ -- 数值字段范围查询优化
838
+ CREATE INVERTED INDEX price_range_idx
839
+ ON TABLE comprehensive_search_demo(price);
840
+
841
+ CREATE INVERTED INDEX view_count_idx
842
+ ON TABLE comprehensive_search_demo(view_count);
843
+
844
+ CREATE INVERTED INDEX rating_idx
845
+ ON TABLE comprehensive_search_demo(rating);
846
+
847
+ -- 时间字段查询优化
848
+ CREATE INVERTED INDEX created_date_idx
849
+ ON TABLE comprehensive_search_demo(created_date);
850
+
851
+ CREATE INVERTED INDEX updated_time_idx
852
+ ON TABLE comprehensive_search_demo(updated_time);
853
+
854
+ -- 布尔字段快速过滤
855
+ CREATE INVERTED INDEX featured_filter_idx
856
+ ON TABLE comprehensive_search_demo(is_featured);
857
+ ```
858
+
859
+ ### 布隆过滤器索引应用(高基数列优化)
860
+
861
+ #### 适用场景分析
862
+
863
+ | 使用场景 | 基数特征 | 查询模式 | 优化效果 | 实际应用 |
864
+ | ------- | --------- | ------- | ----- | ------- |
865
+ | 用户ID查找 | 极高基数(百万+) | = 精确匹配 | 显著提升 | 用户行为分析 |
866
+ | 邮箱地址验证 | 高基数,唯一性强 | = 存在性检查 | 快速过滤 | 注册去重验证 |
867
+ | 商品SKU检索 | 高基数,业务唯一 | = 库存查询 | 快速定位 | 电商库存系统 |
868
+ | 订单号查询 | 极高基数,唯一 | = 订单查找 | 毫秒级响应 | 订单管理系统 |
869
+ | 设备ID监控 | 高基数,设备唯一 | = 设备状态 | 高效过滤 | IoT监控平台 |
870
+
871
+ #### 布隆过滤器最佳实践
872
+
873
+ ```sql
874
+ -- 高基数用户管理表
875
+ CREATE TABLE user_management_optimized (
876
+ user_id BIGINT IDENTITY,
877
+ username VARCHAR(50) NOT NULL,
878
+ email VARCHAR(320) NOT NULL,
879
+ mobile_phone VARCHAR(20),
880
+ id_card_hash VARCHAR(64), -- 身份证号哈希
881
+ device_fingerprint VARCHAR(200), -- 设备指纹
882
+
883
+ -- 核心业务字段
884
+ registration_date DATE,
885
+ last_login_time TIMESTAMP,
886
+ account_status TINYINT DEFAULT 1, -- 1=正常, 0=禁用, 2=锁定
887
+ verification_level TINYINT DEFAULT 0 -- 0=未验证, 1=邮箱, 2=手机, 3=实名
888
+ );
889
+
890
+ -- 高基数字段的布隆过滤器索引
891
+ CREATE BLOOMFILTER INDEX username_bloom_idx
892
+ ON TABLE user_management_optimized(username);
893
+
894
+ CREATE BLOOMFILTER INDEX email_bloom_idx
895
+ ON TABLE user_management_optimized(email);
896
+
897
+ CREATE BLOOMFILTER INDEX phone_bloom_idx
898
+ ON TABLE user_management_optimized(mobile_phone);
899
+
900
+ CREATE BLOOMFILTER INDEX idcard_bloom_idx
901
+ ON TABLE user_management_optimized(id_card_hash);
902
+
903
+ CREATE BLOOMFILTER INDEX device_bloom_idx
904
+ ON TABLE user_management_optimized(device_fingerprint);
905
+
906
+ -- 实际查询应用示例
907
+ -- 1. 用户注册时的快速查重
908
+ SELECT COUNT(*) FROM user_management_optimized
909
+ WHERE email = 'newuser@example.com'; -- 布隆过滤器快速过滤
910
+
911
+ -- 2. 用户登录时的快速定位
912
+ SELECT user_id, account_status, verification_level
913
+ FROM user_management_optimized
914
+ WHERE username = 'target_username'; -- 布隆过滤器加速查找
915
+
916
+ -- 3. 设备风控检查
917
+ SELECT user_id, COUNT(*) as device_usage_count
918
+ FROM user_management_optimized
919
+ WHERE device_fingerprint = 'specific_device_fp' -- 布隆过滤器快速匹配
920
+ GROUP BY user_id;
921
+ ```
922
+
923
+ ### 索引命名和管理规范
924
+
925
+ #### 索引命名最佳实践
926
+
927
+ **重要更新**: 经过实际测试验证,当前版本的云器Lakehouse在索引命名方面**严格强制schema级唯一性**。
928
+
929
+ #### 推荐的索引命名规范
930
+
931
+ **命名格式**: `{table_name}_{index_type}_{column_name}_idx`
932
+
933
+ **索引类型缩写**:
934
+
935
+ * `vec` - 向量索引 (VECTOR INDEX)
936
+ * `inv` - 倒排索引 (INVERTED INDEX)
937
+ * `bloom` - 布隆过滤器索引 (BLOOMFILTER INDEX)
938
+
939
+ ```sql
940
+ -- 正确的索引命名实践
941
+ CREATE TABLE product_catalog (
942
+ product_id INT,
943
+ product_name VARCHAR(200),
944
+ description STRING,
945
+ category VARCHAR(100),
946
+ price DECIMAL(10,2),
947
+ features_vector VECTOR(FLOAT, 512)
948
+ );
949
+
950
+ -- 唯一且描述性的索引名称
951
+ CREATE VECTOR INDEX products_vec_features_idx
952
+ ON TABLE product_catalog(features_vector)
953
+ PROPERTIES ("distance.function" = "cosine_distance");
954
+
955
+ CREATE INVERTED INDEX products_inv_name_idx
956
+ ON TABLE product_catalog(product_name)
957
+ PROPERTIES ('analyzer' = 'chinese');
958
+
959
+ CREATE INVERTED INDEX products_inv_desc_idx
960
+ ON TABLE product_catalog(description)
961
+ PROPERTIES ('analyzer' = 'unicode');
962
+
963
+ CREATE BLOOMFILTER INDEX products_bloom_category_idx
964
+ ON TABLE product_catalog(category);
965
+
966
+ -- 另一个表使用不同的索引名称前缀
967
+ CREATE TABLE user_content (
968
+ content_id BIGINT IDENTITY,
969
+ content_text STRING,
970
+ content_vector VECTOR(FLOAT, 768)
971
+ );
972
+
973
+ CREATE VECTOR INDEX users_vec_content_idx -- 不同的表名前缀
974
+ ON TABLE user_content(content_vector)
975
+ PROPERTIES ("distance.function" = "cosine_distance");
976
+
977
+ CREATE INVERTED INDEX users_inv_text_idx -- 不同的表名前缀
978
+ ON TABLE user_content(content_text)
979
+ PROPERTIES ('analyzer' = 'chinese');
980
+ ```
981
+
982
+ ### 索引功能限制说明
983
+
984
+ #### IF NOT EXISTS语法当前状态
985
+
986
+ 根据最新测试验证,索引创建语法当前**不支持** IF NOT EXISTS选项:
987
+
988
+ ```sql
989
+ -- 不支持的索引IF NOT EXISTS语法(会导致语法错误)
990
+ CREATE VECTOR INDEX IF NOT EXISTS vec_idx
991
+ ON TABLE example_table(embedding)
992
+ PROPERTIES ("distance.function" = "cosine_distance");
993
+
994
+ CREATE INVERTED INDEX IF NOT EXISTS text_idx
995
+ ON TABLE example_table(content)
996
+ PROPERTIES ('analyzer'='chinese');
997
+
998
+ CREATE BLOOMFILTER INDEX IF NOT EXISTS bloom_idx
999
+ ON TABLE example_table(user_id);
1000
+ ```
1001
+
1002
+ 在创建索引前,建议先检查索引是否存在,避免错误:
1003
+
1004
+ ```sql
1005
+ -- 推荐做法:先检查索引是否存在
1006
+ -- 然后再创建
1007
+ CREATE VECTOR INDEX vec_idx ON TABLE example_table(embedding)
1008
+ PROPERTIES ("distance.function" = "cosine_distance");
1009
+ ```
1010
+
1011
+ #### ARRAY类型列上的索引限制
1012
+
1013
+ 通过测试确认,在ARRAY类型列上创建倒排索引时存在以下限制:
1014
+
1015
+ ```sql
1016
+ -- ARRAY类型列不支持指定analyzer参数
1017
+ CREATE TABLE array_column_table (
1018
+ id INT,
1019
+ tags ARRAY<STRING>
1020
+ );
1021
+
1022
+ -- 错误:ARRAY类型列指定analyzer
1023
+ CREATE INVERTED INDEX tags_analyzer_idx
1024
+ ON TABLE array_column_table(tags)
1025
+ PROPERTIES ('analyzer' = 'keyword'); -- 失败!
1026
+
1027
+ -- 正确:ARRAY类型列不指定analyzer
1028
+ CREATE INVERTED INDEX tags_idx
1029
+ ON TABLE array_column_table(tags); -- 成功
1030
+
1031
+ -- 替代方案:使用STRING类型存储标签
1032
+ CREATE TABLE string_tags_table (
1033
+ id INT,
1034
+ tags_str STRING -- 使用逗号分隔的标签字符串
1035
+ );
1036
+
1037
+ CREATE INVERTED INDEX tags_str_idx
1038
+ ON TABLE string_tags_table(tags_str)
1039
+ PROPERTIES ('analyzer' = 'keyword'); -- 成功
1040
+ ```
1041
+
1042
+ ***
1043
+
1044
+ ## ⚡ 性能优化策略
1045
+
1046
+ ### 查询性能优化技巧
1047
+
1048
+ #### 分区剪枝优化
1049
+
1050
+ 确保查询条件能够有效利用分区剪枝:
1051
+
1052
+ ```sql
1053
+ -- 优秀的查询模式:充分利用分区剪枝
1054
+ SELECT user_id, COUNT(*) as activity_count,
1055
+ AVG(session_duration) as avg_duration
1056
+ FROM user_activity_logs
1057
+ WHERE date_partition BETWEEN '2024-01-01' AND '2024-01-31' -- 分区剪枝
1058
+ AND user_id IN (12345, 67890, 54321) -- 分桶定位
1059
+ AND activity_type = 'purchase' -- 索引过滤
1060
+ GROUP BY user_id
1061
+ ORDER BY activity_count DESC;
1062
+
1063
+ -- 避免的查询模式:无法利用分区剪枝
1064
+ SELECT user_id, COUNT(*) as activity_count
1065
+ FROM user_activity_logs
1066
+ WHERE activity_time >= '2024-01-01 00:00:00' -- 直接使用时间列,无法分区剪枝
1067
+ AND activity_time <= '2024-01-31 23:59:59'
1068
+ GROUP BY user_id;
1069
+ ```
1070
+
1071
+ #### 多维度索引协同优化
1072
+
1073
+ ```sql
1074
+ -- 为复杂业务查询设计的表结构
1075
+ CREATE TABLE business_analytics_optimized (
1076
+ record_id BIGINT IDENTITY,
1077
+ user_id INT NOT NULL,
1078
+ product_category VARCHAR(50) NOT NULL,
1079
+ event_type VARCHAR(50) NOT NULL,
1080
+ channel VARCHAR(30) NOT NULL,
1081
+ event_data JSON,
1082
+ revenue_amount DECIMAL(12,2),
1083
+ event_timestamp TIMESTAMP NOT NULL,
1084
+
1085
+ -- 分区键
1086
+ date_partition STRING GENERATED ALWAYS AS (date_format(event_timestamp, 'yyyy-MM-dd'))
1087
+ )
1088
+ PARTITIONED BY (date_partition) -- 时间维度分区剪枝
1089
+ HASH CLUSTERED BY (user_id) -- 用户维度分桶定位
1090
+ SORTED BY (event_timestamp DESC, revenue_amount DESC) -- 时间和收入双重排序
1091
+ INTO 512 BUCKETS;
1092
+
1093
+ -- 多维度索引策略
1094
+ CREATE BLOOMFILTER INDEX analytics_user_idx ON TABLE business_analytics_optimized(user_id);
1095
+ CREATE BLOOMFILTER INDEX analytics_category_idx ON TABLE business_analytics_optimized(product_category);
1096
+ CREATE BLOOMFILTER INDEX analytics_event_idx ON TABLE business_analytics_optimized(event_type);
1097
+ CREATE BLOOMFILTER INDEX analytics_channel_idx ON TABLE business_analytics_optimized(channel);
1098
+ CREATE INVERTED INDEX analytics_revenue_idx ON TABLE business_analytics_optimized(revenue_amount);
1099
+ CREATE INVERTED INDEX analytics_data_search_idx ON TABLE business_analytics_optimized(event_data)
1100
+ PROPERTIES ('analyzer' = 'unicode');
1101
+
1102
+ -- 高效的多维度业务查询
1103
+ SELECT
1104
+ product_category,
1105
+ event_type,
1106
+ COUNT(*) as event_count,
1107
+ SUM(revenue_amount) as total_revenue,
1108
+ AVG(revenue_amount) as avg_revenue
1109
+ FROM business_analytics_optimized
1110
+ WHERE date_partition = '2024-01-15' -- 分区剪枝
1111
+ AND user_id IN (SELECT user_id FROM vip_users) -- 分桶定位 + 布隆过滤器
1112
+ AND product_category = 'electronics' -- 布隆过滤器
1113
+ AND event_type = 'purchase' -- 布隆过滤器
1114
+ AND channel = 'mobile_app' -- 布隆过滤器
1115
+ AND revenue_amount > 100 -- 倒排索引范围查询
1116
+ GROUP BY product_category, event_type
1117
+ ORDER BY total_revenue DESC;
1118
+ ```
1119
+
1120
+ #### 向量相似度查询优化
1121
+
1122
+ ```sql
1123
+ -- 向量搜索性能优化实例
1124
+ CREATE TABLE vector_search_performance (
1125
+ doc_id INT,
1126
+ doc_title VARCHAR(200),
1127
+ doc_category VARCHAR(50),
1128
+ content_embedding VECTOR(FLOAT, 768),
1129
+ summary_embedding VECTOR(FLOAT, 256), -- 较小维度的快速预筛选向量
1130
+ created_date DATE,
1131
+
1132
+ date_partition STRING GENERATED ALWAYS AS (date_format(created_date, 'yyyy-MM-dd'))
1133
+ )
1134
+ PARTITIONED BY (date_partition);
1135
+
1136
+ -- 高性能向量索引
1137
+ CREATE VECTOR INDEX content_semantic_idx
1138
+ ON TABLE vector_search_performance(content_embedding)
1139
+ PROPERTIES (
1140
+ "distance.function" = "cosine_distance",
1141
+ "scalar.type" = "f32",
1142
+ "m" = "32", -- 提高连接数增强召回
1143
+ "ef.construction" = "400", -- 高质量构建
1144
+ "reuse.vector.column" = "false" -- 独立存储保证最优性能
1145
+ );
1146
+
1147
+ -- 快速预筛选向量索引
1148
+ CREATE VECTOR INDEX summary_fast_idx
1149
+ ON TABLE vector_search_performance(summary_embedding)
1150
+ PROPERTIES (
1151
+ "distance.function" = "dot_product", -- 点积距离函数
1152
+ "scalar.type" = "f16", -- 半精度提升速度
1153
+ "m" = "16",
1154
+ "ef.construction" = "128"
1155
+ );
1156
+
1157
+ -- 传统索引辅助过滤
1158
+ CREATE BLOOMFILTER INDEX doc_category_idx ON TABLE vector_search_performance(doc_category);
1159
+
1160
+ -- 多层次向量搜索策略示例
1161
+ -- 1. 粗筛:使用小向量快速预筛选
1162
+ -- 2. 精排:使用大向量精确计算
1163
+ -- 3. 过滤:结合传统索引进一步筛选
1164
+ ```
1165
+
1166
+ ### 存储成本优化策略
1167
+
1168
+ #### 数据类型精确选择(存储优化)
1169
+
1170
+ ```sql
1171
+ -- 存储成本优化的表设计实例
1172
+ CREATE TABLE storage_cost_optimized (
1173
+ -- 主键字段:必要的存储开销
1174
+ record_id BIGINT IDENTITY, -- 8字节,必需的自增主键
1175
+
1176
+ -- 业务ID字段:根据实际需求选择类型
1177
+ user_id INT NOT NULL, -- 4字节,支持42亿用户
1178
+ product_id INT NOT NULL, -- 4字节,支持42亿商品
1179
+ order_id BIGINT NOT NULL, -- 8字节,支持超大订单量
1180
+
1181
+ -- 状态枚举字段:使用最小类型
1182
+ order_status TINYINT DEFAULT 1, -- 1字节 vs VARCHAR(20) 20字节,节省95%
1183
+ priority_level TINYINT DEFAULT 0, -- 1字节,0-255级别充足
1184
+ user_level TINYINT DEFAULT 1, -- 1字节,VIP等级枚举
1185
+
1186
+ -- 布尔字段:明确语义
1187
+ is_paid BOOLEAN DEFAULT false, -- 1字节 vs VARCHAR(10) 10字节,节省90%
1188
+ is_shipped BOOLEAN DEFAULT false, -- 1字节,清晰的布尔语义
1189
+ is_gift BOOLEAN DEFAULT false, -- 1字节,礼品标识
1190
+
1191
+ -- 时间字段:根据精度需求选择
1192
+ order_date DATE, -- 4字节,不需要时分秒的场景
1193
+ created_timestamp TIMESTAMP, -- 8字节,需要精确时间的场景
1194
+ shipped_date DATE, -- 4字节,发货日期够用
1195
+
1196
+ -- 金额字段:精确计算
1197
+ item_price DECIMAL(10,2), -- 精确金额 vs DOUBLE精度风险
1198
+ total_amount DECIMAL(12,2), -- 支持更大金额
1199
+ discount_amount DECIMAL(8,2), -- 折扣金额范围较小
1200
+
1201
+ -- 字符串字段:精确长度设置
1202
+ customer_name VARCHAR(100), -- 100字符覆盖99.5%的实际情况
1203
+ email VARCHAR(320), -- RFC5321标准长度
1204
+ phone VARCHAR(20), -- 支持国际格式+86-13812345678
1205
+ address VARCHAR(500), -- 地址信息合理长度
1206
+
1207
+ -- 复杂数据:合理使用
1208
+ order_metadata JSON, -- 扩展属性 vs 大量稀疏列
1209
+
1210
+ -- 分类ID:使用整数代替字符串
1211
+ category_id SMALLINT, -- 2字节ID vs VARCHAR(50) 50字节,节省96%
1212
+ subcategory_id SMALLINT, -- 2字节,支持65K分类
1213
+ brand_id SMALLINT -- 2字节,品牌ID
1214
+ )
1215
+ COMMENT '存储成本优化设计 - 在功能需求和存储成本间达到最佳平衡';
1216
+
1217
+ -- 存储节省效果分析:
1218
+ -- 状态字段:从VARCHAR(20)改为TINYINT,每行节省19字节
1219
+ -- 布尔字段:从VARCHAR(10)改为BOOLEAN,每行节省9字节
1220
+ -- 分类字段:从VARCHAR(50)改为SMALLINT,每行节省48字节
1221
+ -- 总体节省:每行约76字节,千万级数据可节省约760MB存储
1222
+ ```
1223
+
1224
+ #### 分桶数量优化策略
1225
+
1226
+ ```sql
1227
+ -- 基于数据规模的分桶优化实例
1228
+
1229
+ -- 小表优化(< 10GB):避免过度分桶
1230
+ CREATE TABLE small_table_optimized (
1231
+ id BIGINT IDENTITY,
1232
+ name VARCHAR(100),
1233
+ category VARCHAR(50),
1234
+ data JSON
1235
+ )
1236
+ HASH CLUSTERED BY (category) -- 按业务维度分桶
1237
+ SORTED BY (id ASC) -- 简单排序
1238
+ INTO 16 BUCKETS -- 适中的分桶数,避免小文件问题
1239
+ COMMENT '小表优化 - 16桶平衡性能和管理复杂度';
1240
+
1241
+ -- 中表优化(10GB-1TB):标准配置
1242
+ CREATE TABLE medium_table_optimized (
1243
+ record_id BIGINT IDENTITY,
1244
+ user_id INT NOT NULL,
1245
+ business_data JSON,
1246
+ created_time TIMESTAMP,
1247
+
1248
+ date_partition STRING GENERATED ALWAYS AS (date_format(created_time, 'yyyy-MM-dd'))
1249
+ )
1250
+ PARTITIONED BY (date_partition)
1251
+ HASH CLUSTERED BY (user_id) -- 高基数列分桶
1252
+ SORTED BY (created_time DESC) -- 时间排序
1253
+ INTO 128 BUCKETS -- 标准分桶数,平衡并发和文件大小
1254
+ COMMENT '中表优化 - 128桶适合主流业务场景';
1255
+
1256
+ -- 大表优化(> 1TB):高并发配置
1257
+ CREATE TABLE large_table_optimized (
1258
+ event_id BIGINT IDENTITY,
1259
+ user_id INT NOT NULL,
1260
+ session_id VARCHAR(100),
1261
+ event_data JSON,
1262
+ event_time TIMESTAMP,
1263
+
1264
+ date_partition STRING GENERATED ALWAYS AS (date_format(event_time, 'yyyy-MM-dd'))
1265
+ )
1266
+ PARTITIONED BY (date_partition)
1267
+ HASH CLUSTERED BY (user_id, session_id) -- 组合分桶提高分布均匀性
1268
+ SORTED BY (event_time DESC)
1269
+ INTO 512 BUCKETS -- 高分桶数支持高并发写入和查询
1270
+ COMMENT '大表优化 - 512桶支持大规模并发处理';
1271
+ ```
1272
+
1273
+ ***
1274
+
1275
+ ## ⚠️ 常见设计陷阱与解决方案
1276
+
1277
+ ### 数据类型设计陷阱
1278
+
1279
+ #### 陷阱1:IDENTITY列类型错误
1280
+
1281
+ **错误场景**:
1282
+
1283
+ ```sql
1284
+ -- 以下所有IDENTITY声明都会失败
1285
+ CREATE TABLE identity_type_errors (
1286
+ id INT IDENTITY, -- 失败:不支持INT类型
1287
+ small_id SMALLINT IDENTITY, -- 失败:不支持SMALLINT类型
1288
+ char_id CHAR(10) IDENTITY, -- 失败:不支持字符类型
1289
+ decimal_id DECIMAL(10,0) IDENTITY -- 失败:不支持DECIMAL类型
1290
+ );
1291
+
1292
+ -- 错误信息:invalid identity column type int, currently only BIGINT is supported
1293
+ ```
1294
+
1295
+ **正确解决方案**:
1296
+
1297
+ ```sql
1298
+ -- 正确:统一使用BIGINT IDENTITY
1299
+ CREATE TABLE identity_correct_usage (
1300
+ id BIGINT IDENTITY, -- 唯一支持的IDENTITY类型
1301
+ user_id INT NOT NULL, -- 业务ID使用其他合适类型
1302
+ order_code VARCHAR(50) NOT NULL, -- 业务编码使用字符串
1303
+ sequence_num INT DEFAULT 1 -- 序列号使用普通INT
1304
+ ) COMMENT 'IDENTITY列正确使用示例';
1305
+ ```
1306
+
1307
+ #### 陷阱2:VARCHAR长度设置不当
1308
+
1309
+ **问题分析**:
1310
+
1311
+ ```sql
1312
+ -- 常见的长度设置错误
1313
+ CREATE TABLE varchar_length_problems (
1314
+ name VARCHAR(10000), -- 过度分配:浪费存储空间
1315
+ email VARCHAR(50), -- 长度不足:邮箱标准长度320字符
1316
+ phone VARCHAR(255), -- 过度分配:手机号20字符已足够
1317
+ title VARCHAR(100), -- 长度不足:文章标题通常需要200字符
1318
+ description VARCHAR(500000) -- 超大分配:应该使用STRING类型
1319
+ );
1320
+ ```
1321
+
1322
+ **优化解决方案**:
1323
+
1324
+ ```sql
1325
+ -- 基于实际业务需求的合理长度设置
1326
+ CREATE TABLE varchar_length_optimized (
1327
+ name VARCHAR(100), -- 姓名:覆盖99.5%的实际情况
1328
+ email VARCHAR(320), -- 邮箱:RFC5321国际标准长度
1329
+ phone VARCHAR(20), -- 手机:支持国际格式+86-13812345678
1330
+ title VARCHAR(200), -- 标题:平衡SEO需求和存储效率
1331
+ summary VARCHAR(500), -- 摘要:合理的摘要长度
1332
+ description STRING -- 长描述:不确定长度使用STRING
1333
+ ) COMMENT 'VARCHAR长度优化 - 基于实际业务调研的合理设置';
1334
+ ```
1335
+
1336
+ #### 陷阱3:金融计算使用浮点类型
1337
+
1338
+ **风险演示**:
1339
+
1340
+ ```sql
1341
+ -- 浮点类型在金融计算中的精度问题
1342
+ CREATE TABLE financial_precision_risks (
1343
+ account_id INT,
1344
+ balance DOUBLE, -- 风险:浮点精度问题
1345
+ interest_rate FLOAT, -- 风险:复合计算累积误差
1346
+ transaction_amount DOUBLE -- 风险:交易金额计算误差
1347
+ );
1348
+
1349
+ -- 精度问题演示
1350
+ INSERT INTO financial_precision_risks VALUES
1351
+ (1, 0.1 + 0.2, 0.001, 1.0);
1352
+ -- 期望结果:balance = 0.3
1353
+ -- 实际结果:balance = 0.30000000000000004 (精度误差)
1354
+
1355
+ -- 复合计算误差演示
1356
+ SELECT
1357
+ balance * interest_rate as calculated_interest, -- 可能产生精度误差
1358
+ (balance * interest_rate * 12) as annual_interest -- 误差被放大
1359
+ FROM financial_precision_risks;
1360
+ ```
1361
+
1362
+ **正确解决方案**:
1363
+
1364
+ ```sql
1365
+ -- 金融计算使用精确的DECIMAL类型
1366
+ CREATE TABLE financial_precision_correct (
1367
+ account_id INT,
1368
+ balance DECIMAL(15,2), -- 精确:支持千万级金额,2位小数
1369
+ interest_rate DECIMAL(8,6), -- 精确:支持利率计算,6位小数精度
1370
+ transaction_amount DECIMAL(15,2), -- 精确:交易金额无精度损失
1371
+
1372
+ -- 不同业务场景的DECIMAL配置
1373
+ daily_limit DECIMAL(10,2), -- 日限额:万级金额
1374
+ annual_fee DECIMAL(8,2), -- 年费:千级金额
1375
+ exchange_rate DECIMAL(10,8) -- 汇率:高精度小数
1376
+ ) COMMENT '金融数据精确计算 - 使用DECIMAL保证计算准确性';
1377
+
1378
+ -- 精确计算验证
1379
+ INSERT INTO financial_precision_correct VALUES
1380
+ (1, 0.30, 0.001000, 1.00, 5000.00, 200.00, 6.78901234);
1381
+
1382
+ -- 精确的复合计算
1383
+ SELECT
1384
+ balance * interest_rate as precise_interest, -- 精确计算
1385
+ balance * interest_rate * 12 as precise_annual, -- 精确的年化计算
1386
+ transaction_amount * exchange_rate as precise_conversion -- 精确的汇率转换
1387
+ FROM financial_precision_correct;
1388
+ ```
1389
+
1390
+ ### 分区设计陷阱
1391
+
1392
+ #### 陷阱4:不支持的分区列类型
1393
+
1394
+ **错误场景**:
1395
+
1396
+ ```sql
1397
+ -- 不支持的分区列类型(测试确认会失败)
1398
+ CREATE TABLE partition_type_errors (
1399
+ id INT,
1400
+ amount DECIMAL(10,2), -- DECIMAL不支持直接分区
1401
+ price DOUBLE, -- DOUBLE不支持分区
1402
+ created_time TIMESTAMP, -- TIMESTAMP不能直接分区
1403
+ location_point STRUCT<lat:DOUBLE,lng:DOUBLE> -- 复杂类型不支持分区
1404
+ )
1405
+ PARTITIONED BY (created_time); -- 失败!
1406
+
1407
+ -- 错误信息示例:
1408
+ -- Unsupported data type for partition transform: timestamp_ltz
1409
+ ```
1410
+
1411
+ **正确解决方案**:
1412
+
1413
+ ```sql
1414
+ -- 使用生成列转换为支持的分区类型
1415
+ CREATE TABLE partition_type_solutions (
1416
+ id INT,
1417
+ amount DECIMAL(10,2),
1418
+ price DOUBLE,
1419
+ created_time TIMESTAMP,
1420
+ location_point STRUCT<lat:DOUBLE,lng:DOUBLE>,
1421
+
1422
+ -- 使用生成列转换TIMESTAMP为STRING(支持分区)
1423
+ date_partition STRING GENERATED ALWAYS AS (
1424
+ date_format(created_time, 'yyyy-MM-dd')
1425
+ ),
1426
+
1427
+ -- 使用生成列转换DECIMAL为分类(支持分区)
1428
+ amount_range STRING GENERATED ALWAYS AS (
1429
+ if(amount < 100, 'small',
1430
+ if(amount < 1000, 'medium', 'large'))
1431
+ ),
1432
+
1433
+ -- 使用生成列提取复杂类型的字段(支持分区)
1434
+ location_region STRING GENERATED ALWAYS AS (
1435
+ if(location_point.lat > 35, 'north', 'south')
1436
+ )
1437
+ )
1438
+ PARTITIONED BY (date_partition) -- 成功:STRING类型支持分区
1439
+ COMMENT '分区类型解决方案 - 使用生成列转换不支持的类型';
1440
+ ```
1441
+
1442
+ #### 陷阱5:动态分区数量超限
1443
+
1444
+ **问题场景**:
1445
+
1446
+ ```sql
1447
+ -- 可能导致动态分区超限的操作
1448
+ INSERT INTO large_partition_table
1449
+ SELECT * FROM source_table_with_many_dates; -- 如果源表包含>2048个不同日期会失败
1450
+
1451
+ -- 错误信息:
1452
+ -- The count of dynamic partitions exceeds the maximum number 2048
1453
+ ```
1454
+
1455
+ **解决方案策略**:
1456
+
1457
+ ```sql
1458
+ -- 策略1:分批按时间范围插入
1459
+ INSERT INTO large_partition_table
1460
+ SELECT * FROM source_table_with_many_dates
1461
+ WHERE event_date BETWEEN '2024-01-01' AND '2024-01-10'; -- 限制分区范围
1462
+
1463
+ INSERT INTO large_partition_table
1464
+ SELECT * FROM source_table_with_many_dates
1465
+ WHERE event_date BETWEEN '2024-02-01' AND '2024-02-29'; -- 第二批:29个分区
1466
+
1467
+ -- 策略2:按分区值分批插入
1468
+ INSERT INTO large_partition_table
1469
+ SELECT * FROM source_table_with_many_dates
1470
+ WHERE region IN ('north', 'south', 'east', 'west'); -- 限制为4个分区
1471
+
1472
+ -- 策略3:预先过滤数据
1473
+ WITH filtered_source AS (
1474
+ SELECT *,
1475
+ date_format(event_timestamp, 'yyyy-MM-dd') as date_part
1476
+ FROM source_table_with_many_dates
1477
+ WHERE event_timestamp >= '2024-01-01' -- 预过滤减少分区数
1478
+ AND event_timestamp < '2024-02-01'
1479
+ )
1480
+ INSERT INTO large_partition_table
1481
+ SELECT * FROM filtered_source;
1482
+
1483
+ -- 策略4:应用层循环控制(伪代码)
1484
+ -- for month in ['2024-01', '2024-02', ...]:
1485
+ -- INSERT INTO table SELECT * FROM source WHERE month_partition = month
1486
+ ```
1487
+
1488
+ ### 索引设计陷阱
1489
+
1490
+ #### 陷阱6:索引命名管理
1491
+
1492
+ **重要更新**: 经测试验证,当前版本的云器Lakehouse严格强制schema级索引名称唯一性。
1493
+
1494
+ **推荐的命名实践**:
1495
+
1496
+ ```sql
1497
+ -- 使用表名前缀的唯一索引命名
1498
+ CREATE TABLE orders (
1499
+ order_id INT,
1500
+ customer_id INT,
1501
+ order_content STRING
1502
+ );
1503
+ CREATE INVERTED INDEX orders_inv_customer_idx ON TABLE orders(customer_id);
1504
+ CREATE INVERTED INDEX orders_inv_content_idx ON TABLE orders(order_content)
1505
+ PROPERTIES('analyzer'='keyword');
1506
+
1507
+ CREATE TABLE products (
1508
+ product_id INT,
1509
+ customer_id INT,
1510
+ product_description STRING
1511
+ );
1512
+ CREATE INVERTED INDEX products_inv_customer_idx ON TABLE products(customer_id);
1513
+ CREATE INVERTED INDEX products_inv_desc_idx ON TABLE products(product_description)
1514
+ PROPERTIES('analyzer'='chinese');
1515
+
1516
+ -- 推荐的索引命名规范
1517
+ -- 格式:{table_name}_{index_type}_{column_name}_idx
1518
+ -- 示例:users_bloom_email_idx, orders_vec_features_idx
1519
+ ```
1520
+
1521
+ #### 陷阱7:PRIMARY KEY约束与HASH CLUSTERED BY冲突
1522
+
1523
+ **问题场景**:
1524
+
1525
+ ```sql
1526
+ -- PRIMARY KEY约束与HASH CLUSTERED BY冲突
1527
+ CREATE TABLE table_with_conflict (
1528
+ tenant_id VARCHAR(50) PRIMARY KEY,
1529
+ tenant_name VARCHAR(200) NOT NULL,
1530
+ tenant_status TINYINT DEFAULT 1
1531
+ )
1532
+ HASH CLUSTERED BY (tenant_id) -- 与PRIMARY KEY冲突
1533
+ INTO 32 BUCKETS;
1534
+
1535
+ -- 错误信息:CLUSTERED BY definition conflicts with enforced PRIMARY KEY
1536
+ -- or UNIQUE constraints defined at :[31,2], must HASH CLUSTERED BY ... SORTED BY ... ASC
1537
+ -- with all PRIMARY KEY or UNIQUE columns
1538
+ ```
1539
+
1540
+ **解决方案**:
1541
+
1542
+ ```sql
1543
+ -- 方案1:移除PRIMARY KEY约束,使用普通非空列
1544
+ CREATE TABLE solution_remove_pk (
1545
+ tenant_id VARCHAR(50) NOT NULL, -- 移除PRIMARY KEY
1546
+ tenant_name VARCHAR(200) NOT NULL,
1547
+ tenant_status TINYINT DEFAULT 1
1548
+ )
1549
+ HASH CLUSTERED BY (tenant_id)
1550
+ INTO 32 BUCKETS;
1551
+
1552
+ -- 方案2:调整HASH CLUSTERED BY与SORTED BY以符合要求
1553
+ CREATE TABLE solution_adjust_cluster (
1554
+ tenant_id VARCHAR(50) PRIMARY KEY,
1555
+ tenant_name VARCHAR(200) NOT NULL,
1556
+ tenant_status TINYINT DEFAULT 1
1557
+ )
1558
+ HASH CLUSTERED BY (tenant_id) -- 保持与PRIMARY KEY一致
1559
+ SORTED BY (tenant_id ASC) -- 添加排序且为ASC
1560
+ INTO 32 BUCKETS;
1561
+ ```
1562
+
1563
+ #### PRIMARY KEY与分桶策略最佳实践
1564
+
1565
+ 基于测试验证的结果,我们建议遵循以下设计指导:
1566
+
1567
+ 1. **避免同时使用**:在大多数场景下,建议避免同时使用PRIMARY KEY约束和HASH CLUSTERED BY,而是选择其中一种:
1568
+ * 对于需要唯一性约束的场景,使用PRIMARY KEY
1569
+ * 对于需要性能优化的大表,使用HASH CLUSTERED BY和布隆过滤器索引
1570
+
1571
+ 2. **必须同时使用时的规则**:如果业务需要同时使用,必须满足以下全部条件:
1572
+ * HASH CLUSTERED BY的列必须包含PRIMARY KEY的全部列
1573
+ * 必须添加SORTED BY子句
1574
+ * SORTED BY子句必须包含PRIMARY KEY的全部列
1575
+ * SORTED BY的所有PRIMARY KEY列都必须使用ASC排序方向
1576
+
1577
+ 3. **示例参考**:
1578
+
1579
+ ```sql
1580
+ -- 最佳实践1:仅使用PRIMARY KEY(小表推荐)
1581
+ CREATE TABLE customer_profiles (
1582
+ customer_id INT PRIMARY KEY,
1583
+ customer_name VARCHAR(100) NOT NULL,
1584
+ customer_email VARCHAR(200)
1585
+ );
1586
+
1587
+ -- 最佳实践2:仅使用HASH CLUSTERED BY(大表推荐)
1588
+ CREATE TABLE customer_events (
1589
+ event_id BIGINT IDENTITY,
1590
+ customer_id INT NOT NULL,
1591
+ event_type VARCHAR(50),
1592
+ event_time TIMESTAMP
1593
+ )
1594
+ HASH CLUSTERED BY (customer_id)
1595
+ SORTED BY (event_time DESC)
1596
+ INTO 128 BUCKETS;
1597
+
1598
+ -- 创建布隆过滤器索引实现高效查找
1599
+ CREATE BLOOMFILTER INDEX customer_lookup_idx
1600
+ ON TABLE customer_events(customer_id);
1601
+
1602
+ -- 最佳实践3:必须同时使用时的正确配置
1603
+ CREATE TABLE order_items (
1604
+ order_id INT,
1605
+ item_id INT,
1606
+ product_id INT,
1607
+ quantity INT,
1608
+ PRIMARY KEY (order_id, item_id)
1609
+ )
1610
+ HASH CLUSTERED BY (order_id, item_id) -- 包含所有PRIMARY KEY列
1611
+ SORTED BY (order_id ASC, item_id ASC) -- 包含所有PRIMARY KEY列且都是ASC
1612
+ INTO 64 BUCKETS;
1613
+ ```
1614
+
1615
+ #### 陷阱8:ARRAY类型列上错误使用analyzer
1616
+
1617
+ **错误场景**:
1618
+
1619
+ ```sql
1620
+ -- 在ARRAY类型列上使用analyzer会导致错误
1621
+ CREATE TABLE array_column_table (
1622
+ id INT,
1623
+ tags ARRAY<STRING>
1624
+ );
1625
+
1626
+ CREATE INVERTED INDEX tags_analyzer_idx
1627
+ ON TABLE array_column_table(tags)
1628
+ PROPERTIES ('analyzer' = 'keyword'); -- 失败!ARRAY类型不支持analyzer参数
1629
+
1630
+ -- 错误信息示例:
1631
+ -- invalid.inverted.index.analyzer.type, array<string>
1632
+ ```
1633
+
1634
+ **正确解决方案**:
1635
+
1636
+ ```sql
1637
+ -- 正确:ARRAY类型列上创建倒排索引时不指定analyzer
1638
+ CREATE INVERTED INDEX tags_idx
1639
+ ON TABLE array_column_table(tags); -- 成功:不指定analyzer
1640
+
1641
+ -- 或者使用STRING类型存储并使用分隔符
1642
+ CREATE TABLE string_tags_table (
1643
+ id INT,
1644
+ tags_str STRING -- 使用逗号分隔的标签字符串
1645
+ );
1646
+
1647
+ CREATE INVERTED INDEX tags_str_idx
1648
+ ON TABLE string_tags_table(tags_str)
1649
+ PROPERTIES ('analyzer' = 'keyword'); -- 成功:STRING类型支持analyzer
1650
+ ```
1651
+
1652
+ ### 生成列设计陷阱
1653
+
1654
+ #### 陷阱9:生成列使用非确定性函数
1655
+
1656
+ **错误场景**:
1657
+
1658
+ ```sql
1659
+ -- 生成列中使用非确定性函数(测试确认会失败)
1660
+ CREATE TABLE generated_column_errors (
1661
+ id INT,
1662
+ event_data VARCHAR(1000),
1663
+
1664
+ -- 以下生成列都会导致创建失败
1665
+ auto_timestamp TIMESTAMP GENERATED ALWAYS AS (current_timestamp()), -- 失败
1666
+ random_id DOUBLE GENERATED ALWAYS AS (random()), -- 失败
1667
+ current_user_name STRING GENERATED ALWAYS AS (current_user()), -- 失败
1668
+ uuid_value STRING GENERATED ALWAYS AS (uuid()) -- 失败
1669
+ );
1670
+
1671
+ -- 错误信息:Generated column auto_timestamp only contains built-in/scalar/deterministic function
1672
+ ```
1673
+
1674
+ **正确解决方案**:
1675
+
1676
+ ```sql
1677
+ -- 区分生成列和默认值的正确使用
1678
+ CREATE TABLE generated_column_solutions (
1679
+ id INT,
1680
+ event_time TIMESTAMP,
1681
+ event_data VARCHAR(1000),
1682
+ amount DECIMAL(10,2),
1683
+
1684
+ -- 使用DEFAULT值代替生成列(适用于非确定性函数)
1685
+ created_timestamp TIMESTAMP DEFAULT current_timestamp(),
1686
+ random_seed DOUBLE DEFAULT random(),
1687
+ creator_name STRING DEFAULT current_user(),
1688
+
1689
+ -- 生成列使用确定性函数(从其他列计算得出)
1690
+ event_year INT GENERATED ALWAYS AS (year(event_time)),
1691
+ event_date STRING GENERATED ALWAYS AS (date_format(event_time, 'yyyy-MM-dd')),
1692
+ data_length INT GENERATED ALWAYS AS (length(event_data)),
1693
+ amount_category STRING GENERATED ALWAYS AS (
1694
+ if(amount < 100, 'small',
1695
+ if(amount < 1000, 'medium', 'large'))
1696
+ ),
1697
+ display_info STRING GENERATED ALWAYS AS (
1698
+ concat('[', string(id), '] ', substr(event_data, 1, 50))
1699
+ )
1700
+ ) COMMENT '生成列正确使用 - 区分确定性计算和默认值设置';
1701
+ ```
1702
+
1703
+ ***
1704
+
1705
+ ## 🔧 故障排查指南
1706
+
1707
+ ### 常见错误诊断和解决方案
1708
+
1709
+ #### 错误1:IDENTITY列类型错误
1710
+
1711
+ **错误信息**:
1712
+
1713
+ ```
1714
+ invalid identity column type int, currently only BIGINT is supported
1715
+ ```
1716
+
1717
+ **原因分析**: 尝试在非BIGINT列上使用IDENTITY约束
1718
+
1719
+ **诊断步骤**:
1720
+
1721
+ 1. 检查CREATE TABLE语句中的IDENTITY列定义
1722
+ 2. 确认IDENTITY列的数据类型是否为BIGINT
1723
+ 3. 检查是否误用了INT、SMALLINT等其他数值类型
1724
+
1725
+ **解决方案**:
1726
+
1727
+ ```sql
1728
+ -- 错误用法
1729
+ CREATE TABLE wrong_table (id INT IDENTITY, name VARCHAR(50));
1730
+
1731
+ -- 正确用法
1732
+ CREATE TABLE correct_table (id BIGINT IDENTITY, name VARCHAR(50));
1733
+ ```
1734
+
1735
+ #### 错误2:索引命名管理
1736
+
1737
+ **重要更新**: 经实际测试验证,当前版本的云器Lakehouse在索引命名方面**可能不严格强制schema级唯一性**。虽然测试中相同名称的索引可以创建成功,但为了代码的可维护性和未来版本兼容性,仍建议使用唯一的索引命名。
1738
+
1739
+ **最佳实践**:
1740
+
1741
+ ```sql
1742
+ -- 推荐的唯一索引命名
1743
+ CREATE INVERTED INDEX table1_inv_content_idx ON TABLE table1(content);
1744
+ CREATE INVERTED INDEX table2_inv_content_idx ON TABLE table2(content);
1745
+
1746
+ -- 命名规范:{table_name}_{index_type}_{column_name}_idx
1747
+ ```
1748
+
1749
+ #### 错误3:生成列函数不支持
1750
+
1751
+ **错误信息**:
1752
+
1753
+ ```
1754
+ Generated column auto_timestamp only contains built-in/scalar/deterministic function
1755
+ ```
1756
+
1757
+ **原因分析**: 生成列中使用了非确定性函数
1758
+
1759
+ **诊断步骤**:
1760
+
1761
+ 1. 检查生成列表达式中使用的函数
1762
+ 2. 对照确定性函数支持列表
1763
+ 3. 区分默认值和生成列的使用场景
1764
+
1765
+ **解决方案**:
1766
+
1767
+ ```sql
1768
+ -- 错误:在生成列中使用非确定性函数
1769
+ created_at TIMESTAMP GENERATED ALWAYS AS (current_timestamp())
1770
+
1771
+ -- 正确:使用默认值
1772
+ created_at TIMESTAMP DEFAULT current_timestamp()
1773
+
1774
+ -- 正确:生成列使用确定性函数
1775
+ date_part STRING GENERATED ALWAYS AS (date_format(some_timestamp, 'yyyy-MM-dd'))
1776
+ ```
1777
+
1778
+ #### 错误4:分区类型不支持
1779
+
1780
+ **错误信息**:
1781
+
1782
+ ```
1783
+ Unsupported data type for partition transform: timestamp_ltz
1784
+ ```
1785
+
1786
+ **原因分析**: 使用了不支持分区的数据类型
1787
+
1788
+ **诊断步骤**:
1789
+
1790
+ 1. 检查分区列的数据类型
1791
+ 2. 对照支持分区的数据类型列表
1792
+ 3. 评估是否可以使用生成列转换
1793
+
1794
+ **解决方案**:
1795
+
1796
+ ```sql
1797
+ -- 错误:直接使用TIMESTAMP分区
1798
+ PARTITIONED BY (created_time)
1799
+
1800
+ -- 正确:使用生成列转换
1801
+ CREATE TABLE correct_partition (
1802
+ created_time TIMESTAMP,
1803
+ date_part STRING GENERATED ALWAYS AS (date_format(created_time, 'yyyy-MM-dd'))
1804
+ ) PARTITIONED BY (date_part);
1805
+ ```
1806
+
1807
+ #### 错误5:动态分区数量超限
1808
+
1809
+ **错误信息**:
1810
+
1811
+ ```
1812
+ The count of dynamic partitions exceeds the maximum number 2048
1813
+ ```
1814
+
1815
+ **原因分析**: 单次插入操作涉及的动态分区数量超过2048个
1816
+
1817
+ **诊断步骤**:
1818
+
1819
+ 1. 分析源数据的分区键分布
1820
+ 2. 统计涉及的不同分区值数量
1821
+ 3. 评估数据插入策略
1822
+
1823
+ **解决方案**:
1824
+
1825
+ ```sql
1826
+ -- 查询源数据的分区分布
1827
+ SELECT partition_column, COUNT(*)
1828
+ FROM source_table
1829
+ GROUP BY partition_column
1830
+ ORDER BY COUNT(*) DESC;
1831
+
1832
+ -- 分批插入数据
1833
+ INSERT INTO target_table
1834
+ SELECT * FROM source_table
1835
+ WHERE date_column BETWEEN '2024-01-01' AND '2024-01-31';
1836
+ ```
1837
+
1838
+ #### 错误6:ARRAY类型列索引指定analyzer
1839
+
1840
+ **错误信息**:
1841
+
1842
+ ```
1843
+ invalid.inverted.index.analyzer.type, array<string>
1844
+ ```
1845
+
1846
+ **原因分析**: 在ARRAY类型列上创建倒排索引时指定了analyzer参数
1847
+
1848
+ **诊断步骤**:
1849
+
1850
+ 1. 检查CREATE INVERTED INDEX语句
1851
+ 2. 确认索引列是否为ARRAY类型
1852
+ 3. 检查是否包含analyzer参数
1853
+
1854
+ **解决方案**:
1855
+
1856
+ ```sql
1857
+ -- 错误:ARRAY类型指定analyzer
1858
+ CREATE INVERTED INDEX tags_analyzer_idx
1859
+ ON TABLE array_column_table(tags)
1860
+ PROPERTIES ('analyzer' = 'keyword');
1861
+
1862
+ -- 正确:不指定analyzer
1863
+ CREATE INVERTED INDEX tags_idx
1864
+ ON TABLE array_column_table(tags);
1865
+ ```
1866
+
1867
+ ### 性能问题诊断
1868
+
1869
+ #### 查询性能慢
1870
+
1871
+ **可能原因和解决方案**:
1872
+
1873
+ 1. **分区剪枝未生效**
1874
+ ```sql
1875
+ -- 检查查询是否使用分区列
1876
+ EXPLAIN SELECT * FROM table WHERE partition_column = 'value';
1877
+
1878
+ -- 确保WHERE条件包含分区列
1879
+ WHERE date_partition = '2024-01-15' -- 而不是 WHERE original_date = '2024-01-15'
1880
+ ```
1881
+
1882
+ 2. **缺少合适的索引**
1883
+ ```sql
1884
+ -- 为高频查询列创建索引
1885
+ CREATE BLOOMFILTER INDEX table_column_idx ON TABLE table_name(column_name);
1886
+ ```
1887
+
1888
+ 3. **分桶策略不当**
1889
+ ```sql
1890
+ -- 检查分桶列的基数分布
1891
+ SELECT bucket_column, COUNT(*)
1892
+ FROM table_name
1893
+ GROUP BY bucket_column
1894
+ ORDER BY COUNT(*) DESC;
1895
+
1896
+ -- 选择高基数、分布均匀的列作为分桶键
1897
+ ```
1898
+
1899
+ #### 写入性能差
1900
+
1901
+ **可能原因和解决方案**:
1902
+
1903
+ 1. **分桶数量设置不当**
1904
+ ```sql
1905
+ -- 小表使用过多分桶 → 减少分桶数
1906
+ -- 大表使用过少分桶 → 增加分桶数
1907
+ ```
1908
+
1909
+ 2. **数据倾斜问题**
1910
+ ```sql
1911
+ -- 选择更均匀分布的分桶键
1912
+ HASH CLUSTERED BY (more_uniform_column)
1913
+ ```
1914
+
1915
+ 3. **过多索引维护开销**
1916
+ ```sql
1917
+ -- 删除不必要的索引
1918
+ DROP INDEX unnecessary_index_name;
1919
+ ```
1920
+
1921
+ ### 错误预防检查清单
1922
+
1923
+ #### 表创建前检查
1924
+
1925
+ * [ ] IDENTITY列使用BIGINT类型
1926
+ * [ ] 分区列类型在支持列表中
1927
+ * [ ] 生成列仅使用确定性函数
1928
+ * [ ] VARCHAR长度设置合理
1929
+ * [ ] 金融字段使用DECIMAL类型
1930
+
1931
+ #### 索引创建前检查
1932
+
1933
+ * [ ] 索引名称具有唯一性和描述性
1934
+ * [ ] 倒排索引指定了合适的分词器
1935
+ * [ ] ARRAY类型列不指定analyzer
1936
+ * [ ] 向量索引参数配置正确
1937
+ * [ ] PRIMARY KEY与HASH CLUSTERED BY配置兼容
1938
+
1939
+ #### 数据插入前检查
1940
+
1941
+ * [ ] 评估动态分区数量是否超限
1942
+ * [ ] 检查复杂类型数据的插入语法
1943
+ * [ ] 验证数据类型匹配
1944
+ * [ ] 确认约束条件满足
1945
+
1946
+ ***
1947
+
1948
+ ## 📋 设计评审检查清单
1949
+
1950
+ ### 表结构设计检查
1951
+
1952
+ #### 数据类型设计
1953
+
1954
+ * [ ] **IDENTITY列类型**: 统一使用BIGINT IDENTITY(产品限制)
1955
+ * [ ] **金融数据类型**: 使用DECIMAL而非FLOAT/DOUBLE(精度保证)
1956
+ * [ ] **字符串长度**: 根据实际业务需求设置合理长度(存储优化)
1957
+ * [ ] **向量类型语法**: 使用正确的VECTOR(scalar\_type, dimension)格式
1958
+ * [ ] **复杂类型插入**: STRUCT使用struct()或named\_struct()函数(语法正确)
1959
+
1960
+ #### 约束和默认值
1961
+
1962
+ * [ ] **NOT NULL约束**: 核心业务字段添加NOT NULL约束
1963
+ * [ ] **默认值设置**: 系统字段设置合理默认值
1964
+ * [ ] **生成列函数**: 仅使用确定性标量函数(已验证支持列表)
1965
+ * [ ] **主键设计**: 避免使用主键(除非特殊需求)
1966
+
1967
+ #### 分区策略
1968
+
1969
+ * [ ] **分区列类型**: 使用支持分区的数据类型(已确认支持列表)
1970
+ * [ ] **分区粒度**: 选择合适的分区粒度避免过多小分区
1971
+ * [ ] **生成列分区**: TIMESTAMP等不支持类型使用生成列转换
1972
+ * [ ] **动态分区限制**: 单次操作控制在2048个分区内
1973
+
1974
+ ### 性能优化检查
1975
+
1976
+ #### 分桶设计
1977
+
1978
+ * [ ] **分桶列选择**: 选择高基数、分布均匀的列(基于测试验证)
1979
+ * [ ] **分桶数量**: 根据数据规模设置合理分桶数(已提供测试验证的建议)
1980
+ * [ ] **排序策略**: 选择支持主要查询场景的排序列
1981
+ * [ ] **组合分桶**: 大表考虑使用多列组合分桶
1982
+
1983
+ #### 索引策略
1984
+
1985
+ * [ ] **索引命名**: 遵循唯一命名规范(建议仍然遵循)
1986
+ * [ ] **向量索引**: 距离函数和参数针对业务场景优化(距离函数支持确认)
1987
+ * [ ] **倒排索引**: 字符串类型指定合适的分词器(已验证)
1988
+ * [ ] **ARRAY索引**: ARRAY类型不指定analyzer(已验证限制)
1989
+ * [ ] **布隆过滤器**: 用于高基数列的快速过滤(已验证)
1990
+ * [ ] **PRIMARY KEY与分桶**: 确保配置兼容(已确认冲突)
1991
+
1992
+ #### 查询优化
1993
+
1994
+ * [ ] **分区剪枝**: 主要查询能够利用分区剪枝
1995
+ * [ ] **分桶定位**: JOIN键与分桶列对齐
1996
+ * [ ] **索引利用**: 常用过滤条件有对应索引支持
1997
+ * [ ] **多维查询**: 复杂查询设计多层次索引策略
1998
+
1999
+ ### 运维和扩展性检查
2000
+
2001
+ #### 可维护性
2002
+
2003
+ * [ ] **命名规范**: 表名、字段名、索引名遵循一致规范
2004
+ * [ ] **注释完整**: 表和关键字段有清晰的业务注释
2005
+ * [ ] **生命周期**: 设置合理的数据保留期策略
2006
+ * [ ] **版本管理**: 重要设计决策有文档记录
2007
+
2008
+ #### 扩展性
2009
+
2010
+ * [ ] **数据增长**: 设计考虑未来数据量增长
2011
+ * [ ] **业务扩展**: 预留扩展字段空间(如JSON列)
2012
+ * [ ] **索引扩展**: 索引策略支持新增查询模式
2013
+ * [ ] **分桶预留**: 分桶数量预留扩展余量
2014
+
2015
+ #### 故障处理
2016
+
2017
+ * [ ] **错误预防**: 遵循常见陷阱的避免策略
2018
+ * [ ] **监控设置**: 建立性能和容量监控
2019
+ * [ ] **备份策略**: 制定数据备份和恢复方案
2020
+ * [ ] **应急预案**: 准备常见问题的处理方案
2021
+
2022
+ ### 成本优化检查
2023
+
2024
+ #### 存储成本
2025
+
2026
+ * [ ] **类型优化**: 使用存储空间最小的合适类型
2027
+ * [ ] **长度控制**: VARCHAR长度基于实际需求设置
2028
+ * [ ] **压缩策略**: 合理使用向量索引压缩参数
2029
+ * [ ] **生命周期**: 设置自动数据清理策略
2030
+
2031
+ #### 计算成本
2032
+
2033
+ * [ ] **索引数量**: 避免创建过多不必要的索引
2034
+ * [ ] **查询优化**: 确保查询能够高效执行
2035
+ * [ ] **分区策略**: 避免过多小分区增加元数据开销
2036
+ * [ ] **资源配置**: 分桶数量与集群资源匹配
2037
+
2038
+ ***
2039
+
2040
+ ## 🏗️ 企业级设计模式实战
2041
+
2042
+ ### 模式1:事件溯源架构(完整实现)
2043
+
2044
+ **适用场景**: 金融交易、审计合规、用户行为分析等需要完整历史记录的业务
2045
+
2046
+ ```sql
2047
+ -- 事件存储主表
2048
+ CREATE TABLE event_store_transactions (
2049
+ event_id BIGINT IDENTITY,
2050
+
2051
+ -- 事件标识信息
2052
+ aggregate_id VARCHAR(100) NOT NULL, -- 聚合根ID(用户ID、订单ID等)
2053
+ aggregate_type VARCHAR(50) NOT NULL, -- 聚合类型(User、Order、Payment等)
2054
+ event_type VARCHAR(50) NOT NULL, -- 事件类型(Created、Updated、Deleted等)
2055
+ event_version INT NOT NULL DEFAULT 1, -- 事件版本,支持模式演进
2056
+
2057
+ -- 事件时间信息
2058
+ event_timestamp TIMESTAMP NOT NULL, -- 业务事件发生时间
2059
+ ingestion_timestamp TIMESTAMP DEFAULT current_timestamp(), -- 系统摄入时间
2060
+
2061
+ -- 事件数据和元数据
2062
+ event_data JSON NOT NULL, -- 事件详细数据
2063
+ event_metadata JSON DEFAULT '{}', -- 事件元数据(IP、设备等)
2064
+
2065
+ -- 链路追踪信息
2066
+ causation_id VARCHAR(100), -- 因果关系ID
2067
+ correlation_id VARCHAR(100), -- 关联ID,用于业务流程追踪
2068
+ session_id VARCHAR(100), -- 会话ID
2069
+
2070
+ -- 业务上下文
2071
+ tenant_id VARCHAR(50), -- 多租户场景的租户ID
2072
+ user_id VARCHAR(100), -- 操作用户ID
2073
+ source_system VARCHAR(50), -- 来源系统标识
2074
+
2075
+ -- 分区和性能优化
2076
+ date_partition STRING GENERATED ALWAYS AS (date_format(event_timestamp, 'yyyy-MM-dd')),
2077
+ hour_partition INT GENERATED ALWAYS AS (hour(event_timestamp))
2078
+ )
2079
+ PARTITIONED BY (date_partition)
2080
+ HASH CLUSTERED BY (aggregate_id) -- 按聚合根分桶,支持实体重建
2081
+ SORTED BY (event_timestamp ASC, event_version ASC) -- 保证事件顺序
2082
+ INTO 512 BUCKETS
2083
+ COMMENT '事件溯源存储表 - 记录所有业务事件,支持完整的审计追踪';
2084
+
2085
+ -- 事件查询优化索引
2086
+ CREATE BLOOMFILTER INDEX events_aggregate_idx ON TABLE event_store_transactions(aggregate_id);
2087
+ CREATE BLOOMFILTER INDEX events_type_idx ON TABLE event_store_transactions(event_type);
2088
+ CREATE BLOOMFILTER INDEX events_tenant_idx ON TABLE event_store_transactions(tenant_id);
2089
+ CREATE INVERTED INDEX events_data_search_idx ON TABLE event_store_transactions(event_data)
2090
+ PROPERTIES ('analyzer' = 'unicode');
2091
+
2092
+ -- 快照表(性能优化)
2093
+ CREATE TABLE aggregate_snapshots (
2094
+ snapshot_id BIGINT IDENTITY,
2095
+ aggregate_id VARCHAR(100) NOT NULL,
2096
+ aggregate_type VARCHAR(50) NOT NULL,
2097
+ snapshot_version INT NOT NULL,
2098
+
2099
+ -- 快照数据
2100
+ snapshot_data JSON NOT NULL, -- 聚合根的完整状态快照
2101
+
2102
+ -- 快照元信息
2103
+ snapshot_timestamp TIMESTAMP NOT NULL,
2104
+ last_event_id BIGINT NOT NULL, -- 快照包含的最后事件ID
2105
+ last_event_version INT NOT NULL, -- 快照包含的最后事件版本
2106
+
2107
+ -- 性能优化
2108
+ created_at TIMESTAMP DEFAULT current_timestamp(),
2109
+
2110
+ date_partition STRING GENERATED ALWAYS AS (date_format(snapshot_timestamp, 'yyyy-MM-dd'))
2111
+ )
2112
+ PARTITIONED BY (date_partition)
2113
+ HASH CLUSTERED BY (aggregate_id)
2114
+ SORTED BY (snapshot_timestamp DESC)
2115
+ INTO 128 BUCKETS
2116
+ COMMENT '聚合快照表 - 定期保存聚合状态,优化重建性能';
2117
+
2118
+ -- 设置数据生命周期
2119
+ ALTER TABLE event_store_transactions SET TBLPROPERTIES ('data_lifecycle' = '2555'); -- 7年保留
2120
+ ALTER TABLE aggregate_snapshots SET TBLPROPERTIES ('data_lifecycle' = '365'); -- 1年保留
2121
+ ```
2122
+
2123
+ ### 模式2:实时数据湖架构(Lambda改进版)
2124
+
2125
+ **适用场景**: 实时分析、大数据处理、机器学习特征工程
2126
+
2127
+ ```sql
2128
+ -- 实时数据流层(Speed Layer)
2129
+ CREATE TABLE realtime_data_stream (
2130
+ stream_id BIGINT IDENTITY,
2131
+
2132
+ -- 数据源标识
2133
+ source_system VARCHAR(50) NOT NULL,
2134
+ data_type VARCHAR(50) NOT NULL, -- metrics, events, logs等
2135
+
2136
+ -- 业务标识
2137
+ user_id INT,
2138
+ session_id VARCHAR(100),
2139
+ entity_id VARCHAR(100),
2140
+
2141
+ -- 实时数据
2142
+ raw_data JSON NOT NULL, -- 原始数据
2143
+ processed_data JSON, -- 预处理后数据
2144
+
2145
+ -- 时间信息
2146
+ event_timestamp TIMESTAMP NOT NULL, -- 业务时间
2147
+ ingestion_timestamp TIMESTAMP DEFAULT current_timestamp(), -- 摄入时间
2148
+ processing_timestamp TIMESTAMP, -- 处理时间
2149
+
2150
+ -- 数据质量
2151
+ data_quality_score DECIMAL(3,2), -- 数据质量评分
2152
+ validation_errors ARRAY<STRING>, -- 验证错误列表
2153
+
2154
+ -- 实时分区(按小时)
2155
+ hour_partition STRING GENERATED ALWAYS AS (
2156
+ date_format(event_timestamp, 'yyyy-MM-dd-HH')
2157
+ )
2158
+ )
2159
+ PARTITIONED BY (hour_partition)
2160
+ HASH CLUSTERED BY (user_id)
2161
+ SORTED BY (event_timestamp DESC)
2162
+ INTO 1024 BUCKETS
2163
+ COMMENT '实时数据流表 - Lambda架构速度层,处理流式数据';
2164
+
2165
+ -- 实时查询优化
2166
+ CREATE BLOOMFILTER INDEX realtime_user_idx ON TABLE realtime_data_stream(user_id);
2167
+ CREATE BLOOMFILTER INDEX realtime_source_idx ON TABLE realtime_data_stream(source_system);
2168
+ CREATE INVERTED INDEX realtime_data_search_idx ON TABLE realtime_data_stream(raw_data)
2169
+ PROPERTIES ('analyzer' = 'unicode');
2170
+
2171
+ -- 批处理聚合层(Batch Layer)
2172
+ CREATE TABLE batch_aggregated_analytics (
2173
+ agg_id BIGINT IDENTITY,
2174
+
2175
+ -- 聚合维度
2176
+ user_id INT NOT NULL,
2177
+ data_type VARCHAR(50) NOT NULL,
2178
+ source_system VARCHAR(50) NOT NULL,
2179
+
2180
+ -- 时间窗口
2181
+ window_start TIMESTAMP NOT NULL,
2182
+ window_end TIMESTAMP NOT NULL,
2183
+ window_type VARCHAR(20) NOT NULL, -- HOUR, DAY, WEEK, MONTH
2184
+
2185
+ -- 聚合指标
2186
+ event_count INT,
2187
+ unique_sessions INT,
2188
+ total_duration BIGINT, -- 毫秒
2189
+ avg_quality_score DECIMAL(5,3),
2190
+
2191
+ -- 统计指标
2192
+ min_value DOUBLE,
2193
+ max_value DOUBLE,
2194
+ avg_value DOUBLE,
2195
+ std_deviation DOUBLE,
2196
+ percentile_50 DOUBLE,
2197
+ percentile_95 DOUBLE,
2198
+ percentile_99 DOUBLE,
2199
+
2200
+ -- 业务指标
2201
+ conversion_rate DECIMAL(5,4),
2202
+ error_rate DECIMAL(5,4),
2203
+
2204
+ -- 批处理元信息
2205
+ batch_id VARCHAR(100),
2206
+ batch_timestamp TIMESTAMP DEFAULT current_timestamp(),
2207
+ processing_version VARCHAR(20) DEFAULT '2.2',
2208
+
2209
+ date_partition STRING GENERATED ALWAYS AS (date_format(window_start, 'yyyy-MM-dd'))
2210
+ )
2211
+ PARTITIONED BY (date_partition)
2212
+ HASH CLUSTERED BY (user_id, data_type)
2213
+ SORTED BY (window_start DESC)
2214
+ INTO 256 BUCKETS
2215
+ COMMENT '批处理聚合表 - Lambda架构批处理层,提供准确的历史分析';
2216
+
2217
+ -- 服务层统一视图(Serving Layer)
2218
+ CREATE TABLE serving_layer_unified_view (
2219
+ view_id BIGINT IDENTITY,
2220
+
2221
+ -- 标识信息
2222
+ user_id INT NOT NULL,
2223
+ metric_name VARCHAR(100) NOT NULL,
2224
+
2225
+ -- 实时数据(最近1小时)
2226
+ realtime_value DOUBLE,
2227
+ realtime_timestamp TIMESTAMP,
2228
+ realtime_confidence DECIMAL(3,2),
2229
+
2230
+ -- 批处理数据(历史聚合)
2231
+ batch_value DOUBLE,
2232
+ batch_timestamp TIMESTAMP,
2233
+ batch_window_type VARCHAR(20),
2234
+
2235
+ -- 统一结果(智能合并)
2236
+ unified_value DOUBLE,
2237
+ data_source VARCHAR(20), -- realtime, batch, hybrid
2238
+ confidence_level DECIMAL(3,2),
2239
+
2240
+ -- 更新信息
2241
+ last_updated TIMESTAMP DEFAULT current_timestamp(),
2242
+
2243
+ date_partition STRING GENERATED ALWAYS AS (date_format(last_updated, 'yyyy-MM-dd'))
2244
+ )
2245
+ PARTITIONED BY (date_partition)
2246
+ HASH CLUSTERED BY (user_id)
2247
+ SORTED BY (last_updated DESC)
2248
+ INTO 128 BUCKETS
2249
+ COMMENT '服务层统一视图 - 合并实时和批处理结果,对外提供统一查询接口';
2250
+
2251
+ -- 设置不同层的数据生命周期
2252
+ ALTER TABLE realtime_data_stream SET TBLPROPERTIES ('data_lifecycle' = '7'); -- 实时数据7天
2253
+ ALTER TABLE batch_aggregated_analytics SET TBLPROPERTIES ('data_lifecycle' = '365'); -- 批处理数据1年
2254
+ ALTER TABLE serving_layer_unified_view SET TBLPROPERTIES ('data_lifecycle' = '90'); -- 服务层3个月
2255
+ ```
2256
+
2257
+ ### 模式3:多租户SaaS数据架构(企业级)
2258
+
2259
+ **适用场景**: 企业SaaS平台、多租户应用、数据隔离要求严格的业务
2260
+
2261
+ ```sql
2262
+ -- 租户主数据表
2263
+ CREATE TABLE saas_tenant_registry (
2264
+ tenant_id VARCHAR(50) NOT NULL, -- 移除PRIMARY KEY以兼容HASH CLUSTERED BY
2265
+ tenant_name VARCHAR(200) NOT NULL,
2266
+
2267
+ -- 租户基本信息
2268
+ subscription_plan VARCHAR(50) NOT NULL, -- free, basic, premium, enterprise
2269
+ tenant_status TINYINT DEFAULT 1, -- 1=active, 0=suspended, 2=trial
2270
+
2271
+ -- 配置信息
2272
+ data_region VARCHAR(20) DEFAULT 'default', -- 数据存储区域
2273
+ schema_version VARCHAR(10) DEFAULT '2.2', -- 租户schema版本
2274
+ feature_flags JSON DEFAULT '{}', -- 功能开关配置
2275
+ quota_settings JSON DEFAULT '{}', -- 配额限制设置
2276
+
2277
+ -- 租户元数据
2278
+ created_at TIMESTAMP DEFAULT current_timestamp(),
2279
+ updated_at TIMESTAMP,
2280
+
2281
+ -- 联系信息
2282
+ admin_email VARCHAR(320),
2283
+ billing_contact JSON
2284
+ )
2285
+ HASH CLUSTERED BY (tenant_id)
2286
+ INTO 32 BUCKETS
2287
+ COMMENT '租户注册表 - 管理所有租户的基本信息和配置';
2288
+
2289
+ -- 多租户业务数据表(核心表)
2290
+ CREATE TABLE saas_multi_tenant_data (
2291
+ record_id BIGINT IDENTITY,
2292
+ tenant_id VARCHAR(50) NOT NULL,
2293
+
2294
+ -- 业务实体信息
2295
+ entity_type VARCHAR(50) NOT NULL, -- user, order, product, invoice等
2296
+ entity_id VARCHAR(100) NOT NULL, -- 在租户内的实体ID
2297
+ entity_status TINYINT DEFAULT 1, -- 实体状态
2298
+
2299
+ -- 业务数据
2300
+ core_data JSON NOT NULL, -- 核心业务数据
2301
+ extended_data JSON DEFAULT '{}', -- 扩展数据
2302
+ custom_fields JSON DEFAULT '{}', -- 租户自定义字段
2303
+
2304
+ -- 数据分类和标签
2305
+ data_category VARCHAR(50), -- 数据分类
2306
+ tags ARRAY<STRING>, -- 业务标签
2307
+ priority_level TINYINT DEFAULT 1, -- 优先级:1=normal, 2=high, 3=critical
2308
+
2309
+ -- 审计信息
2310
+ created_by VARCHAR(100),
2311
+ updated_by VARCHAR(100),
2312
+ created_at TIMESTAMP DEFAULT current_timestamp(),
2313
+ updated_at TIMESTAMP,
2314
+ version_number INT DEFAULT 1,
2315
+
2316
+ -- 数据治理
2317
+ data_classification VARCHAR(20) DEFAULT 'internal', -- public, internal, confidential, restricted
2318
+ retention_policy VARCHAR(50), -- 数据保留策略
2319
+
2320
+ -- 性能优化
2321
+ tenant_partition STRING GENERATED ALWAYS AS (tenant_id)
2322
+ )
2323
+ PARTITIONED BY (tenant_partition) -- 租户级数据隔离
2324
+ HASH CLUSTERED BY (entity_id) -- 实体维度分桶
2325
+ SORTED BY (updated_at DESC, priority_level DESC) -- 最新和高优先级数据优先
2326
+ INTO 256 BUCKETS
2327
+ COMMENT '多租户业务数据表 - 实现租户级数据隔离和高效查询';
2328
+
2329
+ -- 多租户查询优化索引
2330
+ CREATE BLOOMFILTER INDEX saas_entity_type_idx ON TABLE saas_multi_tenant_data(entity_type);
2331
+ CREATE BLOOMFILTER INDEX saas_entity_id_idx ON TABLE saas_multi_tenant_data(entity_id);
2332
+ CREATE INVERTED INDEX saas_tags_idx ON TABLE saas_multi_tenant_data(tags);
2333
+ CREATE INVERTED INDEX saas_core_data_idx ON TABLE saas_multi_tenant_data(core_data)
2334
+ PROPERTIES ('analyzer' = 'unicode');
2335
+
2336
+ -- 租户使用统计表(计费和监控)
2337
+ CREATE TABLE saas_tenant_usage_stats (
2338
+ usage_id BIGINT IDENTITY,
2339
+ tenant_id VARCHAR(50) NOT NULL,
2340
+
2341
+ -- 统计时间窗口
2342
+ stat_date DATE NOT NULL,
2343
+ stat_hour TINYINT, -- 0-23,NULL表示日级统计
2344
+
2345
+ -- 使用量统计
2346
+ api_calls_count INT DEFAULT 0,
2347
+ storage_bytes_used BIGINT DEFAULT 0,
2348
+ data_transfer_bytes BIGINT DEFAULT 0,
2349
+ compute_seconds_used INT DEFAULT 0,
2350
+
2351
+ -- 功能使用统计
2352
+ active_users_count INT DEFAULT 0,
2353
+ unique_sessions_count INT DEFAULT 0,
2354
+ feature_usage_stats JSON DEFAULT '{}',
2355
+
2356
+ -- 性能指标
2357
+ avg_response_time_ms INT,
2358
+ error_rate DECIMAL(5,4),
2359
+ availability_percentage DECIMAL(5,2),
2360
+
2361
+ -- 成本分摊
2362
+ estimated_cost_usd DECIMAL(10,4),
2363
+
2364
+ -- 更新信息
2365
+ last_updated TIMESTAMP DEFAULT current_timestamp(),
2366
+
2367
+ date_partition STRING GENERATED ALWAYS AS (string(stat_date))
2368
+ )
2369
+ PARTITIONED BY (date_partition)
2370
+ HASH CLUSTERED BY (tenant_id)
2371
+ SORTED BY (stat_date DESC, stat_hour DESC)
2372
+ INTO 64 BUCKETS
2373
+ COMMENT '租户使用统计表 - 支持计费、监控和资源管理';
2374
+
2375
+ -- 设置数据生命周期策略
2376
+ ALTER TABLE saas_multi_tenant_data SET TBLPROPERTIES ('data_lifecycle' = '1095'); -- 3年业务数据
2377
+ ALTER TABLE saas_tenant_usage_stats SET TBLPROPERTIES ('data_lifecycle' = '730'); -- 2年统计数据
2378
+ ```
2379
+
2380
+ ### 模式4:IoT时序数据架构(工业级)
2381
+
2382
+ **适用场景**: 工业IoT、智能制造、设备监控、传感器数据处理
2383
+
2384
+ ```sql
2385
+ -- 设备主数据表
2386
+ CREATE TABLE iot_device_registry (
2387
+ device_id VARCHAR(100) NOT NULL,
2388
+
2389
+ -- 设备基本信息
2390
+ device_name VARCHAR(200),
2391
+ device_type VARCHAR(50) NOT NULL, -- sensor, actuator, gateway, edge
2392
+ device_model VARCHAR(100),
2393
+ manufacturer VARCHAR(100),
2394
+ firmware_version VARCHAR(50),
2395
+
2396
+ -- 部署信息
2397
+ installation_location VARCHAR(200),
2398
+ geo_location JSON, -- {"lat": 39.9042, "lng": 116.4074}
2399
+ facility_id VARCHAR(50),
2400
+ production_line VARCHAR(50),
2401
+
2402
+ -- 设备配置
2403
+ measurement_interval_seconds INT DEFAULT 60,
2404
+ data_retention_days INT DEFAULT 90,
2405
+ alert_thresholds JSON DEFAULT '{}',
2406
+ calibration_params JSON DEFAULT '{}',
2407
+
2408
+ -- 设备状态
2409
+ device_status TINYINT DEFAULT 1, -- 1=online, 0=offline, 2=maintenance
2410
+ last_heartbeat TIMESTAMP,
2411
+ health_score DECIMAL(3,2), -- 0.00-1.00
2412
+
2413
+ -- 管理信息
2414
+ created_at TIMESTAMP DEFAULT current_timestamp(),
2415
+ updated_at TIMESTAMP
2416
+ )
2417
+ HASH CLUSTERED BY (device_type)
2418
+ INTO 32 BUCKETS
2419
+ COMMENT 'IoT设备注册表 - 管理所有IoT设备的元数据信息';
2420
+
2421
+ -- 高频时序数据表
2422
+ CREATE TABLE iot_timeseries_measurements (
2423
+ measurement_id BIGINT IDENTITY,
2424
+
2425
+ -- 设备和测量标识
2426
+ device_id VARCHAR(100) NOT NULL,
2427
+ sensor_id VARCHAR(100), -- 复合设备中的传感器ID
2428
+ measurement_type VARCHAR(50) NOT NULL, -- temperature, pressure, vibration, current等
2429
+
2430
+ -- 测量数据
2431
+ measurement_value DOUBLE, -- 主要数值
2432
+ measurement_unit VARCHAR(20), -- 单位:℃, Pa, Hz, A等
2433
+ secondary_values JSON, -- 辅助测量值(多维传感器)
2434
+
2435
+ -- 时间信息(高精度)
2436
+ measurement_timestamp TIMESTAMP NOT NULL, -- 设备时间戳
2437
+ collection_timestamp TIMESTAMP DEFAULT current_timestamp(), -- 收集时间戳
2438
+
2439
+ -- 数据质量和状态
2440
+ data_quality_code TINYINT DEFAULT 1, -- 1=good, 2=uncertain, 3=bad
2441
+ measurement_status TINYINT DEFAULT 0, -- 0=normal, 1=warning, 2=alarm, 3=fault
2442
+ confidence_level DECIMAL(3,2), -- 测量置信度
2443
+
2444
+ -- 异常检测结果
2445
+ is_anomaly BOOLEAN DEFAULT false,
2446
+ anomaly_score DECIMAL(5,3), -- 异常评分
2447
+ anomaly_type VARCHAR(50), -- 异常类型
2448
+
2449
+ -- 上下文信息
2450
+ environment_context JSON, -- 环境参数(温湿度、气压等)
2451
+ operational_context JSON, -- 运行参数(负载、转速等)
2452
+
2453
+ -- 高频数据按小时分区
2454
+ hour_partition STRING GENERATED ALWAYS AS (
2455
+ date_format(measurement_timestamp, 'yyyy-MM-dd-HH')
2456
+ )
2457
+ )
2458
+ PARTITIONED BY (hour_partition) -- 按小时分区支持时间范围查询
2459
+ HASH CLUSTERED BY (device_id) -- 按设备分桶
2460
+ SORTED BY (measurement_timestamp DESC) -- 时间倒序,最新数据优先
2461
+ INTO 2048 BUCKETS -- 大量设备需要更多分桶
2462
+ COMMENT 'IoT时序测量数据表 - 存储高频传感器数据和异常检测结果';
2463
+
2464
+ -- 时序数据查询优化索引
2465
+ CREATE BLOOMFILTER INDEX iot_device_lookup_idx ON TABLE iot_timeseries_measurements(device_id);
2466
+ CREATE BLOOMFILTER INDEX iot_measurement_type_idx ON TABLE iot_timeseries_measurements(measurement_type);
2467
+ CREATE INVERTED INDEX iot_anomaly_filter_idx ON TABLE iot_timeseries_measurements(is_anomaly);
2468
+ CREATE INVERTED INDEX iot_status_filter_idx ON TABLE iot_timeseries_measurements(measurement_status);
2469
+
2470
+ -- 设备状态聚合表(实时计算结果)
2471
+ CREATE TABLE iot_device_status_aggregated (
2472
+ agg_id BIGINT IDENTITY,
2473
+ device_id VARCHAR(100) NOT NULL,
2474
+
2475
+ -- 聚合时间窗口
2476
+ window_start TIMESTAMP NOT NULL,
2477
+ window_end TIMESTAMP NOT NULL,
2478
+ window_type VARCHAR(20) NOT NULL, -- MINUTE, HOUR, DAY
2479
+ measurement_type VARCHAR(50) NOT NULL,
2480
+
2481
+ -- 统计指标
2482
+ measurement_count INT,
2483
+ valid_measurement_count INT, -- 质量良好的测量数
2484
+
2485
+ -- 数值统计
2486
+ min_value DOUBLE,
2487
+ max_value DOUBLE,
2488
+ avg_value DOUBLE,
2489
+ median_value DOUBLE,
2490
+ std_deviation DOUBLE,
2491
+
2492
+ -- 异常统计
2493
+ anomaly_count INT DEFAULT 0,
2494
+ alarm_count INT DEFAULT 0,
2495
+ fault_count INT DEFAULT 0,
2496
+
2497
+ -- 设备健康指标
2498
+ uptime_percentage DECIMAL(5,2),
2499
+ data_quality_avg DECIMAL(3,2),
2500
+ health_trend TINYINT, -- 1=improving, 0=stable, -1=degrading
2501
+
2502
+ -- 预测性维护指标
2503
+ maintenance_score DECIMAL(5,3), -- 维护需求评分
2504
+ estimated_rul_hours INT, -- 剩余使用寿命(小时)
2505
+ next_maintenance_date DATE,
2506
+
2507
+ -- 计算元数据
2508
+ computed_timestamp TIMESTAMP DEFAULT current_timestamp(),
2509
+ computation_version VARCHAR(20) DEFAULT '2.2',
2510
+ model_version VARCHAR(20), -- 预测模型版本
2511
+
2512
+ date_partition STRING GENERATED ALWAYS AS (date_format(window_start, 'yyyy-MM-dd'))
2513
+ )
2514
+ PARTITIONED BY (date_partition)
2515
+ HASH CLUSTERED BY (device_id)
2516
+ SORTED BY (window_start DESC)
2517
+ INTO 512 BUCKETS
2518
+ COMMENT '设备状态聚合表 - 实时计算的设备健康状态和预测性维护指标';
2519
+
2520
+ -- 设备告警事件表
2521
+ CREATE TABLE iot_device_alerts (
2522
+ alert_id BIGINT IDENTITY,
2523
+
2524
+ -- 告警标识
2525
+ device_id VARCHAR(100) NOT NULL,
2526
+ alert_type VARCHAR(50) NOT NULL, -- threshold, anomaly, fault, offline
2527
+ alert_level TINYINT NOT NULL, -- 1=info, 2=warning, 3=error, 4=critical
2528
+
2529
+ -- 告警内容
2530
+ alert_title VARCHAR(200),
2531
+ alert_description STRING,
2532
+ alert_data JSON, -- 告警相关数据
2533
+
2534
+ -- 告警状态
2535
+ alert_status TINYINT DEFAULT 1, -- 1=active, 2=acknowledged, 3=resolved
2536
+ acknowledged_by VARCHAR(100),
2537
+ resolved_by VARCHAR(100),
2538
+
2539
+ -- 时间信息
2540
+ alert_timestamp TIMESTAMP NOT NULL,
2541
+ acknowledged_at TIMESTAMP,
2542
+ resolved_at TIMESTAMP,
2543
+
2544
+ -- 业务影响
2545
+ business_impact VARCHAR(100), -- 业务影响描述
2546
+ estimated_downtime_minutes INT, -- 预估停机时间
2547
+
2548
+ date_partition STRING GENERATED ALWAYS AS (date_format(alert_timestamp, 'yyyy-MM-dd'))
2549
+ )
2550
+ PARTITIONED BY (date_partition)
2551
+ HASH CLUSTERED BY (device_id)
2552
+ SORTED BY (alert_timestamp DESC, alert_level DESC)
2553
+ INTO 128 BUCKETS
2554
+ COMMENT '设备告警事件表 - 记录和管理所有设备告警信息';
2555
+
2556
+ -- 设置分层数据生命周期
2557
+ ALTER TABLE iot_timeseries_measurements SET TBLPROPERTIES ('data_lifecycle' = '90'); -- 原始数据3个月
2558
+ ALTER TABLE iot_device_status_aggregated SET TBLPROPERTIES ('data_lifecycle' = '730'); -- 聚合数据2年
2559
+ ALTER TABLE iot_device_alerts SET TBLPROPERTIES ('data_lifecycle' = '1095'); -- 告警记录3年
2560
+ ```
2561
+
2562
+ ## 🧹 实验环境清理指南
2563
+
2564
+ 为确保资源合理使用和避免不必要的存储开销,在完成表设计实验后应当执行以下清理操作:
2565
+
2566
+ ### 表资源清理
2567
+
2568
+ ```sql
2569
+ -- 1. 清理测试表
2570
+ DROP TABLE IF EXISTS test_identity_table;
2571
+ DROP TABLE IF EXISTS test_identity_seed_table;
2572
+ DROP TABLE IF EXISTS test_string_types;
2573
+ DROP TABLE IF EXISTS test_vector_table;
2574
+ DROP TABLE IF EXISTS test_complex_types;
2575
+ DROP TABLE IF EXISTS test_constraints;
2576
+ DROP TABLE IF EXISTS test_generated_columns;
2577
+
2578
+ -- 2. 清理分区测试表
2579
+ DROP TABLE IF EXISTS test_partition_daily;
2580
+ DROP TABLE IF EXISTS test_partition_hourly;
2581
+ DROP TABLE IF EXISTS test_partition_tenant;
2582
+ DROP TABLE IF EXISTS test_partition_multi;
2583
+ DROP TABLE IF EXISTS partition_type_solutions;
2584
+
2585
+ -- 3. 清理索引测试表
2586
+ DROP TABLE IF EXISTS test_vector_index_table;
2587
+ DROP TABLE IF EXISTS test_inverted_index_table;
2588
+ DROP TABLE IF EXISTS test_bloom_index_table;
2589
+ DROP TABLE IF EXISTS comprehensive_vector_demo;
2590
+ DROP TABLE IF EXISTS comprehensive_search_demo;
2591
+ DROP TABLE IF EXISTS user_management_optimized;
2592
+ DROP TABLE IF EXISTS product_catalog;
2593
+ DROP TABLE IF EXISTS user_content;
2594
+
2595
+ -- 4. 清理优化测试表
2596
+ DROP TABLE IF EXISTS user_behavior_optimized;
2597
+ DROP TABLE IF EXISTS financial_transactions_optimized;
2598
+ DROP TABLE IF EXISTS business_analytics_optimized;
2599
+ DROP TABLE IF EXISTS vector_search_performance;
2600
+ DROP TABLE IF EXISTS storage_cost_optimized;
2601
+ DROP TABLE IF EXISTS small_table_optimized;
2602
+ DROP TABLE IF EXISTS medium_table_optimized;
2603
+ DROP TABLE IF EXISTS large_table_optimized;
2604
+
2605
+ -- 5. 清理企业级架构模式表
2606
+ -- 事件溯源架构
2607
+ DROP TABLE IF EXISTS event_store_transactions;
2608
+ DROP TABLE IF EXISTS aggregate_snapshots;
2609
+
2610
+ -- 实时数据湖架构
2611
+ DROP TABLE IF EXISTS realtime_data_stream;
2612
+ DROP TABLE IF EXISTS batch_aggregated_analytics;
2613
+ DROP TABLE IF EXISTS serving_layer_unified_view;
2614
+
2615
+ -- 多租户SaaS架构
2616
+ DROP TABLE IF EXISTS saas_tenant_registry;
2617
+ DROP TABLE IF EXISTS saas_multi_tenant_data;
2618
+ DROP TABLE IF EXISTS saas_tenant_usage_stats;
2619
+
2620
+ -- IoT时序数据架构
2621
+ DROP TABLE IF EXISTS iot_device_registry;
2622
+ DROP TABLE IF EXISTS iot_timeseries_measurements;
2623
+ DROP TABLE IF EXISTS iot_device_status_aggregated;
2624
+ DROP TABLE IF EXISTS iot_device_alerts;
2625
+ ```
2626
+
2627
+ ***
2628
+
2629
+ ## 📋 总结
2630
+
2631
+ ### 验证成果
2632
+
2633
+ 本指南经过云器Lakehouse环境完整验证,所有关键功能点均已确认可用:
2634
+
2635
+ #### ✅ 已验证功能
2636
+
2637
+ * **数据类型**: IDENTITY(仅BIGINT)、向量类型、复杂类型(STRUCT/ARRAY/MAP)
2638
+ * **约束和生成列**: 确定性函数列表、默认值语法
2639
+ * **分区策略**: 支持的分区类型、生成列分区转换
2640
+ * **分桶排序**: 分桶数量配置、排序策略优化
2641
+ * **索引架构**: 向量索引5种距离函数、倒排索引分词器、布隆过滤器
2642
+ * **性能优化**: 查询剪枝、多维度索引协同
2643
+ * **企业架构**: 四种设计模式的完整实现
2644
+
2645
+ #### 🔧 重要发现和修正
2646
+
2647
+ 1. **索引命名**: 当前版本强制schema级唯一性,遵循唯一命名
2648
+ 2. **向量维度**: 插入时必须严格匹配定义的维度
2649
+ 3. **ARRAY索引**: 不支持指定analyzer参数
2650
+ 4. **PRIMARY KEY冲突**: 与HASH CLUSTERED BY同时使用需满足严格条件
2651
+
2652
+ ### 核心价值
2653
+
2654
+ 本指南的核心价值在于:
2655
+
2656
+ 1. **实用性**: 所有示例均经实际验证,可直接应用于生产环境
2657
+ 2. **完整性**: 覆盖从基础类型到企业架构的全栈设计指导
2658
+ 3. **前瞻性**: 基于最新产品功能特性,适应技术发展趋势
2659
+ 4. **可维护性**: 提供完整的故障排查和设计评审体系
2660
+
2661
+ ### 使用建议
2662
+
2663
+ 1. **新项目**: 按照设计理念章节建立设计框架,参考企业级模式选择合适架构
2664
+ 2. **现有系统**: 使用设计评审检查清单进行系统优化和问题排查
2665
+ 3. **团队培训**: 结合实际业务场景,逐章节学习和实践
2666
+ 4. **持续优化**: 根据业务发展和数据增长,定期评估和调整设计策略
2667
+
2668
+ **最佳实践建议**: 严格遵循本指南的设计原则和验证过的SQL语法,将显著提升系统性能、降低运维复杂度,并为业务增长提供可靠的数据基础设施保障。
2669
+
2670
+ ## 参考资料
2671
+
2672
+ [Create Table语法](create-table-ddl.md)
2673
+
2674
+ ***
2675
+
2676
+ *注:本指南基于2025年5月的云器Lakehouse版本测试结果,后续版本可能有所变化。请定期检查官方文档以获取最新信息*。