@clickzetta/cz-cli-darwin-arm64 0.5.16 → 0.5.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/lakehouse-doc-en/SKILL.md +6 -11
- package/bin/skills/lakehouse-doc-en/references/AIGateway.md +58 -13
- package/bin/skills/lakehouse-doc-en/references/Computation.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/DataSource_Amazon_DocumentDB.md +3 -1
- package/bin/skills/lakehouse-doc-en/references/Foreach.md +14 -14
- package/bin/skills/lakehouse-doc-en/references/JDBC-Driver.md +0 -1
- package/bin/skills/lakehouse-doc-en/references/LakehouseAI-overview.md +21 -8
- package/bin/skills/lakehouse-doc-en/references/LakehouseDataGPT-tour.md +4 -9
- package/bin/skills/lakehouse-doc-en/references/LakehouseStudio-tour.md +14 -19
- package/bin/skills/lakehouse-doc-en/references/Lakehouse_Zilliz_MakeDataReadyforBIandAI.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/Logstash.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/Migrate_Spark_DataEngineeringBestPractices_Project_to_Lakehouse.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/Notebook.md +17 -17
- package/bin/skills/lakehouse-doc-en/references/RemoteFunction-as-udf.md +14 -14
- package/bin/skills/lakehouse-doc-en/references/SQL_External_Catalog_Guide.md +1 -9
- package/bin/skills/lakehouse-doc-en/references/SUMMARY.md +59 -29
- package/bin/skills/lakehouse-doc-en/references/WINDOWFUNCTION.md +99 -57
- package/bin/skills/lakehouse-doc-en/references/Zettapark_Data_Engineering_Demo.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/access-control-configuration.md +1 -8
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-2-5-1.0.md +16 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-3-29-1.0.2.md +14 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-3-8-1.0.1.md +16 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-4-28-1.1.md +29 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-12-1.1.1.md +18 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-15-1.2.md +9 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-21-1.3.md +9 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-28-1.4.md +10 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-6-3-1.5.md +9 -0
- package/bin/skills/lakehouse-doc-en/references/alicloud-arn-externalid.md +0 -5
- package/bin/skills/lakehouse-doc-en/references/answer-accuracy-improve.md +120 -103
- package/bin/skills/lakehouse-doc-en/references/application-list.md +1 -3
- package/bin/skills/lakehouse-doc-en/references/approval-list.md +16 -17
- package/bin/skills/lakehouse-doc-en/references/batch-load-parquet-file-into-lakehouse.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/batch_sync.md +9 -9
- package/bin/skills/lakehouse-doc-en/references/batch_sync_Sop.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/batchloadparquetfileintoLakehouse.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/bulkloadv1-python-sdk.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/chart-auto-refresh-guide.md +12 -6
- package/bin/skills/lakehouse-doc-en/references/clickzetta-sample-data.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/code_approval.md +1 -5
- package/bin/skills/lakehouse-doc-en/references/composite_task.md +31 -42
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_environment_and_data_generate.md +6 -9
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_javasdk_bulkload_realtime.md +4 -10
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_kafka_realtime_sync.md +1 -10
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_local_file_into_table_by_studio.md +0 -6
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_batchload_public_network.md +0 -5
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_python_node.md +2 -7
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_realtime_cdc_public_network.md +13 -18
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_sql_insert.md +0 -1
- package/bin/skills/lakehouse-doc-en/references/concepts.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/config-datasource.md +5 -7
- package/bin/skills/lakehouse-doc-en/references/connect-with-cli.md +116 -72
- package/bin/skills/lakehouse-doc-en/references/connect-with-cz-cli.md +151 -0
- package/bin/skills/lakehouse-doc-en/references/continue-job.md +9 -17
- package/bin/skills/lakehouse-doc-en/references/create-api-connection.md +315 -286
- package/bin/skills/lakehouse-doc-en/references/create-catalog-connection.md +1 -0
- package/bin/skills/lakehouse-doc-en/references/create-dynamic-table.md +4 -4
- package/bin/skills/lakehouse-doc-en/references/create-external-catalog.md +85 -22
- package/bin/skills/lakehouse-doc-en/references/create-table-ddl.md +45 -0
- package/bin/skills/lakehouse-doc-en/references/creating_alicloud_privatelinkendpoint.md +4 -6
- package/bin/skills/lakehouse-doc-en/references/creating_alicloud_privatelinkservice.md +4 -7
- package/bin/skills/lakehouse-doc-en/references/creating_tencentcloud_privatelinkendpoint.md +2 -7
- package/bin/skills/lakehouse-doc-en/references/creating_tencentcloud_privatelinkservice.md +1 -5
- package/bin/skills/lakehouse-doc-en/references/cz-cli-agent.md +15 -10
- package/bin/skills/lakehouse-doc-en/references/cz-cli-datasource.md +0 -8
- package/bin/skills/lakehouse-doc-en/references/cz-cli-sql.md +2 -45
- package/bin/skills/lakehouse-doc-en/references/cz-cli.md +53 -42
- package/bin/skills/lakehouse-doc-en/references/dashboard-version-management-guide.md +12 -4
- package/bin/skills/lakehouse-doc-en/references/data-integration-intro.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/data-integration.md +29 -27
- package/bin/skills/lakehouse-doc-en/references/data-load-summary.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/data-quality.md +25 -25
- package/bin/skills/lakehouse-doc-en/references/data-sharing.md +31 -54
- package/bin/skills/lakehouse-doc-en/references/data-sources.md +45 -45
- package/bin/skills/lakehouse-doc-en/references/data_catalog.md +23 -25
- package/bin/skills/lakehouse-doc-en/references/data_privacy.md +5 -2
- package/bin/skills/lakehouse-doc-en/references/data_sharing_between_accounts_guide.md +0 -4
- package/bin/skills/lakehouse-doc-en/references/data_visualization.md +4 -15
- package/bin/skills/lakehouse-doc-en/references/dataagent.md +39 -7
- package/bin/skills/lakehouse-doc-en/references/databricks-delta-to-lakehouse-migration.md +168 -0
- package/bin/skills/lakehouse-doc-en/references/databricks-dlt-to-lakehouse-migration.md +331 -0
- package/bin/skills/lakehouse-doc-en/references/databricks-external-catalog-practice.md +367 -0
- package/bin/skills/lakehouse-doc-en/references/databricks-jobs-to-studio-migration.md +199 -0
- package/bin/skills/lakehouse-doc-en/references/databricks-notebook-to-studio-migration.md +350 -0
- package/bin/skills/lakehouse-doc-en/references/databricks-uc-governance-to-lakehouse-migration.md +327 -0
- package/bin/skills/lakehouse-doc-en/references/datagpt-model-config.md +34 -0
- package/bin/skills/lakehouse-doc-en/references/datagpt_data_source.md +50 -37
- package/bin/skills/lakehouse-doc-en/references/datagpt_introduction.md +55 -79
- package/bin/skills/lakehouse-doc-en/references/datagpt_quickstart.md +50 -64
- package/bin/skills/lakehouse-doc-en/references/datalake-acceleration.md +75 -2
- package/bin/skills/lakehouse-doc-en/references/dbt-databricks-to-clickzetta-migration.md +242 -0
- package/bin/skills/lakehouse-doc-en/references/dynamic-mask.md +30 -30
- package/bin/skills/lakehouse-doc-en/references/dynamic-table-bestpractice.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/dynamic-table-introduce.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/dynamic_table_summary.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/eco_integration/streamlit.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/eco_integration/superset.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/ecosystem-all.md +1 -3
- package/bin/skills/lakehouse-doc-en/references/ecosystem.md +145 -0
- package/bin/skills/lakehouse-doc-en/references/external-catalog-summary.md +33 -38
- package/bin/skills/lakehouse-doc-en/references/external-function-combo-practice.md +466 -0
- package/bin/skills/lakehouse-doc-en/references/f6fc6447ee.md +7 -9
- package/bin/skills/lakehouse-doc-en/references/federation-query.md +56 -6
- package/bin/skills/lakehouse-doc-en/references/finebi-mysql.md +2 -0
- package/bin/skills/lakehouse-doc-en/references/get-started-with-sample-data.md +10 -11
- package/bin/skills/lakehouse-doc-en/references/gitfolder.md +2 -3
- package/bin/skills/lakehouse-doc-en/references/grant-privileges.md +2 -0
- package/bin/skills/lakehouse-doc-en/references/iceberg-rest-catalog-databricks.md +166 -0
- package/bin/skills/lakehouse-doc-en/references/ide.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/if_else_task.md +59 -57
- package/bin/skills/lakehouse-doc-en/references/input_output.md +10 -7
- package/bin/skills/lakehouse-doc-en/references/jobprofile-bestpractices.md +60 -64
- package/bin/skills/lakehouse-doc-en/references/kafka-connection.md +0 -1
- package/bin/skills/lakehouse-doc-en/references/key-concepts.md +146 -117
- package/bin/skills/lakehouse-doc-en/references/lakehouse-ai-gateway-cz-cli.md +317 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-ai-sql-analysis.md +345 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-dqc-guide.md +300 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-medallion-sql-dt-guide.md +543 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-multi-cloud-acceleration.md +274 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-multimodal-ai-pipeline.md +198 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-quick-experience_guide.md +49 -52
- package/bin/skills/lakehouse-doc-en/references/lakehouse-volume-pipe-acceleration-guide.md +380 -0
- package/bin/skills/lakehouse-doc-en/references/langchain-plug-installation.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/management.md +4 -9
- package/bin/skills/lakehouse-doc-en/references/medallion-lakehouse-from-scratch.md +2 -1
- package/bin/skills/lakehouse-doc-en/references/metrics_answer_build.md +58 -21
- package/bin/skills/lakehouse-doc-en/references/migrate-spark-data-engineering-best-practices-to-lakehouse.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/mindsdb.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/monitoring_and_alerting.md +65 -60
- package/bin/skills/lakehouse-doc-en/references/monitoring_item_specification.md +33 -33
- package/bin/skills/lakehouse-doc-en/references/multitable_batch_sync.md +16 -16
- package/bin/skills/lakehouse-doc-en/references/multitable_realtime_sync.md +65 -72
- package/bin/skills/lakehouse-doc-en/references/multitable_realtime_sync_sop.md +54 -52
- package/bin/skills/lakehouse-doc-en/references/navicat-mysql.md +2 -0
- package/bin/skills/lakehouse-doc-en/references/om-dynamic-table.md +71 -66
- package/bin/skills/lakehouse-doc-en/references/om-vcluster.md +2 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-create-session.md +79 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-generate-auth-token.md +63 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-overview.md +96 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-quick-start.md +286 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-response-guide.md +264 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-safe-question-poll.md +201 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-text2insight-query.md +99 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-text2insight-stop.md +74 -0
- package/bin/skills/lakehouse-doc-en/references/overview.md +6 -7
- package/bin/skills/lakehouse-doc-en/references/permission-application.md +5 -5
- package/bin/skills/lakehouse-doc-en/references/pipe-introduction.md +1 -0
- package/bin/skills/lakehouse-doc-en/references/pipe-kafka-table-stream.md +72 -70
- package/bin/skills/lakehouse-doc-en/references/pipe-kafka.md +105 -110
- package/bin/skills/lakehouse-doc-en/references/pipe-overview.md +40 -40
- package/bin/skills/lakehouse-doc-en/references/pipe-storage-object.md +43 -48
- package/bin/skills/lakehouse-doc-en/references/pipe-summary.md +14 -4
- package/bin/skills/lakehouse-doc-en/references/pipe-syntax.md +58 -151
- package/bin/skills/lakehouse-doc-en/references/practice_python_task.md +4 -4
- package/bin/skills/lakehouse-doc-en/references/pricing-ai-gateway.md +181 -0
- package/bin/skills/lakehouse-doc-en/references/pricing-lakehouse.md +316 -0
- package/bin/skills/lakehouse-doc-en/references/pricing.md +44 -288
- package/bin/skills/lakehouse-doc-en/references/private-link-general.md +0 -2
- package/bin/skills/lakehouse-doc-en/references/pyspark-to-zettapark-migration-f1.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/python-igs.md +7 -3
- package/bin/skills/lakehouse-doc-en/references/python-sample-put-github-rt-events.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/python-task.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/python_reference/connector.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/python_reference/connector_advanced.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/python_reference/connector_examples.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/python_sdk_guide.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/python_shell_datasource.md +11 -9
- package/bin/skills/lakehouse-doc-en/references/quick_start_batch_sync_data.md +9 -18
- package/bin/skills/lakehouse-doc-en/references/quick_start_bi_analysis.md +8 -25
- package/bin/skills/lakehouse-doc-en/references/quick_start_create_workspace.md +4 -6
- package/bin/skills/lakehouse-doc-en/references/quick_start_data_quality.md +8 -8
- package/bin/skills/lakehouse-doc-en/references/quick_start_etl.md +16 -20
- package/bin/skills/lakehouse-doc-en/references/quick_start_monitoring_and_alerting.md +10 -18
- package/bin/skills/lakehouse-doc-en/references/quick_start_sql_query.md +7 -10
- package/bin/skills/lakehouse-doc-en/references/quick_start_upload_data.md +5 -7
- package/bin/skills/lakehouse-doc-en/references/quick_start_user_management.md +8 -8
- package/bin/skills/lakehouse-doc-en/references/quick_start_workspace.md +0 -5
- package/bin/skills/lakehouse-doc-en/references/quick_start_workspace_user.md +8 -8
- package/bin/skills/lakehouse-doc-en/references/quickstart.md +69 -56
- package/bin/skills/lakehouse-doc-en/references/quickstart_datashare_between_companies.md +0 -5
- package/bin/skills/lakehouse-doc-en/references/quickstart_envirment_for_team.md +0 -24
- package/bin/skills/lakehouse-doc-en/references/realtime-pipeline-selection-guide.md +1 -2
- package/bin/skills/lakehouse-doc-en/references/realtime-sales-dashboard-with-dynamic-table.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/realtime_sync.md +0 -1
- package/bin/skills/lakehouse-doc-en/references/release-note-2026-05-19.md +5 -3
- package/bin/skills/lakehouse-doc-en/references/revoke-privileges.md +3 -1
- package/bin/skills/lakehouse-doc-en/references/roles.md +2 -3
- package/bin/skills/lakehouse-doc-en/references/row-filter.md +165 -0
- package/bin/skills/lakehouse-doc-en/references/row_level_permission.md +30 -19
- package/bin/skills/lakehouse-doc-en/references/scheduled_task.md +28 -21
- package/bin/skills/lakehouse-doc-en/references/security_overview.md +99 -21
- package/bin/skills/lakehouse-doc-en/references/set-command.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/setup.md +13 -15
- package/bin/skills/lakehouse-doc-en/references/show-grants.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/snowflake-dynamic-tables-to-lakehouse.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/spark-connector-summary.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/sql_functions/context_functions/current_vcluster.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/sso-configuration.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/streaming_pipeline_with_dynamic_table.md +0 -1
- package/bin/skills/lakehouse-doc-en/references/studio-incremental-sync-practice.md +27 -23
- package/bin/skills/lakehouse-doc-en/references/studio-shell-task.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/supported-cloud-platforms.md +32 -0
- package/bin/skills/lakehouse-doc-en/references/table_rendering.md +18 -12
- package/bin/skills/lakehouse-doc-en/references/task-develop.md +89 -91
- package/bin/skills/lakehouse-doc-en/references/task_development.md +19 -17
- package/bin/skills/lakehouse-doc-en/references/task_group.md +16 -14
- package/bin/skills/lakehouse-doc-en/references/task_instance.md +21 -21
- package/bin/skills/lakehouse-doc-en/references/task_param.md +38 -35
- package/bin/skills/lakehouse-doc-en/references/task_param_reference.md +81 -79
- package/bin/skills/lakehouse-doc-en/references/task_scheduling_dependency.md +20 -21
- package/bin/skills/lakehouse-doc-en/references/tencentcloud_arn_and_externalid.md +1 -5
- package/bin/skills/lakehouse-doc-en/references/trial-account-quotas-and-limits.md +1 -3
- package/bin/skills/lakehouse-doc-en/references/tutorial_connect_to_lakehouse.md +69 -0
- package/bin/skills/lakehouse-doc-en/references/tutorials.md +4 -1
- package/bin/skills/lakehouse-doc-en/references/unique-key.md +167 -0
- package/bin/skills/lakehouse-doc-en/references/usageandbillingview.md +138 -0
- package/bin/skills/lakehouse-doc-en/references/use-dbt-dev.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/use-java-sdk-realtime-uploaddata.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/use-java-sdk-upload-data-local.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/use-models.md +128 -0
- package/bin/skills/lakehouse-doc-en/references/use-mysql-client.md +81 -81
- package/bin/skills/lakehouse-doc-en/references/use-python-sdk-upload-data.md +10 -12
- package/bin/skills/lakehouse-doc-en/references/user-identification.md +2 -3
- package/bin/skills/lakehouse-doc-en/references/user_permission_grand_guide.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/using-udf-in-dynamic-table.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/vc_cache.md +18 -22
- package/bin/skills/lakehouse-doc-en/references/vcluster_size_description.md +33 -31
- package/bin/skills/lakehouse-doc-en/references/virtual-cluster.md +43 -45
- package/bin/skills/lakehouse-doc-en/references/web-job-history.md +94 -108
- package/bin/skills/lakehouse-doc-en/references/web_search.md +16 -7
- package/bin/skills/lakehouse-doc-en/references/zettapark-data-engineering-demo.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/zettapark-dataframe-guide.md +144 -70
- package/bin/skills/lakehouse-doc-en/references/zettapark-dynamic-table-guide.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/zettapark-etl-guide.md +73 -33
- package/bin/skills/lakehouse-doc-en/references/zettapark-feature-engineering.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/zettapark-functions-guide.md +75 -46
- package/bin/skills/lakehouse-doc-en/references/zettapark-quick-start.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/zettapark-stream-guide.md +4 -4
- package/bin/skills/lakehouse-doc-en/references/zettapark-volume-guide.md +93 -29
- package/package.json +1 -1
- package/bin/skills/lakehouse-doc-en/references/CLAUDE.md +0 -606
- package/bin/skills/lakehouse-doc-en/references/modelprice.md +0 -155
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
# Using Table Stream and Pipe to Import Kafka Data into Lakehouse
|
|
2
2
|
|
|
3
|
-
## 1. Background
|
|
3
|
+
## 1. Background
|
|
4
4
|
|
|
5
|
-
In
|
|
5
|
+
In big data processing, efficiently ingesting streaming data from Kafka into a Lakehouse is a common requirement. Singdata Lakehouse provides powerful Table Stream and Pipe functionality that makes this process simpler and more efficient. This article describes how to use Table Stream and Pipe to import Kafka data into the Lakehouse, covering the complete process of creating a Kafka external table and a Kafka Table Stream.
|
|
6
6
|
|
|
7
|
-
## 2.
|
|
7
|
+
## 2. Steps
|
|
8
8
|
|
|
9
|
-
###
|
|
9
|
+
### Create a Kafka External Table
|
|
10
10
|
|
|
11
|
-
Before using Table Stream and Pipe,
|
|
11
|
+
Before using Table Stream and Pipe, create an [external table integrated with Kafka](create-kafka-external.md) to access data in Kafka.
|
|
12
12
|
|
|
13
13
|
```sql
|
|
14
14
|
CREATE STORAGE CONNECTION pipe_kafka
|
|
@@ -24,23 +24,24 @@ OPTIONS ( 'group_id' = 'external_table_lh', 'topics' = 'my_topic')
|
|
|
24
24
|
CONNECTION pipe_kafka;
|
|
25
25
|
```
|
|
26
26
|
|
|
27
|
-
###
|
|
27
|
+
### Create a Table Stream
|
|
28
28
|
|
|
29
|
-
Create a Table Stream on the Kafka external table to capture real-time data changes
|
|
29
|
+
[Create a Table Stream](create-table-stream.md) on the Kafka external table to capture real-time data changes from Kafka.
|
|
30
30
|
|
|
31
31
|
```sql
|
|
32
32
|
CREATE TABLE STREAM kafka_table_stream_pipe1
|
|
33
33
|
ON TABLE external_table_kafka
|
|
34
34
|
WITH PROPERTIES (
|
|
35
35
|
'table_stream_mode' = 'append_only'
|
|
36
|
+
|
|
36
37
|
);
|
|
37
38
|
```
|
|
38
39
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
* `kafka_table_stream_pipe1`: Name of the Table Stream.
|
|
41
|
+
* `ON TABLE external_table_kafka`: Specifies that the Table Stream is created based on the previously created Kafka external table.
|
|
42
|
+
* `table_stream_mode='append_only'`: Sets the mode to append-only, meaning only newly added data rows are captured.
|
|
42
43
|
|
|
43
|
-
After creation,
|
|
44
|
+
After creation, verify the data in the Table Stream with the following query:
|
|
44
45
|
|
|
45
46
|
```sql
|
|
46
47
|
SELECT CAST(value AS STRING) FROM kafka_table_stream_pipe1;
|
|
@@ -48,61 +49,61 @@ SELECT CAST(value AS STRING) FROM kafka_table_stream_pipe1;
|
|
|
48
49
|
|
|
49
50
|
This query converts the `value` field in the Table Stream to a string type and returns it for subsequent processing.
|
|
50
51
|
|
|
51
|
-
###
|
|
52
|
+
### Create a Target Table
|
|
52
53
|
|
|
53
|
-
|
|
54
|
+
Create a target table to store data imported from Kafka.
|
|
54
55
|
|
|
55
56
|
```sql
|
|
56
|
-
CREATE TABLE
|
|
57
|
+
CREATE TABLE kafka_sink_table_1 (
|
|
57
58
|
a TIMESTAMP,
|
|
58
59
|
b STRING
|
|
59
60
|
);
|
|
60
61
|
```
|
|
61
62
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
63
|
+
* `kafka_sink_table_1`: Name of the target table.
|
|
64
|
+
* `a TIMESTAMP`: First field for storing timestamp data.
|
|
65
|
+
* `b STRING`: Second field for storing string data.
|
|
65
66
|
|
|
66
|
-
###
|
|
67
|
+
### Create a Pipe
|
|
67
68
|
|
|
68
|
-
|
|
69
|
+
Use a Pipe to continuously import data from the Table Stream into the target table.
|
|
69
70
|
|
|
70
71
|
```sql
|
|
71
72
|
CREATE PIPE kafka_pipe_stream
|
|
72
73
|
VIRTUAL_CLUSTER = 'test_alter'
|
|
73
74
|
AS
|
|
74
|
-
COPY INTO
|
|
75
|
+
COPY INTO kafka_sink_table_1
|
|
75
76
|
FROM (
|
|
76
77
|
SELECT CURRENT_TIMESTAMP(), CAST(value AS STRING) FROM kafka_table_stream_pipe1
|
|
77
78
|
);
|
|
78
79
|
```
|
|
79
80
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
81
|
+
* `kafka_pipe_stream`: Name of the Pipe.
|
|
82
|
+
* `VIRTUAL_CLUSTER = 'test_alter'`: Specifies the Virtual Cluster to use.
|
|
83
|
+
* `COPY INTO kafka_sink_table_1`: Copies data into the target table `kafka_sink_table_1`.
|
|
84
|
+
* `SELECT CURRENT_TIMESTAMP(), CAST(value AS STRING) FROM kafka_table_stream_pipe1`: Selects data from the Table Stream, using the current timestamp and the converted `value` field as the two columns for the target table.
|
|
84
85
|
|
|
85
|
-
Other
|
|
86
|
+
Other configurable properties:
|
|
86
87
|
- `INITIAL_DELAY_IN_SECONDS`: Initial job scheduling delay (optional, default 0 seconds)
|
|
87
|
-
- `BATCH_INTERVAL_IN_SECONDS`: (Optional)
|
|
88
|
-
- `BATCH_SIZE_PER_KAFKA_PARTITION`: (Optional)
|
|
89
|
-
- `MAX_SKIP_BATCH_COUNT_ON_ERROR`: (Optional)
|
|
90
|
-
- `RESET_KAFKA_GROUP_OFFSETS`: (Optional)
|
|
91
|
-
- `none`:
|
|
92
|
-
- `valid`: Checks if the current offset
|
|
93
|
-
- `earliest`: Resets to the current earliest
|
|
94
|
-
- `latest`: Resets to the current latest
|
|
95
|
-
- `${TIMESTAMP_MILLISECONDS}`: Resets to the offset corresponding to the millisecond timestamp, e.g.,
|
|
88
|
+
- `BATCH_INTERVAL_IN_SECONDS`: (Optional) Batch processing interval, default 60 seconds.
|
|
89
|
+
- `BATCH_SIZE_PER_KAFKA_PARTITION`: (Optional) Batch size per Kafka partition, default 500,000 records.
|
|
90
|
+
- `MAX_SKIP_BATCH_COUNT_ON_ERROR`: (Optional) Maximum number of batches to skip on error, default 30.
|
|
91
|
+
- `RESET_KAFKA_GROUP_OFFSETS`: (Optional) Initial Kafka offset when starting the Pipe. Cannot be modified after creation. Possible values: `latest`, `earliest`, `none`, `valid`, `${TIMESTAMP_MILLISECONDS}`
|
|
92
|
+
- `none`: No action (default)
|
|
93
|
+
- `valid`: Checks if the current group offset is expired and resets expired partition offsets to the current earliest
|
|
94
|
+
- `earliest`: Resets to the current earliest
|
|
95
|
+
- `latest`: Resets to the current latest
|
|
96
|
+
- `${TIMESTAMP_MILLISECONDS}`: Resets to the offset corresponding to the millisecond timestamp, e.g., `1737789688000` (2025-01-25 15:21:28)
|
|
96
97
|
|
|
97
|
-
## 3.
|
|
98
|
+
## 3. Verify Results
|
|
98
99
|
|
|
99
|
-
|
|
100
|
+
Verify whether data has been successfully imported by querying the target table:
|
|
100
101
|
|
|
101
102
|
```sql
|
|
102
|
-
SELECT * FROM
|
|
103
|
+
SELECT * FROM kafka_sink_table_1;
|
|
103
104
|
```
|
|
104
105
|
|
|
105
|
-
|
|
106
|
+
Check the running status of the Pipe to ensure it is working properly:
|
|
106
107
|
|
|
107
108
|
```sql
|
|
108
109
|
SHOW PIPES;
|
|
@@ -112,14 +113,15 @@ This command lists all created Pipes and their status information, including whe
|
|
|
112
113
|
|
|
113
114
|
## 4. Status Monitoring and Management
|
|
114
115
|
|
|
115
|
-
###
|
|
116
|
+
### Check Kafka Consumption Latency
|
|
116
117
|
|
|
117
|
-
Use the `DESC PIPE` command.
|
|
118
|
-
- `lastConsumeTimestamp`: The last consumed offset
|
|
119
|
-
- `offsetLag`: The backlog of Kafka data
|
|
120
|
-
- `timeLag`: Consumption latency, calculated as the current time minus the last consumed offset.
|
|
118
|
+
Use the `DESC PIPE` command. The JSON string in `pipe_latency` contains the following fields:
|
|
119
|
+
- `lastConsumeTimestamp`: The last consumed offset timestamp
|
|
120
|
+
- `offsetLag`: The backlog of Kafka data
|
|
121
|
+
- `timeLag`: Consumption latency, calculated as the current time minus the last consumed offset timestamp. When Kafka consumption is abnormal, the value is -1
|
|
121
122
|
|
|
122
|
-
|
|
123
|
+
|
|
124
|
+
````
|
|
123
125
|
DESC PIPE EXTENDED kafka_pipe_stream
|
|
124
126
|
+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
|
125
127
|
| info_name | info_value |
|
|
@@ -130,53 +132,53 @@ DESC PIPE EXTENDED kafka_pipe_stream
|
|
|
130
132
|
| last_modified_time | 2025-03-05 10:40:55.405 |
|
|
131
133
|
| comment | |
|
|
132
134
|
| properties | ((virtual_cluster,test_alter)) |
|
|
133
|
-
| copy_statement | COPY INTO TABLE qingyun.pipe_schema.
|
|
135
|
+
| copy_statement | COPY INTO TABLE qingyun.pipe_schema.kafka_sink_table_1 FROM (SELECT `current_timestamp`() AS ```current_timestamp``()`, CAST(kafka_table_stream_pipe1.`value` AS string) AS `value` |
|
|
134
136
|
| pipe_status | RUNNING |
|
|
135
|
-
| output_name | xxxxxxx.pipe_schema.
|
|
137
|
+
| output_name | xxxxxxx.pipe_schema.kafka_sink_table_1 |
|
|
136
138
|
| input_name | kafka_table_stream:xxxxxxx.pipe_schema.kafka_table_stream_pipe1 |
|
|
137
139
|
| invalid_reason | |
|
|
138
140
|
| pipe_latency | {"kafka":{"lags":{"0":0,"1":0,"2":0,"3":0},"lastConsumeTimestamp":-1,"offsetLag":0,"timeLag":-1}} |
|
|
139
141
|
+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
|
140
|
-
```
|
|
141
142
|
|
|
142
|
-
|
|
143
|
+
````
|
|
143
144
|
|
|
144
|
-
|
|
145
|
+
### View Pipe Execution History
|
|
145
146
|
|
|
146
|
-
|
|
147
|
+
Since each Pipe execution is a COPY operation, you can view all operations in the job history. Filter by `query_tag` in [Job History](<web-job-history.md>). All Pipe COPY jobs are tagged in the format `pipe.``workspace_name``.schema_name.pipe_name` for easy tracking.
|
|
147
148
|
|
|
148
|
-
|
|
149
|
-
```sql
|
|
150
|
-
ALTER PIPE pipe_name SET PIPE_EXECUTION_PAUSED = true;
|
|
151
|
-
```
|
|
149
|
+
### Stop and Start a Pipe
|
|
152
150
|
|
|
151
|
+
- Pause a Pipe:
|
|
152
|
+
```
|
|
153
|
+
ALTER PIPE pipe_name SET PIPE_EXECUTION_PAUSED = true;
|
|
154
|
+
```
|
|
153
155
|
- Resume a Pipe:
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
156
|
+
```
|
|
157
|
+
ALTER PIPE pipe_name SET PIPE_EXECUTION_PAUSED = false;
|
|
158
|
+
```
|
|
157
159
|
|
|
158
|
-
###
|
|
160
|
+
### Modify Pipe Properties
|
|
159
161
|
|
|
160
|
-
You can modify Pipe properties
|
|
162
|
+
You can modify Pipe properties one at a time. If multiple properties need to be changed, run the `ALTER` command multiple times. Below are the modifiable properties and their syntax:
|
|
161
163
|
|
|
162
|
-
```
|
|
164
|
+
```SQL
|
|
163
165
|
ALTER PIPE pipe_name SET
|
|
164
|
-
|
|
166
|
+
[VIRTUAL_CLUSTER = 'virtual_cluster_name'],
|
|
165
167
|
[BATCH_INTERVAL_IN_SECONDS=''],
|
|
166
|
-
|
|
168
|
+
[BATCH_SIZE_PER_KAFKA_PARTITION=''],
|
|
167
169
|
[MAX_SKIP_BATCH_COUNT_ON_ERROR=''],
|
|
168
170
|
[COPY_JOB_HINT='']
|
|
169
171
|
```
|
|
170
172
|
|
|
171
173
|
Examples:
|
|
172
|
-
```
|
|
173
|
-
-- Modify
|
|
174
|
-
ALTER PIPE pipe_name SET VIRTUAL_CLUSTER = '
|
|
175
|
-
|
|
174
|
+
```
|
|
175
|
+
-- Modify the Virtual Cluster
|
|
176
|
+
ALTER PIPE pipe_name SET VIRTUAL_CLUSTER = 'DEFAULT'
|
|
176
177
|
-- Set COPY_JOB_HINT
|
|
177
|
-
ALTER PIPE pipe_name SET
|
|
178
|
+
ALTER PIPE pipe_name SET COPY_JOB_HINT='{"cz.mapper.kafka.message.size": "2000000"}'
|
|
179
|
+
|
|
178
180
|
```
|
|
179
181
|
|
|
180
|
-
**
|
|
181
|
-
- Modifying the COPY statement logic is not supported. If
|
|
182
|
-
- When modifying the `COPY_JOB_HINT` of a Pipe, the new settings will overwrite existing hints. If your Pipe already has hints
|
|
182
|
+
**Notes**
|
|
183
|
+
- Modifying the COPY statement logic is not supported. If you need to modify it, delete the Pipe and recreate it.
|
|
184
|
+
- When modifying the `COPY_JOB_HINT` of a Pipe, the new settings will overwrite all existing hints. If your Pipe already has hints such as `{"cz.sql.split.kafka.strategy":"size"}`, you must include all required hints together when setting new ones; otherwise existing hints will be overwritten. Separate multiple parameters with commas.
|
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
# Continuous Data
|
|
1
|
+
# Continuous Data Collection from Kafka Using Pipe
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
Pipe is the **continuous data ingestion** solution provided by the Lakehouse, designed to automatically and continuously import data from Kafka into Lakehouse tables. Pipe creates a persistent consumer group, maintains the consumption position, and runs continuously according to the configured scheduling strategy.
|
|
6
|
+
|
|
7
|
+
A Kafka Pipe is like a continuously running consumer group. You only need to define the consumption logic, and it automatically pulls data from the Topic and writes it to a table — no manual triggering or Cron configuration required.
|
|
2
8
|
|
|
3
9
|
## Kafka Pipe Syntax
|
|
4
10
|
|
|
@@ -6,34 +12,73 @@
|
|
|
6
12
|
-- Syntax for creating a Pipe from Kafka
|
|
7
13
|
CREATE PIPE [ IF NOT EXISTS ] <pipe_name>
|
|
8
14
|
VIRTUAL_CLUSTER = 'virtual_cluster_name'
|
|
15
|
+
[INITIAL_DELAY_IN_SECONDS='']
|
|
9
16
|
[BATCH_INTERVAL_IN_SECONDS='']
|
|
10
|
-
|
|
17
|
+
[BATCH_SIZE_PER_KAFKA_PARTITION='']
|
|
11
18
|
[MAX_SKIP_BATCH_COUNT_ON_ERROR='']
|
|
12
19
|
[RESET_KAFKA_GROUP_OFFSETS='']
|
|
13
20
|
[COPY_JOB_HINT='']
|
|
14
21
|
AS <copy_statement>;
|
|
15
22
|
```
|
|
16
23
|
|
|
17
|
-
* `<pipe_name>`: The name of the Pipe object
|
|
18
|
-
* `VIRTUAL_CLUSTER`:
|
|
19
|
-
* `
|
|
20
|
-
* `
|
|
21
|
-
* `
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
* `<pipe_name>`: The name of the Pipe object, used for management and monitoring.
|
|
25
|
+
* `VIRTUAL_CLUSTER`: Specifies the name of the Virtual Cluster to execute Pipe tasks.
|
|
26
|
+
* `INITIAL_DELAY_IN_SECONDS`: Initial job scheduling delay (optional, default 0 seconds).
|
|
27
|
+
* `BATCH_INTERVAL_IN_SECONDS`: (Optional) Controls how long to accumulate data per batch before writing — a shorter interval means fresher data, a longer interval means more efficient single writes. Default of 60 seconds works for most scenarios.
|
|
28
|
+
* `BATCH_SIZE_PER_KAFKA_PARTITION`: (Optional) Batch size per Kafka partition, default 500,000 records.
|
|
29
|
+
* `MAX_SKIP_BATCH_COUNT_ON_ERROR`: (Optional) Maximum number of batches to skip on error, default 30.
|
|
30
|
+
* `RESET_KAFKA_GROUP_OFFSETS`: (Optional) Controls where the Pipe starts consuming Kafka data when it starts. Only settable at startup. If not set and the consumer group has no historical position, Kafka's [auto.offset.reset](https://kafka.apache.org/documentation/#consumerconfigs_auto.offset.reset) configuration is used (default `latest`). Supported values:
|
|
31
|
+
* `none`: No action; uses [auto.offset.reset](https://kafka.apache.org/documentation/#consumerconfigs_auto.offset.reset)
|
|
32
|
+
* `valid`: Checks if the current group offset is expired and resets expired partition offsets to the current earliest
|
|
33
|
+
* `earliest`: Resets to the current earliest
|
|
34
|
+
* `latest`: Resets to the current latest
|
|
35
|
+
* `${TIMESTAMP_MILLISECONDS}`: Resets to the offset corresponding to the millisecond timestamp, e.g., `1737789688000` (2025-01-25 15:21:28)
|
|
36
|
+
|
|
37
|
+
## Using READ\_KAFKA in a Pipe
|
|
38
|
+
|
|
39
|
+
For temporary exploration, you can use the READ_KAFKA function directly (see [READ_KAFKA Function](<sql_functions/table_functions/read_kafka.md>)). When using `READ_KAFKA` in a Pipe's COPY statement, the following **important differences** apply:
|
|
40
|
+
|
|
41
|
+
### Parameter Passing Rules
|
|
42
|
+
|
|
43
|
+
```sql
|
|
44
|
+
-- READ_KAFKA syntax in a Pipe
|
|
45
|
+
read_kafka (
|
|
46
|
+
'bootstrap_servers', -- Required: Kafka cluster address in host:port format, multiple brokers separated by commas — 2-3 broker addresses are sufficient, no need to list all nodes
|
|
47
|
+
'topic', -- Required: Topic name — one Pipe corresponds to one Topic; create multiple Pipes for multiple Topics
|
|
48
|
+
'', -- Required: Topic pattern (not yet supported, leave empty string)
|
|
49
|
+
'group_id', -- Required: Persistent consumer group ID — use a meaningful name (e.g., pipe_orders_group); different Pipes for the same Topic must use different group_ids
|
|
50
|
+
'', -- Leave empty: start position is managed automatically by Pipe (when using READ_KAFKA standalone, fill starting_offsets here)
|
|
51
|
+
'', -- Leave empty: end position managed automatically by Pipe
|
|
52
|
+
'', -- Leave empty: start timestamp managed automatically by Pipe
|
|
53
|
+
'', -- Leave empty: end timestamp managed automatically by Pipe
|
|
54
|
+
'raw', -- Key format
|
|
55
|
+
'raw', -- Value format
|
|
56
|
+
0, -- Max error count
|
|
57
|
+
map() -- Kafka config parameters — fill in SSL, SASL and other auth params here when needed, e.g., map('security.protocol','SASL_SSL',...)
|
|
58
|
+
)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Key Differences
|
|
62
|
+
|
|
63
|
+
| Feature | READ\_KAFKA Function (standalone) | READ\_KAFKA (in a Pipe) |
|
|
64
|
+
| ------ | ------------------------ | --------------------- |
|
|
65
|
+
| Consumer group | Temporary, destroyed after execution | Persistent, maintains consumption position |
|
|
66
|
+
| Position management | Manually specify starting\_offsets etc. | Managed automatically by Pipe; position parameters must be left empty |
|
|
67
|
+
| Execution mode | One-time query | Continuously scheduled |
|
|
68
|
+
| Default start position | earliest (explore historical data) | latest (process new data) |
|
|
69
|
+
|
|
70
|
+
### Best Practices
|
|
71
|
+
|
|
72
|
+
See [Efficiently Ingesting Kafka Data with Pipe](<pipe-kafka-bestpractice-1.md>)
|
|
28
73
|
|
|
29
74
|
## Usage Example
|
|
30
75
|
|
|
31
76
|
```SQL
|
|
32
|
-
/*Use Lakehouse Pipe task object to continuously import Kafka data into
|
|
77
|
+
/*Use a Lakehouse Pipe task object to continuously import Kafka data into a target table*/
|
|
33
78
|
---Step01: Create the target table for Kafka writes
|
|
34
79
|
create table kafka_raw(value string);
|
|
35
80
|
|
|
36
|
-
---Step02: Create PIPE task to read from Kafka and write to the target table
|
|
81
|
+
---Step02: Create a PIPE task to read from Kafka and write to the target table
|
|
37
82
|
CREATE PIPE load_kafka01
|
|
38
83
|
VIRTUAL_CLUSTER = 'DEFAULT'
|
|
39
84
|
BATCH_INTERVAL_IN_SECONDS = '10'
|
|
@@ -48,12 +93,12 @@ FROM (
|
|
|
48
93
|
'test',-- topic name
|
|
49
94
|
'', -- topic prefix not supported yet
|
|
50
95
|
'pipe_kafka_group',-- group id
|
|
51
|
-
'',--
|
|
52
|
-
'',--
|
|
53
|
-
'',--
|
|
54
|
-
'',--
|
|
55
|
-
'raw',-- format
|
|
56
|
-
'raw',-- format
|
|
96
|
+
'',-- offset-related parameter, leave empty in pipe ddl
|
|
97
|
+
'',-- offset-related parameter, leave empty in pipe ddl
|
|
98
|
+
'',-- offset-related parameter, leave empty in pipe ddl
|
|
99
|
+
'',-- offset-related parameter, leave empty in pipe ddl
|
|
100
|
+
'raw',-- key format, currently only supports binary
|
|
101
|
+
'raw',-- value format, currently only supports binary
|
|
57
102
|
0,
|
|
58
103
|
map()
|
|
59
104
|
)
|
|
@@ -88,72 +133,18 @@ SELECT * FROM kafka_raw LIMIT 100;
|
|
|
88
133
|
DROP PIPE load_kafka01;
|
|
89
134
|
```
|
|
90
135
|
|
|
91
|
-
## Function: read\_kafka
|
|
92
|
-
|
|
93
|
-
> Note: This function is currently in preview release
|
|
94
|
-
|
|
95
|
-
## Function Description
|
|
96
|
-
|
|
97
|
-
Read data from an Apache Kafka cluster and return the data in tabular form.
|
|
98
|
-
|
|
99
|
-
## Function Syntax
|
|
100
|
-
|
|
101
|
-
```SQL
|
|
102
|
-
read_kafka (
|
|
103
|
-
<bootstrapServers>,
|
|
104
|
-
<topic>,
|
|
105
|
-
<topic_prefix>,
|
|
106
|
-
<group_id>,
|
|
107
|
-
<STARTING_OFFSETS>,
|
|
108
|
-
<ENDING_OFFSETS>,
|
|
109
|
-
<STARTING_OFFSETS_TIMESTAMP>,
|
|
110
|
-
<ENDING_OFFSETS_TIMESTAMP>,
|
|
111
|
-
<KEY_FORMAT>,
|
|
112
|
-
<VALUE_FORMAT>,
|
|
113
|
-
<MAX_ERROR_NUMBER>,
|
|
114
|
-
<kafka_parameters>
|
|
115
|
-
)
|
|
116
|
-
```
|
|
117
|
-
|
|
118
|
-
## Parameter Description
|
|
119
|
-
|
|
120
|
-
* bootstrap: Comma-separated Kafka broker server addresses, such as `1.2.3.1:9092,1.2.3.2:9092`.
|
|
121
|
-
* topic: Kafka topic name, multiple topics separated by commas, such as `topicA,topicB`.
|
|
122
|
-
* topic\_pattern: Topic regex, not supported yet, leave it empty by default. For example: ''.
|
|
123
|
-
* group\_id: Kafka consumer group ID.
|
|
124
|
-
* STARTING\_OFFSETS: Specifies the starting offset to read from, default is `latest`. This parameter does not need to be passed in the pipe.
|
|
125
|
-
* ENDING\_OFFSETS: Specifies the ending offset, default is `latest`. This parameter does not need to be passed in the pipe.
|
|
126
|
-
* STARTING\_OFFSETS\_TIMESTAMP: Specifies the timestamp for the starting offset. This parameter does not need to be passed in the pipe.
|
|
127
|
-
* ENDING\_OFFSETS\_TIMESTAMP: Specifies the timestamp for the ending offset. This parameter does not need to be passed in the pipe.
|
|
128
|
-
* KEY\_FORMAT: Specifies the format of the key to read, case-insensitive STRING type. Currently, only raw format is supported.
|
|
129
|
-
* VALUE\_FORMAT: Specifies the format of the value to read, case-insensitive STRING type. Currently, only raw format is supported.
|
|
130
|
-
* MAX\_ERROR\_NUMBER: The maximum number of allowed error rows within the reading window. Must be greater than or equal to 0. The default is 0, which means no error rows are allowed, with a range of 0-100000.
|
|
131
|
-
* kafka\_parameters: Parameters to be passed to Kafka, prefixed with kafka., directly using Kafka's parameters. These options can be found in Kafka. The format is like MAP('kafka.security.protocol', 'PLAINTEXT', 'kafka.auto.offset.reset', 'latest'). For values, refer to the [Kafka documentation](https://kafka.apache.org/documentation/#consumerconfigs).
|
|
132
|
-
|
|
133
|
-
## Return Values
|
|
134
|
-
|
|
135
|
-
| Field | Meaning | Type |
|
|
136
|
-
| --------------- | ---------------------------- | -------------------- |
|
|
137
|
-
| topic | Kafka topic name | STRING |
|
|
138
|
-
| partition | Data partition ID | INT |
|
|
139
|
-
| offset | Offset in Kafka partition | BIGINT |
|
|
140
|
-
| timestamp | Kafka message timestamp | TIMESTAMP\_LTZ |
|
|
141
|
-
| timestamp\_type | Kafka message timestamp type | STRING |
|
|
142
|
-
| headers | Kafka message headers | MAP\<STRING, BINARY> |
|
|
143
|
-
| key | Kafka key value | BINARY |
|
|
144
|
-
| value | Kafka value | BINARY |
|
|
145
136
|
|
|
146
137
|
|
|
147
138
|
## Status Monitoring and Management
|
|
148
139
|
|
|
149
|
-
###
|
|
140
|
+
### Check Kafka Consumption Latency
|
|
150
141
|
|
|
151
|
-
Use the `DESC PIPE` command.
|
|
152
|
-
- `lastConsumeTimestamp`: The last consumed offset
|
|
153
|
-
- `offsetLag`: The backlog of Kafka data
|
|
154
|
-
- `timeLag`: Consumption latency, calculated as the current time minus the last consumed offset.
|
|
142
|
+
Use the `DESC PIPE` command. The JSON string in `pipe_latency` contains the following fields:
|
|
143
|
+
- `lastConsumeTimestamp`: The last consumed offset timestamp
|
|
144
|
+
- `offsetLag`: The backlog of Kafka data
|
|
145
|
+
- `timeLag`: Consumption latency, calculated as the current time minus the last consumed offset timestamp. When Kafka consumption is abnormal, the value is -1
|
|
155
146
|
|
|
156
|
-
|
|
147
|
+
````
|
|
157
148
|
DESC PIPE EXTENDED kafka_pipe_stream
|
|
158
149
|
+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
|
159
150
|
| info_name | info_value |
|
|
@@ -164,54 +155,58 @@ DESC PIPE EXTENDED kafka_pipe_stream
|
|
|
164
155
|
| last_modified_time | 2025-03-05 10:40:55.405 |
|
|
165
156
|
| comment | |
|
|
166
157
|
| properties | ((virtual_cluster,test_alter)) |
|
|
167
|
-
| copy_statement | COPY INTO TABLE qingyun.pipe_schema.
|
|
158
|
+
| copy_statement | COPY INTO TABLE qingyun.pipe_schema.kafka_sink_table_1 FROM (SELECT `current_timestamp`() AS ```current_timestamp``()`, CAST(kafka_table_stream_pipe1.`value` AS string) AS `value` |
|
|
168
159
|
| pipe_status | RUNNING |
|
|
169
|
-
| output_name | xxxxxxx.pipe_schema.
|
|
160
|
+
| output_name | xxxxxxx.pipe_schema.kafka_sink_table_1 |
|
|
170
161
|
| input_name | kafka_table_stream:xxxxxxx.pipe_schema.kafka_table_stream_pipe1 |
|
|
171
162
|
| invalid_reason | |
|
|
172
163
|
| pipe_latency | {"kafka":{"lags":{"0":0,"1":0,"2":0,"3":0},"lastConsumeTimestamp":-1,"offsetLag":0,"timeLag":-1}} |
|
|
173
164
|
+--------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
|
174
|
-
```
|
|
175
165
|
|
|
176
|
-
|
|
166
|
+
````
|
|
177
167
|
|
|
178
|
-
|
|
168
|
+
### View Pipe Execution History
|
|
179
169
|
|
|
180
|
-
|
|
170
|
+
Since each Pipe execution is a COPY operation, you can view all operations in the job history. Filter by `query_tag` in the [Job History](web-job-history.md). All Pipe COPY jobs are tagged in the format `pipe.``workspace_name``.schema_name.pipe_name` for easy tracking.
|
|
181
171
|
|
|
182
|
-
|
|
183
|
-
```sql
|
|
184
|
-
ALTER PIPE pipe_name SET PIPE_EXECUTION_PAUSED = true;
|
|
185
|
-
```
|
|
172
|
+
### Stop and Start a Pipe
|
|
186
173
|
|
|
187
|
-
|
|
188
|
-
```sql
|
|
189
|
-
ALTER PIPE pipe_name SET PIPE_EXECUTION_PAUSED = false;
|
|
190
|
-
```
|
|
174
|
+
* Pause a Pipe:
|
|
191
175
|
|
|
192
|
-
|
|
176
|
+
```
|
|
177
|
+
ALTER PIPE pipe_name SET PIPE_EXECUTION_PAUSED = true;
|
|
178
|
+
```
|
|
193
179
|
|
|
194
|
-
|
|
180
|
+
* Resume a Pipe:
|
|
195
181
|
|
|
196
|
-
```
|
|
182
|
+
```
|
|
183
|
+
ALTER PIPE pipe_name SET PIPE_EXECUTION_PAUSED = false;
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Modify Pipe Properties
|
|
187
|
+
|
|
188
|
+
You can modify Pipe properties one at a time. If multiple properties need to be changed, run the `ALTER` command multiple times. Below are the modifiable properties and their syntax:
|
|
189
|
+
|
|
190
|
+
```SQL
|
|
197
191
|
ALTER PIPE pipe_name SET
|
|
198
192
|
[VIRTUAL_CLUSTER = 'virtual_cluster_name'],
|
|
199
|
-
|
|
200
|
-
[
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
[COPY_JOB_HINT='']
|
|
193
|
+
[BATCH_INTERVAL_IN_SECONDS=''],
|
|
194
|
+
[BATCH_SIZE_PER_KAFKA_PARTITION=''],
|
|
195
|
+
[MAX_SKIP_BATCH_COUNT_ON_ERROR=''],
|
|
196
|
+
[COPY_JOB_HINT='']
|
|
204
197
|
```
|
|
205
198
|
|
|
206
199
|
Examples:
|
|
207
|
-
```sql
|
|
208
|
-
-- Modify the compute cluster
|
|
209
|
-
ALTER PIPE pipe_name SET VIRTUAL_CLUSTER = 'default';
|
|
210
200
|
|
|
201
|
+
```
|
|
202
|
+
-- Modify the Virtual Cluster
|
|
203
|
+
ALTER PIPE pipe_name SET VIRTUAL_CLUSTER = 'DEFAULT'
|
|
211
204
|
-- Set COPY_JOB_HINT
|
|
212
|
-
ALTER PIPE pipe_name SET
|
|
205
|
+
ALTER PIPE pipe_name SET COPY_JOB_HINT='{"cz.mapper.kafka.message.size": "2000000"}'
|
|
206
|
+
|
|
213
207
|
```
|
|
214
208
|
|
|
215
|
-
**
|
|
216
|
-
|
|
217
|
-
|
|
209
|
+
**Notes**
|
|
210
|
+
|
|
211
|
+
* Modifying the COPY statement logic is not supported. If you need to modify it, delete the Pipe and recreate it.
|
|
212
|
+
* When modifying the `COPY_JOB_HINT` of a Pipe, the new settings will overwrite all existing hints. If your Pipe already has hints such as `{"cz.sql.split.kafka.strategy":"size"}`, you must include all required hints together when setting new ones; otherwise existing hints will be overwritten. Separate multiple parameters with commas.
|