@clickzetta/cz-cli-darwin-arm64 0.5.16 → 0.5.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cz-cli +0 -0
- package/bin/skills/lakehouse-doc-en/SKILL.md +6 -11
- package/bin/skills/lakehouse-doc-en/references/AIGateway.md +58 -13
- package/bin/skills/lakehouse-doc-en/references/Computation.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/DataSource_Amazon_DocumentDB.md +3 -1
- package/bin/skills/lakehouse-doc-en/references/Foreach.md +14 -14
- package/bin/skills/lakehouse-doc-en/references/JDBC-Driver.md +0 -1
- package/bin/skills/lakehouse-doc-en/references/LakehouseAI-overview.md +21 -8
- package/bin/skills/lakehouse-doc-en/references/LakehouseDataGPT-tour.md +4 -9
- package/bin/skills/lakehouse-doc-en/references/LakehouseStudio-tour.md +14 -19
- package/bin/skills/lakehouse-doc-en/references/Lakehouse_Zilliz_MakeDataReadyforBIandAI.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/Logstash.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/Migrate_Spark_DataEngineeringBestPractices_Project_to_Lakehouse.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/Notebook.md +17 -17
- package/bin/skills/lakehouse-doc-en/references/RemoteFunction-as-udf.md +14 -14
- package/bin/skills/lakehouse-doc-en/references/SQL_External_Catalog_Guide.md +1 -9
- package/bin/skills/lakehouse-doc-en/references/SUMMARY.md +59 -29
- package/bin/skills/lakehouse-doc-en/references/WINDOWFUNCTION.md +99 -57
- package/bin/skills/lakehouse-doc-en/references/Zettapark_Data_Engineering_Demo.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/access-control-configuration.md +1 -8
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-2-5-1.0.md +16 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-3-29-1.0.2.md +14 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-3-8-1.0.1.md +16 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-4-28-1.1.md +29 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-12-1.1.1.md +18 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-15-1.2.md +9 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-21-1.3.md +9 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-5-28-1.4.md +10 -0
- package/bin/skills/lakehouse-doc-en/references/aigw-2026-6-3-1.5.md +9 -0
- package/bin/skills/lakehouse-doc-en/references/alicloud-arn-externalid.md +0 -5
- package/bin/skills/lakehouse-doc-en/references/answer-accuracy-improve.md +120 -103
- package/bin/skills/lakehouse-doc-en/references/application-list.md +1 -3
- package/bin/skills/lakehouse-doc-en/references/approval-list.md +16 -17
- package/bin/skills/lakehouse-doc-en/references/batch-load-parquet-file-into-lakehouse.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/batch_sync.md +9 -9
- package/bin/skills/lakehouse-doc-en/references/batch_sync_Sop.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/batchloadparquetfileintoLakehouse.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/bulkloadv1-python-sdk.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/chart-auto-refresh-guide.md +12 -6
- package/bin/skills/lakehouse-doc-en/references/clickzetta-sample-data.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/code_approval.md +1 -5
- package/bin/skills/lakehouse-doc-en/references/composite_task.md +31 -42
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_environment_and_data_generate.md +6 -9
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_javasdk_bulkload_realtime.md +4 -10
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_kafka_realtime_sync.md +1 -10
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_local_file_into_table_by_studio.md +0 -6
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_batchload_public_network.md +0 -5
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_python_node.md +2 -7
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_realtime_cdc_public_network.md +13 -18
- package/bin/skills/lakehouse-doc-en/references/comprehensive_guide_to_ingesting_studio_sql_insert.md +0 -1
- package/bin/skills/lakehouse-doc-en/references/concepts.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/config-datasource.md +5 -7
- package/bin/skills/lakehouse-doc-en/references/connect-with-cli.md +116 -72
- package/bin/skills/lakehouse-doc-en/references/connect-with-cz-cli.md +151 -0
- package/bin/skills/lakehouse-doc-en/references/continue-job.md +9 -17
- package/bin/skills/lakehouse-doc-en/references/create-api-connection.md +315 -286
- package/bin/skills/lakehouse-doc-en/references/create-catalog-connection.md +1 -0
- package/bin/skills/lakehouse-doc-en/references/create-dynamic-table.md +4 -4
- package/bin/skills/lakehouse-doc-en/references/create-external-catalog.md +85 -22
- package/bin/skills/lakehouse-doc-en/references/create-table-ddl.md +45 -0
- package/bin/skills/lakehouse-doc-en/references/creating_alicloud_privatelinkendpoint.md +4 -6
- package/bin/skills/lakehouse-doc-en/references/creating_alicloud_privatelinkservice.md +4 -7
- package/bin/skills/lakehouse-doc-en/references/creating_tencentcloud_privatelinkendpoint.md +2 -7
- package/bin/skills/lakehouse-doc-en/references/creating_tencentcloud_privatelinkservice.md +1 -5
- package/bin/skills/lakehouse-doc-en/references/cz-cli-agent.md +15 -10
- package/bin/skills/lakehouse-doc-en/references/cz-cli-datasource.md +0 -8
- package/bin/skills/lakehouse-doc-en/references/cz-cli-sql.md +2 -45
- package/bin/skills/lakehouse-doc-en/references/cz-cli.md +53 -42
- package/bin/skills/lakehouse-doc-en/references/dashboard-version-management-guide.md +12 -4
- package/bin/skills/lakehouse-doc-en/references/data-integration-intro.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/data-integration.md +29 -27
- package/bin/skills/lakehouse-doc-en/references/data-load-summary.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/data-quality.md +25 -25
- package/bin/skills/lakehouse-doc-en/references/data-sharing.md +31 -54
- package/bin/skills/lakehouse-doc-en/references/data-sources.md +45 -45
- package/bin/skills/lakehouse-doc-en/references/data_catalog.md +23 -25
- package/bin/skills/lakehouse-doc-en/references/data_privacy.md +5 -2
- package/bin/skills/lakehouse-doc-en/references/data_sharing_between_accounts_guide.md +0 -4
- package/bin/skills/lakehouse-doc-en/references/data_visualization.md +4 -15
- package/bin/skills/lakehouse-doc-en/references/dataagent.md +39 -7
- package/bin/skills/lakehouse-doc-en/references/databricks-delta-to-lakehouse-migration.md +168 -0
- package/bin/skills/lakehouse-doc-en/references/databricks-dlt-to-lakehouse-migration.md +331 -0
- package/bin/skills/lakehouse-doc-en/references/databricks-external-catalog-practice.md +367 -0
- package/bin/skills/lakehouse-doc-en/references/databricks-jobs-to-studio-migration.md +199 -0
- package/bin/skills/lakehouse-doc-en/references/databricks-notebook-to-studio-migration.md +350 -0
- package/bin/skills/lakehouse-doc-en/references/databricks-uc-governance-to-lakehouse-migration.md +327 -0
- package/bin/skills/lakehouse-doc-en/references/datagpt-model-config.md +34 -0
- package/bin/skills/lakehouse-doc-en/references/datagpt_data_source.md +50 -37
- package/bin/skills/lakehouse-doc-en/references/datagpt_introduction.md +55 -79
- package/bin/skills/lakehouse-doc-en/references/datagpt_quickstart.md +50 -64
- package/bin/skills/lakehouse-doc-en/references/datalake-acceleration.md +75 -2
- package/bin/skills/lakehouse-doc-en/references/dbt-databricks-to-clickzetta-migration.md +242 -0
- package/bin/skills/lakehouse-doc-en/references/dynamic-mask.md +30 -30
- package/bin/skills/lakehouse-doc-en/references/dynamic-table-bestpractice.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/dynamic-table-introduce.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/dynamic_table_summary.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/eco_integration/streamlit.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/eco_integration/superset.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/ecosystem-all.md +1 -3
- package/bin/skills/lakehouse-doc-en/references/ecosystem.md +145 -0
- package/bin/skills/lakehouse-doc-en/references/external-catalog-summary.md +33 -38
- package/bin/skills/lakehouse-doc-en/references/external-function-combo-practice.md +466 -0
- package/bin/skills/lakehouse-doc-en/references/f6fc6447ee.md +7 -9
- package/bin/skills/lakehouse-doc-en/references/federation-query.md +56 -6
- package/bin/skills/lakehouse-doc-en/references/finebi-mysql.md +2 -0
- package/bin/skills/lakehouse-doc-en/references/get-started-with-sample-data.md +10 -11
- package/bin/skills/lakehouse-doc-en/references/gitfolder.md +2 -3
- package/bin/skills/lakehouse-doc-en/references/grant-privileges.md +2 -0
- package/bin/skills/lakehouse-doc-en/references/iceberg-rest-catalog-databricks.md +166 -0
- package/bin/skills/lakehouse-doc-en/references/ide.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/if_else_task.md +59 -57
- package/bin/skills/lakehouse-doc-en/references/input_output.md +10 -7
- package/bin/skills/lakehouse-doc-en/references/jobprofile-bestpractices.md +60 -64
- package/bin/skills/lakehouse-doc-en/references/kafka-connection.md +0 -1
- package/bin/skills/lakehouse-doc-en/references/key-concepts.md +146 -117
- package/bin/skills/lakehouse-doc-en/references/lakehouse-ai-gateway-cz-cli.md +317 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-ai-sql-analysis.md +345 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-dqc-guide.md +300 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-medallion-sql-dt-guide.md +543 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-multi-cloud-acceleration.md +274 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-multimodal-ai-pipeline.md +198 -0
- package/bin/skills/lakehouse-doc-en/references/lakehouse-quick-experience_guide.md +49 -52
- package/bin/skills/lakehouse-doc-en/references/lakehouse-volume-pipe-acceleration-guide.md +380 -0
- package/bin/skills/lakehouse-doc-en/references/langchain-plug-installation.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/management.md +4 -9
- package/bin/skills/lakehouse-doc-en/references/medallion-lakehouse-from-scratch.md +2 -1
- package/bin/skills/lakehouse-doc-en/references/metrics_answer_build.md +58 -21
- package/bin/skills/lakehouse-doc-en/references/migrate-spark-data-engineering-best-practices-to-lakehouse.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/mindsdb.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/monitoring_and_alerting.md +65 -60
- package/bin/skills/lakehouse-doc-en/references/monitoring_item_specification.md +33 -33
- package/bin/skills/lakehouse-doc-en/references/multitable_batch_sync.md +16 -16
- package/bin/skills/lakehouse-doc-en/references/multitable_realtime_sync.md +65 -72
- package/bin/skills/lakehouse-doc-en/references/multitable_realtime_sync_sop.md +54 -52
- package/bin/skills/lakehouse-doc-en/references/navicat-mysql.md +2 -0
- package/bin/skills/lakehouse-doc-en/references/om-dynamic-table.md +71 -66
- package/bin/skills/lakehouse-doc-en/references/om-vcluster.md +2 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-create-session.md +79 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-generate-auth-token.md +63 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-overview.md +96 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-quick-start.md +286 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-response-guide.md +264 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-safe-question-poll.md +201 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-text2insight-query.md +99 -0
- package/bin/skills/lakehouse-doc-en/references/open-api-text2insight-stop.md +74 -0
- package/bin/skills/lakehouse-doc-en/references/overview.md +6 -7
- package/bin/skills/lakehouse-doc-en/references/permission-application.md +5 -5
- package/bin/skills/lakehouse-doc-en/references/pipe-introduction.md +1 -0
- package/bin/skills/lakehouse-doc-en/references/pipe-kafka-table-stream.md +72 -70
- package/bin/skills/lakehouse-doc-en/references/pipe-kafka.md +105 -110
- package/bin/skills/lakehouse-doc-en/references/pipe-overview.md +40 -40
- package/bin/skills/lakehouse-doc-en/references/pipe-storage-object.md +43 -48
- package/bin/skills/lakehouse-doc-en/references/pipe-summary.md +14 -4
- package/bin/skills/lakehouse-doc-en/references/pipe-syntax.md +58 -151
- package/bin/skills/lakehouse-doc-en/references/practice_python_task.md +4 -4
- package/bin/skills/lakehouse-doc-en/references/pricing-ai-gateway.md +181 -0
- package/bin/skills/lakehouse-doc-en/references/pricing-lakehouse.md +316 -0
- package/bin/skills/lakehouse-doc-en/references/pricing.md +44 -288
- package/bin/skills/lakehouse-doc-en/references/private-link-general.md +0 -2
- package/bin/skills/lakehouse-doc-en/references/pyspark-to-zettapark-migration-f1.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/python-igs.md +7 -3
- package/bin/skills/lakehouse-doc-en/references/python-sample-put-github-rt-events.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/python-task.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/python_reference/connector.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/python_reference/connector_advanced.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/python_reference/connector_examples.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/python_sdk_guide.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/python_shell_datasource.md +11 -9
- package/bin/skills/lakehouse-doc-en/references/quick_start_batch_sync_data.md +9 -18
- package/bin/skills/lakehouse-doc-en/references/quick_start_bi_analysis.md +8 -25
- package/bin/skills/lakehouse-doc-en/references/quick_start_create_workspace.md +4 -6
- package/bin/skills/lakehouse-doc-en/references/quick_start_data_quality.md +8 -8
- package/bin/skills/lakehouse-doc-en/references/quick_start_etl.md +16 -20
- package/bin/skills/lakehouse-doc-en/references/quick_start_monitoring_and_alerting.md +10 -18
- package/bin/skills/lakehouse-doc-en/references/quick_start_sql_query.md +7 -10
- package/bin/skills/lakehouse-doc-en/references/quick_start_upload_data.md +5 -7
- package/bin/skills/lakehouse-doc-en/references/quick_start_user_management.md +8 -8
- package/bin/skills/lakehouse-doc-en/references/quick_start_workspace.md +0 -5
- package/bin/skills/lakehouse-doc-en/references/quick_start_workspace_user.md +8 -8
- package/bin/skills/lakehouse-doc-en/references/quickstart.md +69 -56
- package/bin/skills/lakehouse-doc-en/references/quickstart_datashare_between_companies.md +0 -5
- package/bin/skills/lakehouse-doc-en/references/quickstart_envirment_for_team.md +0 -24
- package/bin/skills/lakehouse-doc-en/references/realtime-pipeline-selection-guide.md +1 -2
- package/bin/skills/lakehouse-doc-en/references/realtime-sales-dashboard-with-dynamic-table.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/realtime_sync.md +0 -1
- package/bin/skills/lakehouse-doc-en/references/release-note-2026-05-19.md +5 -3
- package/bin/skills/lakehouse-doc-en/references/revoke-privileges.md +3 -1
- package/bin/skills/lakehouse-doc-en/references/roles.md +2 -3
- package/bin/skills/lakehouse-doc-en/references/row-filter.md +165 -0
- package/bin/skills/lakehouse-doc-en/references/row_level_permission.md +30 -19
- package/bin/skills/lakehouse-doc-en/references/scheduled_task.md +28 -21
- package/bin/skills/lakehouse-doc-en/references/security_overview.md +99 -21
- package/bin/skills/lakehouse-doc-en/references/set-command.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/setup.md +13 -15
- package/bin/skills/lakehouse-doc-en/references/show-grants.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/snowflake-dynamic-tables-to-lakehouse.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/spark-connector-summary.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/sql_functions/context_functions/current_vcluster.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/sso-configuration.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/streaming_pipeline_with_dynamic_table.md +0 -1
- package/bin/skills/lakehouse-doc-en/references/studio-incremental-sync-practice.md +27 -23
- package/bin/skills/lakehouse-doc-en/references/studio-shell-task.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/supported-cloud-platforms.md +32 -0
- package/bin/skills/lakehouse-doc-en/references/table_rendering.md +18 -12
- package/bin/skills/lakehouse-doc-en/references/task-develop.md +89 -91
- package/bin/skills/lakehouse-doc-en/references/task_development.md +19 -17
- package/bin/skills/lakehouse-doc-en/references/task_group.md +16 -14
- package/bin/skills/lakehouse-doc-en/references/task_instance.md +21 -21
- package/bin/skills/lakehouse-doc-en/references/task_param.md +38 -35
- package/bin/skills/lakehouse-doc-en/references/task_param_reference.md +81 -79
- package/bin/skills/lakehouse-doc-en/references/task_scheduling_dependency.md +20 -21
- package/bin/skills/lakehouse-doc-en/references/tencentcloud_arn_and_externalid.md +1 -5
- package/bin/skills/lakehouse-doc-en/references/trial-account-quotas-and-limits.md +1 -3
- package/bin/skills/lakehouse-doc-en/references/tutorial_connect_to_lakehouse.md +69 -0
- package/bin/skills/lakehouse-doc-en/references/tutorials.md +4 -1
- package/bin/skills/lakehouse-doc-en/references/unique-key.md +167 -0
- package/bin/skills/lakehouse-doc-en/references/usageandbillingview.md +138 -0
- package/bin/skills/lakehouse-doc-en/references/use-dbt-dev.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/use-java-sdk-realtime-uploaddata.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/use-java-sdk-upload-data-local.md +3 -3
- package/bin/skills/lakehouse-doc-en/references/use-models.md +128 -0
- package/bin/skills/lakehouse-doc-en/references/use-mysql-client.md +81 -81
- package/bin/skills/lakehouse-doc-en/references/use-python-sdk-upload-data.md +10 -12
- package/bin/skills/lakehouse-doc-en/references/user-identification.md +2 -3
- package/bin/skills/lakehouse-doc-en/references/user_permission_grand_guide.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/using-udf-in-dynamic-table.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/vc_cache.md +18 -22
- package/bin/skills/lakehouse-doc-en/references/vcluster_size_description.md +33 -31
- package/bin/skills/lakehouse-doc-en/references/virtual-cluster.md +43 -45
- package/bin/skills/lakehouse-doc-en/references/web-job-history.md +94 -108
- package/bin/skills/lakehouse-doc-en/references/web_search.md +16 -7
- package/bin/skills/lakehouse-doc-en/references/zettapark-data-engineering-demo.md +1 -1
- package/bin/skills/lakehouse-doc-en/references/zettapark-dataframe-guide.md +144 -70
- package/bin/skills/lakehouse-doc-en/references/zettapark-dynamic-table-guide.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/zettapark-etl-guide.md +73 -33
- package/bin/skills/lakehouse-doc-en/references/zettapark-feature-engineering.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/zettapark-functions-guide.md +75 -46
- package/bin/skills/lakehouse-doc-en/references/zettapark-quick-start.md +2 -2
- package/bin/skills/lakehouse-doc-en/references/zettapark-stream-guide.md +4 -4
- package/bin/skills/lakehouse-doc-en/references/zettapark-volume-guide.md +93 -29
- package/package.json +1 -1
- package/bin/skills/lakehouse-doc-en/references/CLAUDE.md +0 -606
- package/bin/skills/lakehouse-doc-en/references/modelprice.md +0 -155
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
# Web Search
|
|
2
2
|
|
|
3
|
+
> \[Preview Release] This feature is currently in an invite-only preview release. Contact technical support if you need access.
|
|
4
|
+
|
|
3
5
|
## Feature Overview
|
|
4
6
|
|
|
5
|
-
Web Search adds internet search capability to Analytics Agent, enabling the
|
|
7
|
+
Web Search adds internet search capability to Analytics Agent, enabling the Agent to automatically invoke external search engines during data analysis to obtain real-time information, achieving combined analysis of "internal data + external knowledge." This is suitable for scenarios requiring data attribution analysis in conjunction with external events (such as weather, sports events, news, etc.).
|
|
6
8
|
|
|
7
|
-
##
|
|
9
|
+
## Usage
|
|
8
10
|
|
|
9
11
|
Simply ask questions directly in the conversation window; there is no need to manually select a tool. The Agent will automatically determine whether an internet search is needed based on the question content.
|
|
10
12
|
|
|
11
13
|
### Example
|
|
12
14
|
|
|
13
|
-
\> *Question: What caused the change in order volume between March 28
|
|
15
|
+
\> *Question: What caused the change in order volume between March 28–30, 2025*?
|
|
14
16
|
|
|
15
17
|
The Agent will automatically perform the following steps:
|
|
16
18
|
|
|
@@ -20,11 +22,18 @@ The Agent will automatically perform the following steps:
|
|
|
20
22
|
|
|
21
23
|
3\. Conduct multi-dimensional attribution analysis and output conclusions along with visualization charts
|
|
22
24
|
|
|
23
|
-
|
|
24
|
-

|
|
25
|
+

|
|
25
26
|
|
|
26
27
|
**Notes**
|
|
27
28
|
|
|
28
|
-
1\. Web search results are influenced by the content returned by the search engine; it is recommended to manually verify key conclusions
|
|
29
|
+
1\. Web search results are influenced by the content returned by the search engine; it is recommended to manually verify key conclusions.
|
|
30
|
+
|
|
31
|
+
2\. **The Web Search feature is currently in beta. To enable internet search capability, please contact the Singdata team**.
|
|
32
|
+
|
|
33
|
+
## Related Documentation
|
|
34
|
+
|
|
35
|
+
* [Improve Answer Accuracy](answer-accuracy-improve.md) — Further improve answer quality with a knowledge base and semantic layer
|
|
36
|
+
* [Scheduled Task](scheduled_task.md) — Automatically run data analysis on a schedule and push results
|
|
37
|
+
* [Conversational Data Analytics (Analytics Agent)](datagpt_introduction.md) — Return to the feature overview
|
|
29
38
|
|
|
30
|
-
|
|
39
|
+
^
|
|
@@ -85,7 +85,7 @@ XSMALL VCLUSTER ready
|
|
|
85
85
|
|
|
86
86
|
> ⚠️ **Note**: The vcluster\_size parameter for compute clusters supports both T-shirt sizes (XSMALL, SMALL, Large, etc.) and numeric values (1, 2, 4, 16, etc.) to provide a richer range of compute cluster specifications for different scenarios. For more information, see: [VCluster Size Specification Change Description](vcluster_size_description.md)
|
|
87
87
|
|
|
88
|
-
`config.json` file sample ([parameter description](
|
|
88
|
+
`config.json` file sample ([parameter description](jdbc-driver.md)):
|
|
89
89
|
|
|
90
90
|
```json
|
|
91
91
|
{
|
|
@@ -7,7 +7,7 @@ Zettapark is the Python DataFrame API for Singdata Lakehouse, providing a pandas
|
|
|
7
7
|
> 💡 **When to use what**:
|
|
8
8
|
> - Need DataFrame operations (pandas/PySpark-like) → Use Zettapark (this guide)
|
|
9
9
|
> - Need standard SQL execution or script automation → Use [Python Connector](python_reference/connector.md)
|
|
10
|
-
> - Need high-speed bulk writes (millions of rows) → Use [BulkLoad](java_reference/bulkload
|
|
10
|
+
> - Need high-speed bulk writes (millions of rows) → Use [BulkLoad](java_reference/bulkload.md)
|
|
11
11
|
|
|
12
12
|
---
|
|
13
13
|
|
|
@@ -27,11 +27,11 @@ from clickzetta.zettapark.session import Session
|
|
|
27
27
|
session = Session.builder.configs({
|
|
28
28
|
"username": "your_username",
|
|
29
29
|
"password": "your_password",
|
|
30
|
-
"service": "cn-shanghai-alicloud.api.
|
|
30
|
+
"service": "cn-shanghai-alicloud.api.clickzetta.com",
|
|
31
31
|
"instance": "your_instance",
|
|
32
32
|
"workspace": "your_workspace",
|
|
33
33
|
"schema": "public",
|
|
34
|
-
"vcluster": "
|
|
34
|
+
"vcluster": "DEFAULT"
|
|
35
35
|
}).create()
|
|
36
36
|
```
|
|
37
37
|
|
|
@@ -53,13 +53,16 @@ from clickzetta.zettapark.session import Session
|
|
|
53
53
|
data = [(1, "Alice", 1000.0), (2, "Bob", 2000.0), (3, "Carol", 500.0)]
|
|
54
54
|
df = session.create_dataframe(data, schema=["id", "name", "amount"])
|
|
55
55
|
df.show()
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
```Plain
|
|
59
|
+
+---+-----+------+
|
|
60
|
+
| id| name|amount|
|
|
61
|
+
+---+-----+------+
|
|
62
|
+
| 1|Alice| 1000|
|
|
63
|
+
| 2| Bob| 2000|
|
|
64
|
+
| 3|Carol| 500|
|
|
65
|
+
+---+-----+------+
|
|
63
66
|
```
|
|
64
67
|
|
|
65
68
|
### From an Existing Table
|
|
@@ -104,26 +107,47 @@ from clickzetta.zettapark import functions as F
|
|
|
104
107
|
|
|
105
108
|
data = [(1,"A",100.0),(2,"A",200.0),(3,"B",300.0),(4,"B",150.0)]
|
|
106
109
|
df = session.create_dataframe(data, schema=["id","category","amount"])
|
|
110
|
+
```
|
|
107
111
|
|
|
108
|
-
|
|
112
|
+
filter — filter rows
|
|
113
|
+
|
|
114
|
+
```python
|
|
109
115
|
df.filter(F.col("amount") > 150).show()
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
select — select columns
|
|
110
119
|
|
|
111
|
-
|
|
120
|
+
```python
|
|
112
121
|
df.select("category", "amount").show()
|
|
122
|
+
```
|
|
113
123
|
|
|
114
|
-
|
|
124
|
+
sort — sort rows
|
|
125
|
+
|
|
126
|
+
```python
|
|
115
127
|
df.sort("amount", ascending=False).show()
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
with\_column — add or replace a column
|
|
116
131
|
|
|
117
|
-
|
|
132
|
+
```python
|
|
118
133
|
df.with_column("amount_tax", F.col("amount") * 1.13).show()
|
|
134
|
+
```
|
|
119
135
|
|
|
120
|
-
|
|
136
|
+
with\_column\_renamed — rename a column
|
|
137
|
+
|
|
138
|
+
```python
|
|
121
139
|
df.with_column_renamed("amount", "price").show()
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
drop — drop a column
|
|
122
143
|
|
|
123
|
-
|
|
144
|
+
```python
|
|
124
145
|
df.drop("id").show()
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
limit
|
|
125
149
|
|
|
126
|
-
|
|
150
|
+
```python
|
|
127
151
|
df.limit(2).show()
|
|
128
152
|
```
|
|
129
153
|
|
|
@@ -131,8 +155,9 @@ df.limit(2).show()
|
|
|
131
155
|
|
|
132
156
|
## Aggregations
|
|
133
157
|
|
|
158
|
+
group\_by + agg
|
|
159
|
+
|
|
134
160
|
```python
|
|
135
|
-
# group_by + agg
|
|
136
161
|
result = df.group_by("category").agg(
|
|
137
162
|
F.sum("amount").alias("total"),
|
|
138
163
|
F.count("id").alias("cnt"),
|
|
@@ -141,12 +166,15 @@ result = df.group_by("category").agg(
|
|
|
141
166
|
F.min("amount").alias("min_amount")
|
|
142
167
|
)
|
|
143
168
|
result.show()
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
```Plain
|
|
172
|
+
+--------+-----+---+----------+---------+---------+
|
|
173
|
+
|category|total|cnt|avg_amount|max_amount|min_amount|
|
|
174
|
+
+--------+-----+---+----------+---------+---------+
|
|
175
|
+
| A| 300| 2| 150| 200| 100|
|
|
176
|
+
| B| 450| 2| 225| 300| 150|
|
|
177
|
+
+--------+-----+---+----------+---------+---------+
|
|
150
178
|
```
|
|
151
179
|
|
|
152
180
|
---
|
|
@@ -156,22 +184,34 @@ result.show()
|
|
|
156
184
|
```python
|
|
157
185
|
users = session.create_dataframe([(1,"Alice"),(2,"Bob"),(3,"Carol")], schema=["id","name"])
|
|
158
186
|
orders = session.create_dataframe([(1,500.0),(1,300.0),(2,800.0)], schema=["user_id","amount"])
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
inner join (default)
|
|
159
190
|
|
|
160
|
-
|
|
191
|
+
```python
|
|
161
192
|
users.join(orders, users["id"] == orders["user_id"]).show()
|
|
193
|
+
```
|
|
162
194
|
|
|
163
|
-
|
|
195
|
+
left join
|
|
196
|
+
|
|
197
|
+
```python
|
|
164
198
|
users.join(orders, users["id"] == orders["user_id"], "left").show()
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
```Plain
|
|
202
|
+
+---+-----+-------+------+
|
|
203
|
+
| id| name|user_id|amount|
|
|
204
|
+
+---+-----+-------+------+
|
|
205
|
+
| 1|Alice| 1| 300|
|
|
206
|
+
| 1|Alice| 1| 500|
|
|
207
|
+
| 2| Bob| 2| 800|
|
|
208
|
+
| 3|Carol| NULL| NULL|
|
|
209
|
+
+---+-----+-------+------+
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
cross join
|
|
213
|
+
|
|
214
|
+
```python
|
|
175
215
|
users.cross_join(orders).show()
|
|
176
216
|
```
|
|
177
217
|
|
|
@@ -195,24 +235,36 @@ df1.except_(df2).show() # difference (in df1 but not df2)
|
|
|
195
235
|
```python
|
|
196
236
|
data = [(1,"Alice",100.0),(2,None,200.0),(3,"Carol",None)]
|
|
197
237
|
df = session.create_dataframe(data, schema=["id","name","amount"])
|
|
238
|
+
```
|
|
198
239
|
|
|
199
|
-
|
|
240
|
+
Drop rows containing NULL
|
|
241
|
+
|
|
242
|
+
```python
|
|
200
243
|
df.dropna().show()
|
|
201
|
-
|
|
202
|
-
# | id| name|amount|
|
|
203
|
-
# +---+-----+------+
|
|
204
|
-
# | 1|Alice| 100|
|
|
205
|
-
# +---+-----+------+
|
|
244
|
+
```
|
|
206
245
|
|
|
207
|
-
|
|
246
|
+
```Plain
|
|
247
|
+
+---+-----+------+
|
|
248
|
+
| id| name|amount|
|
|
249
|
+
+---+-----+------+
|
|
250
|
+
| 1|Alice| 100|
|
|
251
|
+
+---+-----+------+
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Fill NULL values
|
|
255
|
+
|
|
256
|
+
```python
|
|
208
257
|
df.fillna({"name": "Unknown", "amount": 0.0}).show()
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
```Plain
|
|
261
|
+
+---+-------+------+
|
|
262
|
+
| id| name|amount|
|
|
263
|
+
+---+-------+------+
|
|
264
|
+
| 1| Alice| 100|
|
|
265
|
+
| 2|Unknown| 200|
|
|
266
|
+
| 3| Carol| 0|
|
|
267
|
+
+---+-------+------+
|
|
216
268
|
```
|
|
217
269
|
|
|
218
270
|
---
|
|
@@ -224,11 +276,17 @@ from clickzetta.zettapark.window import Window
|
|
|
224
276
|
|
|
225
277
|
data = [(1,"A",100),(2,"A",200),(3,"B",300),(4,"B",150),(5,"A",50)]
|
|
226
278
|
df = session.create_dataframe(data, schema=["id","category","amount"])
|
|
279
|
+
```
|
|
227
280
|
|
|
228
|
-
|
|
281
|
+
Rank within group
|
|
282
|
+
|
|
283
|
+
```python
|
|
229
284
|
w_rank = Window.partition_by("category").order_by(F.col("amount").desc())
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
Running sum within group
|
|
230
288
|
|
|
231
|
-
|
|
289
|
+
```python
|
|
232
290
|
w_sum = Window.partition_by("category").order_by("amount")
|
|
233
291
|
|
|
234
292
|
result = df \
|
|
@@ -236,15 +294,18 @@ result = df \
|
|
|
236
294
|
.with_column("running_total", F.sum("amount").over(w_sum))
|
|
237
295
|
|
|
238
296
|
result.show()
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
```Plain
|
|
300
|
+
+---+--------+------+----+-------------+
|
|
301
|
+
| id|category|amount|rank|running_total|
|
|
302
|
+
+---+--------+------+----+-------------+
|
|
303
|
+
| 5| A| 50| 3| 50|
|
|
304
|
+
| 1| A| 100| 2| 150|
|
|
305
|
+
| 2| A| 200| 1| 350|
|
|
306
|
+
| 4| B| 150| 2| 150|
|
|
307
|
+
| 3| B| 300| 1| 450|
|
|
308
|
+
+---+--------+------+----+-------------+
|
|
248
309
|
```
|
|
249
310
|
|
|
250
311
|
---
|
|
@@ -255,11 +316,17 @@ result.show()
|
|
|
255
316
|
|
|
256
317
|
```python
|
|
257
318
|
df = session.create_dataframe([(1,"Alice",100.0),(2,"Bob",200.0)], schema=["id","name","amount"])
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
Overwrite (creates the table if it doesn't exist)
|
|
258
322
|
|
|
259
|
-
|
|
323
|
+
```python
|
|
260
324
|
df.write.save_as_table("my_table", mode="overwrite")
|
|
325
|
+
```
|
|
261
326
|
|
|
262
|
-
|
|
327
|
+
Append
|
|
328
|
+
|
|
329
|
+
```python
|
|
263
330
|
df.write.save_as_table("my_table", mode="append")
|
|
264
331
|
```
|
|
265
332
|
|
|
@@ -286,8 +353,11 @@ print(pdf.head())
|
|
|
286
353
|
|
|
287
354
|
```python
|
|
288
355
|
df.filter(F.col("amount") > 100).create_or_replace_temp_view("high_value_orders")
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
Query the temporary view with SQL
|
|
289
359
|
|
|
290
|
-
|
|
360
|
+
```python
|
|
291
361
|
session.sql("SELECT * FROM high_value_orders").show()
|
|
292
362
|
```
|
|
293
363
|
|
|
@@ -299,8 +369,9 @@ df.filter(F.col("amount") > 100).create_or_replace_view("v_high_value_orders")
|
|
|
299
369
|
|
|
300
370
|
### Dynamic Table (auto incremental refresh)
|
|
301
371
|
|
|
372
|
+
Define transformation logic on a source table; the system auto-refreshes incrementally
|
|
373
|
+
|
|
302
374
|
```python
|
|
303
|
-
# Define transformation logic on a source table; the system auto-refreshes incrementally
|
|
304
375
|
source_df = session.table("raw_orders").filter(F.col("status") == "paid")
|
|
305
376
|
|
|
306
377
|
source_df.create_or_replace_dynamic_table(
|
|
@@ -323,11 +394,14 @@ df.filter(F.col("amount") > 150) \
|
|
|
323
394
|
.group_by("category") \
|
|
324
395
|
.agg(F.sum("amount").alias("total")) \
|
|
325
396
|
.explain()
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
Output:
|
|
326
400
|
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
401
|
+
```Plain
|
|
402
|
+
SELECT `category`, sum(`amount`) AS `total`
|
|
403
|
+
FROM ( SELECT ... WHERE (`amount` > CAST(150 AS bigint)))
|
|
404
|
+
GROUP BY `category`
|
|
331
405
|
```
|
|
332
406
|
|
|
333
407
|
---
|
|
@@ -339,4 +413,4 @@ df.filter(F.col("amount") > 150) \
|
|
|
339
413
|
| [Zettapark Quick Start](zettapark-quick-start.md) | Installation and basic examples |
|
|
340
414
|
| [Python Connector SDK](python_reference/connector.md) | Standard SQL execution interface |
|
|
341
415
|
| [Dynamic Table](dynamic-table.md) | Auto-incrementally refreshed data pipelines |
|
|
342
|
-
| [BulkLoad Batch Import](java_reference/bulkload
|
|
416
|
+
| [BulkLoad Batch Import](java_reference/bulkload.md) | High-speed writes for millions of rows |
|
|
@@ -13,11 +13,11 @@ from clickzetta.zettapark import functions as F
|
|
|
13
13
|
session = Session.builder.configs({
|
|
14
14
|
"username": "your_username",
|
|
15
15
|
"password": "your_password",
|
|
16
|
-
"service": "cn-shanghai-alicloud.api.
|
|
16
|
+
"service": "cn-shanghai-alicloud.api.clickzetta.com",
|
|
17
17
|
"instance": "your_instance",
|
|
18
18
|
"workspace": "your_workspace",
|
|
19
19
|
"schema": "public",
|
|
20
|
-
"vcluster": "
|
|
20
|
+
"vcluster": "DEFAULT"
|
|
21
21
|
}).create()
|
|
22
22
|
```
|
|
23
23
|
|
|
@@ -14,11 +14,11 @@ from clickzetta.zettapark.window import Window
|
|
|
14
14
|
session = Session.builder.configs({
|
|
15
15
|
"username": "your_username",
|
|
16
16
|
"password": "your_password",
|
|
17
|
-
"service": "cn-shanghai-alicloud.api.
|
|
17
|
+
"service": "cn-shanghai-alicloud.api.clickzetta.com",
|
|
18
18
|
"instance": "your_instance",
|
|
19
19
|
"workspace": "your_workspace",
|
|
20
20
|
"schema": "public",
|
|
21
|
-
"vcluster": "
|
|
21
|
+
"vcluster": "DEFAULT"
|
|
22
22
|
}).create()
|
|
23
23
|
```
|
|
24
24
|
|
|
@@ -28,8 +28,9 @@ session = Session.builder.configs({
|
|
|
28
28
|
|
|
29
29
|
All examples in this guide use the following two tables. Run this setup before proceeding:
|
|
30
30
|
|
|
31
|
+
Create tables
|
|
32
|
+
|
|
31
33
|
```python
|
|
32
|
-
# Create tables
|
|
33
34
|
session.sql("""
|
|
34
35
|
CREATE TABLE IF NOT EXISTS orders (
|
|
35
36
|
order_id BIGINT,
|
|
@@ -49,8 +50,11 @@ session.sql("""
|
|
|
49
50
|
level STRING
|
|
50
51
|
)
|
|
51
52
|
""").collect()
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Insert test data
|
|
52
56
|
|
|
53
|
-
|
|
57
|
+
```python
|
|
54
58
|
session.sql("""
|
|
55
59
|
INSERT INTO orders VALUES
|
|
56
60
|
(1001, 101, 'iPhone', 7999.00, 'paid', '2024-01-15'),
|
|
@@ -80,9 +84,11 @@ Join the orders and users tables, compute per-user spending summaries, and write
|
|
|
80
84
|
```python
|
|
81
85
|
orders = session.table("orders") # order_id, user_id, product, amount, status, order_date
|
|
82
86
|
users = session.table("users") # user_id, name, city, level
|
|
87
|
+
```
|
|
83
88
|
|
|
84
|
-
|
|
85
|
-
|
|
89
|
+
Note: when joining tables with a shared column name (user_id), rename it before joining to avoid ambiguity
|
|
90
|
+
|
|
91
|
+
```python
|
|
86
92
|
paid = orders.filter(F.col("status") == "paid") \
|
|
87
93
|
.select(
|
|
88
94
|
F.col("order_id"),
|
|
@@ -101,15 +107,21 @@ result = paid.join(users, paid["o_user_id"] == users["user_id"]) \
|
|
|
101
107
|
.sort(F.col("total_amount").desc())
|
|
102
108
|
|
|
103
109
|
result.show()
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
```Plain
|
|
113
|
+
+-------+-----+---------+------+-----------+------------+---------------+
|
|
114
|
+
|user_id| name| city| level|order_count|total_amount|last_order_date|
|
|
115
|
+
+-------+-----+---------+------+-----------+------------+---------------+
|
|
116
|
+
| 101|Alice| Beijing| gold| 2| 22998.00| 2024-01-17|
|
|
117
|
+
| 102| Bob| Shanghai|silver| 1| 14999.00| 2024-01-15|
|
|
118
|
+
| 103|Carol|Guangzhou|bronze| 1| 8999.00| 2024-01-16|
|
|
119
|
+
+-------+-----+---------+------+-----------+------------+---------------+
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Write to result table
|
|
123
|
+
|
|
124
|
+
```python
|
|
113
125
|
result.write.save_as_table("user_order_summary", mode="overwrite")
|
|
114
126
|
```
|
|
115
127
|
|
|
@@ -121,11 +133,17 @@ result.write.save_as_table("user_order_summary", mode="overwrite")
|
|
|
121
133
|
|
|
122
134
|
```python
|
|
123
135
|
summary = session.table("user_order_summary")
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Rank by spending amount descending
|
|
124
139
|
|
|
125
|
-
|
|
140
|
+
```python
|
|
126
141
|
w_rank = Window.order_by(F.col("total_amount").desc())
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Running total by city
|
|
127
145
|
|
|
128
|
-
|
|
146
|
+
```python
|
|
129
147
|
w_city = Window.partition_by("city").order_by(F.col("total_amount").desc())
|
|
130
148
|
|
|
131
149
|
result = summary \
|
|
@@ -134,21 +152,25 @@ result = summary \
|
|
|
134
152
|
.with_column("running_total", F.sum("total_amount").over(w_city))
|
|
135
153
|
|
|
136
154
|
result.show()
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
```Plain
|
|
158
|
+
+-------+-----+------+-----------+------------+----+---------+-------------+
|
|
159
|
+
|user_id| name| level|order_count|total_amount|rank|city_rank|running_total|
|
|
160
|
+
+-------+-----+------+-----------+------------+----+---------+-------------+
|
|
161
|
+
| 101|Alice| gold| 2| 22998.00| 1| 1| 22998.00|
|
|
162
|
+
| 102| Bob|silver| 1| 14999.00| 2| 1| 14999.00|
|
|
163
|
+
| 103|Carol|bronze| 1| 8999.00| 3| 1| 8999.00|
|
|
164
|
+
+-------+-----+------+-----------+------------+----+---------+-------------+
|
|
144
165
|
```
|
|
145
166
|
|
|
146
167
|
---
|
|
147
168
|
|
|
148
169
|
## Scenario 3: Create a View for BI
|
|
149
170
|
|
|
171
|
+
Create a paid orders view with year/month dimensions for BI analysis
|
|
172
|
+
|
|
150
173
|
```python
|
|
151
|
-
# Create a paid orders view with year/month dimensions for BI analysis
|
|
152
174
|
orders.filter(F.col("status") == "paid") \
|
|
153
175
|
.select(
|
|
154
176
|
F.col("order_id"),
|
|
@@ -159,8 +181,11 @@ orders.filter(F.col("status") == "paid") \
|
|
|
159
181
|
F.year(F.to_date(F.col("order_date"))).alias("year"),
|
|
160
182
|
F.month(F.to_date(F.col("order_date"))).alias("month"),
|
|
161
183
|
).create_or_replace_view("v_paid_orders")
|
|
184
|
+
```
|
|
162
185
|
|
|
163
|
-
|
|
186
|
+
BI tools can query the view directly
|
|
187
|
+
|
|
188
|
+
```python
|
|
164
189
|
session.table("v_paid_orders").show()
|
|
165
190
|
```
|
|
166
191
|
|
|
@@ -170,15 +195,19 @@ session.table("v_paid_orders").show()
|
|
|
170
195
|
|
|
171
196
|
Process only new data after a given point in time — suitable for scheduled incremental ETL:
|
|
172
197
|
|
|
198
|
+
Process only new orders from 2024-01-16 onwards
|
|
199
|
+
|
|
173
200
|
```python
|
|
174
|
-
# Process only new orders from 2024-01-16 onwards
|
|
175
201
|
cutoff = "2024-01-16"
|
|
176
202
|
new_orders = orders.filter(F.col("order_date") >= cutoff)
|
|
177
203
|
|
|
178
204
|
print(f"New orders: {new_orders.count()}")
|
|
179
205
|
new_orders.show()
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Append new paid orders to the archive table (append mode)
|
|
180
209
|
|
|
181
|
-
|
|
210
|
+
```python
|
|
182
211
|
new_orders.filter(F.col("status") == "paid") \
|
|
183
212
|
.write.save_as_table("paid_orders_archive", mode="append")
|
|
184
213
|
```
|
|
@@ -189,21 +218,28 @@ new_orders.filter(F.col("status") == "paid") \
|
|
|
189
218
|
|
|
190
219
|
Check data quality before writing:
|
|
191
220
|
|
|
221
|
+
Check for NULL values
|
|
222
|
+
|
|
192
223
|
```python
|
|
193
|
-
# Check for NULL values
|
|
194
224
|
null_counts = orders.select(
|
|
195
225
|
F.count(F.lit(1)).alias("total"),
|
|
196
226
|
F.sum(F.iff(F.is_null(F.col("amount")), F.lit(1), F.lit(0))).alias("null_amount"),
|
|
197
227
|
F.sum(F.iff(F.is_null(F.col("user_id")), F.lit(1), F.lit(0))).alias("null_user_id"),
|
|
198
228
|
)
|
|
199
229
|
null_counts.show()
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
Check status distribution
|
|
200
233
|
|
|
201
|
-
|
|
234
|
+
```python
|
|
202
235
|
orders.group_by("status").agg(
|
|
203
236
|
F.count(F.lit(1)).alias("cnt")
|
|
204
237
|
).sort("cnt", ascending=False).show()
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
Check for anomalous amounts (negative or excessively large)
|
|
205
241
|
|
|
206
|
-
|
|
242
|
+
```python
|
|
207
243
|
anomalies = orders.filter(
|
|
208
244
|
(F.col("amount") <= 0) | (F.col("amount") > 100000)
|
|
209
245
|
)
|
|
@@ -216,8 +252,9 @@ print(f"Anomalous orders: {anomalies.count()}")
|
|
|
216
252
|
|
|
217
253
|
Complex logic can be executed directly with `session.sql()`. The result is still a DataFrame and can continue to be chained:
|
|
218
254
|
|
|
255
|
+
Execute a complex query with SQL, return a DataFrame for further processing
|
|
256
|
+
|
|
219
257
|
```python
|
|
220
|
-
# Execute a complex query with SQL, return a DataFrame for further processing
|
|
221
258
|
df = session.sql("""
|
|
222
259
|
SELECT
|
|
223
260
|
user_id,
|
|
@@ -227,8 +264,11 @@ df = session.sql("""
|
|
|
227
264
|
WHERE status = 'paid'
|
|
228
265
|
GROUP BY user_id, DATE_TRUNC('month', TO_DATE(order_date))
|
|
229
266
|
""")
|
|
267
|
+
```
|
|
230
268
|
|
|
231
|
-
|
|
269
|
+
Continue processing with the DataFrame API
|
|
270
|
+
|
|
271
|
+
```python
|
|
232
272
|
w = Window.partition_by("user_id").order_by("month")
|
|
233
273
|
df.with_column("cumulative", F.sum("monthly_amount").over(w)).show()
|
|
234
274
|
```
|
|
@@ -14,11 +14,11 @@ from clickzetta.zettapark.window import Window
|
|
|
14
14
|
session = Session.builder.configs({
|
|
15
15
|
"username": "your_username",
|
|
16
16
|
"password": "your_password",
|
|
17
|
-
"service": "cn-shanghai-alicloud.api.
|
|
17
|
+
"service": "cn-shanghai-alicloud.api.clickzetta.com",
|
|
18
18
|
"instance": "your_instance",
|
|
19
19
|
"workspace": "your_workspace",
|
|
20
20
|
"schema": "public",
|
|
21
|
-
"vcluster": "
|
|
21
|
+
"vcluster": "DEFAULT"
|
|
22
22
|
}).create()
|
|
23
23
|
```
|
|
24
24
|
|