recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
import contextvars
|
|
2
|
+
import functools
|
|
3
|
+
import inspect
|
|
4
|
+
import os
|
|
5
|
+
import warnings
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from recurvedata.operators.task import BaseTask
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from fsspec.asyn import get_running_loop, sync # todo
|
|
13
|
+
except ImportError:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
from recurvedata.operators.operator import BaseOperator, _registry, get_operator_class
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Context(object):
|
|
20
|
+
"""
|
|
21
|
+
Web/Worker 调用的对象,提供:
|
|
22
|
+
1. 注册函数功能
|
|
23
|
+
调用之前需要注册一些函数,用于 config_schema 以及 validate,包括:
|
|
24
|
+
get_connection_names_by_type: 根据连接源 type 返回连接源 names。支持同步/异步写法
|
|
25
|
+
get_connection_by_name: 根据连接源 name 返回连接源对象。支持同步/异步写法 todo
|
|
26
|
+
2. list_config_schemas
|
|
27
|
+
3. get_supported_operators
|
|
28
|
+
4. get_config_schema
|
|
29
|
+
5. Worker 执行时,需要的一些函数
|
|
30
|
+
|
|
31
|
+
sync/async 调用
|
|
32
|
+
Context 支持同步/异步的调用方式,
|
|
33
|
+
1. Web 调用是异步,Worker 调用是同步;Web 端注册的 get_connection_names_by_type 等函数是异步的,Worker 端注册的函数是同步的
|
|
34
|
+
2. 为了避免在 operator 里引入 async/await 语法,保持 operator 代码的简洁,
|
|
35
|
+
各个 Operator 里,统一使用同步的写法,
|
|
36
|
+
Operator 里只提供同步的 config_schema, validate, execute 写法,
|
|
37
|
+
context 也提供同步的 get_connection_names_by_type 等方法,供 operator 调用
|
|
38
|
+
3. Web 注册的 get_connection_names_by_type 是异步的,Context 提供的 get_connection_names_by_type 是同步的。
|
|
39
|
+
Context 为了提供同步的 get_connection_names_by_type,把 Web 注册的异步的 get_connection_names_by_type 转成了同步
|
|
40
|
+
4. Operator 的 config_schema 是同步的,而 Web 端调用需要异步的方法,
|
|
41
|
+
所以 Context.get_config_schema 方法通过异步的方式调用 Operator.config_schema
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self):
|
|
45
|
+
# 根据连接源 type 返回连接源 names。支持同步/异步写法
|
|
46
|
+
self._get_connection_names_by_type: Callable = None
|
|
47
|
+
|
|
48
|
+
# 根据连接源 name 返回连接源对象。支持同步/异步写法
|
|
49
|
+
self._get_connection_by_name: Callable = None
|
|
50
|
+
|
|
51
|
+
self.current_project_id = contextvars.ContextVar("Recurve Project ID")
|
|
52
|
+
|
|
53
|
+
self._pid = os.getpid()
|
|
54
|
+
self._loop = None
|
|
55
|
+
self.async_mode = False
|
|
56
|
+
self._functions = {}
|
|
57
|
+
|
|
58
|
+
def init_context(self, get_connection_names_by_type: Callable = None, get_connection_by_name: Callable = None):
|
|
59
|
+
"""
|
|
60
|
+
:param get_connection_names_by_type: 根据连接源 type 返回连接源 names。支持同步/异步写法
|
|
61
|
+
get_connection_names_by_type 函数定义: get_connection_names_by_type(project_id, connection_type)
|
|
62
|
+
:param get_connection_by_name: 根据连接源 name 返回连接源对象。支持同步/异步写法
|
|
63
|
+
get_connection_by_name 函数定义:get_connection_by_name(project_id, connection_name)
|
|
64
|
+
"""
|
|
65
|
+
self._get_connection_names_by_type = get_connection_names_by_type
|
|
66
|
+
self._get_connection_by_name = get_connection_by_name
|
|
67
|
+
if inspect.iscoroutinefunction(self._get_connection_names_by_type):
|
|
68
|
+
self.async_mode = True
|
|
69
|
+
else:
|
|
70
|
+
self.async_mode = False
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def loop(self):
|
|
74
|
+
if self._pid != os.getpid():
|
|
75
|
+
raise RuntimeError("This class is not fork-safe")
|
|
76
|
+
if self._loop:
|
|
77
|
+
return self._loop
|
|
78
|
+
# self._loop = asyncio.get_event_loop() # todo: get_running_loop?
|
|
79
|
+
self._loop = get_running_loop()
|
|
80
|
+
# self._loop = get_loop() # todo: maybe have problem
|
|
81
|
+
return self._loop
|
|
82
|
+
|
|
83
|
+
def get_connection_names_by_type(self, connection_type: str) -> list[str]:
|
|
84
|
+
"""
|
|
85
|
+
根据连接源类型,返回连接源名称
|
|
86
|
+
Web 端调用的时候,self._get_connection_names_by_type 是异步方法
|
|
87
|
+
Worker 端调用,self._get_connection_names_by_type 是同步方法
|
|
88
|
+
:param connection_type:
|
|
89
|
+
:return:
|
|
90
|
+
"""
|
|
91
|
+
project_id = self.current_project_id.get()
|
|
92
|
+
if inspect.iscoroutinefunction(self._get_connection_names_by_type):
|
|
93
|
+
return sync(self.loop, self._get_connection_names_by_type, project_id, connection_type)
|
|
94
|
+
return self._get_connection_names_by_type(project_id, connection_type)
|
|
95
|
+
|
|
96
|
+
def get_connection_by_name(self, connection_name: str):
|
|
97
|
+
project_id = self.current_project_id.get()
|
|
98
|
+
if inspect.iscoroutinefunction(self._get_connection_by_name):
|
|
99
|
+
return sync(self.loop, self._get_connection_by_name, project_id, connection_name)
|
|
100
|
+
return self._get_connection_by_name(project_id, connection_name)
|
|
101
|
+
|
|
102
|
+
def get_connection_choices_by_type(self, connection_type):
|
|
103
|
+
warnings.warn(
|
|
104
|
+
"This function is deprecated. Please use `get_connection_names_by_type`",
|
|
105
|
+
DeprecationWarning,
|
|
106
|
+
stacklevel=2,
|
|
107
|
+
)
|
|
108
|
+
return self.get_connection_names_by_type(connection_type)
|
|
109
|
+
|
|
110
|
+
async def async_call_synchronous_func(self, func, *args):
|
|
111
|
+
project_id = self.current_project_id.get()
|
|
112
|
+
loop = self.loop
|
|
113
|
+
res = await loop.run_in_executor(None, self.contextvars_wrapper(project_id, func), *args)
|
|
114
|
+
return res
|
|
115
|
+
|
|
116
|
+
def must_get_connection_by_name(self, connection_name: str):
|
|
117
|
+
connection = self.get_connection_by_name(connection_name)
|
|
118
|
+
if not connection:
|
|
119
|
+
raise ValueError(f"connection {connection_name} not exists")
|
|
120
|
+
return connection
|
|
121
|
+
|
|
122
|
+
async def validate_operator_configuration(self, operator_name: str, configuration: dict, project_id: str):
|
|
123
|
+
self.current_project_id.set(project_id)
|
|
124
|
+
operator_cls: BaseOperator = get_operator_class(operator_name)
|
|
125
|
+
if not operator_cls:
|
|
126
|
+
raise ValueError(f"no operator {operator_name}")
|
|
127
|
+
return await self.async_call_synchronous_func(operator_cls.ui_validate, configuration)
|
|
128
|
+
|
|
129
|
+
def validate_operator_configuration_synchronously(self, operator_name: str, configuration: dict, project_id: str):
|
|
130
|
+
self.current_project_id.set(project_id)
|
|
131
|
+
operator_cls: BaseOperator = get_operator_class(operator_name)
|
|
132
|
+
if not operator_cls:
|
|
133
|
+
raise ValueError(f"no operator {operator_name}")
|
|
134
|
+
if not self.async_mode:
|
|
135
|
+
return operator_cls.ui_validate(configuration)
|
|
136
|
+
else:
|
|
137
|
+
return sync(self.loop, operator_cls.ui_validate, configuration)
|
|
138
|
+
|
|
139
|
+
# validate_operator_configuration_synchronously = sync_wrapper(validate_operator_configuration)
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def get_ds_name_field_values(operator_name: str, rendered_config: dict) -> list[str]:
|
|
143
|
+
operator_cls: BaseOperator = get_operator_class(operator_name)
|
|
144
|
+
if not operator_cls:
|
|
145
|
+
raise ValueError(f"no operator {operator_name}")
|
|
146
|
+
return operator_cls.get_ds_name_field_values(rendered_config)
|
|
147
|
+
|
|
148
|
+
def contextvars_wrapper(self, project_id, func):
|
|
149
|
+
"""
|
|
150
|
+
init contextvars in asyncio
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
@functools.wraps(func)
|
|
154
|
+
def wrapper(*args, **kwargs):
|
|
155
|
+
token = self.current_project_id.set(project_id)
|
|
156
|
+
res = func(*args, **kwargs)
|
|
157
|
+
self.current_project_id.reset(token)
|
|
158
|
+
return res
|
|
159
|
+
|
|
160
|
+
return wrapper
|
|
161
|
+
|
|
162
|
+
async def get_config_schema(self, operator_name: str, project_id: str):
|
|
163
|
+
"""
|
|
164
|
+
默认返回的是 get_ui_config_schema
|
|
165
|
+
:param operator_name:
|
|
166
|
+
:param project_id:
|
|
167
|
+
:return:
|
|
168
|
+
"""
|
|
169
|
+
self.current_project_id.set(project_id)
|
|
170
|
+
operator_cls: BaseOperator = get_operator_class(operator_name)
|
|
171
|
+
if operator_cls:
|
|
172
|
+
return await self.async_call_synchronous_func(operator_cls.ui_config_schema)
|
|
173
|
+
|
|
174
|
+
def get_config_schema_synchronously(self, operator_name: str, project_id: str):
|
|
175
|
+
self.current_project_id.set(project_id)
|
|
176
|
+
operator_cls: BaseOperator = get_operator_class(operator_name)
|
|
177
|
+
if not operator_cls:
|
|
178
|
+
raise ValueError(f"no operator {operator_name}")
|
|
179
|
+
if not self.async_mode:
|
|
180
|
+
return operator_cls.ui_config_schema()
|
|
181
|
+
else:
|
|
182
|
+
return sync(self.loop, operator_cls.ui_config_schema)
|
|
183
|
+
|
|
184
|
+
# get_config_schema_synchronously = sync_wrapper(get_config_schema)
|
|
185
|
+
|
|
186
|
+
@staticmethod
|
|
187
|
+
def get_supported_operators() -> list[str]:
|
|
188
|
+
res_lst = []
|
|
189
|
+
for op_name, op_cls in _registry.items():
|
|
190
|
+
if not op_cls.enabled:
|
|
191
|
+
continue
|
|
192
|
+
res_lst.append(op_name)
|
|
193
|
+
return res_lst
|
|
194
|
+
|
|
195
|
+
async def list_config_schemas(self, project_id: str):
|
|
196
|
+
self.current_project_id.set(project_id)
|
|
197
|
+
res_lst = []
|
|
198
|
+
|
|
199
|
+
for operator_name, operator_cls in _registry.items():
|
|
200
|
+
res_lst.append(
|
|
201
|
+
{
|
|
202
|
+
"name": operator_name,
|
|
203
|
+
"config_schema": await self.async_call_synchronous_func(operator_cls.config_schema),
|
|
204
|
+
}
|
|
205
|
+
)
|
|
206
|
+
return res_lst
|
|
207
|
+
|
|
208
|
+
def list_config_schemas_synchronously(self, project_id: str):
|
|
209
|
+
self.current_project_id.set(project_id)
|
|
210
|
+
res_lst = []
|
|
211
|
+
|
|
212
|
+
for operator_name, operator_cls in _registry.items():
|
|
213
|
+
res_lst.append(
|
|
214
|
+
{
|
|
215
|
+
"name": operator_name,
|
|
216
|
+
"config_schema": self.get_config_schema_synchronously(operator_name, project_id),
|
|
217
|
+
}
|
|
218
|
+
)
|
|
219
|
+
return res_lst
|
|
220
|
+
|
|
221
|
+
# list_config_schemas_synchronously = sync_wrapper(list_config_schemas)
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def client(self):
|
|
225
|
+
return self._client
|
|
226
|
+
|
|
227
|
+
@client.setter
|
|
228
|
+
def client(self, client):
|
|
229
|
+
self._client = client
|
|
230
|
+
|
|
231
|
+
def register_function(self, name: str, function: Callable):
|
|
232
|
+
self._functions[name] = function
|
|
233
|
+
|
|
234
|
+
def init_task_instance_on_task_start(self, task: "BaseTask", *args, **kwargs) -> int:
|
|
235
|
+
func = self._functions.get("init_task_instance_on_task_start")
|
|
236
|
+
if func:
|
|
237
|
+
return func(task, *args, **kwargs)
|
|
238
|
+
|
|
239
|
+
def update_task_instance_on_task_finish(
|
|
240
|
+
self,
|
|
241
|
+
task: "BaseTask",
|
|
242
|
+
ti_id: int,
|
|
243
|
+
task_status: str,
|
|
244
|
+
meta: Any,
|
|
245
|
+
error: Exception,
|
|
246
|
+
error_stack: str,
|
|
247
|
+
*args,
|
|
248
|
+
**kwargs,
|
|
249
|
+
):
|
|
250
|
+
func = self._functions.get("update_task_instance_on_task_finish")
|
|
251
|
+
if func:
|
|
252
|
+
return func(task, ti_id, task_status, meta, error, error_stack, *args, **kwargs)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
context = Context()
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from recurvedata.core.translation import _l
|
|
2
|
+
from recurvedata.operators.link_operator import LinkOperator
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class LinkModelPipelineOperator(LinkOperator):
|
|
6
|
+
@classmethod
|
|
7
|
+
def config_schema(cls) -> dict: # front-end does not use this config schema to show
|
|
8
|
+
return {
|
|
9
|
+
"type": "object",
|
|
10
|
+
"properties": {
|
|
11
|
+
"model_pipeline_id": {
|
|
12
|
+
"type": "string",
|
|
13
|
+
"title": _l("Model Pipeline ID"),
|
|
14
|
+
"description": _l("Model Pipeline ID"),
|
|
15
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
16
|
+
"ui:options": {
|
|
17
|
+
"type": "plain",
|
|
18
|
+
},
|
|
19
|
+
},
|
|
20
|
+
"workflow_id": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"title": _l("Workflow ID"),
|
|
23
|
+
"description": _l("Workflow ID"),
|
|
24
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
25
|
+
"ui:options": {
|
|
26
|
+
"type": "plain",
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
"workflow_version": {
|
|
30
|
+
"type": "string",
|
|
31
|
+
"title": _l("Workflow Version"),
|
|
32
|
+
"description": _l("Workflow Version"),
|
|
33
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
34
|
+
"ui:options": {
|
|
35
|
+
"type": "plain",
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
"variables": {
|
|
39
|
+
"type": "string",
|
|
40
|
+
"title": _l("Variables"),
|
|
41
|
+
"default": "{}",
|
|
42
|
+
"description": _l("Variables in JSON format"),
|
|
43
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
44
|
+
"ui:options": {
|
|
45
|
+
"type": "code",
|
|
46
|
+
"lang": "json",
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
},
|
|
50
|
+
"required": [
|
|
51
|
+
"model_pipeline_id",
|
|
52
|
+
"workflow_id",
|
|
53
|
+
"workflow_version",
|
|
54
|
+
],
|
|
55
|
+
}
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import logging
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import TYPE_CHECKING, Union
|
|
5
|
+
|
|
6
|
+
from recurvedata.core.translation import _l
|
|
7
|
+
from recurvedata.dbt.utils import parse_run_model_log
|
|
8
|
+
from recurvedata.exceptions import MaxRetriesExceededException
|
|
9
|
+
from recurvedata.operators.operator import BaseOperator
|
|
10
|
+
from recurvedata.operators.task import BaseTask
|
|
11
|
+
from recurvedata.utils.date_time import utcnow_aware
|
|
12
|
+
from recurvedata.utils.helpers import get_environment_variable
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from recurvedata.dbt.consts import DbtMaterialization
|
|
16
|
+
from recurvedata.dbt.schemas import PreviewResult
|
|
17
|
+
from recurvedata.dbt.service import DbtService
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
from dbt.cli.main import dbtRunnerResult
|
|
21
|
+
except ImportError:
|
|
22
|
+
dbtRunnerResult = None
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class TaskRuntimeException:
|
|
29
|
+
exception: Exception
|
|
30
|
+
|
|
31
|
+
def to_dict(self):
|
|
32
|
+
return {
|
|
33
|
+
"success": False,
|
|
34
|
+
"exception": {
|
|
35
|
+
"type": f"TaskRuntimeException-{type(self.exception).__name__}",
|
|
36
|
+
"message": str(self.exception),
|
|
37
|
+
},
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class DbtResultConstructor:
|
|
43
|
+
project_id: int
|
|
44
|
+
model_name: str
|
|
45
|
+
materialization: "DbtMaterialization"
|
|
46
|
+
compiled_code: str = None
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def _construct_timing(action_name: str, start_time: datetime.datetime, end_time: datetime.datetime) -> list[dict]:
|
|
50
|
+
def _format_time(dt: datetime.datetime) -> str:
|
|
51
|
+
dt_utc = dt.astimezone(datetime.timezone.utc)
|
|
52
|
+
ds = dt_utc.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
53
|
+
return ds
|
|
54
|
+
|
|
55
|
+
return [
|
|
56
|
+
{"name": action_name, "started_at": _format_time(start_time), "completed_at": _format_time(end_time)},
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
def construct_ephemeral_materialized_result(
|
|
60
|
+
self, materialized_result_dct: dict, start_time: datetime.datetime, end_time: datetime.datetime
|
|
61
|
+
) -> dict:
|
|
62
|
+
"""
|
|
63
|
+
For ephemeral materialization, the materialized_result_dct $result.results is empty.
|
|
64
|
+
CP relies on $result.results to show message and start/end time
|
|
65
|
+
"""
|
|
66
|
+
if not materialized_result_dct["success"]:
|
|
67
|
+
return materialized_result_dct
|
|
68
|
+
result_dct = materialized_result_dct["result"]
|
|
69
|
+
if not result_dct:
|
|
70
|
+
return materialized_result_dct
|
|
71
|
+
sub_results: list[dict] = result_dct["results"]
|
|
72
|
+
if not sub_results:
|
|
73
|
+
result_dct["results"] = [
|
|
74
|
+
{
|
|
75
|
+
"unique_id": self.format_model_unique_id(),
|
|
76
|
+
"status": "success",
|
|
77
|
+
"timing": self._construct_timing("execute", start_time=start_time, end_time=end_time),
|
|
78
|
+
"message": "Ephemeral model compiled successfully",
|
|
79
|
+
"compiled_code": self.compiled_code,
|
|
80
|
+
}
|
|
81
|
+
]
|
|
82
|
+
return materialized_result_dct
|
|
83
|
+
|
|
84
|
+
def format_model_unique_id(self) -> str:
|
|
85
|
+
return f"model.project_{self.project_id}.{self.model_name}"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class DbtTask(BaseTask):
|
|
89
|
+
def execute_impl(self):
|
|
90
|
+
from recurvedata.dbt.schemas import PreviewResult
|
|
91
|
+
from recurvedata.dbt.service import DbtService
|
|
92
|
+
from recurvedata.dbt.utils import format_var
|
|
93
|
+
from recurvedata.utils.redis_lock import RedisLock
|
|
94
|
+
|
|
95
|
+
model_name = self.rendered_config.get("model_name") or self.rendered_config.get("entity_name")
|
|
96
|
+
|
|
97
|
+
lock = RedisLock(
|
|
98
|
+
f"dbt_task_{self.dag.project_id}_{model_name}", auto_extend=True, expire=60, timeout=60 * 60 * 1
|
|
99
|
+
)
|
|
100
|
+
lock.acquire()
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
service = DbtService(self.dag.project_id)
|
|
104
|
+
service.prepare()
|
|
105
|
+
model_id: int = int(self.rendered_config["entity_id"])
|
|
106
|
+
var_str = format_var(service, self.get_template_context())
|
|
107
|
+
|
|
108
|
+
materialize_start_time = utcnow_aware()
|
|
109
|
+
full_refresh = self.dag.full_refresh_models
|
|
110
|
+
|
|
111
|
+
model_run_result = service.run_model(model_name, var_str, full_refresh=full_refresh, include_run_log=True)
|
|
112
|
+
compiled_code = model_run_result.compiled_sql
|
|
113
|
+
materialized_result = model_run_result.result
|
|
114
|
+
run_sql = model_run_result.run_sql
|
|
115
|
+
run_log = model_run_result.run_log
|
|
116
|
+
|
|
117
|
+
materialize_end_time = utcnow_aware()
|
|
118
|
+
|
|
119
|
+
if not compiled_code:
|
|
120
|
+
logger.info("compiled_code empty, use un-compiled sql")
|
|
121
|
+
compiled_code = service.read_model_sql(model_name)
|
|
122
|
+
|
|
123
|
+
if not compiled_code:
|
|
124
|
+
logger.info("compiled_code still empty, set materialized_result to failed")
|
|
125
|
+
materialized_result.success = False
|
|
126
|
+
materialized_result.exception = RuntimeError("Materialization failed due to empty compiled_code")
|
|
127
|
+
materialized_result.result = None
|
|
128
|
+
|
|
129
|
+
if not materialized_result.success:
|
|
130
|
+
self.send_dbt_model_result(
|
|
131
|
+
service,
|
|
132
|
+
compiled_sql=compiled_code,
|
|
133
|
+
run_sql=run_sql,
|
|
134
|
+
run_log=run_log,
|
|
135
|
+
try_number=get_environment_variable("AIRFLOW_RETRY_NUMBER", int),
|
|
136
|
+
materialized_result=materialized_result,
|
|
137
|
+
materialize_start_time=materialize_start_time,
|
|
138
|
+
materialize_end_time=materialize_end_time,
|
|
139
|
+
)
|
|
140
|
+
raise Exception(f"run model {model_name} materialized failed")
|
|
141
|
+
|
|
142
|
+
test_case_skipped = False
|
|
143
|
+
if self.dag.skip_data_tests:
|
|
144
|
+
logger.info("skip data tests")
|
|
145
|
+
test_result = None
|
|
146
|
+
test_case_sample_result = None
|
|
147
|
+
test_case_skipped = True
|
|
148
|
+
else:
|
|
149
|
+
logger.info("run data tests")
|
|
150
|
+
test_result = service.run_test(model_id, var_str)
|
|
151
|
+
test_case_sample_result: dict[str, PreviewResult] = service.run_test_sample_data(test_result)
|
|
152
|
+
self.send_dbt_model_result(
|
|
153
|
+
service,
|
|
154
|
+
compiled_sql=compiled_code,
|
|
155
|
+
run_sql=run_sql,
|
|
156
|
+
run_log=run_log,
|
|
157
|
+
try_number=get_environment_variable("AIRFLOW_RETRY_NUMBER", int),
|
|
158
|
+
materialized_result=materialized_result,
|
|
159
|
+
test_case_result=test_result,
|
|
160
|
+
test_case_sample_result=test_case_sample_result,
|
|
161
|
+
materialize_start_time=materialize_start_time,
|
|
162
|
+
materialize_end_time=materialize_end_time,
|
|
163
|
+
test_case_skipped=test_case_skipped,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
test_case_result_dct = self.format_test_case_result(test_result)
|
|
167
|
+
if test_case_result_dct and not test_case_result_dct["success"]:
|
|
168
|
+
raise Exception("Task Run failed due to Error / Failed test cases")
|
|
169
|
+
except Exception as e:
|
|
170
|
+
raise e from None
|
|
171
|
+
finally:
|
|
172
|
+
lock.release()
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def format_materialized_result(
|
|
176
|
+
project_id: int,
|
|
177
|
+
model_name: str,
|
|
178
|
+
materialization: Union["DbtMaterialization", str],
|
|
179
|
+
compiled_code: str,
|
|
180
|
+
materialized_result: Union["dbtRunnerResult", "TaskRuntimeException"],
|
|
181
|
+
materialize_start_time: datetime.datetime = None,
|
|
182
|
+
materialize_end_time: datetime.datetime = None,
|
|
183
|
+
) -> dict | None:
|
|
184
|
+
"""
|
|
185
|
+
materialized: model, ephemeral, view, incremental
|
|
186
|
+
"""
|
|
187
|
+
from recurvedata.dbt.consts import DbtMaterialization
|
|
188
|
+
from recurvedata.dbt.utils import dbt_runner_result_to_dict
|
|
189
|
+
|
|
190
|
+
if not materialized_result:
|
|
191
|
+
return
|
|
192
|
+
if isinstance(materialized_result, TaskRuntimeException):
|
|
193
|
+
materialized_result_dct = materialized_result.to_dict()
|
|
194
|
+
else:
|
|
195
|
+
materialized_result_dct = dbt_runner_result_to_dict(materialized_result)
|
|
196
|
+
|
|
197
|
+
if materialization == DbtMaterialization.EPHEMERAL:
|
|
198
|
+
constructor = DbtResultConstructor(
|
|
199
|
+
project_id=project_id,
|
|
200
|
+
model_name=model_name,
|
|
201
|
+
materialization=materialization,
|
|
202
|
+
compiled_code=compiled_code,
|
|
203
|
+
)
|
|
204
|
+
materialized_result_dct = constructor.construct_ephemeral_materialized_result(
|
|
205
|
+
materialized_result_dct, materialize_start_time, materialize_end_time
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
if materialized_result_dct["success"]:
|
|
209
|
+
results = materialized_result_dct.get("result", {}).get("results")
|
|
210
|
+
if not results:
|
|
211
|
+
# The selection criterion '' does not match any nodes
|
|
212
|
+
materialized_result_dct["success"] = False
|
|
213
|
+
|
|
214
|
+
return materialized_result_dct
|
|
215
|
+
|
|
216
|
+
@staticmethod
|
|
217
|
+
def format_test_case_result(test_case_result: Union["dbtRunnerResult", "TaskRuntimeException"]) -> dict | None:
|
|
218
|
+
from recurvedata.dbt.utils import dbt_runner_result_to_dict
|
|
219
|
+
|
|
220
|
+
if not test_case_result:
|
|
221
|
+
return
|
|
222
|
+
if isinstance(test_case_result, TaskRuntimeException):
|
|
223
|
+
test_case_result_dct = test_case_result.to_dict()
|
|
224
|
+
else:
|
|
225
|
+
test_case_result_dct = dbt_runner_result_to_dict(test_case_result)
|
|
226
|
+
return test_case_result_dct
|
|
227
|
+
|
|
228
|
+
@property
|
|
229
|
+
def model_name(self) -> str:
|
|
230
|
+
return self.rendered_config.get("model_name") or self.rendered_config.get("entity_name")
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
def materialization(self) -> str:
|
|
234
|
+
return self.rendered_config.get("materialized")
|
|
235
|
+
|
|
236
|
+
def send_dbt_model_result(
|
|
237
|
+
self,
|
|
238
|
+
service: "DbtService",
|
|
239
|
+
compiled_sql: str | None,
|
|
240
|
+
try_number: int,
|
|
241
|
+
run_sql: str | None = None,
|
|
242
|
+
run_log: str | None = None,
|
|
243
|
+
materialized_result: Union["dbtRunnerResult", "TaskRuntimeException"] = None,
|
|
244
|
+
test_case_result: Union["dbtRunnerResult", "TaskRuntimeException"] = None,
|
|
245
|
+
test_case_sample_result: dict[str, "PreviewResult"] = None,
|
|
246
|
+
materialize_start_time: datetime.datetime = None,
|
|
247
|
+
materialize_end_time: datetime.datetime = None,
|
|
248
|
+
test_case_skipped: bool = False,
|
|
249
|
+
):
|
|
250
|
+
materialized_result_dct = self.format_materialized_result(
|
|
251
|
+
self.dag.project_id,
|
|
252
|
+
self.model_name,
|
|
253
|
+
self.materialization,
|
|
254
|
+
compiled_sql,
|
|
255
|
+
materialized_result,
|
|
256
|
+
materialize_start_time,
|
|
257
|
+
materialize_end_time,
|
|
258
|
+
)
|
|
259
|
+
test_case_result_dct = self.format_test_case_result(test_case_result)
|
|
260
|
+
|
|
261
|
+
if not compiled_sql:
|
|
262
|
+
logger.info(f"compiled_sql empty, materialized_result_dct: {materialized_result_dct}")
|
|
263
|
+
|
|
264
|
+
if test_case_sample_result:
|
|
265
|
+
test_case_sample_result_dct = {
|
|
266
|
+
unique_id: preview_obj.model_dump() for unique_id, preview_obj in test_case_sample_result.items()
|
|
267
|
+
}
|
|
268
|
+
else:
|
|
269
|
+
test_case_sample_result_dct = None
|
|
270
|
+
|
|
271
|
+
run_sql_log = parse_run_model_log(run_log)
|
|
272
|
+
|
|
273
|
+
logger.info(f"debug: compiled sql: {compiled_sql}")
|
|
274
|
+
logger.info(f"debug: run sql: {run_sql}")
|
|
275
|
+
logger.info(f"debug: run_log: {run_log}")
|
|
276
|
+
logger.info(f"debug: run_sql_log: {run_sql_log}")
|
|
277
|
+
logger.info(f"debug: materialized_result_dct: {materialized_result_dct}")
|
|
278
|
+
logger.info(f"debug: test_case_result_dct: {test_case_result_dct}")
|
|
279
|
+
logger.info(f"debug: test_case_sample_result_dct: {test_case_sample_result_dct}")
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
service.client.send_dbt_model_result(
|
|
283
|
+
self.dag.id,
|
|
284
|
+
self.node.node_key,
|
|
285
|
+
compiled_sql,
|
|
286
|
+
run_sql,
|
|
287
|
+
run_sql_log=run_sql_log,
|
|
288
|
+
raw_materialized_result=materialized_result_dct,
|
|
289
|
+
raw_test_result=test_case_result_dct,
|
|
290
|
+
test_case_sample_data=test_case_sample_result_dct,
|
|
291
|
+
materialization=self.materialization,
|
|
292
|
+
try_number=try_number,
|
|
293
|
+
test_case_skipped=test_case_skipped,
|
|
294
|
+
)
|
|
295
|
+
except MaxRetriesExceededException as e:
|
|
296
|
+
logger.exception(f"send_dbt_model_result failed, error: {e}")
|
|
297
|
+
self.sent_dbt_model_result = True
|
|
298
|
+
|
|
299
|
+
def on_execute_impl_error(self, err: Exception):
|
|
300
|
+
from recurvedata.dbt.service import DbtService
|
|
301
|
+
|
|
302
|
+
if getattr(self, "sent_dbt_model_result", False):
|
|
303
|
+
return
|
|
304
|
+
service = DbtService(self.dag.project_id)
|
|
305
|
+
self.send_dbt_model_result(
|
|
306
|
+
service,
|
|
307
|
+
compiled_sql=None,
|
|
308
|
+
try_number=get_environment_variable("AIRFLOW_RETRY_NUMBER", int),
|
|
309
|
+
materialized_result=TaskRuntimeException(err),
|
|
310
|
+
test_case_result=None,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
class DBTOperator(BaseOperator):
|
|
315
|
+
task_cls = DbtTask
|
|
316
|
+
|
|
317
|
+
@classmethod
|
|
318
|
+
def config_schema(cls) -> dict:
|
|
319
|
+
return {
|
|
320
|
+
"type": "object",
|
|
321
|
+
"properties": {
|
|
322
|
+
"entity_name": {
|
|
323
|
+
"type": "string",
|
|
324
|
+
"title": _l("Entity Name"),
|
|
325
|
+
"description": _l("Entity Name"),
|
|
326
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
327
|
+
"ui:options": {
|
|
328
|
+
"type": "plain",
|
|
329
|
+
},
|
|
330
|
+
},
|
|
331
|
+
"materialized": { # for front-end display
|
|
332
|
+
"type": "string",
|
|
333
|
+
"title": _l("Materialized"),
|
|
334
|
+
"default": "view",
|
|
335
|
+
"enum": ["table", "view", "incremental", "ephemeral"],
|
|
336
|
+
"enumNames": ["table", "view", "incremental", "ephemeral"],
|
|
337
|
+
},
|
|
338
|
+
},
|
|
339
|
+
"required": ["entity_name", "materialized"],
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
@classmethod
|
|
343
|
+
def validate(cls, configuration) -> dict:
|
|
344
|
+
return configuration
|
|
345
|
+
|
|
346
|
+
@classmethod
|
|
347
|
+
def ui_config_to_config(cls, configuration: dict) -> dict:
|
|
348
|
+
source = configuration["source"]
|
|
349
|
+
return source
|
|
350
|
+
|
|
351
|
+
@classmethod
|
|
352
|
+
def get_ds_name_field_values(cls, rendered_config: dict) -> list[str]:
|
|
353
|
+
return []
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from recurvedata.operators.link_operator.operator import LinkOperator
|