recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
|
|
4
|
+
from recurvedata.core.translation import _l
|
|
5
|
+
from recurvedata.operators.operator import BaseOperator
|
|
6
|
+
from recurvedata.operators.task import BaseTask
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConfigTask(BaseTask):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LinkOperator(BaseOperator):
|
|
14
|
+
@classmethod
|
|
15
|
+
def get_link_setting(cls, node_config: dict) -> tuple[int, str, Optional[int]]:
|
|
16
|
+
source = node_config["source"]
|
|
17
|
+
return source["workflow_id"], source["workflow_version_tag"], source.get("link_node_key")
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def config_schema(cls) -> dict: # front-end does not use this config schema to show
|
|
21
|
+
return {
|
|
22
|
+
"type": "object",
|
|
23
|
+
"properties": {
|
|
24
|
+
"workflow_id": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"title": _l("Target Workflow"),
|
|
27
|
+
"description": _l("ID of the workflow to link to"),
|
|
28
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
29
|
+
"ui:options": {
|
|
30
|
+
"type": "plain",
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
"workflow_version": {
|
|
34
|
+
"type": "string",
|
|
35
|
+
"title": _l("Workflow Version Tag"),
|
|
36
|
+
"description": _l("Version tag of the target workflow (e.g. latest, v1.0)"),
|
|
37
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
38
|
+
"ui:options": {
|
|
39
|
+
"type": "plain",
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
"node_id": {
|
|
43
|
+
"type": "string",
|
|
44
|
+
"title": _l("Target Node"),
|
|
45
|
+
"description": _l("ID of the node to link to in the target workflow"),
|
|
46
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
47
|
+
"ui:options": {
|
|
48
|
+
"type": "plain",
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
"variables": {
|
|
52
|
+
"type": "string",
|
|
53
|
+
"title": _l("Custom Variables"),
|
|
54
|
+
"default": "{}",
|
|
55
|
+
"description": _l("Custom variables to pass to the linked workflow in JSON format"),
|
|
56
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
57
|
+
"ui:options": {
|
|
58
|
+
"type": "code",
|
|
59
|
+
"lang": "json",
|
|
60
|
+
},
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
"required": [
|
|
64
|
+
"workflow_id",
|
|
65
|
+
"workflow_version",
|
|
66
|
+
],
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
def execute(self):
|
|
70
|
+
from recurvedata.executors import LinkExecutor
|
|
71
|
+
|
|
72
|
+
link_custom_variables = self.get_link_custom_variables(
|
|
73
|
+
self.dag, self.node, self.execution_date, self.variables, self.node.job_variable
|
|
74
|
+
)
|
|
75
|
+
link_settings = self.node.link_settings
|
|
76
|
+
link_executor = LinkExecutor(
|
|
77
|
+
self.dag,
|
|
78
|
+
self.node,
|
|
79
|
+
execution_date=self.execution_date,
|
|
80
|
+
link_workflow_id=link_settings["workflow_id"],
|
|
81
|
+
link_node_id=link_settings["node_id"],
|
|
82
|
+
custom_variables=link_custom_variables,
|
|
83
|
+
is_link_workflow=link_settings["is_link_workflow"],
|
|
84
|
+
)
|
|
85
|
+
# TODO(chenjingmeng): temporary solution to distinguish link operator
|
|
86
|
+
link_executor.node.is_link_op = True
|
|
87
|
+
link_executor.node.origin_node = self.node
|
|
88
|
+
link_executor.run()
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def validate(cls, configuration) -> dict:
|
|
92
|
+
return configuration # variables is dict type which will fail the json validation
|
|
93
|
+
|
|
94
|
+
def run_stage(self, stage):
|
|
95
|
+
return self.execute()
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def ui_config_to_config(cls, configuration: dict[str, Any]) -> dict[str, Any]:
|
|
99
|
+
source = configuration["source"]
|
|
100
|
+
source["variables"] = configuration.get("variables", {})
|
|
101
|
+
return source
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def get_link_custom_variables(cls, dag, node, execution_date, variables, job_variables: dict):
|
|
105
|
+
# if linkOp has not configured the variables, use job_variables https://project.feishu.cn/recurvedata/issue/detail/5342288226
|
|
106
|
+
link_variables = copy.deepcopy(job_variables) if job_variables else {}
|
|
107
|
+
task_obj = ConfigTask(dag, node, execution_date, variables)
|
|
108
|
+
link_custom_variables = task_obj.rendered_config["variables"]
|
|
109
|
+
|
|
110
|
+
if "execution_date" in variables and "execution_date" not in link_custom_variables:
|
|
111
|
+
# user may update `execution_date` in variable, which may not appear in link custom variable,
|
|
112
|
+
# so we need to pass the updated `execution_date` to link
|
|
113
|
+
link_custom_variables["execution_date"] = variables["execution_date"]
|
|
114
|
+
link_variables.update(link_custom_variables)
|
|
115
|
+
return link_variables
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def get_ds_name_field_values(cls, rendered_config: dict) -> list[str]:
|
|
119
|
+
# todo: check linked node
|
|
120
|
+
return []
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from recurvedata.utils.crontab import next_schedule, previous_schedule
|
|
4
|
+
from recurvedata.utils.date_time import _DATELIKE
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class DagBase:
|
|
9
|
+
"""
|
|
10
|
+
Recurve Web Job 对象
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
id: int
|
|
14
|
+
project_id: int
|
|
15
|
+
name: str
|
|
16
|
+
schedule_interval: str
|
|
17
|
+
scheduler_type: str
|
|
18
|
+
timezone: str
|
|
19
|
+
owner: str
|
|
20
|
+
full_refresh_models: bool = False
|
|
21
|
+
retries: int = None
|
|
22
|
+
retry_delay: int = None
|
|
23
|
+
# for dbt operator
|
|
24
|
+
skip_data_tests: bool = False
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def normalize_schedule_interval(cls, interval: str) -> str:
|
|
28
|
+
mapping = {
|
|
29
|
+
"@once": None,
|
|
30
|
+
"@hourly": "0 * * * *",
|
|
31
|
+
"@daily": "0 0 * * *",
|
|
32
|
+
"@weekly": "0 0 * * 0",
|
|
33
|
+
"@monthly": "0 0 1 * *",
|
|
34
|
+
"@yearly": "0 0 1 1 *",
|
|
35
|
+
}
|
|
36
|
+
if interval in mapping:
|
|
37
|
+
return mapping[interval]
|
|
38
|
+
return interval
|
|
39
|
+
|
|
40
|
+
def next_schedule(self, dttm: _DATELIKE) -> _DATELIKE:
|
|
41
|
+
return next_schedule(self.schedule_interval, dttm)
|
|
42
|
+
|
|
43
|
+
def previous_schedule(self, dttm: _DATELIKE) -> _DATELIKE:
|
|
44
|
+
return previous_schedule(self.schedule_interval, dttm)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def is_once(self):
|
|
48
|
+
return self.schedule_interval == "@once" or self.schedule_interval is None or self.schedule_interval == ""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class NodeBase:
|
|
53
|
+
id: int
|
|
54
|
+
node_key: str
|
|
55
|
+
name: str
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from recurvedata.operators.notify_operator.operator import NotifyOperator
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
import jsonschema
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from recurvedata.pigeon.connector.feishu import FeishuBot
|
|
8
|
+
except ImportError:
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
from recurvedata.core.translation import _l
|
|
12
|
+
from recurvedata.operators.operator import BaseOperator
|
|
13
|
+
from recurvedata.operators.task import BaseTask
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class NotifyTask(BaseTask):
|
|
19
|
+
@staticmethod
|
|
20
|
+
def split_str_lst(s: str, default_value: list[str] = None) -> list[str]:
|
|
21
|
+
if default_value is None:
|
|
22
|
+
default_value = []
|
|
23
|
+
if not s:
|
|
24
|
+
return default_value
|
|
25
|
+
return list(set(item.strip() for item in s.split(",")))
|
|
26
|
+
|
|
27
|
+
def execute_impl(self, *args, **kwargs):
|
|
28
|
+
config = self.rendered_config
|
|
29
|
+
bot = FeishuBot(**self.get_connection_by_name(config["feishu_bot"]).extra)
|
|
30
|
+
|
|
31
|
+
send_method = bot.send_message
|
|
32
|
+
send_conf = {"msg_type": config.msg_type}
|
|
33
|
+
email_lst = self.split_str_lst(config.at_user_email, default_value=None)
|
|
34
|
+
if config.msg_type == "post":
|
|
35
|
+
send_conf.update({"content": config.text_content})
|
|
36
|
+
if email_lst is None and not config.subject:
|
|
37
|
+
send_conf.update({"msg_type": "text"})
|
|
38
|
+
send_conf.update({"content": json.dumps({"text": config.text_content})})
|
|
39
|
+
else:
|
|
40
|
+
send_method = bot.send_text
|
|
41
|
+
send_conf.pop("msg_type")
|
|
42
|
+
send_conf.update({"email_lst": email_lst, "subject": config.subject})
|
|
43
|
+
else:
|
|
44
|
+
send_conf.update({"content": config.card_content})
|
|
45
|
+
|
|
46
|
+
user_lst = self.split_str_lst(config.email)
|
|
47
|
+
chat_name_lst = self.split_str_lst(config.chat_name)
|
|
48
|
+
chat_id_lst = self.split_str_lst(config.chat_id)
|
|
49
|
+
for email in user_lst:
|
|
50
|
+
send_method(receiver_type="user", user_email=email, **send_conf)
|
|
51
|
+
for chat_name in chat_name_lst:
|
|
52
|
+
send_method(receiver_type="group", chat_name=chat_name, **send_conf)
|
|
53
|
+
for chat_id in chat_id_lst:
|
|
54
|
+
send_method(receiver_type="group", chat_id=chat_id, **send_conf)
|
|
55
|
+
logger.info("Message was successfully sent to the receivers.")
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def config_schema(cls):
|
|
59
|
+
# get_choices_by_type = cls.get_connection_names_by_type
|
|
60
|
+
return {
|
|
61
|
+
"type": "object",
|
|
62
|
+
"properties": {
|
|
63
|
+
"feishu_bot": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"title": _l("Feishu Bot Connection"),
|
|
66
|
+
"description": _l("Select the Feishu bot connection to use for sending messages"),
|
|
67
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
68
|
+
"ui:options": {
|
|
69
|
+
"supportTypes": [
|
|
70
|
+
"feishu_bot",
|
|
71
|
+
],
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
"msg_type": {
|
|
75
|
+
"type": "string",
|
|
76
|
+
"title": _l("Message Format"),
|
|
77
|
+
"description": _l("Choose between simple text/post format or interactive card format"),
|
|
78
|
+
"default": "post",
|
|
79
|
+
"enum": ["post", "interactive"],
|
|
80
|
+
"enumNames": ["post", "interactive"],
|
|
81
|
+
},
|
|
82
|
+
"subject": {
|
|
83
|
+
"ui:hidden": '{{parentFormData.msg_type === "interactive"}}',
|
|
84
|
+
"type": "string",
|
|
85
|
+
"title": _l("Message Subject"),
|
|
86
|
+
"description": _l("Subject line for the message. Supports template variables."),
|
|
87
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
88
|
+
"ui:options": {
|
|
89
|
+
"type": "plain",
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
"at_user_email": {
|
|
93
|
+
"ui:hidden": '{{parentFormData.msg_type === "interactive"}}',
|
|
94
|
+
"type": "string",
|
|
95
|
+
"title": _l("Mention Users"),
|
|
96
|
+
"description": _l(
|
|
97
|
+
"Email addresses of users to @mention in the message. Separate multiple emails with commas. Use 'all' to @mention everyone."
|
|
98
|
+
),
|
|
99
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
100
|
+
"ui:options": {
|
|
101
|
+
"type": "plain",
|
|
102
|
+
},
|
|
103
|
+
},
|
|
104
|
+
"text_content": {
|
|
105
|
+
"ui:hidden": '{{parentFormData.msg_type === "interactive"}}',
|
|
106
|
+
"type": "string",
|
|
107
|
+
"title": _l("Message Text"),
|
|
108
|
+
"default": "",
|
|
109
|
+
"description": _l("Main text content of the message. Supports template variables."),
|
|
110
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
111
|
+
"ui:options": {
|
|
112
|
+
"type": "plain",
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
"card_content": {
|
|
116
|
+
"ui:hidden": '{{parentFormData.msg_type === "post"}}',
|
|
117
|
+
"type": "string",
|
|
118
|
+
"title": _l("Interactive Card JSON"),
|
|
119
|
+
"description": _l("JSON definition for the interactive message card. Supports template variables."),
|
|
120
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
121
|
+
"ui:options": {
|
|
122
|
+
"type": "code",
|
|
123
|
+
"lang": "json",
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
"email": {
|
|
127
|
+
"type": "string",
|
|
128
|
+
"title": _l("Individual Recipients"),
|
|
129
|
+
"description": _l(
|
|
130
|
+
"Email addresses of individual users to receive the message. Separate multiple emails with commas."
|
|
131
|
+
),
|
|
132
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
133
|
+
"ui:options": {
|
|
134
|
+
"type": "plain",
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
"chat_name": {
|
|
138
|
+
"type": "string",
|
|
139
|
+
"title": _l("Group Recipients (by Name)"),
|
|
140
|
+
"description": _l(
|
|
141
|
+
"Names of Feishu chat groups to receive the message. Separate multiple names with commas. Bot must be a member of the groups."
|
|
142
|
+
),
|
|
143
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
144
|
+
"ui:options": {
|
|
145
|
+
"type": "plain",
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
"chat_id": {
|
|
149
|
+
"type": "string",
|
|
150
|
+
"title": _l("Group Recipients (by ID)"),
|
|
151
|
+
"description": _l(
|
|
152
|
+
"IDs of Feishu chat groups to receive the message. Separate multiple IDs with commas. Bot must be a member of the groups."
|
|
153
|
+
),
|
|
154
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
155
|
+
"ui:options": {
|
|
156
|
+
"type": "plain",
|
|
157
|
+
},
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
"required": ["feishu_bot", "msg_type"],
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
@classmethod
|
|
164
|
+
def validate(cls, configuration) -> dict:
|
|
165
|
+
config = super().validate(configuration)
|
|
166
|
+
if not any([config["user_email"], config["chat_name"], config["chat_id"]]):
|
|
167
|
+
raise jsonschema.ValidationError(
|
|
168
|
+
message="at least one of (User Email, Chat Group Name, Chat Group ID) must be entered",
|
|
169
|
+
path=("user_email", "chat_name", "chat_id"),
|
|
170
|
+
)
|
|
171
|
+
if config["card_content"]:
|
|
172
|
+
try:
|
|
173
|
+
json.loads(config["card_content"])
|
|
174
|
+
except Exception:
|
|
175
|
+
raise jsonschema.ValidationError(message="Card Content should be valid JSON", path=("card_content",))
|
|
176
|
+
return config
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class NotifyOperator(BaseOperator):
|
|
180
|
+
task_cls = NotifyTask
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import TYPE_CHECKING, Callable
|
|
3
|
+
|
|
4
|
+
from recurvedata.operators.base import Configurable
|
|
5
|
+
from recurvedata.operators.ui import format_config_schema
|
|
6
|
+
from recurvedata.utils.registry import Registry
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from recurvedata.executors.models import ExecutorDag, ExecutorNode
|
|
10
|
+
from recurvedata.operators.task import BaseTask
|
|
11
|
+
|
|
12
|
+
_registry = Registry(key_callback=lambda x: x.name())
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_operator_class(name):
|
|
16
|
+
return _registry.get(name)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class BaseOperator(Configurable):
|
|
20
|
+
stages = ()
|
|
21
|
+
task_cls = None
|
|
22
|
+
web_install_require = [] # the python modules needed by Recurve Web
|
|
23
|
+
worker_install_require = [] # the python modules needed by Recurve Worker
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self, dag: "ExecutorDag", node: "ExecutorNode", execution_date: datetime.datetime, variables: dict = None
|
|
27
|
+
):
|
|
28
|
+
self.dag: "ExecutorDag" = dag
|
|
29
|
+
self.node: "ExecutorNode" = node
|
|
30
|
+
self.execution_date: datetime.datetime = execution_date
|
|
31
|
+
self.task: BaseTask = None
|
|
32
|
+
self.variables: dict = variables or {}
|
|
33
|
+
self.init_task()
|
|
34
|
+
|
|
35
|
+
def __init_subclass__(cls, **kwargs):
|
|
36
|
+
_registry.add(cls)
|
|
37
|
+
|
|
38
|
+
def execute(self):
|
|
39
|
+
task_obj = self.task
|
|
40
|
+
if task_obj:
|
|
41
|
+
task_obj.execute()
|
|
42
|
+
|
|
43
|
+
def init_task(self):
|
|
44
|
+
if self.task_cls:
|
|
45
|
+
self.task = self.task_cls(self.dag, self.node, self.execution_date, self.variables)
|
|
46
|
+
|
|
47
|
+
def set_execution_date(self, execution_date):
|
|
48
|
+
if execution_date == self.execution_date:
|
|
49
|
+
return
|
|
50
|
+
# 有些 T0 任务,需要修改 operator 的 execute_date
|
|
51
|
+
# task 也需要设置 execute_date
|
|
52
|
+
self.execution_date = execution_date
|
|
53
|
+
if self.task:
|
|
54
|
+
self.task.set_execution_date(execution_date)
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def config_schema(cls) -> dict:
|
|
58
|
+
if cls.task_cls:
|
|
59
|
+
return cls.task_cls.config_schema()
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def to_dict(cls) -> dict:
|
|
63
|
+
return {
|
|
64
|
+
"name": cls.name(),
|
|
65
|
+
"config_schema": {"source": cls.config_schema()},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def get_setup_install_require(cls) -> dict:
|
|
70
|
+
return {
|
|
71
|
+
"web": cls.web_install_require,
|
|
72
|
+
"worker": cls.worker_install_require,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def ui_config_schema(cls) -> dict:
|
|
77
|
+
res = {
|
|
78
|
+
"source": {
|
|
79
|
+
"name": "Source",
|
|
80
|
+
"config_schema": format_config_schema(cls.config_schema(), "source"),
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return res
|
|
84
|
+
|
|
85
|
+
@staticmethod
|
|
86
|
+
def _add_schema_name_to_json_schema_error(schema_name: str, validate_func: Callable, *args, **kwargs):
|
|
87
|
+
try:
|
|
88
|
+
return validate_func(*args, **kwargs)
|
|
89
|
+
except Exception as e:
|
|
90
|
+
e.schema_name = schema_name
|
|
91
|
+
raise e
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def ui_validate(cls, configuration: dict) -> dict:
|
|
95
|
+
return {
|
|
96
|
+
"source": cls._add_schema_name_to_json_schema_error(
|
|
97
|
+
"source", cls.validate, cls.ui_config_to_config(configuration)
|
|
98
|
+
),
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def ui_config_to_config(cls, configuration: dict) -> dict:
|
|
103
|
+
"""
|
|
104
|
+
ui_config: 前端保存时传来的配置,和 ui_config_schema 里一一对应,通常是
|
|
105
|
+
{'source': source_dct, 'meta': meta_dct} 格式
|
|
106
|
+
config: Operator 具体的配置,通常指 ui_config 里的 source。
|
|
107
|
+
|
|
108
|
+
区分 ui config 和 config 的原因:
|
|
109
|
+
前端页面,根据 ui config_schema 里配置的,基本分为 Source, Meta 两大块。
|
|
110
|
+
其中 Meta 是调度器相关配置,和具体的 Operator 关联不大。
|
|
111
|
+
为了把调度器相关的校验逻辑、schema 和具体 Operator 区分开,
|
|
112
|
+
设置了 ui_config
|
|
113
|
+
"""
|
|
114
|
+
return configuration["source"]
|
|
115
|
+
|
|
116
|
+
@classmethod
|
|
117
|
+
def get_ds_name_field_values(cls, rendered_config: dict) -> list[str]:
|
|
118
|
+
config = cls.ui_config_to_config(rendered_config)
|
|
119
|
+
return cls.task_cls.get_ds_name_field_values(config)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from recurvedata.operators.python_operator.operator import PythonOperator
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
from tempfile import NamedTemporaryFile
|
|
5
|
+
|
|
6
|
+
from recurvedata.config import PY_PACKAGES_PATH, RECURVE_EXECUTOR_PYENV_NAME
|
|
7
|
+
from recurvedata.core.translation import _l
|
|
8
|
+
from recurvedata.operators.config import CONF
|
|
9
|
+
from recurvedata.operators.operator import BaseOperator
|
|
10
|
+
from recurvedata.operators.task import BaseTask
|
|
11
|
+
from recurvedata.utils.mp import robust_run_subprocess, run_subprocess
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
DEFAULT_PY_VERSION = os.environ.get("RECURVE_OPERATOR_PYTHON_DEFAULT_VERSION", "3.11.9")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PythonRequirementsMixin:
|
|
19
|
+
@staticmethod
|
|
20
|
+
def _install_requirements(requirements: str, pyenv_name: str):
|
|
21
|
+
if pyenv_name != RECURVE_EXECUTOR_PYENV_NAME:
|
|
22
|
+
requirements += "\nrecurvedata-lib[slim]"
|
|
23
|
+
if not requirements:
|
|
24
|
+
return
|
|
25
|
+
logger.info("installing requirements")
|
|
26
|
+
# Install recurvedata-lib from local package if it's a new virtualenv
|
|
27
|
+
if pyenv_name != RECURVE_EXECUTOR_PYENV_NAME:
|
|
28
|
+
python = CONF.PYENV_PYTHON_PATH.format(pyenv=pyenv_name)
|
|
29
|
+
run_subprocess(
|
|
30
|
+
f"{python} -m pip install -v --no-index --find-links={PY_PACKAGES_PATH} recurvedata-lib[slim]".split()
|
|
31
|
+
)
|
|
32
|
+
with NamedTemporaryFile(mode="w+t", prefix="recurve_python_requirements_", suffix=".txt") as requirements_path:
|
|
33
|
+
requirements_path.write(requirements)
|
|
34
|
+
requirements_path.flush()
|
|
35
|
+
python = CONF.PYENV_PYTHON_PATH.format(pyenv=pyenv_name)
|
|
36
|
+
run_subprocess(f"{python} -m pip install -r {requirements_path.name}".split())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class PythonTask(BaseTask, PythonRequirementsMixin):
|
|
40
|
+
ds_name_fields = ("python_env",)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def config_schema(cls) -> dict:
|
|
44
|
+
return {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"properties": {
|
|
47
|
+
"python_env": {
|
|
48
|
+
"type": "string",
|
|
49
|
+
"title": _l("Python Env"),
|
|
50
|
+
"description": _l(
|
|
51
|
+
"Python virtual environment name that will be created and can be shared between tasks"
|
|
52
|
+
),
|
|
53
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
54
|
+
"ui:options": {
|
|
55
|
+
"supportTypes": [
|
|
56
|
+
"python",
|
|
57
|
+
]
|
|
58
|
+
},
|
|
59
|
+
},
|
|
60
|
+
"code": {
|
|
61
|
+
"type": "string",
|
|
62
|
+
"title": _l("Code"),
|
|
63
|
+
"description": _l(
|
|
64
|
+
"Python code that will be executed. Supports Jinja templating for dynamic code generation and variable substitution."
|
|
65
|
+
),
|
|
66
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
67
|
+
"ui:options": {
|
|
68
|
+
"type": "code",
|
|
69
|
+
"lang": "python",
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
},
|
|
73
|
+
"required": ["python_env", "code"],
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
def __custom_os_env(self) -> dict:
|
|
77
|
+
custom_env = os.environ.copy()
|
|
78
|
+
return custom_env
|
|
79
|
+
|
|
80
|
+
def __run_python(self, filename: str, pyenv: str, os_env: dict):
|
|
81
|
+
script_path = os.path.abspath(filename)
|
|
82
|
+
python = CONF.PYENV_PYTHON_PATH.format(pyenv=pyenv)
|
|
83
|
+
output, ret_code = robust_run_subprocess([python, script_path], env=os_env, _logger=logger)
|
|
84
|
+
if ret_code:
|
|
85
|
+
raise RuntimeError(f"Python Operator Error:\n{output}")
|
|
86
|
+
|
|
87
|
+
def _prepare_env(self, python_config: dict):
|
|
88
|
+
pyenv_name: str = python_config.get("pyenv")
|
|
89
|
+
py_version: str = python_config.get("python_version", DEFAULT_PY_VERSION)
|
|
90
|
+
self._install_virtualenv(py_version, pyenv_name)
|
|
91
|
+
self._install_requirements(python_config.get("requirements", ""), pyenv_name)
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def _install_virtualenv(py_version: str, pyenv_name: str):
|
|
95
|
+
python_path: str = CONF.PYENV_PYTHON_PATH.format(pyenv=pyenv_name)
|
|
96
|
+
if os.path.exists(python_path):
|
|
97
|
+
return
|
|
98
|
+
run_subprocess(["pyenv", "virtualenv", py_version, pyenv_name])
|
|
99
|
+
|
|
100
|
+
def execute_impl(self, *args, **kwargs):
|
|
101
|
+
config = self.rendered_config
|
|
102
|
+
code = config.code
|
|
103
|
+
os_env = self.__custom_os_env()
|
|
104
|
+
|
|
105
|
+
conn_config: dict = self.get_connection_by_name(config.python_env).extra
|
|
106
|
+
self._prepare_env(conn_config)
|
|
107
|
+
pyenv = conn_config["pyenv"]
|
|
108
|
+
prefix = f"recurve_python_{self.dag.id}_{self.node.id}_"
|
|
109
|
+
with NamedTemporaryFile(mode="w+t", prefix=prefix, suffix=".py") as tmp_file:
|
|
110
|
+
tmp_file.write(code)
|
|
111
|
+
tmp_file.flush()
|
|
112
|
+
logger.info(code)
|
|
113
|
+
self.__run_python(tmp_file.name, pyenv, os_env)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class PythonOperator(BaseOperator):
|
|
117
|
+
task_cls = PythonTask
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def validate(cls, configuration) -> dict:
|
|
121
|
+
res = super().validate(configuration)
|
|
122
|
+
# syntax_error = cls._get_python_code_syntax_error(res['code'])
|
|
123
|
+
# if syntax_error:
|
|
124
|
+
# raise jsonschema.ValidationError(f'Python Syntax Error {syntax_error}')
|
|
125
|
+
return res
|
|
126
|
+
|
|
127
|
+
@staticmethod
|
|
128
|
+
def _get_python_code_syntax_error(code):
|
|
129
|
+
try:
|
|
130
|
+
ast.parse(code)
|
|
131
|
+
except SyntaxError as e:
|
|
132
|
+
return e
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from recurvedata.operators.sensor_operator.operator import SensorOperator
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from airflow.models import DAG
|
|
5
|
+
from airflow.timetables.interval import CronDataIntervalTimetable
|
|
6
|
+
|
|
7
|
+
from recurvedata.schedulers.airflow import AirflowScheduler
|
|
8
|
+
from recurvedata.schedulers.model import SchedulerNode
|
|
9
|
+
from recurvedata.schedulers.utils import format_dag_id
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def prepare_airflow_env():
|
|
13
|
+
if "_AIRFLOW__AS_LIBRARY" in os.environ:
|
|
14
|
+
del os.environ["_AIRFLOW__AS_LIBRARY"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_dag_from_db(dag_id):
|
|
18
|
+
from airflow.exceptions import AirflowException
|
|
19
|
+
from airflow.models import DagBag
|
|
20
|
+
|
|
21
|
+
dagbag = DagBag(read_dags_from_db=True)
|
|
22
|
+
dag = dagbag.get_dag(dag_id)
|
|
23
|
+
if not dag:
|
|
24
|
+
raise AirflowException(f"Dag {dag_id!r} could not be found; either it does not exist or it failed to parse.")
|
|
25
|
+
return dag
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def build_execute_context(dag, task, run_id) -> dict:
|
|
29
|
+
dag_run = dag.get_dagrun(run_id=run_id)
|
|
30
|
+
task_instance = dag_run.get_task_instance(task.task_id)
|
|
31
|
+
context = {
|
|
32
|
+
"dag": dag,
|
|
33
|
+
"task": task,
|
|
34
|
+
"dag_run": dag_run,
|
|
35
|
+
"task_instance": task_instance,
|
|
36
|
+
"execution_date": dag_run.execution_date,
|
|
37
|
+
"logical_date": dag_run.logical_date,
|
|
38
|
+
"data_interval_end": dag_run.data_interval_end,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return context
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def format_external_dag_id(job_id: int) -> str:
|
|
45
|
+
return format_dag_id(job_id)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def format_external_task_id(node_key: str) -> str:
|
|
49
|
+
node = SchedulerNode(node_key=node_key, operator="external_operator", id=0, name="external_task")
|
|
50
|
+
return AirflowScheduler.format_task_id(node)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def data_interval_end_to_data_interval_start(
|
|
54
|
+
dag: DAG, data_interval_end: datetime.datetime
|
|
55
|
+
) -> datetime.datetime | None:
|
|
56
|
+
if hasattr(dag, "timetable") and isinstance(dag.timetable, CronDataIntervalTimetable):
|
|
57
|
+
data_interval_start = dag.timetable._get_prev(data_interval_end)
|
|
58
|
+
data_interval_end2 = dag.timetable._get_next(data_interval_start)
|
|
59
|
+
if data_interval_end2 != data_interval_end:
|
|
60
|
+
data_interval_start = dag.timetable._get_prev(data_interval_start)
|
|
61
|
+
return data_interval_start
|
|
62
|
+
else:
|
|
63
|
+
return data_interval_end
|