recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import dateutil
|
|
6
|
+
import typer
|
|
7
|
+
from airflow.models import DAG
|
|
8
|
+
|
|
9
|
+
from recurvedata.schedulers.airflow_db_process import AirflowDbService
|
|
10
|
+
from recurvedata.schedulers.schemas import WorkflowNodeDebugDetail
|
|
11
|
+
from recurvedata.schedulers.service import get_job_dag
|
|
12
|
+
from recurvedata.schedulers.task_status import TaskStatusScanner
|
|
13
|
+
from recurvedata.schedulers.utils import clear_task_instance, format_dag_id, init_client
|
|
14
|
+
from recurvedata.utils import init_logging
|
|
15
|
+
from recurvedata.utils._typer import RecurveTyper
|
|
16
|
+
from recurvedata.utils.date_time import to_local_datetime
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
cli = RecurveTyper()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _ensure_dag_exists(job_id: int, raise_error=True) -> DAG | None:
|
|
23
|
+
dag = get_job_dag(job_id)
|
|
24
|
+
if not dag: # job deleted
|
|
25
|
+
if not raise_error:
|
|
26
|
+
logger.info(f"dag missing for job {job_id}")
|
|
27
|
+
return
|
|
28
|
+
raise ValueError(f"dag not exists: {job_id}")
|
|
29
|
+
return dag
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@cli.callback()
|
|
33
|
+
def init():
|
|
34
|
+
init_logging()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@cli.command()
|
|
38
|
+
def update_dag(job_id: int = typer.Option(..., "--job_id")):
|
|
39
|
+
dag = _ensure_dag_exists(job_id, raise_error=False)
|
|
40
|
+
AirflowDbService.update_dag(dag)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@cli.command()
|
|
44
|
+
def activate_dag(job_id: int = typer.Option(..., "--job_id")):
|
|
45
|
+
dag = _ensure_dag_exists(job_id, raise_error=False)
|
|
46
|
+
AirflowDbService.activate_dag(dag)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@cli.command()
|
|
50
|
+
def deactivate_dag(job_id: int = typer.Option(..., "--job_id")):
|
|
51
|
+
dag = _ensure_dag_exists(job_id, raise_error=False)
|
|
52
|
+
AirflowDbService.deactivate_dag(dag)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@cli.command()
|
|
56
|
+
def delete_dag(job_id: int = typer.Option(..., "--job_id"), job_name: str = typer.Option(..., "--job_name")):
|
|
57
|
+
dag_id = format_dag_id(job_id)
|
|
58
|
+
AirflowDbService.delete_dag(dag_id, job_name)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@cli.command()
|
|
62
|
+
def clear(
|
|
63
|
+
job_id: int = typer.Option(..., "--job_id"),
|
|
64
|
+
node_key: str = typer.Option(..., "--node_key"),
|
|
65
|
+
execution_date: str = typer.Option(..., "--execution_date", callback=to_local_datetime),
|
|
66
|
+
only_failed: bool = typer.Option(False, "--only_failed"),
|
|
67
|
+
including_downstream: bool = typer.Option(False, "--including_downstream"),
|
|
68
|
+
):
|
|
69
|
+
dag = get_job_dag(job_id)
|
|
70
|
+
clear_task_instance(dag, node_key, execution_date, only_failed, including_downstream)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@cli.command()
|
|
74
|
+
def start_workflow_node_debug(
|
|
75
|
+
workflow_id: int = typer.Option(..., "--workflow_id"),
|
|
76
|
+
node_key: str = typer.Option(..., "--node_key"),
|
|
77
|
+
schedule_type: str = typer.Option(..., "--schedule_type"),
|
|
78
|
+
schedule_interval: str = typer.Option(..., "--schedule_interval"),
|
|
79
|
+
execution_date: str = typer.Option(..., "--execution_date"),
|
|
80
|
+
timezone: str = typer.Option(..., "--timezone"),
|
|
81
|
+
):
|
|
82
|
+
from celery.result import AsyncResult
|
|
83
|
+
|
|
84
|
+
from recurvedata.schedulers.debug_celery import debug_node
|
|
85
|
+
|
|
86
|
+
celery_kwargs = {
|
|
87
|
+
"workflow_id": workflow_id,
|
|
88
|
+
"node_key": node_key,
|
|
89
|
+
"schedule_type": schedule_type,
|
|
90
|
+
"schedule_interval": schedule_interval,
|
|
91
|
+
"execution_date": execution_date,
|
|
92
|
+
"timezone": timezone,
|
|
93
|
+
}
|
|
94
|
+
result: AsyncResult = debug_node.apply_async(kwargs=celery_kwargs)
|
|
95
|
+
logger.info(f"sent debug_node {celery_kwargs}, celery_id: {result.task_id}")
|
|
96
|
+
return {
|
|
97
|
+
"celery_task_id": result.task_id,
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@cli.command()
|
|
102
|
+
def abort_workflow_node_debug(
|
|
103
|
+
workflow_id: int = typer.Option(..., "--workflow_id"),
|
|
104
|
+
node_key: str = typer.Option(..., "--node_key"),
|
|
105
|
+
celery_task_id: str = typer.Option(None, "--celery_task_id"),
|
|
106
|
+
):
|
|
107
|
+
import recurvedata.schedulers.debug_celery
|
|
108
|
+
|
|
109
|
+
if not celery_task_id:
|
|
110
|
+
client = init_client()
|
|
111
|
+
detail: WorkflowNodeDebugDetail = client.get_workflow_node_debug_detail(
|
|
112
|
+
workflow_id=workflow_id, node_key=node_key
|
|
113
|
+
)
|
|
114
|
+
celery_task_id = detail.celery_task_id
|
|
115
|
+
|
|
116
|
+
if not celery_task_id:
|
|
117
|
+
logger.info("skip revoke_debug, no celery_task_id found")
|
|
118
|
+
return
|
|
119
|
+
logger.info(f"start revoke debug: {workflow_id} {node_key} {celery_task_id}")
|
|
120
|
+
recurvedata.schedulers.debug_celery.revoke_task(celery_task_id)
|
|
121
|
+
logger.info(f"finish revoke debug: {workflow_id} {node_key} {celery_task_id}")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@cli.command()
|
|
125
|
+
def sync_task_status(interval: int = typer.Option(5, "--interval")):
|
|
126
|
+
scanner = TaskStatusScanner()
|
|
127
|
+
scanner.run(interval)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@cli.command()
|
|
131
|
+
def trigger_job_run(
|
|
132
|
+
job_id: int = typer.Option(..., "--job_id"),
|
|
133
|
+
execution_date: str = typer.Option(..., "--execution_date", callback=to_local_datetime),
|
|
134
|
+
include_past: bool = typer.Option(False, "--include_past"),
|
|
135
|
+
include_future: bool = typer.Option(False, "--include_future"),
|
|
136
|
+
run_type: str = typer.Option(None, "--run_type"),
|
|
137
|
+
conf: str = typer.Option(None, "--conf"),
|
|
138
|
+
):
|
|
139
|
+
dag = _ensure_dag_exists(job_id)
|
|
140
|
+
if conf:
|
|
141
|
+
conf: dict[str, Any] = json.loads(conf)
|
|
142
|
+
AirflowDbService.trigger_job_run(dag, execution_date, include_past, include_future, run_type, conf)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@cli.command()
|
|
146
|
+
def rerun_job_run(
|
|
147
|
+
job_id: int = typer.Option(..., "--job_id"),
|
|
148
|
+
run_id: str = typer.Option(None, "--run_id"),
|
|
149
|
+
min_execution_date: str = typer.Option(None, "--min_execution_date"),
|
|
150
|
+
max_execution_date: str = typer.Option(None, "--max_execution_date"),
|
|
151
|
+
failed_only: bool = typer.Option(False, "--failed_only"),
|
|
152
|
+
):
|
|
153
|
+
dag = _ensure_dag_exists(job_id)
|
|
154
|
+
if min_execution_date:
|
|
155
|
+
min_execution_date = dateutil.parser.parse(min_execution_date)
|
|
156
|
+
if max_execution_date:
|
|
157
|
+
max_execution_date = dateutil.parser.parse(max_execution_date)
|
|
158
|
+
AirflowDbService.rerun_job_run(dag, run_id, min_execution_date, max_execution_date, failed_only)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
@cli.command()
|
|
162
|
+
def rerun_task_run(
|
|
163
|
+
job_id: int = typer.Option(..., "--job_id"),
|
|
164
|
+
run_id: str = typer.Option(None, "--run_id"),
|
|
165
|
+
node_key: str = typer.Option(..., "--node_key"),
|
|
166
|
+
include_upstream: bool = typer.Option(False, "--include_upstream"),
|
|
167
|
+
include_downstream: bool = typer.Option(False, "--include_downstream"),
|
|
168
|
+
min_execution_date: str = typer.Option(None, "--min_execution_date"),
|
|
169
|
+
max_execution_date: str = typer.Option(None, "--max_execution_date"),
|
|
170
|
+
failed_only: bool = typer.Option(False, "--failed_only"),
|
|
171
|
+
):
|
|
172
|
+
dag = _ensure_dag_exists(job_id)
|
|
173
|
+
if min_execution_date:
|
|
174
|
+
min_execution_date = dateutil.parser.parse(min_execution_date)
|
|
175
|
+
if max_execution_date:
|
|
176
|
+
max_execution_date = dateutil.parser.parse(max_execution_date)
|
|
177
|
+
AirflowDbService.rerun_task_run(
|
|
178
|
+
dag=dag,
|
|
179
|
+
run_id=run_id,
|
|
180
|
+
node_key=node_key,
|
|
181
|
+
min_execution_date=min_execution_date,
|
|
182
|
+
max_execution_date=max_execution_date,
|
|
183
|
+
include_upstream=include_upstream,
|
|
184
|
+
include_downstream=include_downstream,
|
|
185
|
+
failed_only=failed_only,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@cli.command()
|
|
190
|
+
def init_airflow_tables():
|
|
191
|
+
AirflowDbService.init_airflow_tables()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@cli.command()
|
|
195
|
+
def stop_dev_run(job_id: int = typer.Option(..., "--job_id")):
|
|
196
|
+
logger.info(f"start stop dev run job_id: {job_id}")
|
|
197
|
+
dag = _ensure_dag_exists(job_id)
|
|
198
|
+
|
|
199
|
+
AirflowDbService.mark_dag_run_failed(dag, whole_dag=True)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@cli.command()
|
|
203
|
+
def start_dev_run(
|
|
204
|
+
job_id: int = typer.Option(..., "--job_id"),
|
|
205
|
+
execution_date: str = typer.Option(..., "--execution_date", callback=to_local_datetime),
|
|
206
|
+
):
|
|
207
|
+
logger.info(f"start stop dev run job_id: {job_id}")
|
|
208
|
+
dag = _ensure_dag_exists(job_id)
|
|
209
|
+
|
|
210
|
+
AirflowDbService.update_dag(dag)
|
|
211
|
+
# AirflowDbService.mark_dag_run_failed(dag, whole_dag=True)
|
|
212
|
+
AirflowDbService.delete_whole_dag_dr_ti(dag)
|
|
213
|
+
AirflowDbService.activate_dag(dag)
|
|
214
|
+
AirflowDbService.trigger_job_run(dag, execution_date, False, False, "manual")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@cli.command()
|
|
218
|
+
def terminate_task_run(
|
|
219
|
+
job_id: int = typer.Option(..., "--job_id"),
|
|
220
|
+
run_id: str = typer.Option(..., "--run_id"),
|
|
221
|
+
node_key: str = typer.Option(..., "--node_key"),
|
|
222
|
+
):
|
|
223
|
+
dag = _ensure_dag_exists(job_id)
|
|
224
|
+
AirflowDbService.terminate_task_run(dag, run_id, node_key)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
if __name__ == "__main__":
|
|
228
|
+
cli()
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from recurvedata.client import Client
|
|
2
|
+
from recurvedata.schedulers.schemas import JobListResponse, TaskStatusCursor, WorkflowNodeDebugDetail
|
|
3
|
+
from recurvedata.utils import get_env_id
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SchedulerClient(Client):
|
|
7
|
+
def list_jobs(self, sharding_size: int = None, sharding_key: int = None) -> JobListResponse:
|
|
8
|
+
if not sharding_size:
|
|
9
|
+
sharding_size = 1
|
|
10
|
+
sharding_key = 0
|
|
11
|
+
|
|
12
|
+
params = {
|
|
13
|
+
"env_id": get_env_id(),
|
|
14
|
+
"sharding_key": sharding_key,
|
|
15
|
+
"sharding_size": sharding_size,
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return self.request("GET", path="/api/scheduler/jobs", response_model_class=JobListResponse, params=params)
|
|
19
|
+
|
|
20
|
+
def get_task_status_cursor(self) -> TaskStatusCursor:
|
|
21
|
+
params = {"env_id": get_env_id()}
|
|
22
|
+
return self.request(
|
|
23
|
+
"GET", path="/api/scheduler/task-status-cursor", response_model_class=TaskStatusCursor, params=params
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
def sync_task_status(self, job_runs: list[dict] | None = None, task_runs: list[dict] | None = None):
|
|
27
|
+
params = {"env_id": get_env_id()}
|
|
28
|
+
payload = {
|
|
29
|
+
"job_runs": job_runs,
|
|
30
|
+
"task_runs": task_runs,
|
|
31
|
+
}
|
|
32
|
+
return self.request("POST", path="/api/scheduler/sync-task-status", params=params, json=payload)
|
|
33
|
+
|
|
34
|
+
def get_workflow_node_debug_detail(self, workflow_id: int, node_key: str) -> WorkflowNodeDebugDetail:
|
|
35
|
+
params = {
|
|
36
|
+
"env_id": get_env_id(),
|
|
37
|
+
"workflow_id": workflow_id,
|
|
38
|
+
"node_key": node_key,
|
|
39
|
+
}
|
|
40
|
+
return self.request(
|
|
41
|
+
"GET",
|
|
42
|
+
path="/api/scheduler/workflow-node-debug-detail",
|
|
43
|
+
response_model_class=WorkflowNodeDebugDetail,
|
|
44
|
+
params=params,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def on_job_run_finished(self, job_run_result: dict):
|
|
48
|
+
params = {"env_id": get_env_id()}
|
|
49
|
+
payload = {
|
|
50
|
+
"job_id": job_run_result["job_id"],
|
|
51
|
+
"run_id": job_run_result["run_id"],
|
|
52
|
+
"task_info_map": job_run_result["task_info_map"],
|
|
53
|
+
"state": job_run_result["state"],
|
|
54
|
+
"data_interval_end": job_run_result["data_interval_end"],
|
|
55
|
+
}
|
|
56
|
+
return self.request("POST", path="/api/scheduler/on-job-run-finished", params=params, json=payload)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from enum import Enum
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class OperatorEnum(str, Enum):
|
|
6
|
+
SQLOperator = "SQLOperator"
|
|
7
|
+
TransferOperator = "TransferOperator"
|
|
8
|
+
PythonOperator = "PythonOperator"
|
|
9
|
+
SparkOperator = "SparkOperator"
|
|
10
|
+
NotifyOperator = "NotifyOperator"
|
|
11
|
+
LinkOperator = "LinkOperator"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
WORK_DIR = "/opt/airflow" # todo: use /opt/recurve
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Operator(str, Enum): # todo
|
|
18
|
+
SQLOperator = "SQLOperator"
|
|
19
|
+
TransferOperator = "TransferOperator"
|
|
20
|
+
PythonOperator = "PythonOperator"
|
|
21
|
+
SparkOperator = "SparkOperator"
|
|
22
|
+
NotifyOperator = "NotifyOperator"
|
|
23
|
+
LinkOperator = "LinkOperator"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ScheduleType(str, Enum):
|
|
27
|
+
crontab = "crontab"
|
|
28
|
+
customization = "customization" # 快捷设置
|
|
29
|
+
manual = "manual" # 手动触发
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
SYSTEM_SYNC_STATUS_DAG_ID = "system_sync_status"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def format_recurve_env_key(key: str) -> str:
|
|
36
|
+
return f"RECURVE__{key.upper()}"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_dag_file_loc(job_id: int) -> str:
|
|
40
|
+
# todo: configuration
|
|
41
|
+
idx = job_id % 7
|
|
42
|
+
return f"/opt/airflow/dags/autogen_sharding_{idx}.py"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
DEFAULT_RETRY_NUMBER = 2
|
|
46
|
+
DEFAULT_RETRY_DELAY = 60 * 5 # 5 minutes
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def is_dev_run_job(job_name: str) -> bool:
|
|
50
|
+
pattern = r"dev_run_.*_\d+_\d+"
|
|
51
|
+
match = re.match(pattern, job_name)
|
|
52
|
+
return match is not None
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from airflow.providers.celery.executors.celery_executor import app as celery_app
|
|
5
|
+
from celery import Task
|
|
6
|
+
|
|
7
|
+
from recurvedata.executors.client import ExecutorClient
|
|
8
|
+
from recurvedata.operators.config import CONF
|
|
9
|
+
from recurvedata.utils.mp import run_subprocess
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@celery_app.task(bind=True)
|
|
15
|
+
def debug_node(
|
|
16
|
+
self: Task,
|
|
17
|
+
workflow_id: int,
|
|
18
|
+
node_key: str,
|
|
19
|
+
schedule_type: str,
|
|
20
|
+
schedule_interval: str,
|
|
21
|
+
execution_date: str,
|
|
22
|
+
timezone: str,
|
|
23
|
+
):
|
|
24
|
+
task_id = self.request.id
|
|
25
|
+
logging.info(
|
|
26
|
+
f"start {task_id} {workflow_id} {node_key} {schedule_type} {schedule_interval} {execution_date} {timezone}"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
executor_client = ExecutorClient()
|
|
30
|
+
|
|
31
|
+
executor_client.debug_start(workflow_id, node_key, task_id)
|
|
32
|
+
try:
|
|
33
|
+
run_subprocess(
|
|
34
|
+
[
|
|
35
|
+
os.path.join(CONF.RECURVE_EXECUTOR_PYENV_BIN_PATH, "recurve_executor"),
|
|
36
|
+
"debug",
|
|
37
|
+
"--workflow_id",
|
|
38
|
+
f"{workflow_id}",
|
|
39
|
+
"--node_key",
|
|
40
|
+
f"{node_key}",
|
|
41
|
+
"--schedule_type",
|
|
42
|
+
schedule_type,
|
|
43
|
+
"--schedule_interval",
|
|
44
|
+
schedule_interval,
|
|
45
|
+
"--execution_date",
|
|
46
|
+
execution_date,
|
|
47
|
+
"--timezone",
|
|
48
|
+
timezone,
|
|
49
|
+
"--celery_task_id",
|
|
50
|
+
task_id,
|
|
51
|
+
],
|
|
52
|
+
env=os.environ.copy(),
|
|
53
|
+
)
|
|
54
|
+
is_success = True
|
|
55
|
+
except Exception as e:
|
|
56
|
+
logger.exception(f"{workflow_id} {node_key} {execution_date} debug failed, err: {e}")
|
|
57
|
+
is_success = False
|
|
58
|
+
executor_client.debug_end(workflow_id, node_key, task_id, is_success)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def revoke_task(task_id: str = None, terminate=True):
|
|
62
|
+
return celery_app.control.revoke(task_id, terminate=terminate)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from recurvedata.operators.models import DagBase, NodeBase
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class SchedulerDag(DagBase):
|
|
9
|
+
workflow_version: str | None = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class SchedulerNode(NodeBase):
|
|
14
|
+
"""
|
|
15
|
+
调度器的 Node 对象
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
operator: str
|
|
19
|
+
|
|
20
|
+
scheduler_settings: Optional[dict] = None
|
|
21
|
+
skip_self: Optional[bool] = None
|
|
22
|
+
skip_downstream: Optional[bool] = None
|
|
23
|
+
latest_only: Optional[bool] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class LinkNodeItem:
|
|
28
|
+
"""
|
|
29
|
+
the node linked by LinkOperator
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
link_wf_id: int
|
|
33
|
+
link_wf_version: str
|
|
34
|
+
link_node_id: int
|
|
35
|
+
link_node_name: str
|
|
36
|
+
link_node_key: str
|
|
37
|
+
link_latest_only: bool
|
|
38
|
+
link_operator: str
|
|
39
|
+
link_skip_downstream: bool
|
|
40
|
+
link_skip_self: bool
|
|
41
|
+
link_scheduler_settings: dict = None
|
|
42
|
+
link_config: dict = None # used in CustomAirflowOperator
|
|
43
|
+
node_id: int = None
|
|
44
|
+
plan_id: int = None
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def config(self):
|
|
48
|
+
# for CustomAirflowOperator
|
|
49
|
+
return self.link_config
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class LinkWorkflowItem:
|
|
54
|
+
"""
|
|
55
|
+
LinkOperator - link workflow
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
node_id: int
|
|
59
|
+
link_wf_id: int
|
|
60
|
+
link_wf_name: str
|
|
61
|
+
link_wf_version: str
|
|
62
|
+
link_graph: list[tuple[str, str]] = field(default_factory=list) # [(upstream_node_key, downstream_node_key),]
|
|
63
|
+
link_nodes: list[LinkNodeItem] = None
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from recurvedata.consts import Operator, ScheduleType
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SchedulerLinkWorkflowNodeItem(BaseModel):
|
|
9
|
+
link_wf_id: int
|
|
10
|
+
link_wf_version: str
|
|
11
|
+
link_node_id: int
|
|
12
|
+
link_node_key: str
|
|
13
|
+
link_node_name: str
|
|
14
|
+
link_operator: Operator
|
|
15
|
+
link_skip_self: bool
|
|
16
|
+
link_skip_downstream: bool
|
|
17
|
+
link_latest_only: bool
|
|
18
|
+
link_scheduler_settings: dict | None
|
|
19
|
+
plan_id: int | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SchedulerLinkNodeItem(BaseModel):
|
|
23
|
+
node_id: int
|
|
24
|
+
link_wf_id: int
|
|
25
|
+
link_wf_version: str
|
|
26
|
+
link_node_id: int
|
|
27
|
+
link_node_key: str
|
|
28
|
+
link_node_name: str
|
|
29
|
+
link_operator: Operator
|
|
30
|
+
link_skip_self: bool
|
|
31
|
+
link_skip_downstream: bool
|
|
32
|
+
link_latest_only: bool
|
|
33
|
+
link_scheduler_settings: dict | None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SchedulerNodeItem(BaseModel):
|
|
37
|
+
id: int
|
|
38
|
+
node_key: str
|
|
39
|
+
name: str
|
|
40
|
+
operator: Operator
|
|
41
|
+
scheduler_settings: dict | None
|
|
42
|
+
skip_self: bool
|
|
43
|
+
skip_downstream: bool
|
|
44
|
+
latest_only: bool
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class JobItem(BaseModel):
|
|
48
|
+
id: int
|
|
49
|
+
name: str
|
|
50
|
+
schedule_type: ScheduleType
|
|
51
|
+
schedule_interval: str
|
|
52
|
+
timezone: str | None
|
|
53
|
+
workflow_version: str
|
|
54
|
+
start_date: datetime.datetime | None
|
|
55
|
+
end_date: datetime.datetime | None
|
|
56
|
+
scheduler_settings: dict | None
|
|
57
|
+
owner_username: str
|
|
58
|
+
|
|
59
|
+
nodes: list[SchedulerNodeItem]
|
|
60
|
+
|
|
61
|
+
graph: list
|
|
62
|
+
project_id: int = None
|
|
63
|
+
project_name: str = None
|
|
64
|
+
workflow_id: int = None
|
|
65
|
+
workflow_name: str = None
|
|
66
|
+
|
|
67
|
+
skip_data_tests: bool = False
|
|
68
|
+
retries: int | None = None
|
|
69
|
+
retry_delay: int | None = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class SchedulerLinkWorkflowItem(BaseModel):
|
|
73
|
+
node_id: int
|
|
74
|
+
link_wf_id: int
|
|
75
|
+
link_wf_name: str
|
|
76
|
+
link_wf_version: str
|
|
77
|
+
link_nodes: list[SchedulerLinkWorkflowNodeItem]
|
|
78
|
+
link_graph: list
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class JobListResponse(BaseModel):
|
|
82
|
+
jobs: list[JobItem]
|
|
83
|
+
link_nodes: list[SchedulerLinkNodeItem]
|
|
84
|
+
link_workflows: list[SchedulerLinkWorkflowItem]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class TaskStatusCursor(BaseModel):
|
|
88
|
+
job_run: datetime.datetime | None = None
|
|
89
|
+
task_run: datetime.datetime | None = None
|
|
90
|
+
limit: int = 30
|
|
91
|
+
sliding_time: int = 1
|
|
92
|
+
unfinished: dict | None = None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class WorkflowNodeDebugDetail(BaseModel):
|
|
96
|
+
celery_task_id: str | None = None
|
|
97
|
+
state: str | None = None
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from airflow.models import DAG
|
|
5
|
+
|
|
6
|
+
from recurvedata.schedulers.airflow import AirflowScheduler
|
|
7
|
+
from recurvedata.schedulers.consts import get_dag_file_loc
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_job_dag(job_id: int) -> Optional["DAG"]:
|
|
13
|
+
scheduler = AirflowScheduler(sharding_size=job_id, sharding_key=0)
|
|
14
|
+
dag_dct = scheduler.execute()
|
|
15
|
+
dag_ids = list(dag_dct.keys())
|
|
16
|
+
if not dag_ids:
|
|
17
|
+
return
|
|
18
|
+
dag = dag_dct[dag_ids[0]]
|
|
19
|
+
dag.fileloc = get_dag_file_loc(job_id)
|
|
20
|
+
return dag
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
|
|
4
|
+
from airflow import DAG
|
|
5
|
+
from airflow.operators.bash import BashOperator
|
|
6
|
+
|
|
7
|
+
from recurvedata.schedulers.consts import SYSTEM_SYNC_STATUS_DAG_ID
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_system_dags():
|
|
11
|
+
return [
|
|
12
|
+
create_sync_status_dag(),
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _prepare_bash_env():
|
|
17
|
+
dct = {}
|
|
18
|
+
for key, val in os.environ.items():
|
|
19
|
+
if key.startswith("RECURVE__"):
|
|
20
|
+
dct[key] = val
|
|
21
|
+
elif key.startswith("AIRFLOW"):
|
|
22
|
+
dct[key] = val
|
|
23
|
+
elif key in (
|
|
24
|
+
"PATH",
|
|
25
|
+
"PYENV_ROOT",
|
|
26
|
+
):
|
|
27
|
+
dct[key] = os.environ[key]
|
|
28
|
+
return dct
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def create_sync_status_dag():
|
|
32
|
+
start_date = datetime(2024, 8, 5)
|
|
33
|
+
default_args = {
|
|
34
|
+
"depends_on_past": False,
|
|
35
|
+
"retries": 150,
|
|
36
|
+
"retry_delay": timedelta(seconds=10),
|
|
37
|
+
"priority_weight": 100,
|
|
38
|
+
"retry_exponential_backoff": True,
|
|
39
|
+
"max_retry_delay": timedelta(seconds=30),
|
|
40
|
+
}
|
|
41
|
+
dag = DAG(
|
|
42
|
+
SYSTEM_SYNC_STATUS_DAG_ID,
|
|
43
|
+
default_args=default_args,
|
|
44
|
+
description="A DAG to sync db status",
|
|
45
|
+
schedule_interval="0 */6 * * *", # Run every 6 hours
|
|
46
|
+
start_date=start_date,
|
|
47
|
+
catchup=False,
|
|
48
|
+
dagrun_timeout=timedelta(minutes=60 * 6),
|
|
49
|
+
max_active_runs=1, # todo: retry may delay the future dag_run
|
|
50
|
+
is_paused_upon_creation=False,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
BashOperator(
|
|
54
|
+
task_id="sync_status",
|
|
55
|
+
bash_command="recurve_scheduler sync-task-status --interval=5",
|
|
56
|
+
dag=dag,
|
|
57
|
+
env=_prepare_bash_env(),
|
|
58
|
+
)
|
|
59
|
+
return dag
|