recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import logging
|
|
3
|
+
import typing
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from recurvedata.pigeon.utils.fs import new_stagefile_factory
|
|
7
|
+
except ImportError:
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
from recurvedata.operators.config import CONF
|
|
11
|
+
from recurvedata.operators.models import DagBase, NodeBase
|
|
12
|
+
from recurvedata.operators.operator import BaseOperator
|
|
13
|
+
from recurvedata.operators.transfer_operator.task import get_dump_classes, get_load_classes, get_task_class
|
|
14
|
+
from recurvedata.operators.ui import format_config_schema
|
|
15
|
+
from recurvedata.utils import md5hash
|
|
16
|
+
|
|
17
|
+
if typing.TYPE_CHECKING:
|
|
18
|
+
from recurvedata.operators.transfer_operator.task import DumpTask, LoadTask
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TransferOperator(BaseOperator):
|
|
24
|
+
"""
|
|
25
|
+
Operator that handles data transfer operations between dump and load stages.
|
|
26
|
+
Manages the execution of dump and load tasks with appropriate configurations.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
stages = ("dump", "load")
|
|
30
|
+
|
|
31
|
+
def __init__(self, dag: DagBase, node: NodeBase, execution_date: datetime.datetime, variables: dict = None) -> None:
|
|
32
|
+
self.dump_task: "DumpTask" = None
|
|
33
|
+
self.load_task: "LoadTask" = None
|
|
34
|
+
self.filename: str = self._determine_filename(dag, node, execution_date)
|
|
35
|
+
# self.execution_date = as_local_datetime(execution_date)
|
|
36
|
+
|
|
37
|
+
super().__init__(dag, node, execution_date, variables)
|
|
38
|
+
|
|
39
|
+
def init_task(self):
|
|
40
|
+
params = {
|
|
41
|
+
"dag": self.dag,
|
|
42
|
+
"node": self.node,
|
|
43
|
+
"execution_date": self.execution_date,
|
|
44
|
+
"filename": self.filename,
|
|
45
|
+
"variables": self.variables,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
load_config = self.node.configuration["load"]
|
|
49
|
+
load_cls = self.get_task_class(load_config["name"])
|
|
50
|
+
logger.debug(f"create load task with {params}")
|
|
51
|
+
|
|
52
|
+
self.load_task: LoadTask = load_cls(config=load_config["config"], **params)
|
|
53
|
+
|
|
54
|
+
# TODO: 最好能去掉这种配置,dump 和 load 都使用使用统一的 CSV 格式,由 loader 自己去处理
|
|
55
|
+
handler_options = {
|
|
56
|
+
"encoding": None,
|
|
57
|
+
"write_header": self._determine_write_header(),
|
|
58
|
+
}
|
|
59
|
+
if self.load_task.default_dumper_handler_options:
|
|
60
|
+
handler_options.update(self.load_task.default_dumper_handler_options)
|
|
61
|
+
|
|
62
|
+
dump_config = self.node.configuration["dump"]
|
|
63
|
+
dump_cls = self.get_task_class(dump_config["name"])
|
|
64
|
+
logger.debug(f"create dump task with {params}")
|
|
65
|
+
self.dump_task: DumpTask = dump_cls(config=dump_config["config"], handler_options=handler_options, **params)
|
|
66
|
+
self.load_task.dump_task_type = dump_cls.__name__
|
|
67
|
+
|
|
68
|
+
def set_execution_date(self, execution_date):
|
|
69
|
+
self.dump_task.set_execution_date(execution_date)
|
|
70
|
+
self.load_task.set_execution_date(execution_date)
|
|
71
|
+
|
|
72
|
+
def _determine_write_header(self):
|
|
73
|
+
return self.load_task.should_write_header
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def _determine_filename(dag: DagBase, node: NodeBase, execution_date: datetime.datetime) -> str:
|
|
77
|
+
"""
|
|
78
|
+
Generate a unique filename for the transfer operation.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
dag: The DAG instance
|
|
82
|
+
node: The node instance
|
|
83
|
+
execution_date: The execution datetime
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
str: Generated filename
|
|
87
|
+
"""
|
|
88
|
+
dag_id = dag.id
|
|
89
|
+
node_id = node.node_key
|
|
90
|
+
is_link_node = getattr(node, "is_link_op", False)
|
|
91
|
+
if not is_link_node:
|
|
92
|
+
hash_txt = md5hash(f"{dag_id}|{node_id}|{execution_date}")
|
|
93
|
+
prefix = f"{dag_id}_{node_id}_"
|
|
94
|
+
else:
|
|
95
|
+
origin_node = node.origin_node
|
|
96
|
+
hash_txt = md5hash(f"{dag_id}|{origin_node.node_key}|{node_id}|{execution_date}")
|
|
97
|
+
prefix = f"{dag_id}_{origin_node.node_key}_{node_id}_"
|
|
98
|
+
logger.info(f"link op _determine_filename: {prefix} {hash_txt}")
|
|
99
|
+
|
|
100
|
+
hash_len = max(8, len(hash_txt) - len(prefix))
|
|
101
|
+
return new_stagefile_factory(CONF.DATA_ROOT)(prefix + hash_txt[:hash_len])
|
|
102
|
+
|
|
103
|
+
def dump(self):
|
|
104
|
+
return self.dump_task.execute()
|
|
105
|
+
|
|
106
|
+
def load(self):
|
|
107
|
+
return self.load_task.execute()
|
|
108
|
+
|
|
109
|
+
def execute(self):
|
|
110
|
+
self.dump()
|
|
111
|
+
self.load()
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def validate(cls, configuration: dict):
|
|
115
|
+
config = {
|
|
116
|
+
"dump": cls._validate_task_config(configuration["dump"]),
|
|
117
|
+
"load": cls._validate_task_config(configuration["load"]),
|
|
118
|
+
}
|
|
119
|
+
return config
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def _validate_task_config(cls, config: dict):
|
|
123
|
+
task_cls = cls.get_task_class(config["name"])
|
|
124
|
+
cfg = task_cls.validate(config["config"])
|
|
125
|
+
return {"name": config["name"], "config": cfg}
|
|
126
|
+
|
|
127
|
+
@classmethod
|
|
128
|
+
def to_dict(cls) -> dict:
|
|
129
|
+
return {
|
|
130
|
+
"name": cls.name(),
|
|
131
|
+
"config_schema": {
|
|
132
|
+
"dump": [x.to_dict() for x in get_dump_classes()],
|
|
133
|
+
"load": [x.to_dict() for x in get_load_classes()],
|
|
134
|
+
},
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def config_schema(cls):
|
|
139
|
+
return {
|
|
140
|
+
"dump": [{"name": x.name(), "config_schema": x.config_schema()} for x in cls.get_dump_classes()],
|
|
141
|
+
"load": [{"name": x.name(), "config_schema": x.config_schema()} for x in cls.get_load_classes()],
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
@classmethod
|
|
145
|
+
def ui_config_schema(cls):
|
|
146
|
+
return {
|
|
147
|
+
"dump": {
|
|
148
|
+
"name": "Dump",
|
|
149
|
+
"config_schema": [
|
|
150
|
+
{"name": x.name(), "config_schema": format_config_schema(x.config_schema(), "dump")}
|
|
151
|
+
for x in cls.get_dump_classes()
|
|
152
|
+
],
|
|
153
|
+
},
|
|
154
|
+
"load": {
|
|
155
|
+
"name": "Load",
|
|
156
|
+
"config_schema": [
|
|
157
|
+
{"name": x.name(), "config_schema": format_config_schema(x.config_schema(), "load")}
|
|
158
|
+
for x in cls.get_load_classes()
|
|
159
|
+
],
|
|
160
|
+
},
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
@classmethod
|
|
164
|
+
def ui_validate(cls, configuration: dict) -> dict:
|
|
165
|
+
res = {
|
|
166
|
+
"dump": cls._add_schema_name_to_json_schema_error("dump", cls._validate_task_config, configuration["dump"]),
|
|
167
|
+
"load": cls._add_schema_name_to_json_schema_error("load", cls._validate_task_config, configuration["load"]),
|
|
168
|
+
}
|
|
169
|
+
return res
|
|
170
|
+
|
|
171
|
+
@classmethod
|
|
172
|
+
def ui_config_to_config(cls, configuration: dict) -> dict:
|
|
173
|
+
return {
|
|
174
|
+
"dump": configuration["dump"],
|
|
175
|
+
"load": configuration["load"],
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
@classmethod
|
|
179
|
+
def get_ds_name_field_values(cls, rendered_config: dict) -> list[str]:
|
|
180
|
+
config = cls.ui_config_to_config(rendered_config)
|
|
181
|
+
res = []
|
|
182
|
+
dump_cls = cls.get_task_class(config["dump"]["name"])
|
|
183
|
+
if dump_cls:
|
|
184
|
+
res.extend(dump_cls.get_ds_name_field_values(config["dump"]["config"]))
|
|
185
|
+
load_cls = cls.get_task_class(config["load"]["name"])
|
|
186
|
+
if load_cls:
|
|
187
|
+
res.extend(load_cls.get_ds_name_field_values(config["load"]["config"]))
|
|
188
|
+
return res
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def get_task_class(cls, name: str):
|
|
192
|
+
return get_task_class(name)
|
|
193
|
+
|
|
194
|
+
@classmethod
|
|
195
|
+
def get_dump_classes(cls, check_enabled=True):
|
|
196
|
+
res_lst = get_dump_classes()
|
|
197
|
+
if check_enabled:
|
|
198
|
+
res_lst = [dump_cls for dump_cls in res_lst if dump_cls.enabled]
|
|
199
|
+
return res_lst
|
|
200
|
+
|
|
201
|
+
@classmethod
|
|
202
|
+
def get_load_classes(cls, check_enabled=True):
|
|
203
|
+
res_lst = get_load_classes()
|
|
204
|
+
if check_enabled:
|
|
205
|
+
res_lst = [load_cls for load_cls in res_lst if load_cls.enabled]
|
|
206
|
+
return res_lst
|
|
207
|
+
|
|
208
|
+
@classmethod
|
|
209
|
+
def get_setup_install_require(cls) -> dict:
|
|
210
|
+
require_dct = {}
|
|
211
|
+
op_name = cls.name()
|
|
212
|
+
op_web_requires = cls.web_install_require[:]
|
|
213
|
+
op_worker_requires = cls.worker_install_require[:]
|
|
214
|
+
for dump_cls in cls.get_dump_classes():
|
|
215
|
+
if dump_cls.web_install_require:
|
|
216
|
+
require_dct[f"web.{op_name}.dump.{dump_cls.name()}"] = dump_cls.web_install_require
|
|
217
|
+
op_web_requires.extend(dump_cls.web_install_require)
|
|
218
|
+
if dump_cls.worker_install_require:
|
|
219
|
+
require_dct[f"worker.{op_name}.dump.{dump_cls.name()}"] = dump_cls.worker_install_require
|
|
220
|
+
op_worker_requires.extend(dump_cls.worker_install_require)
|
|
221
|
+
|
|
222
|
+
for load_cls in cls.get_load_classes():
|
|
223
|
+
if load_cls.web_install_require:
|
|
224
|
+
require_dct[f"web.{op_name}.load.{load_cls.name()}"] = load_cls.web_install_require
|
|
225
|
+
op_web_requires.extend(load_cls.web_install_require)
|
|
226
|
+
if load_cls.worker_install_require:
|
|
227
|
+
require_dct[f"worker.{op_name}.load.{load_cls.name()}"] = load_cls.worker_install_require
|
|
228
|
+
op_worker_requires.extend(load_cls.worker_install_require)
|
|
229
|
+
require_dct["web"] = sorted(list(set(op_web_requires)))
|
|
230
|
+
require_dct["worker"] = sorted(list(set(op_worker_requires)))
|
|
231
|
+
return require_dct
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import tempfile
|
|
6
|
+
import traceback
|
|
7
|
+
|
|
8
|
+
import jsonschema
|
|
9
|
+
|
|
10
|
+
from recurvedata.core.transformer import Transformer
|
|
11
|
+
from recurvedata.operators.task import BaseTask
|
|
12
|
+
from recurvedata.utils.attrdict import AttrDict
|
|
13
|
+
from recurvedata.utils.date_time import round_time_resolution
|
|
14
|
+
from recurvedata.utils.helpers import first
|
|
15
|
+
from recurvedata.utils.registry import Registry
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from recurvedata.pigeon.handler.csv_handler import create_csv_file_handler_factory
|
|
19
|
+
from recurvedata.pigeon.utils import fs, trim_suffix
|
|
20
|
+
except ImportError:
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
from recurvedata.operators.transfer_operator import utils
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
_registry = Registry(key_callback=lambda x: x.name())
|
|
27
|
+
_load_task_registry = Registry(key_callback=lambda x: x.ds_types)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Task(BaseTask):
|
|
31
|
+
worker_install_require = []
|
|
32
|
+
web_install_require = []
|
|
33
|
+
|
|
34
|
+
def __init__(self, dag, node, execution_date, variables, config, filename):
|
|
35
|
+
super().__init__(dag, node, execution_date, variables)
|
|
36
|
+
|
|
37
|
+
self.config = AttrDict(config)
|
|
38
|
+
self.filename = filename
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def type(cls):
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def first_or_default(dss, default=""):
|
|
46
|
+
return first(dss, default)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class DumpTask(Task):
|
|
50
|
+
_AUTO_REGISTER = True
|
|
51
|
+
_MAX_ERROR_RATE = 0
|
|
52
|
+
no_template_fields = (
|
|
53
|
+
"data_source_name",
|
|
54
|
+
"filter_engine",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def type(cls):
|
|
59
|
+
return "dump"
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def stage(self) -> str:
|
|
63
|
+
return "dump"
|
|
64
|
+
|
|
65
|
+
def __init_subclass__(cls, **kwargs):
|
|
66
|
+
if cls._AUTO_REGISTER:
|
|
67
|
+
_registry.add(cls)
|
|
68
|
+
|
|
69
|
+
def __init__(self, handler_options=None, *args, **kwargs):
|
|
70
|
+
self.handler_options = handler_options or {}
|
|
71
|
+
super().__init__(*args, **kwargs)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def validate(cls, configuration):
|
|
75
|
+
config = super().validate(configuration)
|
|
76
|
+
|
|
77
|
+
transformer_code = configuration.get("transform", "").strip()
|
|
78
|
+
if not transformer_code:
|
|
79
|
+
return config
|
|
80
|
+
try:
|
|
81
|
+
utils.validate_transform(transformer_code)
|
|
82
|
+
except (ValueError, TypeError) as e:
|
|
83
|
+
raise jsonschema.ValidationError(message=str(e), path=("transform",))
|
|
84
|
+
except Exception:
|
|
85
|
+
tb = traceback.format_exc(limit=0)
|
|
86
|
+
msg = "\n".join(tb.splitlines()[1:])
|
|
87
|
+
raise jsonschema.ValidationError(message=msg, path=("transform",))
|
|
88
|
+
|
|
89
|
+
if "custom_handler_options" in config:
|
|
90
|
+
try:
|
|
91
|
+
value = json.loads(config["custom_handler_options"])
|
|
92
|
+
except Exception:
|
|
93
|
+
raise jsonschema.ValidationError(
|
|
94
|
+
message="custom_handler_options should be valid JSON", path=("custom_handler_options",)
|
|
95
|
+
)
|
|
96
|
+
if not isinstance(value, dict):
|
|
97
|
+
raise jsonschema.ValidationError(
|
|
98
|
+
message="custom_handler_options should be dict", path=("custom_handler_options",)
|
|
99
|
+
)
|
|
100
|
+
return config
|
|
101
|
+
|
|
102
|
+
def create_handler_factory(self):
|
|
103
|
+
self.remove_intermediate_files()
|
|
104
|
+
transformer = self.create_transformer()
|
|
105
|
+
kwargs = self.handler_options.copy()
|
|
106
|
+
encoding = self.rendered_config.get("middle_file_encoding")
|
|
107
|
+
kwargs.update(
|
|
108
|
+
{
|
|
109
|
+
"filename": self.filename,
|
|
110
|
+
"encoding": encoding,
|
|
111
|
+
"transformer": transformer,
|
|
112
|
+
"max_error_rate": self._MAX_ERROR_RATE,
|
|
113
|
+
}
|
|
114
|
+
)
|
|
115
|
+
# FIXME: ugly way to get more handler options from Transformer definition
|
|
116
|
+
kwargs.update(getattr(transformer, "handler_options", {}))
|
|
117
|
+
|
|
118
|
+
# allow user to override the default handler options
|
|
119
|
+
if self.rendered_config.custom_handler_options:
|
|
120
|
+
custom_handler_options = json.loads(self.rendered_config.custom_handler_options)
|
|
121
|
+
kwargs.update(custom_handler_options)
|
|
122
|
+
hf = create_csv_file_handler_factory(**kwargs)
|
|
123
|
+
return hf
|
|
124
|
+
|
|
125
|
+
def create_transformer(self) -> Transformer:
|
|
126
|
+
transformer_code = self.rendered_config.get("transform", "").strip()
|
|
127
|
+
if transformer_code:
|
|
128
|
+
transformer = utils.validate_transform(transformer_code)
|
|
129
|
+
else:
|
|
130
|
+
transformer = None
|
|
131
|
+
return transformer
|
|
132
|
+
|
|
133
|
+
def has_custom_transformer(self):
|
|
134
|
+
return self.rendered_config.get("transform")
|
|
135
|
+
|
|
136
|
+
def get_schedule_time_range(self):
|
|
137
|
+
end_date = self.execution_date
|
|
138
|
+
start_date = self.dag.previous_schedule(self.execution_date)
|
|
139
|
+
if self.config.get("time_auto_round", False):
|
|
140
|
+
start_date = round_time_resolution(start_date, self.dag.schedule_interval)
|
|
141
|
+
end_date = round_time_resolution(end_date, self.dag.schedule_interval)
|
|
142
|
+
return start_date, end_date
|
|
143
|
+
|
|
144
|
+
def remove_intermediate_files(self):
|
|
145
|
+
pattern = f"{self.filename}.*"
|
|
146
|
+
logger.info(f"remove intermediate files {pattern}")
|
|
147
|
+
fs.remove_files_by_pattern(pattern)
|
|
148
|
+
|
|
149
|
+
def on_execute_impl_error(self, exc: Exception):
|
|
150
|
+
logger.exception(f"caught error: {exc}")
|
|
151
|
+
self.remove_intermediate_files()
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class LoadTask(Task):
|
|
155
|
+
ds_types = ()
|
|
156
|
+
should_write_header = False
|
|
157
|
+
default_dumper_handler_options = {}
|
|
158
|
+
dump_task_type = None
|
|
159
|
+
|
|
160
|
+
def __init_subclass__(cls, **kwargs):
|
|
161
|
+
_registry.add(cls)
|
|
162
|
+
_load_task_registry.add(cls)
|
|
163
|
+
|
|
164
|
+
@classmethod
|
|
165
|
+
def type(cls):
|
|
166
|
+
return "load"
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def stage(self) -> str:
|
|
170
|
+
return "load"
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def compress_file(filename, target_filename=None, compress_mode="None"):
|
|
174
|
+
"""compress file before loading, only support gzip and zip"""
|
|
175
|
+
if compress_mode == "None":
|
|
176
|
+
return filename, None
|
|
177
|
+
if compress_mode not in ("Gzip", "Zip", "Bzip2"):
|
|
178
|
+
raise ValueError(f"{compress_mode} is not supported")
|
|
179
|
+
|
|
180
|
+
logger.info(f"Compressing file using {compress_mode}")
|
|
181
|
+
compress_method, ext = {
|
|
182
|
+
"Gzip": (fs.gzip_compress, ".gz"),
|
|
183
|
+
"Zip": (fs.zip_compress, ".zip"),
|
|
184
|
+
"Bzip2": (fs.bzip2_compress, ".bz2"),
|
|
185
|
+
}[compress_mode]
|
|
186
|
+
|
|
187
|
+
# 如果指定了压缩包内的文件名,先把文件临时改名为目标文件名,压缩完了再改回来
|
|
188
|
+
if target_filename:
|
|
189
|
+
inner_filename = trim_suffix(os.path.basename(target_filename), ext)
|
|
190
|
+
tmp_dir = tempfile.mkdtemp(dir=os.path.dirname(filename))
|
|
191
|
+
file_to_compress = os.path.join(tmp_dir, inner_filename)
|
|
192
|
+
os.rename(filename, file_to_compress)
|
|
193
|
+
else:
|
|
194
|
+
target_filename = f"{filename}{ext}"
|
|
195
|
+
file_to_compress = filename
|
|
196
|
+
|
|
197
|
+
try:
|
|
198
|
+
compressed_file = compress_method(file_to_compress, target_filename=target_filename, using_cmd=True)
|
|
199
|
+
except BaseException as e:
|
|
200
|
+
raise e
|
|
201
|
+
finally:
|
|
202
|
+
# 如果发生异常,做回滚操作
|
|
203
|
+
if file_to_compress != filename:
|
|
204
|
+
os.rename(file_to_compress, filename)
|
|
205
|
+
shutil.rmtree(os.path.dirname(file_to_compress))
|
|
206
|
+
return compressed_file, ext
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def get_task_class(name):
|
|
210
|
+
return _registry[name]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def get_dump_classes():
|
|
214
|
+
return sorted([x for x in _registry.values() if x.type() == "dump"], key=lambda x: x.name())
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_load_classes():
|
|
218
|
+
return sorted([x for x in _registry.values() if x.type() == "load"], key=lambda x: x.name())
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def get_load_by_ds_type(ds_type):
|
|
222
|
+
klass = _load_task_registry.get(ds_type)
|
|
223
|
+
return klass.name()
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
from recurvedata.core.transformer import Transformer
|
|
5
|
+
from recurvedata.core.translation import _l
|
|
6
|
+
from recurvedata.operators.transfer_operator import const
|
|
7
|
+
|
|
8
|
+
allowed_modes = (const.LOAD_OVERWRITE, const.LOAD_MERGE, const.LOAD_APPEND)
|
|
9
|
+
|
|
10
|
+
_TRANSFORM_SKELETON = """\
|
|
11
|
+
from recurvedata.core.transformer import Transformer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MyTransformer(Transformer):
|
|
15
|
+
def transform_impl(self, row, *args, **kwargs):
|
|
16
|
+
# The row is an OrderedDict. Write your custom transformation logic here.
|
|
17
|
+
return row
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Instantiate the transformer, the name must be `transformer`
|
|
21
|
+
transformer = MyTransformer()
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
TRANSFORM = {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"title": _l("Custom Transformation"),
|
|
28
|
+
"description": _l(
|
|
29
|
+
"Python code to transform data during transfer. Must implement a Transformer class with "
|
|
30
|
+
"transform_impl method that processes each row. See example code below."
|
|
31
|
+
),
|
|
32
|
+
"default": _TRANSFORM_SKELETON,
|
|
33
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
34
|
+
"ui:options": {
|
|
35
|
+
"type": "code",
|
|
36
|
+
"lang": "python",
|
|
37
|
+
},
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
LOAD_COMMON = {
|
|
41
|
+
"mode": {
|
|
42
|
+
"type": "string",
|
|
43
|
+
"title": _l("Load Mode"),
|
|
44
|
+
"description": _l("How to handle existing data in the target table"),
|
|
45
|
+
"enum": list(allowed_modes),
|
|
46
|
+
"enumNames": list(allowed_modes),
|
|
47
|
+
"default": const.LOAD_OVERWRITE,
|
|
48
|
+
},
|
|
49
|
+
"primary_keys": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"title": _l("Primary Keys"),
|
|
52
|
+
"description": _l(
|
|
53
|
+
"Comma-separated list of columns used for deduplication in MERGE mode. "
|
|
54
|
+
"Should be primary or unique key columns."
|
|
55
|
+
),
|
|
56
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
57
|
+
"ui:options": {
|
|
58
|
+
"type": "plain",
|
|
59
|
+
},
|
|
60
|
+
"ui:hidden": '{{parentFormData.mode !== "MERGE"}}',
|
|
61
|
+
},
|
|
62
|
+
"dedup": {
|
|
63
|
+
"type": "boolean",
|
|
64
|
+
"title": _l("Enable Deduplication"),
|
|
65
|
+
"default": False,
|
|
66
|
+
"description": _l("Remove duplicate rows from the data before loading"),
|
|
67
|
+
"ui:widget": "BaseCheckbox",
|
|
68
|
+
"ui:options": {
|
|
69
|
+
"label": _l("Enable Deduplication"),
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
"dedup_uniq_keys": {
|
|
73
|
+
"type": "string",
|
|
74
|
+
"title": _l("Deduplication Keys"),
|
|
75
|
+
"description": _l("Comma-separated list of columns that uniquely identify each row"),
|
|
76
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
77
|
+
"ui:options": {
|
|
78
|
+
"type": "plain",
|
|
79
|
+
},
|
|
80
|
+
"ui:hidden": "{{!parentFormData.dedup}}",
|
|
81
|
+
},
|
|
82
|
+
"dedup_orderby": {
|
|
83
|
+
"type": "string",
|
|
84
|
+
"title": _l("Sort Order"),
|
|
85
|
+
"description": _l("Comma-separated list of columns to sort by before deduplication"),
|
|
86
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
87
|
+
"ui:options": {
|
|
88
|
+
"type": "plain",
|
|
89
|
+
},
|
|
90
|
+
"ui:hidden": "{{!parentFormData.dedup}}",
|
|
91
|
+
},
|
|
92
|
+
# "pre_queries": {
|
|
93
|
+
# "type": "string",
|
|
94
|
+
# "title": "Queries Ran Before Loading",
|
|
95
|
+
# "description": '新数据导入前运行的 SQL,多条 SQL 用 `;` 分隔;支持传入变量,详见 <a target="_blank" href="https://bit.ly/2JMutjn">文档</a>',
|
|
96
|
+
# "ui:field": "CodeEditorWithReferencesField",
|
|
97
|
+
# "ui:options": {
|
|
98
|
+
# "type": "code",
|
|
99
|
+
# "lang": "sql",
|
|
100
|
+
# "sqlLang": "sql",
|
|
101
|
+
# },
|
|
102
|
+
# },
|
|
103
|
+
# "post_queries": {
|
|
104
|
+
# "type": "string",
|
|
105
|
+
# "title": "Queries Ran After Loading",
|
|
106
|
+
# "description": '新数据导入后运行的 SQL,多条 SQL 用 `;` 分隔;支持传入变量,详见 <a target="_blank" href="https://bit.ly/2JMutjn">文档</a>',
|
|
107
|
+
# "ui:field": "CodeEditorWithReferencesField",
|
|
108
|
+
# "ui:options": {
|
|
109
|
+
# "type": "code",
|
|
110
|
+
# "lang": "sql",
|
|
111
|
+
# "sqlLang": "sql",
|
|
112
|
+
# },
|
|
113
|
+
# },
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
__spec = importlib.util.spec_from_loader("recurve_hack", None)
|
|
117
|
+
__recurve_hack = importlib.util.module_from_spec(__spec)
|
|
118
|
+
sys.modules["recurve_hack"] = __recurve_hack
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def validate_transform(raw_code):
|
|
122
|
+
from recurvedata.pigeon.transformer import Transformer as PigeonTransformer
|
|
123
|
+
|
|
124
|
+
code = compile(raw_code, "", "exec")
|
|
125
|
+
exec(code, __recurve_hack.__dict__)
|
|
126
|
+
transformer = __recurve_hack.__dict__.get("transformer")
|
|
127
|
+
if not transformer:
|
|
128
|
+
raise ValueError("transformer is required")
|
|
129
|
+
if (
|
|
130
|
+
not isinstance(transformer, (Transformer, PigeonTransformer))
|
|
131
|
+
and transformer.__class__.__name__ != "MyTransformer"
|
|
132
|
+
):
|
|
133
|
+
raise TypeError(f"transformer should be type of pigeon.transformer.Transformer, {type(transformer)}")
|
|
134
|
+
return transformer
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
def format_config_schema(config_schema: dict, schema_name: str):
|
|
2
|
+
"""
|
|
3
|
+
按前端需求,重新格式化 config_schema
|
|
4
|
+
"""
|
|
5
|
+
if "properties" not in config_schema:
|
|
6
|
+
return config_schema
|
|
7
|
+
for field_name, field_dct in config_schema["properties"].items():
|
|
8
|
+
if field_dct["type"] == "object":
|
|
9
|
+
format_config_schema(field_dct, field_name)
|
|
10
|
+
else:
|
|
11
|
+
format_field_schema(field_dct, schema_name)
|
|
12
|
+
return config_schema
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def format_field_schema(field_dct: dict, schema_name: str):
|
|
16
|
+
"""
|
|
17
|
+
field_dct 例子:
|
|
18
|
+
{
|
|
19
|
+
'type': 'string',
|
|
20
|
+
'title': 'Data Source',
|
|
21
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
22
|
+
"ui:options": {
|
|
23
|
+
"supportTypes": ["mysql", "postgres",],
|
|
24
|
+
},
|
|
25
|
+
},
|
|
26
|
+
"""
|
|
27
|
+
_add_option_id(field_dct)
|
|
28
|
+
_format_input_with_variable(field_dct, schema_name)
|
|
29
|
+
_format_aliases_select_field(field_dct)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _add_option_id(field_dct: dict):
|
|
33
|
+
ui_field = field_dct.get("ui:field")
|
|
34
|
+
if ui_field == "CodeEditorWithReferencesField":
|
|
35
|
+
return
|
|
36
|
+
if "ui:options" not in field_dct:
|
|
37
|
+
field_dct["ui:options"] = {}
|
|
38
|
+
if "id" in field_dct["ui:options"]:
|
|
39
|
+
return
|
|
40
|
+
field_dct["ui:options"]["id"] = ""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _format_input_with_variable(field_dct: dict, schema_name: str):
|
|
44
|
+
ui_field = field_dct.get("ui:field")
|
|
45
|
+
if ui_field != "CodeEditorWithReferencesField":
|
|
46
|
+
return
|
|
47
|
+
ui_options: dict = field_dct.get("ui:options")
|
|
48
|
+
if not ui_options:
|
|
49
|
+
return
|
|
50
|
+
ui_type = ui_options.get("type")
|
|
51
|
+
if ui_type != "code":
|
|
52
|
+
return
|
|
53
|
+
# 全屏相关配置
|
|
54
|
+
if "toParent" in ui_options:
|
|
55
|
+
return
|
|
56
|
+
ui_options["toParent"] = ".expanded_code_position"
|
|
57
|
+
ui_options["parentName"] = schema_name
|
|
58
|
+
ui_options["needExpandBtn"] = True
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _format_aliases_select_field(field_dct: dict):
|
|
62
|
+
"""
|
|
63
|
+
ProjectConnectionSelectorField 的 supportTypes 前端需要展示 connection 的 ui_type
|
|
64
|
+
"""
|
|
65
|
+
from recurvedata.connectors import get_connection_ui_type
|
|
66
|
+
|
|
67
|
+
ui_field = field_dct.get("ui:field")
|
|
68
|
+
if ui_field != "ProjectConnectionSelectorField":
|
|
69
|
+
return
|
|
70
|
+
ui_options: dict = field_dct.get("ui:options")
|
|
71
|
+
if not ui_options:
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
support_types = ui_options.get("supportTypes")
|
|
75
|
+
if not support_types:
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
ui_options["supportTypes"] = [
|
|
79
|
+
ui_type for ui_type in [get_connection_ui_type(backend_type) for backend_type in support_types] if ui_type
|
|
80
|
+
]
|