recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
7
|
+
from recurvedata.connectors.proxy import HttpProxyMixin
|
|
8
|
+
from recurvedata.core.translation import _l
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
DEFAULT_TIMEOUT = 10 # in seconds
|
|
13
|
+
|
|
14
|
+
# todo(chenjingmeng): using auto generated by scripts/gen_const.py
|
|
15
|
+
ALL_CONNECTION_SECRET_WORDS = [
|
|
16
|
+
"account_key",
|
|
17
|
+
"api_key",
|
|
18
|
+
"api_secret_key",
|
|
19
|
+
"app_secret",
|
|
20
|
+
"blob_options.sas_token",
|
|
21
|
+
"password",
|
|
22
|
+
"private_key",
|
|
23
|
+
"sas_token",
|
|
24
|
+
"secret_access_key",
|
|
25
|
+
"secret_key",
|
|
26
|
+
"key_dict.private_key",
|
|
27
|
+
"client_secret",
|
|
28
|
+
"access_key_secret",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _format_connector_module_name(connection_type: str) -> str:
|
|
33
|
+
"""
|
|
34
|
+
connection_type can be ui_connection_type
|
|
35
|
+
"""
|
|
36
|
+
connection_type = connection_type.replace(" ", "_").lower()
|
|
37
|
+
return f"recurvedata.connectors.connectors.{connection_type}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
DBAPI_TYPES = [
|
|
41
|
+
"azure_synapse",
|
|
42
|
+
"bigquery",
|
|
43
|
+
"clickhouse",
|
|
44
|
+
"elasticsearch",
|
|
45
|
+
"hive",
|
|
46
|
+
"impala",
|
|
47
|
+
"mongodb",
|
|
48
|
+
"mssql",
|
|
49
|
+
"mysql",
|
|
50
|
+
"phoenix",
|
|
51
|
+
"postgres",
|
|
52
|
+
"redis",
|
|
53
|
+
"redshift",
|
|
54
|
+
"starrocks",
|
|
55
|
+
"tidb",
|
|
56
|
+
"doris",
|
|
57
|
+
"microsoft_fabric",
|
|
58
|
+
]
|
|
59
|
+
CONNECTION_TYPE_MODULE_MAPPING = { # todo(chenjingmeng): auto generated
|
|
60
|
+
"Tencent COS": "recurvedata.connectors.connectors.tencent_cos",
|
|
61
|
+
"cos": "recurvedata.connectors.connectors.tencent_cos",
|
|
62
|
+
"Elastic Search": "recurvedata.connectors.connectors.es",
|
|
63
|
+
"elasticsearch": "recurvedata.connectors.connectors.es",
|
|
64
|
+
"Ding Talk": "recurvedata.connectors.connectors.dingtalk",
|
|
65
|
+
"feishu_bot": "recurvedata.connectors.connectors.feishu",
|
|
66
|
+
"SelectDB(Doris)": "recurvedata.connectors.connectors.doris",
|
|
67
|
+
"azure_mssql": "recurvedata.connectors.connectors.mssql",
|
|
68
|
+
"google_bigquery": "recurvedata.connectors.connectors.bigquery",
|
|
69
|
+
"Google BigQuery": "recurvedata.connectors.connectors.bigquery",
|
|
70
|
+
"BigQuery": "recurvedata.connectors.connectors.bigquery",
|
|
71
|
+
"PostgreSQL": "recurvedata.connectors.connectors.postgres",
|
|
72
|
+
"MongoDB": "recurvedata.connectors.connectors.mongo",
|
|
73
|
+
"mongodb": "recurvedata.connectors.connectors.mongo",
|
|
74
|
+
"Microsoft SQL Server": "recurvedata.connectors.connectors.mssql",
|
|
75
|
+
"selectdb(doris)": "recurvedata.connectors.connectors.doris",
|
|
76
|
+
"apache impala": "recurvedata.connectors.connectors.impala",
|
|
77
|
+
"Apache Impala": "recurvedata.connectors.connectors.impala",
|
|
78
|
+
"Aliyun OSS": "recurvedata.connectors.connectors.oss",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def get_module_name(connection_type: str) -> str:
|
|
83
|
+
if connection_type in CONNECTION_TYPE_MODULE_MAPPING:
|
|
84
|
+
return CONNECTION_TYPE_MODULE_MAPPING[connection_type]
|
|
85
|
+
return _format_connector_module_name(connection_type)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
SQL_OPERATOR_TYPES = [
|
|
89
|
+
"azure_mssql",
|
|
90
|
+
"azure_synapse",
|
|
91
|
+
"bigquery",
|
|
92
|
+
"clickhouse",
|
|
93
|
+
"google_bigquery",
|
|
94
|
+
"hive",
|
|
95
|
+
"impala",
|
|
96
|
+
"mssql",
|
|
97
|
+
"mysql",
|
|
98
|
+
"phoenix",
|
|
99
|
+
"postgres",
|
|
100
|
+
"redshift",
|
|
101
|
+
"starrocks",
|
|
102
|
+
"tidb",
|
|
103
|
+
"doris",
|
|
104
|
+
"microsoft_fabric",
|
|
105
|
+
]
|
|
106
|
+
JUICE_SYNC_ABLE_DBAPI_TYPES = ["azure_blob", "cos", "google_cloud_storage", "oss", "s3", "sftp"]
|
|
107
|
+
|
|
108
|
+
# This Const is manually built, refer to "https://docs.getdbt.com/docs/supported-data-platforms"
|
|
109
|
+
# is there any web's api available?
|
|
110
|
+
DBT_SUPPORTED_TYPES = [
|
|
111
|
+
# official trusted, seems that these database/data
|
|
112
|
+
# warehouse connector are more robust
|
|
113
|
+
# ------------------------------------------------
|
|
114
|
+
"spark",
|
|
115
|
+
"azure_synapse",
|
|
116
|
+
"bigquery",
|
|
117
|
+
"postgres",
|
|
118
|
+
"redshift",
|
|
119
|
+
# not implemented connectors
|
|
120
|
+
"alloy_db",
|
|
121
|
+
"athena",
|
|
122
|
+
"databricks",
|
|
123
|
+
"dremio",
|
|
124
|
+
"glue",
|
|
125
|
+
"materialize",
|
|
126
|
+
"microsoft_fabric",
|
|
127
|
+
"oracle_autonomous_database",
|
|
128
|
+
"snowflake",
|
|
129
|
+
"starburst",
|
|
130
|
+
"teradata",
|
|
131
|
+
# --------------------------------------------------
|
|
132
|
+
# community maintained
|
|
133
|
+
# --------------------------------------------------
|
|
134
|
+
"mysql",
|
|
135
|
+
"starrocks",
|
|
136
|
+
"clickhouse",
|
|
137
|
+
"doris",
|
|
138
|
+
"tidb",
|
|
139
|
+
"hive",
|
|
140
|
+
"impala",
|
|
141
|
+
# not implemented connectors
|
|
142
|
+
"duckdb",
|
|
143
|
+
"exasol_analytics",
|
|
144
|
+
"extrica",
|
|
145
|
+
"ibm_db2",
|
|
146
|
+
"infer",
|
|
147
|
+
"iomete",
|
|
148
|
+
"mindsdb",
|
|
149
|
+
"risingwave",
|
|
150
|
+
"rockset",
|
|
151
|
+
"single_store",
|
|
152
|
+
"sql_server",
|
|
153
|
+
"sqlite",
|
|
154
|
+
"timescaledb",
|
|
155
|
+
"upsolver",
|
|
156
|
+
"vertica",
|
|
157
|
+
"databend_cloud",
|
|
158
|
+
"yellowbrick",
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
# This is also manually built :)). Refer to: https://cube.dev/docs/product/configuration/data-sources
|
|
162
|
+
CUBE_SUPPORTED_TYPES = [
|
|
163
|
+
"doris",
|
|
164
|
+
"postgres",
|
|
165
|
+
"bigquery",
|
|
166
|
+
"starrocks",
|
|
167
|
+
"mysql",
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class ProcessDBMixin(object):
|
|
172
|
+
@staticmethod
|
|
173
|
+
def auth_preprocess_conf(data):
|
|
174
|
+
data = RecurveConnectorBase.preprocess_conf(data)
|
|
175
|
+
json_data = data.get("extra")
|
|
176
|
+
if json_data and isinstance(json_data, str):
|
|
177
|
+
data["extra"] = json.loads(json_data)
|
|
178
|
+
return data
|
|
179
|
+
|
|
180
|
+
@classmethod
|
|
181
|
+
def bigquery_preprocess_conf(cls, data: dict) -> dict:
|
|
182
|
+
data = RecurveConnectorBase.preprocess_conf(data)
|
|
183
|
+
proxies = data.get("proxies")
|
|
184
|
+
if proxies and not HttpProxyMixin.check_proxy(proxies):
|
|
185
|
+
logger.warning(f"proxies {proxies} is not available, use direct connect")
|
|
186
|
+
data["proxies"] = None
|
|
187
|
+
return data
|
|
188
|
+
|
|
189
|
+
@classmethod
|
|
190
|
+
def google_cloud_storage_preprocess_conf(cls, data: dict) -> dict:
|
|
191
|
+
data = RecurveConnectorBase.preprocess_conf(data)
|
|
192
|
+
proxies = data.get("proxies")
|
|
193
|
+
if proxies and not HttpProxyMixin.check_proxy(proxies):
|
|
194
|
+
logger.warning(f"proxies {proxies} is not available, use direct connect")
|
|
195
|
+
data["proxies"] = None
|
|
196
|
+
return data
|
|
197
|
+
|
|
198
|
+
@classmethod
|
|
199
|
+
def google_service_account_preprocess_conf(cls, data: dict) -> dict:
|
|
200
|
+
data = RecurveConnectorBase.preprocess_conf(data)
|
|
201
|
+
proxies = data.get("proxies")
|
|
202
|
+
if proxies and not HttpProxyMixin.check_proxy(proxies):
|
|
203
|
+
logger.warning(f"proxies {proxies} is not available, use direct connect")
|
|
204
|
+
data["proxies"] = None
|
|
205
|
+
return data
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def oss_preprocess_conf(cls, data: dict) -> dict:
|
|
209
|
+
data = RecurveConnectorBase.preprocess_conf(data)
|
|
210
|
+
proxies = data.get("proxies")
|
|
211
|
+
if proxies and not HttpProxyMixin.check_proxy(proxies):
|
|
212
|
+
logger.warning(f"proxies {proxies} is not available, use direct connect")
|
|
213
|
+
data["proxies"] = None
|
|
214
|
+
return data
|
|
215
|
+
|
|
216
|
+
@staticmethod
|
|
217
|
+
def other_preprocess_conf(data):
|
|
218
|
+
data = RecurveConnectorBase.preprocess_conf(data)
|
|
219
|
+
json_data = data.get("data")
|
|
220
|
+
if json_data and isinstance(json_data, str):
|
|
221
|
+
real_data = json.loads(json_data)
|
|
222
|
+
return real_data
|
|
223
|
+
return data
|
|
224
|
+
|
|
225
|
+
@classmethod
|
|
226
|
+
def s3_preprocess_conf(cls, data: dict) -> dict:
|
|
227
|
+
data = RecurveConnectorBase.preprocess_conf(data)
|
|
228
|
+
proxies = data.get("proxies")
|
|
229
|
+
if proxies and not HttpProxyMixin.check_proxy(proxies):
|
|
230
|
+
logger.warning(f"proxies {proxies} is not available, use direct connect")
|
|
231
|
+
data["proxies"] = None
|
|
232
|
+
return data
|
|
233
|
+
|
|
234
|
+
@staticmethod
|
|
235
|
+
def spark_preprocess_conf(data):
|
|
236
|
+
data = RecurveConnectorBase.preprocess_conf(data)
|
|
237
|
+
execution_config = data.get("execution_config")
|
|
238
|
+
if execution_config:
|
|
239
|
+
execution_config_conf = execution_config.get("conf")
|
|
240
|
+
if execution_config_conf and isinstance(execution_config_conf, str):
|
|
241
|
+
execution_config_conf = json.loads(execution_config_conf)
|
|
242
|
+
execution_config["conf"] = execution_config_conf
|
|
243
|
+
return data
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
CONNECTION_TYPE_PREPROCESS_CONF_MAPPING = {
|
|
247
|
+
"auth": ProcessDBMixin.auth_preprocess_conf,
|
|
248
|
+
"Auth": ProcessDBMixin.auth_preprocess_conf,
|
|
249
|
+
"bigquery": ProcessDBMixin.bigquery_preprocess_conf,
|
|
250
|
+
"BigQuery": ProcessDBMixin.bigquery_preprocess_conf,
|
|
251
|
+
"google_cloud_storage": ProcessDBMixin.google_cloud_storage_preprocess_conf,
|
|
252
|
+
"Google Cloud Storage": ProcessDBMixin.google_cloud_storage_preprocess_conf,
|
|
253
|
+
"google_service_account": ProcessDBMixin.google_service_account_preprocess_conf,
|
|
254
|
+
"Google Service Account": ProcessDBMixin.google_service_account_preprocess_conf,
|
|
255
|
+
"oss": ProcessDBMixin.oss_preprocess_conf,
|
|
256
|
+
"OSS": ProcessDBMixin.oss_preprocess_conf,
|
|
257
|
+
"s3": ProcessDBMixin.s3_preprocess_conf,
|
|
258
|
+
"S3": ProcessDBMixin.s3_preprocess_conf,
|
|
259
|
+
"spark": ProcessDBMixin.spark_preprocess_conf,
|
|
260
|
+
"Spark": ProcessDBMixin.spark_preprocess_conf,
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# auto generated finish
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def preprocess_conf(connection_type: str, data: dict):
|
|
268
|
+
func = CONNECTION_TYPE_PREPROCESS_CONF_MAPPING.get(connection_type, RecurveConnectorBase.preprocess_conf)
|
|
269
|
+
return func(data)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
SSH_TUNNEL_CONFIG_SCHEMA = {
|
|
273
|
+
"type": "object",
|
|
274
|
+
"title": _l("SSH Tunnel Configuration"),
|
|
275
|
+
"description": _l("Configuration for establishing an SSH tunnel connection"),
|
|
276
|
+
"properties": {
|
|
277
|
+
"host": {"type": "string", "title": _l("Host Address")},
|
|
278
|
+
"user": {"type": "string", "title": _l("Username")},
|
|
279
|
+
"port": {
|
|
280
|
+
"type": "number",
|
|
281
|
+
"title": _l("Port Number"),
|
|
282
|
+
"default": 22,
|
|
283
|
+
},
|
|
284
|
+
"password": {"type": "string", "title": _l("Password")},
|
|
285
|
+
"private_key_str": {
|
|
286
|
+
"type": "string",
|
|
287
|
+
"title": _l("SSH Private Key"),
|
|
288
|
+
"description": _l("Private key content for SSH key-based authentication"),
|
|
289
|
+
},
|
|
290
|
+
"private_key_passphrase": {
|
|
291
|
+
"type": "string",
|
|
292
|
+
"title": _l("SSH Private Key Passphrase"),
|
|
293
|
+
"description": _l("Passphrase to decrypt the SSH private key if encrypted"),
|
|
294
|
+
},
|
|
295
|
+
},
|
|
296
|
+
"order": ["host", "user", "port", "password", "private_key_str", "private_key_passphrase"],
|
|
297
|
+
"secret": ["password", "private_key_str", "private_key_passphrase"],
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class LoadMode(str, Enum):
|
|
302
|
+
OVERWRITE = "OVERWRITE"
|
|
303
|
+
APPEND = "APPEND"
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
ENV_VAR_DBT_USER = '{{ env_var("DBT_USER") }}' # after yaml dump, single quote will become '', which cause dbt error
|
|
307
|
+
ENV_VAR_DBT_PASSWORD = '{{ env_var("DBT_PASSWORD") }}'
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def set_env_dbt_user(user_name: str):
|
|
311
|
+
os.environ["DBT_USER"] = user_name
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def set_env_dbt_password(password: str):
|
|
315
|
+
os.environ["DBT_PASSWORD"] = password
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""
|
|
2
|
+
之前 Pigeon 里叫法是 DataSource
|
|
3
|
+
这里先封装一个类似的,
|
|
4
|
+
之后再整合到 Base 里
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import copy
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from recurvedata.connectors.const import DBAPI_TYPES, preprocess_conf
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class DataSourceBase:
|
|
16
|
+
connection_type: str
|
|
17
|
+
data: dict
|
|
18
|
+
name: str = ""
|
|
19
|
+
extra: dict = None
|
|
20
|
+
|
|
21
|
+
def __post_init__(self):
|
|
22
|
+
self.data = preprocess_conf(self.connection_type, self.data)
|
|
23
|
+
self.process_pigeon_keyword()
|
|
24
|
+
self.extra = copy.deepcopy(self.data)
|
|
25
|
+
|
|
26
|
+
def process_pigeon_keyword(self):
|
|
27
|
+
"""
|
|
28
|
+
OneFlow 使用的是 pigeon 的关键词,部分和 Recurve 不一致。
|
|
29
|
+
历史原因,从 OneFlow 迁移过来时,是直接复制数据库数据,没有转换关键词,
|
|
30
|
+
导致 Recurve 数据库里同时存在两套。
|
|
31
|
+
这里把旧的 pigeon 关键词转成新的 Recurve 关键词
|
|
32
|
+
:return:
|
|
33
|
+
"""
|
|
34
|
+
from recurvedata.connectors.pigeon import DataSource as PigeonDataSource
|
|
35
|
+
|
|
36
|
+
keyword_renames: dict = PigeonDataSource.PIGEON_KEYWORD_MAPPING.get(self.connection_type, {})
|
|
37
|
+
for recurve_keyword, pigeon_keyword in keyword_renames.items():
|
|
38
|
+
if not pigeon_keyword or recurve_keyword in self.data or pigeon_keyword not in self.data:
|
|
39
|
+
continue
|
|
40
|
+
self.data[recurve_keyword] = self.data[pigeon_keyword]
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def recurve_connector_cls(self):
|
|
44
|
+
from recurvedata.connectors._register import get_connection_class
|
|
45
|
+
|
|
46
|
+
return get_connection_class(self.connection_type)
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def recurve_connector(self):
|
|
50
|
+
"""
|
|
51
|
+
和 pigeon connector 区分开
|
|
52
|
+
:return:
|
|
53
|
+
"""
|
|
54
|
+
recurve_cls = self.recurve_connector_cls
|
|
55
|
+
if not recurve_cls:
|
|
56
|
+
raise ValueError(f"Unknown connection type: {self.connection_type}")
|
|
57
|
+
recurve_con = recurve_cls(self.data)
|
|
58
|
+
return recurve_con
|
|
59
|
+
|
|
60
|
+
def juice_sync_path(self, path: str) -> tuple[str, str]:
|
|
61
|
+
"""
|
|
62
|
+
Return the paths used in juice sync.
|
|
63
|
+
The first return value is the path with a secret key,
|
|
64
|
+
and the second return value is the path without a secret key, intended for display purposes
|
|
65
|
+
"""
|
|
66
|
+
if not self.recurve_connector_cls:
|
|
67
|
+
raise ValueError(f"{self.connection_type} is not juice sync able")
|
|
68
|
+
return self.recurve_connector.juice_sync_path(path)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class DataSource(DataSourceBase):
|
|
72
|
+
"""
|
|
73
|
+
pigeon 里 DataSource 等同于 Connection,这里保留两种叫法
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def connector(self):
|
|
78
|
+
"""
|
|
79
|
+
暂时保留之前 OneFlow 做法,返回 pigeon Connector 对象
|
|
80
|
+
"""
|
|
81
|
+
from recurvedata.connectors.pigeon import DataSource as PigeonDataSource
|
|
82
|
+
|
|
83
|
+
if PigeonDataSource.is_support_connection_type(self.connection_type):
|
|
84
|
+
pigeon_ds = PigeonDataSource(connection_type=self.connection_type, name=self.name, data=self.data)
|
|
85
|
+
return pigeon_ds.connector
|
|
86
|
+
recurve_cls = self.recurve_connector_cls
|
|
87
|
+
if not recurve_cls:
|
|
88
|
+
raise ValueError(f"Unknown connection type: {self.connection_type}")
|
|
89
|
+
recurve_con = recurve_cls(self.data)
|
|
90
|
+
return recurve_con
|
|
91
|
+
|
|
92
|
+
def create_engine(self):
|
|
93
|
+
return self.connector.create_engine()
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def host(self):
|
|
97
|
+
for key in ["url", "host"]:
|
|
98
|
+
if key in self.data:
|
|
99
|
+
return self.data[key]
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def ds_type(self):
|
|
103
|
+
# 兼容 oneflow lineage
|
|
104
|
+
return self.connection_type
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def database(self):
|
|
108
|
+
# used in postgres load
|
|
109
|
+
return self.data.get("database")
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def port(self):
|
|
113
|
+
# used in email load
|
|
114
|
+
return self.data.get("port")
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def password(self):
|
|
118
|
+
# used in email load
|
|
119
|
+
return self.data.get("password")
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def user(self):
|
|
123
|
+
# used in email load
|
|
124
|
+
return self.data.get("user")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class DataSourceWrapper(object):
|
|
128
|
+
"""封装 DataSource,只保留必要的只读功能"""
|
|
129
|
+
|
|
130
|
+
def __init__(self, ds: DataSource):
|
|
131
|
+
self.__ds = ds
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def name(self) -> str:
|
|
135
|
+
return self.__ds.name
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def ds_type(self) -> str:
|
|
139
|
+
return self.__ds.connection_type
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def host(self) -> Optional[str]:
|
|
143
|
+
return self.__ds.host
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def database(self) -> Optional[str]:
|
|
147
|
+
return self.__ds.database
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def user(self) -> Optional[str]:
|
|
151
|
+
return self.__ds.user
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def password(self) -> Optional[str]:
|
|
155
|
+
# 最初的设计里,ds_wrapper 不提供 password,但是后来使用的地方较多
|
|
156
|
+
return self.__ds.password
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def port(self) -> Optional[int]:
|
|
160
|
+
return self.__ds.port
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def is_dbapi(self) -> bool:
|
|
164
|
+
return self.ds_type in DBAPI_TYPES
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def connector(self):
|
|
168
|
+
return self.__ds.connector
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def recurve_connector(self):
|
|
172
|
+
return self.__ds.recurve_connector
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def data(self) -> dict:
|
|
176
|
+
if self.ds_type == "other":
|
|
177
|
+
return self.__ds.data.get("data", self.__ds.data)
|
|
178
|
+
return self.__ds.data
|
|
179
|
+
|
|
180
|
+
@property
|
|
181
|
+
def extra(self) -> dict:
|
|
182
|
+
if self.ds_type == "other":
|
|
183
|
+
return self.__ds.extra.get("data", self.__ds.extra)
|
|
184
|
+
return self.__ds.extra
|
|
185
|
+
|
|
186
|
+
def __getattr__(self, name):
|
|
187
|
+
if name in self.data:
|
|
188
|
+
return self.data[name]
|
|
189
|
+
return super().__getattribute__(name) # raise
|