recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""
|
|
2
|
+
使用 pigeon connector 作为底层实现
|
|
3
|
+
返回 pigeon connector
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import copy
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
from recurvedata.connectors.datasource import DataSourceBase
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataSource(DataSourceBase):
|
|
13
|
+
"""
|
|
14
|
+
pigeon 里 DataSource 等同于 Connection,这里保留两种叫法
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __post_init__(self):
|
|
18
|
+
super().__post_init__()
|
|
19
|
+
keyword_renames: dict = self.PIGEON_KEYWORD_MAPPING.get(self.connection_type, {})
|
|
20
|
+
for recurve_keyword, pigeon_keyword in keyword_renames.items():
|
|
21
|
+
val = self.data.pop(recurve_keyword, None)
|
|
22
|
+
if pigeon_keyword:
|
|
23
|
+
self.data[pigeon_keyword] = val
|
|
24
|
+
self.extra[pigeon_keyword] = self.extra.get(recurve_keyword)
|
|
25
|
+
if "passwd" in self.data:
|
|
26
|
+
self.data["password"] = self.data["passwd"]
|
|
27
|
+
self.extra["password"] = self.extra["passwd"]
|
|
28
|
+
if self.connection_type == "bigquery":
|
|
29
|
+
self.data["dataset"] = self.data.get("database")
|
|
30
|
+
if "key_dict" not in self.data:
|
|
31
|
+
# some are not saved through the web page, have key_dict field, no need to modify
|
|
32
|
+
key_dict = copy.deepcopy(self.data)
|
|
33
|
+
key_dict.pop("location", None)
|
|
34
|
+
key_dict.pop("proxies", None)
|
|
35
|
+
self.data["key_dict"] = key_dict
|
|
36
|
+
self.extra["key_dict"] = key_dict
|
|
37
|
+
if self.connection_type == "sftp":
|
|
38
|
+
pk_path = self.data.get("rsa_private_key_file")
|
|
39
|
+
if pk_path == "":
|
|
40
|
+
self.data.pop("rsa_private_key_file")
|
|
41
|
+
elif pk_path:
|
|
42
|
+
self.data["rsa_private_key_file"] = os.path.expanduser(pk_path)
|
|
43
|
+
self.extra["rsa_private_key_file"] = os.path.expanduser(pk_path)
|
|
44
|
+
|
|
45
|
+
PIGEON_KEYWORD_MAPPING = {
|
|
46
|
+
"s3": {"access_key_id": "aws_access_key_id", "secret_access_key": "aws_secret_access_key", "bucket": None},
|
|
47
|
+
"filebrowser": {"url": "host"},
|
|
48
|
+
"kafka": {
|
|
49
|
+
"bootstrap_servers": "bootstrap.servers",
|
|
50
|
+
"value_deserializer": "value.deserializer",
|
|
51
|
+
"value_schema_registry_client_url": "schema_registry_client_url",
|
|
52
|
+
},
|
|
53
|
+
"oss": {"secret_access_key": "access_key_secret", "access_key_id": "access_key_id", "bucket": "bucket_name"},
|
|
54
|
+
"sftp": {"user": "username", "private_key_path": "rsa_private_key_file"},
|
|
55
|
+
"mongodb": {"user": "username"},
|
|
56
|
+
"bigquery": {"database": "dataset"},
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
PIGEON_TYPE_CLS_MAPPING = {
|
|
60
|
+
"s3": "recurvedata.pigeon.connector.awss3",
|
|
61
|
+
"oss": "recurvedata.pigeon.connector.aliyun_oss",
|
|
62
|
+
"azure_blob": "recurvedata.pigeon.connector.azure_blob",
|
|
63
|
+
"azure_synapse": "recurvedata.pigeon.connector.azure_synapse",
|
|
64
|
+
"clickhouse": "recurvedata.pigeon.connector.clickhouse_native",
|
|
65
|
+
"es": "recurvedata.pigeon.connector.es",
|
|
66
|
+
"ftp": "recurvedata.pigeon.connector.ftp",
|
|
67
|
+
"google_bigquery": "recurvedata.pigeon.connector.google_bigquery",
|
|
68
|
+
"phoenix": "recurvedata.pigeon.connector.hbase_phoenix",
|
|
69
|
+
"hdfs": "recurvedata.pigeon.connector.hdfs",
|
|
70
|
+
"hive": "recurvedata.pigeon.connector.hive_impala",
|
|
71
|
+
"impala": "recurvedata.pigeon.connector.hive_impala",
|
|
72
|
+
"mongodb": "recurvedata.pigeon.connector.mongodb",
|
|
73
|
+
"mssql": "recurvedata.pigeon.connector.mssql",
|
|
74
|
+
"mysql": "recurvedata.pigeon.connector.mysql",
|
|
75
|
+
"postgresql": "recurvedata.pigeon.connector.postgresql",
|
|
76
|
+
"postgres": "recurvedata.pigeon.connector.postgresql",
|
|
77
|
+
"qcloud_cos": "recurvedata.pigeon.connector.qcloud_cos",
|
|
78
|
+
"cos": "recurvedata.pigeon.connector.qcloud_cos",
|
|
79
|
+
"redshift": "recurvedata.pigeon.connector.redshift",
|
|
80
|
+
"sftp": "recurvedata.pigeon.connector.sftp",
|
|
81
|
+
"owncloud": "recurvedata.pigeon.connector.owncloud",
|
|
82
|
+
"starrocks": "recurvedata.pigeon.connector.starrocks",
|
|
83
|
+
"doris": "recurvedata.pigeon.connector.doris",
|
|
84
|
+
"microsoft_fabric": "recurvedata.pigeon.connector.microsoft_fabric",
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
PIGEON_TYPE_MAPPING = {
|
|
88
|
+
"tidb": "mysql",
|
|
89
|
+
"bigquery": "google_bigquery",
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def connector(self):
|
|
94
|
+
"""
|
|
95
|
+
暂时保留之前 OneFlow 做法,返回 pigeon Connector 对象
|
|
96
|
+
"""
|
|
97
|
+
return self.get_pigeon_connector()
|
|
98
|
+
|
|
99
|
+
def create_engine(self):
|
|
100
|
+
return self.connector.create_engine()
|
|
101
|
+
|
|
102
|
+
def get_pigeon_connector(self):
|
|
103
|
+
klass = self.get_pigeon_connector_class()
|
|
104
|
+
if not klass:
|
|
105
|
+
raise ValueError(f"{self.connection_type} has no pigeon class")
|
|
106
|
+
|
|
107
|
+
return klass(**self.data)
|
|
108
|
+
|
|
109
|
+
def get_pigeon_connector_class(self):
|
|
110
|
+
from recurvedata.pigeon.connector import get_connector_class
|
|
111
|
+
|
|
112
|
+
connection_type = self.PIGEON_TYPE_MAPPING.get(self.connection_type, self.connection_type)
|
|
113
|
+
try:
|
|
114
|
+
return get_connector_class(connection_type)
|
|
115
|
+
except KeyError:
|
|
116
|
+
pigeon_cls = self.PIGEON_TYPE_CLS_MAPPING[connection_type]
|
|
117
|
+
__import__(pigeon_cls)
|
|
118
|
+
return get_connector_class(connection_type)
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def host(self):
|
|
122
|
+
for key in ["url", "host"]:
|
|
123
|
+
if key in self.data:
|
|
124
|
+
return self.data[key]
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def ds_type(self):
|
|
128
|
+
# 兼容 oneflow lineage
|
|
129
|
+
return self.connection_type
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def database(self):
|
|
133
|
+
# used in postgres load
|
|
134
|
+
return self.data.get("database")
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def port(self):
|
|
138
|
+
# used in email load
|
|
139
|
+
return self.data.get("port")
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def user(self):
|
|
143
|
+
# used in email load
|
|
144
|
+
return self.data.get("user")
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def password(self):
|
|
148
|
+
# used in email load
|
|
149
|
+
return self.data.get("password")
|
|
150
|
+
|
|
151
|
+
@classmethod
|
|
152
|
+
def is_support_connection_type(cls, connection_type: str) -> bool:
|
|
153
|
+
connection_type = cls.PIGEON_TYPE_MAPPING.get(connection_type, connection_type)
|
|
154
|
+
return connection_type in cls.PIGEON_TYPE_CLS_MAPPING
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def recurve_connector(self):
|
|
158
|
+
"""
|
|
159
|
+
和 pigeon connector 区分开
|
|
160
|
+
:return:
|
|
161
|
+
"""
|
|
162
|
+
recurve_cls = self.recurve_connector_cls
|
|
163
|
+
if not recurve_cls:
|
|
164
|
+
raise ValueError(f"Unknown connection type: {self.connection_type}")
|
|
165
|
+
recurve_con = recurve_cls(self.extra)
|
|
166
|
+
return recurve_con
|
|
167
|
+
|
|
168
|
+
# todo: is_dbapi
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def get_pigeon_connector(connection_type: str, data: dict):
|
|
172
|
+
return DataSource(connection_type=connection_type, data=data).get_pigeon_connector()
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import socket
|
|
2
|
+
import urllib.parse
|
|
3
|
+
|
|
4
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
5
|
+
from recurvedata.connectors.utils import EnvContextManager
|
|
6
|
+
from recurvedata.core.translation import _l
|
|
7
|
+
from recurvedata.utils.log import LoggingMixin
|
|
8
|
+
|
|
9
|
+
logger = LoggingMixin.logger()
|
|
10
|
+
|
|
11
|
+
HTTP_PROXY_CONFIG_SCHEMA = {
|
|
12
|
+
"proxies": {
|
|
13
|
+
"type": "object",
|
|
14
|
+
"title": _l("HTTP/HTTPS Proxy Settings"),
|
|
15
|
+
"description": _l("Configure proxy servers for HTTP and HTTPS connections"),
|
|
16
|
+
"properties": {
|
|
17
|
+
"https": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"title": _l("HTTPS Proxy"),
|
|
20
|
+
"description": _l("HTTPS proxy URL in format https://host:port, or http://host:port"),
|
|
21
|
+
},
|
|
22
|
+
"http": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"title": _l("HTTP Proxy"),
|
|
25
|
+
"description": _l("HTTP proxy URL in format http://host:port"),
|
|
26
|
+
},
|
|
27
|
+
},
|
|
28
|
+
"order": ["https", "http"],
|
|
29
|
+
},
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ProxyEnvContextManager(EnvContextManager):
|
|
34
|
+
def __init__(self, proxy: str = None, http_proxy: str = None, https_proxy: str = None):
|
|
35
|
+
env_vars = {}
|
|
36
|
+
if http_proxy or proxy:
|
|
37
|
+
env_vars["http_proxy"] = http_proxy or proxy
|
|
38
|
+
if https_proxy or proxy:
|
|
39
|
+
env_vars["https_proxy"] = https_proxy or proxy
|
|
40
|
+
super().__init__(env_vars)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ProxyMixinBase:
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class HttpProxyMixin(ProxyMixinBase):
|
|
48
|
+
@classmethod
|
|
49
|
+
def format_config_schema(cls):
|
|
50
|
+
config_schema = super(HttpProxyMixin, cls).format_config_schema()
|
|
51
|
+
return add_http_proxy_to_config_schema(config_schema)
|
|
52
|
+
|
|
53
|
+
def _init_proxy_manager(self):
|
|
54
|
+
proxies = self.proxies or {}
|
|
55
|
+
return ProxyEnvContextManager(http_proxy=proxies.get("http"), https_proxy=proxies.get("https"))
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def preprocess_conf(cls, data: dict) -> dict:
|
|
59
|
+
data = RecurveConnectorBase.preprocess_conf(data)
|
|
60
|
+
proxies = data.get("proxies")
|
|
61
|
+
if proxies and not cls.check_proxy(proxies):
|
|
62
|
+
logger.warning(f"proxies {proxies} is not available, use direct connect")
|
|
63
|
+
data["proxies"] = None
|
|
64
|
+
return data
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def check_proxy(cls, proxies: dict, timeout=10):
|
|
68
|
+
"""
|
|
69
|
+
检查 proxy 是否可用
|
|
70
|
+
proxies example: {'http': 'http://proxy_host:proxy_port', 'https': 'https://proxy_host:proxy_port'}
|
|
71
|
+
"""
|
|
72
|
+
if not proxies:
|
|
73
|
+
return False
|
|
74
|
+
return cls._check_proxy_connection(proxies.get("http"), timeout) or cls._check_proxy_connection(
|
|
75
|
+
proxies.get("https"), timeout
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def _check_proxy_connection(cls, proxy_url, timeout):
|
|
80
|
+
parsed_url = urllib.parse.urlparse(proxy_url)
|
|
81
|
+
if not parsed_url.scheme:
|
|
82
|
+
proxy_url = f"http://{proxy_url}"
|
|
83
|
+
parsed_url = urllib.parse.urlparse(proxy_url)
|
|
84
|
+
hostname = parsed_url.hostname
|
|
85
|
+
port = parsed_url.port or (443 if parsed_url.scheme == "https" else 80)
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
with socket.create_connection((hostname, port), timeout=timeout):
|
|
89
|
+
return True
|
|
90
|
+
except socket.timeout:
|
|
91
|
+
logger.info(f"Connection timed out while connecting to {hostname}:{port}")
|
|
92
|
+
return False
|
|
93
|
+
except socket.error as e:
|
|
94
|
+
logger.info(f"Error connecting to {hostname}:{port}: {e}")
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def add_http_proxy_to_config_schema(config_schema: dict) -> dict:
|
|
99
|
+
if "proxies" in config_schema["properties"]:
|
|
100
|
+
return config_schema
|
|
101
|
+
|
|
102
|
+
config_schema["properties"].update(HTTP_PROXY_CONFIG_SCHEMA)
|
|
103
|
+
config_schema["order"].append("proxies")
|
|
104
|
+
return config_schema
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import traceback
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from sqlalchemy.engine.url import URL
|
|
8
|
+
|
|
9
|
+
from recurvedata.connectors._register import get_connection_class
|
|
10
|
+
from recurvedata.connectors.config_schema import ALL_CONFIG_SCHEMA_DCT, get_complex_config_schema
|
|
11
|
+
from recurvedata.connectors.const import ( # noqa
|
|
12
|
+
ALL_CONNECTION_SECRET_WORDS,
|
|
13
|
+
DBAPI_TYPES,
|
|
14
|
+
JUICE_SYNC_ABLE_DBAPI_TYPES,
|
|
15
|
+
SQL_OPERATOR_TYPES,
|
|
16
|
+
)
|
|
17
|
+
from recurvedata.connectors.datasource import DataSource, DataSourceWrapper
|
|
18
|
+
from recurvedata.connectors.pigeon import DataSource as PigeonDataSource
|
|
19
|
+
from recurvedata.consts import PROJECT_ID_KEY
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def list_config_schemas(only_enabled=True) -> list[dict[str, Any]]:
|
|
23
|
+
"""
|
|
24
|
+
todo: 返回的类型 (看要不要用 pydantic)
|
|
25
|
+
:param only_enabled:
|
|
26
|
+
:return:
|
|
27
|
+
"""
|
|
28
|
+
config_schemas = ALL_CONFIG_SCHEMA_DCT.values()
|
|
29
|
+
if only_enabled:
|
|
30
|
+
config_schemas = filter(lambda v: v["enabled"], config_schemas)
|
|
31
|
+
return list(config_schemas)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_config_schema(connection_type: str):
|
|
35
|
+
complex_config_schema = get_complex_config_schema(connection_type)
|
|
36
|
+
if complex_config_schema:
|
|
37
|
+
return complex_config_schema["config_schema"]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_connection_category(connection_type: str):
|
|
41
|
+
complex_config_schema = get_complex_config_schema(connection_type)
|
|
42
|
+
if complex_config_schema:
|
|
43
|
+
return complex_config_schema["category"]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_connection_ui_category(connection_type: str):
|
|
47
|
+
complex_config_schema = get_complex_config_schema(connection_type)
|
|
48
|
+
if complex_config_schema:
|
|
49
|
+
return complex_config_schema["ui_category"]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_connection_type(connection_type: str):
|
|
53
|
+
complex_config_schema = get_complex_config_schema(connection_type)
|
|
54
|
+
if complex_config_schema:
|
|
55
|
+
return complex_config_schema["type"]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_connection_ui_type(connection_type: str):
|
|
59
|
+
complex_config_schema = get_complex_config_schema(connection_type)
|
|
60
|
+
if complex_config_schema:
|
|
61
|
+
return complex_config_schema["ui_type"]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_connection_host(ui_type: str, connection_conf: dict):
|
|
65
|
+
"""
|
|
66
|
+
前端页面列表页显示的 ui_type
|
|
67
|
+
:param ui_type:
|
|
68
|
+
:param connection_conf:
|
|
69
|
+
:return:
|
|
70
|
+
"""
|
|
71
|
+
for keyword in ["host", "endpoint", "access_key_id", "url"]: # todo: not so good
|
|
72
|
+
if keyword in connection_conf:
|
|
73
|
+
return connection_conf[keyword]
|
|
74
|
+
return ""
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_all_secret_keywords():
|
|
78
|
+
return ALL_CONNECTION_SECRET_WORDS
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_connection(connection_type: str, connection_conf: dict) -> tuple[bool, str]:
|
|
82
|
+
connection_cls = get_connection_class(connection_type)
|
|
83
|
+
try:
|
|
84
|
+
con = connection_cls(connection_conf)
|
|
85
|
+
con.test_connection()
|
|
86
|
+
return True, ""
|
|
87
|
+
except Exception:
|
|
88
|
+
return False, traceback.format_exc()
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def init_connector(connection_type: str, connection_conf: dict):
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def list_dbapi_types():
|
|
96
|
+
return DBAPI_TYPES
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def list_juice_sync_able_dbapi_types():
|
|
100
|
+
return JUICE_SYNC_ABLE_DBAPI_TYPES
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def list_sql_operator_types():
|
|
104
|
+
return SQL_OPERATOR_TYPES
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_datasource_by_name(project_connection_name: str, project_id: int | None = None) -> DataSourceWrapper:
|
|
108
|
+
from recurvedata.executors.client import ExecutorClient
|
|
109
|
+
from recurvedata.executors.schemas import ConnectionItem
|
|
110
|
+
|
|
111
|
+
project_id = os.environ.get(PROJECT_ID_KEY) if project_id is None else project_id
|
|
112
|
+
if project_id is None:
|
|
113
|
+
raise ValueError("project id is not set")
|
|
114
|
+
|
|
115
|
+
recurve_client = ExecutorClient()
|
|
116
|
+
conn: ConnectionItem = recurve_client.get_connection(project_id=project_id, connection_name=project_connection_name)
|
|
117
|
+
if PigeonDataSource.is_support_connection_type(conn.type):
|
|
118
|
+
try:
|
|
119
|
+
return DataSourceWrapper(PigeonDataSource(connection_type=conn.type, name=conn.name, data=conn.data))
|
|
120
|
+
except ModuleNotFoundError:
|
|
121
|
+
pass
|
|
122
|
+
return DataSourceWrapper(DataSource(connection_type=conn.type, name=conn.name, data=conn.data))
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def get_datasource_by_config(
|
|
126
|
+
connection_type: str, config: dict, name: str = None, database: str = None, schema: str = None
|
|
127
|
+
) -> DataSourceWrapper:
|
|
128
|
+
"""
|
|
129
|
+
Get a DataSourceWrapper instance based on the connection type and configuration dictionary.
|
|
130
|
+
|
|
131
|
+
:param connection_type: The type of the connection (e.g., 'mysql', 'postgresql', 'snowflake')
|
|
132
|
+
:param config: A dictionary containing the connection configuration
|
|
133
|
+
:param database: project database name
|
|
134
|
+
:param schema: project schema name if have
|
|
135
|
+
:return: A DataSourceWrapper instance
|
|
136
|
+
"""
|
|
137
|
+
update_dct = {}
|
|
138
|
+
if database:
|
|
139
|
+
update_dct.update({"database": database})
|
|
140
|
+
if schema:
|
|
141
|
+
update_dct.update({"schema": schema})
|
|
142
|
+
config.update(update_dct)
|
|
143
|
+
if PigeonDataSource.is_support_connection_type(connection_type):
|
|
144
|
+
try:
|
|
145
|
+
return DataSourceWrapper(PigeonDataSource(connection_type=connection_type, name=name, data=config))
|
|
146
|
+
except ModuleNotFoundError:
|
|
147
|
+
pass
|
|
148
|
+
return DataSourceWrapper(DataSource(connection_type=connection_type, name=name, data=config))
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def get_sqlalchemy_url_by_connection(connection_orm) -> URL:
|
|
152
|
+
ds = DataSourceWrapper(
|
|
153
|
+
DataSource(connection_type=connection_orm.type, name=connection_orm.name, data=connection_orm.data)
|
|
154
|
+
)
|
|
155
|
+
if not ds.is_dbapi:
|
|
156
|
+
raise ValueError(f"{ds.ds_type} is not dbapi, not support this function")
|
|
157
|
+
con = ds.recurve_connector
|
|
158
|
+
return con.sqlalchemy_url
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# todo: cache
|
|
162
|
+
def list_column_data_types():
|
|
163
|
+
import sqlalchemy.sql.sqltypes
|
|
164
|
+
|
|
165
|
+
def _get_module_types(module):
|
|
166
|
+
types = set()
|
|
167
|
+
for cls_name, cls in module.__dict__.items():
|
|
168
|
+
if cls_name.startswith("_"):
|
|
169
|
+
continue
|
|
170
|
+
if not isinstance(cls, type):
|
|
171
|
+
continue
|
|
172
|
+
if not issubclass(cls, sqlalchemy.sql.sqltypes.TypeEngine):
|
|
173
|
+
continue
|
|
174
|
+
if not hasattr(cls, "__visit_name__"):
|
|
175
|
+
continue
|
|
176
|
+
if cls.__visit_name__ in ("TypeDecorator", "type_decorator"):
|
|
177
|
+
continue
|
|
178
|
+
types.add(cls.__visit_name__.lower())
|
|
179
|
+
return types
|
|
180
|
+
|
|
181
|
+
types = _get_module_types(sqlalchemy.sql.sqltypes)
|
|
182
|
+
try:
|
|
183
|
+
import sqlalchemy.dialects.mysql.types
|
|
184
|
+
|
|
185
|
+
types = types.union(_get_module_types(sqlalchemy.dialects.mysql.types))
|
|
186
|
+
except ImportError:
|
|
187
|
+
pass
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
import clickhouse_sqlalchemy.types.common
|
|
191
|
+
|
|
192
|
+
types = types.union(_get_module_types(clickhouse_sqlalchemy.types))
|
|
193
|
+
except ImportError:
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
return sorted(types)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def convert_connection_to_dbt_profile(
|
|
200
|
+
connection_type: str, connection_config: dict, database: str, schema: str = None
|
|
201
|
+
) -> dict:
|
|
202
|
+
ds = get_datasource_by_config(connection_type, connection_config, database=database, schema=schema)
|
|
203
|
+
if not ds.is_dbapi:
|
|
204
|
+
return {}
|
|
205
|
+
con = ds.recurve_connector
|
|
206
|
+
|
|
207
|
+
dct = con.convert_config_to_dbt_profile(database, schema)
|
|
208
|
+
return dct
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def convert_connection_to_cube_config(
|
|
212
|
+
connection_type: str, connection_config: dict, database: str, schema: str = None, masking: bool = None
|
|
213
|
+
) -> dict:
|
|
214
|
+
ds = get_datasource_by_config(connection_type, connection_config, database=database, schema=schema)
|
|
215
|
+
con = ds.recurve_connector
|
|
216
|
+
dct = con.convert_config_to_cube_config(database, schema, ds)
|
|
217
|
+
if not dct or not isinstance(dct, dict):
|
|
218
|
+
return {}
|
|
219
|
+
|
|
220
|
+
masking = masking if masking is not None else not ds.is_dbapi
|
|
221
|
+
if masking:
|
|
222
|
+
return {k: hashlib.md5(json.dumps(dct[k]).encode("utf-8")).hexdigest() for k in dct}
|
|
223
|
+
return dct
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
from urllib.parse import quote
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class EnvContextManager:
|
|
7
|
+
def __init__(self, env_vars: dict):
|
|
8
|
+
self.env_vars = env_vars
|
|
9
|
+
self.old_env_vars = {}
|
|
10
|
+
|
|
11
|
+
def __enter__(self):
|
|
12
|
+
for key, value in self.env_vars.items():
|
|
13
|
+
self.old_env_vars[key] = os.environ.get(key)
|
|
14
|
+
os.environ[key] = value
|
|
15
|
+
|
|
16
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
17
|
+
for key, value in self.old_env_vars.items():
|
|
18
|
+
if value is None:
|
|
19
|
+
del os.environ[key]
|
|
20
|
+
else:
|
|
21
|
+
os.environ[key] = value
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def juice_sync_process_special_character_within_secret(secret_part: str) -> str:
|
|
25
|
+
"""
|
|
26
|
+
`When you get "/" in ACCESS_KEY or SECRET_KEY strings,you need to replace "/" with "%2F".`
|
|
27
|
+
:return:
|
|
28
|
+
"""
|
|
29
|
+
if "/" in secret_part:
|
|
30
|
+
secret_part = secret_part.replace("/", "%2F")
|
|
31
|
+
return secret_part
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def juice_sync_process_special_character_within_path(path: str) -> str:
|
|
35
|
+
"""
|
|
36
|
+
1. 冒号需要在 juice sync 里处理两遍,第一遍转成 %3A, 第二遍再把 %3A quote 一下,用于 juice sync
|
|
37
|
+
2. 有些路径需要加引号
|
|
38
|
+
"""
|
|
39
|
+
if not path:
|
|
40
|
+
return path
|
|
41
|
+
colon_quote = quote(":")
|
|
42
|
+
if colon_quote in path:
|
|
43
|
+
path = path.replace(colon_quote, quote(colon_quote))
|
|
44
|
+
|
|
45
|
+
if re.search("[ &]", path):
|
|
46
|
+
path = f'"{path}"'
|
|
47
|
+
return path
|
recurvedata/consts.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
# docker container environ key for recurve environment_id
|
|
4
|
+
ENV_ID_KEY = "RECURVE__ENVIRONMENT_ID"
|
|
5
|
+
PROJECT_ID_KEY = "RECURVE__PROJECT_ID"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ScheduleType(str, Enum):
|
|
9
|
+
crontab = "crontab"
|
|
10
|
+
customization = "customization"
|
|
11
|
+
manual = "manual"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Operator(str, Enum):
|
|
15
|
+
SQLOperator = "SQLOperator"
|
|
16
|
+
TransferOperator = "TransferOperator"
|
|
17
|
+
PythonOperator = "PythonOperator"
|
|
18
|
+
SparkOperator = "SparkOperator"
|
|
19
|
+
NotifyOperator = "NotifyOperator"
|
|
20
|
+
LinkOperator = "LinkOperator"
|
|
21
|
+
DBTOperator = "DBTOperator"
|
|
22
|
+
LinkModelPipelineOperator = "LinkModelPipelineOperator"
|
|
23
|
+
SensorOperator = "SensorOperator"
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def is_link(cls, op: str):
|
|
27
|
+
return op in (cls.LinkOperator, cls.LinkModelPipelineOperator)
|
|
28
|
+
|
|
29
|
+
# todo(chenjingmeng): support dynamically added Operator
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ETLExecutionStatus(str, Enum):
|
|
33
|
+
SUCCESS = "success"
|
|
34
|
+
FAILED = "failed"
|
|
35
|
+
RETRY = "retry"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ConnectionCategory(str, Enum):
|
|
39
|
+
DATABASE = "database"
|
|
40
|
+
STORAGE = "storage"
|
|
41
|
+
WAREHOUSE = "warehouse"
|
|
42
|
+
OTHERS = "others"
|
|
43
|
+
SERVICE = "service"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ConnectorGroup(str, Enum):
|
|
47
|
+
SOURCE = "source"
|
|
48
|
+
DESTINATION = "destination"
|
|
49
|
+
INTEGRATION = "integration"
|
|
File without changes
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Config:
|
|
6
|
+
CONFIG_FOLDER_ENV_NAME = "RECURVE_CONFIG_FOLDER"
|
|
7
|
+
__instance = None
|
|
8
|
+
|
|
9
|
+
def __new__(cls, *args, **kwargs):
|
|
10
|
+
if cls.__instance is None:
|
|
11
|
+
cls.__instance = super().__new__(cls, *args, **kwargs)
|
|
12
|
+
return cls.__instance
|
|
13
|
+
|
|
14
|
+
def __init__(self, folder: str = None):
|
|
15
|
+
folder = folder or os.environ.get(self.CONFIG_FOLDER_ENV_NAME, "~/.recurve")
|
|
16
|
+
if not folder:
|
|
17
|
+
raise ValueError(f"config folder is required, got {repr(folder)}")
|
|
18
|
+
|
|
19
|
+
self._folder = pathlib.Path(folder).expanduser()
|
|
20
|
+
self._folder.mkdir(mode=0o755, parents=True, exist_ok=True)
|
|
21
|
+
self._config_file = self._folder / "config"
|
|
22
|
+
|
|
23
|
+
self.__defaults = self.load_defaults()
|
|
24
|
+
|
|
25
|
+
def load_defaults(self) -> dict:
|
|
26
|
+
"""load default config from env file or environment variables
|
|
27
|
+
|
|
28
|
+
file format:
|
|
29
|
+
RECURVE_HOST = https://recurve.test.recurvedata.com
|
|
30
|
+
RECURVE_USERNAME = foo
|
|
31
|
+
RECURVE_PASSWORD = pwd123
|
|
32
|
+
"""
|
|
33
|
+
from dotenv import dotenv_values
|
|
34
|
+
|
|
35
|
+
# failed to install dotenv in 3.11 todo
|
|
36
|
+
return {
|
|
37
|
+
**dotenv_values(self._config_file),
|
|
38
|
+
**os.environ,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
def get_or_default(self, value: str, env_key: str) -> str:
|
|
42
|
+
if value:
|
|
43
|
+
return value
|
|
44
|
+
if env_key not in self.__defaults:
|
|
45
|
+
raise ValueError("value is required")
|
|
46
|
+
return self.__defaults[env_key]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import Any, ClassVar, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BaseConfigModel(BaseModel):
|
|
7
|
+
model_config = ConfigDict(extra="forbid")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Configurable:
|
|
11
|
+
config_model: ClassVar[BaseConfigModel]
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def config_schema(cls) -> dict[str, Any]:
|
|
15
|
+
return cls.config_model.model_json_schema()
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def name(cls) -> str:
|
|
19
|
+
return cls.__name__
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def to_dict(cls) -> dict[str, Any]:
|
|
23
|
+
return {"name": cls.name(), "config_schema": cls.config_schema()}
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def validate(cls, configuration: Union[dict, BaseConfigModel]) -> BaseConfigModel:
|
|
27
|
+
return cls.config_model.model_validate(configuration)
|