recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
recurvedata/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.487"
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, TypeVar, overload
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from recurvedata.__version__ import __version__
|
|
10
|
+
from recurvedata.config import AgentConfig
|
|
11
|
+
from recurvedata.exceptions import APIError, MaxRetriesExceededException, UnauthorizedError
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
ResponseModelType = TypeVar("ResponseModelType", bound=BaseModel)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Client:
|
|
19
|
+
_config: AgentConfig
|
|
20
|
+
_client: httpx.Client
|
|
21
|
+
|
|
22
|
+
def __init__(self, config: AgentConfig = None):
|
|
23
|
+
if not config:
|
|
24
|
+
config = AgentConfig.load()
|
|
25
|
+
self.set_config(config)
|
|
26
|
+
|
|
27
|
+
def set_config(self, config: AgentConfig):
|
|
28
|
+
self._config = config
|
|
29
|
+
self._client = httpx.Client(
|
|
30
|
+
base_url=config.server_url,
|
|
31
|
+
timeout=config.request_timeout,
|
|
32
|
+
headers={"User-Agent": f"RecurveLib/{__version__}"},
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
@overload
|
|
36
|
+
def request(self, method: str, path: str, response_model_class: None = None, retries: int = 3, **kwargs) -> Any:
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
@overload
|
|
40
|
+
def request(
|
|
41
|
+
self, method: str, path: str, response_model_class: type[ResponseModelType], retries: int = 3, **kwargs
|
|
42
|
+
) -> ResponseModelType:
|
|
43
|
+
...
|
|
44
|
+
|
|
45
|
+
def prepare_header(self, kwargs: dict):
|
|
46
|
+
headers = kwargs.pop("headers", {})
|
|
47
|
+
headers["Authorization"] = f"Bearer {self._config.agent_id}:{self._config.token.get_secret_value()}"
|
|
48
|
+
headers["X-Tenant-Domain"] = self._config.tenant_domain
|
|
49
|
+
kwargs["headers"] = headers
|
|
50
|
+
|
|
51
|
+
def request(
|
|
52
|
+
self,
|
|
53
|
+
method: str,
|
|
54
|
+
path: str,
|
|
55
|
+
response_model_class: type[ResponseModelType] | None = None,
|
|
56
|
+
retries: int = 1,
|
|
57
|
+
**kwargs,
|
|
58
|
+
) -> Any:
|
|
59
|
+
self.prepare_header(kwargs)
|
|
60
|
+
pre_err: httpx.HTTPStatusError | None = None
|
|
61
|
+
for attempt in range(retries):
|
|
62
|
+
try:
|
|
63
|
+
resp = self._client.request(method, path, **kwargs)
|
|
64
|
+
resp.raise_for_status()
|
|
65
|
+
resp_content = resp.json()
|
|
66
|
+
|
|
67
|
+
# TODO(yangliang): handle errors more gracefully
|
|
68
|
+
if "code" in resp_content and resp_content["code"] != "0":
|
|
69
|
+
raise APIError(f"API request failed: {resp_content['msg']}\n{resp_content.get('data')}")
|
|
70
|
+
|
|
71
|
+
if response_model_class is not None:
|
|
72
|
+
if "code" in resp_content:
|
|
73
|
+
return response_model_class.model_validate(resp_content["data"])
|
|
74
|
+
return response_model_class.model_validate(resp_content)
|
|
75
|
+
return resp_content.get("data")
|
|
76
|
+
except httpx.HTTPStatusError as e:
|
|
77
|
+
pre_err = e
|
|
78
|
+
logger.error(
|
|
79
|
+
f"HTTP error on attempt {attempt + 1} for url '{e.request.url}' :"
|
|
80
|
+
f" {e.response.status_code} - {e.response.text}"
|
|
81
|
+
)
|
|
82
|
+
if e.response.status_code == 401:
|
|
83
|
+
raise UnauthorizedError("Unauthorized, please check your agent_id and token")
|
|
84
|
+
except httpx.RequestError as e:
|
|
85
|
+
logger.debug(f"Request error on attempt {attempt + 1} for url '{e.request.url}': {e}")
|
|
86
|
+
|
|
87
|
+
if attempt < retries - 1:
|
|
88
|
+
time.sleep(2**attempt) # Exponential backoff
|
|
89
|
+
else:
|
|
90
|
+
err_msg = str(pre_err) if pre_err else ""
|
|
91
|
+
raise MaxRetriesExceededException(
|
|
92
|
+
f"Failed to complete {method} request to {path} after {retries} attempts, {err_msg}"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def request_file(
|
|
96
|
+
self,
|
|
97
|
+
method: str,
|
|
98
|
+
path: str,
|
|
99
|
+
file_name: str,
|
|
100
|
+
retries: int = 1,
|
|
101
|
+
**kwargs,
|
|
102
|
+
) -> bool:
|
|
103
|
+
self.prepare_header(kwargs)
|
|
104
|
+
|
|
105
|
+
pre_err: httpx.HTTPStatusError | None = None
|
|
106
|
+
for attempt in range(retries):
|
|
107
|
+
try:
|
|
108
|
+
resp = self._client.request(method, path, **kwargs)
|
|
109
|
+
resp.raise_for_status()
|
|
110
|
+
try:
|
|
111
|
+
resp_content = resp.json()
|
|
112
|
+
|
|
113
|
+
if "code" in resp_content and resp_content["code"] != "0":
|
|
114
|
+
raise APIError(f"API request failed: {resp_content['msg']}\n{resp_content.get('data')}")
|
|
115
|
+
except (json.JSONDecodeError, TypeError, UnicodeDecodeError):
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
if not resp.content:
|
|
119
|
+
return False
|
|
120
|
+
|
|
121
|
+
with open(file_name, "wb") as f:
|
|
122
|
+
f.write(resp.content)
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
# TODO(yangliang): handle errors more gracefully
|
|
126
|
+
except httpx.HTTPStatusError as e:
|
|
127
|
+
logger.debug(
|
|
128
|
+
f"HTTP error on attempt {attempt + 1} for url '{e.request.url}' :"
|
|
129
|
+
f" {e.response.status_code} - {e.response.text}"
|
|
130
|
+
)
|
|
131
|
+
pre_err = e
|
|
132
|
+
if e.response.status_code == 401:
|
|
133
|
+
raise UnauthorizedError("Unauthorized, please check your agent_id and token")
|
|
134
|
+
except httpx.RequestError as e:
|
|
135
|
+
logger.debug(f"Request error on attempt {attempt + 1} for url '{e.request.url}': {e}")
|
|
136
|
+
|
|
137
|
+
if attempt < retries - 1:
|
|
138
|
+
time.sleep(2**attempt) # Exponential backoff
|
|
139
|
+
else:
|
|
140
|
+
err_msg = str(pre_err) if pre_err else ""
|
|
141
|
+
raise MaxRetriesExceededException(
|
|
142
|
+
f"Failed to complete {method} request to {path} after {retries} attempts {err_msg}"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
def close(self):
|
|
146
|
+
self._client.close()
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def base_url(self) -> str:
|
|
150
|
+
return str(self._client.base_url)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
from recurvedata.client.client import Client, ResponseModelType
|
|
6
|
+
from recurvedata.config import AgentConfig
|
|
7
|
+
from recurvedata.exceptions import APIError
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from recurvedata.dbt.schemas import CompileResponseWithError, PreviewResponseWithError
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ServerDbtClient(Client):
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
server_host: str = os.environ.get("RECURVE__DPSERVER__HOST", "http://0.0.0.0:25103"),
|
|
19
|
+
request_timeout: int = 60,
|
|
20
|
+
):
|
|
21
|
+
config = AgentConfig.load()
|
|
22
|
+
config.server_host = server_host
|
|
23
|
+
config.request_timeout = request_timeout
|
|
24
|
+
super().__init__(config)
|
|
25
|
+
|
|
26
|
+
def prepare_header(self, kwargs: dict):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
def compile(
|
|
30
|
+
self, project_id: int, sql: str, alias: str, force_regenerate_dir: bool = False
|
|
31
|
+
) -> "CompileResponseWithError":
|
|
32
|
+
from recurvedata.dbt.schemas import CompilePayload, CompileResponseWithError
|
|
33
|
+
|
|
34
|
+
payload = CompilePayload(
|
|
35
|
+
project_id=project_id,
|
|
36
|
+
sql=sql,
|
|
37
|
+
alias=alias,
|
|
38
|
+
force_regenerate_dir=force_regenerate_dir,
|
|
39
|
+
)
|
|
40
|
+
return self.request(
|
|
41
|
+
"POST", path="/api/dbt/compile", json=payload.model_dump(), response_model_class=CompileResponseWithError
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def preview(
|
|
45
|
+
self,
|
|
46
|
+
project_id: int,
|
|
47
|
+
sql: str,
|
|
48
|
+
alias: str,
|
|
49
|
+
limit: int,
|
|
50
|
+
force_regenerate_dir: bool = False,
|
|
51
|
+
is_compiled: bool = False,
|
|
52
|
+
) -> "PreviewResponseWithError":
|
|
53
|
+
from recurvedata.dbt.schemas import PreviewPayload, PreviewResponseWithError
|
|
54
|
+
|
|
55
|
+
payload = PreviewPayload(
|
|
56
|
+
project_id=project_id,
|
|
57
|
+
sql=sql,
|
|
58
|
+
alias=alias,
|
|
59
|
+
limit=limit,
|
|
60
|
+
force_regenerate_dir=force_regenerate_dir,
|
|
61
|
+
is_compiled=is_compiled,
|
|
62
|
+
)
|
|
63
|
+
return self.request(
|
|
64
|
+
"POST", path="/api/dbt/preview", json=payload.model_dump(), response_model_class=PreviewResponseWithError
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def request(
|
|
68
|
+
self,
|
|
69
|
+
method: str,
|
|
70
|
+
path: str,
|
|
71
|
+
response_model_class: type[ResponseModelType] | None = None,
|
|
72
|
+
retries: int = 1,
|
|
73
|
+
**kwargs,
|
|
74
|
+
) -> Any:
|
|
75
|
+
"""
|
|
76
|
+
compared with super().request, this function has no retry logic,
|
|
77
|
+
to avoid all exception type is MaxRetriesExceededException
|
|
78
|
+
"""
|
|
79
|
+
self.prepare_header(kwargs)
|
|
80
|
+
resp = self._client.request(method, path, **kwargs)
|
|
81
|
+
resp.raise_for_status()
|
|
82
|
+
resp_content = resp.json()
|
|
83
|
+
|
|
84
|
+
if "code" in resp_content and resp_content["code"] != "0":
|
|
85
|
+
raise APIError(f"API request failed: {resp_content['msg']}\n{resp_content.get('data')}")
|
|
86
|
+
|
|
87
|
+
if response_model_class is not None:
|
|
88
|
+
if "code" in resp_content:
|
|
89
|
+
return response_model_class.model_validate(resp_content["data"])
|
|
90
|
+
return response_model_class.model_validate(resp_content)
|
|
91
|
+
return resp_content.get("data")
|
recurvedata/config.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import uuid
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING, Any, ClassVar, get_origin
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, SecretStr
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from pydantic.fields import FieldInfo
|
|
13
|
+
|
|
14
|
+
RECURVE_HOME = Path(os.environ.get("RECURVE_HOME", Path.home() / ".recurve"))
|
|
15
|
+
CONFIG_FILE_PATH = RECURVE_HOME / "config.json"
|
|
16
|
+
|
|
17
|
+
EXECUTOR_RECURVE_HOME = Path(os.environ.get("RECURVE__WORKER__BASE", "/opt/recurve"))
|
|
18
|
+
RECURVE_DBT_HOME = EXECUTOR_RECURVE_HOME / "dbt"
|
|
19
|
+
EXECUTOR_META_PATH = EXECUTOR_RECURVE_HOME / "meta"
|
|
20
|
+
EXECUTOR_WORKER_DATA_PATH = EXECUTOR_RECURVE_HOME / "worker_data"
|
|
21
|
+
SERVER_RESULT_STAGING_PATH = EXECUTOR_RECURVE_HOME / "server" / "staging"
|
|
22
|
+
|
|
23
|
+
PYENV_HOME = Path(os.environ.get("RECURVE__WORKER__PYENV__BASE", "/opt/pyenv"))
|
|
24
|
+
RECURVE_EXECUTOR_PYENV_NAME = "recurve_executor"
|
|
25
|
+
RECURVE_DBT_PYENV_NAME = "dbt"
|
|
26
|
+
RECURVE_EXECUTOR_CLI_NAME = "recurve_executor"
|
|
27
|
+
RECURVE_EXECUTOR_CLI = PYENV_HOME / "versions" / RECURVE_EXECUTOR_PYENV_NAME / "bin" / RECURVE_EXECUTOR_CLI_NAME
|
|
28
|
+
RECURVE_EXECUTOR_DBT_CLI = PYENV_HOME / "versions" / RECURVE_DBT_PYENV_NAME / "bin" / RECURVE_EXECUTOR_CLI_NAME
|
|
29
|
+
|
|
30
|
+
REDIS_LOCK_URL: str = os.environ.get("RECURVE__REDIS_LOCK_URL", "redis://localhost:6381/1")
|
|
31
|
+
PY_PACKAGES_PATH: str = "/opt/reorc_packages"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class AgentConfig(BaseModel):
|
|
35
|
+
model_config = ConfigDict(extra="ignore")
|
|
36
|
+
editable_fields: ClassVar[set[str]] = {
|
|
37
|
+
"token",
|
|
38
|
+
"request_timeout",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
agent_id: uuid.UUID = Field(default_factory=uuid.uuid4, description="The unique identifier of the agent.")
|
|
42
|
+
server_host: str = Field(..., description="The hostname of the server.")
|
|
43
|
+
tenant_domain: str = Field(..., description="The domain of the tenant.")
|
|
44
|
+
token: SecretStr = Field(..., description="The authentication token.")
|
|
45
|
+
request_timeout: int = Field(5, description="The timeout in seconds for HTTP requests.")
|
|
46
|
+
|
|
47
|
+
def is_valid(self) -> bool:
|
|
48
|
+
return all((self.token.get_secret_value(), self.server_host, self.tenant_domain))
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def server_url(self) -> str:
|
|
52
|
+
if self.server_host.startswith("http"):
|
|
53
|
+
return self.server_host
|
|
54
|
+
return f"https://{self.server_host}"
|
|
55
|
+
|
|
56
|
+
def set_auth_token(self, encoded_token: str):
|
|
57
|
+
decoded = base64.urlsafe_b64decode(encoded_token.encode()).split(b"::")
|
|
58
|
+
self.tenant_domain = decoded[0].decode()
|
|
59
|
+
self.server_host = decoded[1].decode()
|
|
60
|
+
self.token = SecretStr(base64.urlsafe_b64encode(decoded[2]).decode())
|
|
61
|
+
|
|
62
|
+
def clear_auth_token(self):
|
|
63
|
+
self.token = SecretStr("")
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def load(cls, filename: Path | str | None = None) -> Self:
|
|
67
|
+
if filename is None:
|
|
68
|
+
filename = CONFIG_FILE_PATH
|
|
69
|
+
filename = Path(filename)
|
|
70
|
+
if not filename.exists():
|
|
71
|
+
cfg = cls(server_host="", tenant_domain="", token="")
|
|
72
|
+
cfg.save(filename)
|
|
73
|
+
with open(filename) as f:
|
|
74
|
+
content = f.read()
|
|
75
|
+
return cls.model_validate_json(content)
|
|
76
|
+
|
|
77
|
+
def save(self, filename: Path | None = None):
|
|
78
|
+
if filename is None:
|
|
79
|
+
filename = CONFIG_FILE_PATH
|
|
80
|
+
|
|
81
|
+
filename.parent.mkdir(parents=True, exist_ok=True)
|
|
82
|
+
with open(filename, "w") as f:
|
|
83
|
+
content: dict = self.model_dump(mode="json")
|
|
84
|
+
content["token"] = self.token.get_secret_value()
|
|
85
|
+
f.write(json.dumps(content, indent=2))
|
|
86
|
+
f.write("\n")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def parse_value(key: str, value: str) -> Any:
|
|
90
|
+
field_info: "FieldInfo" = AgentConfig.model_fields[key]
|
|
91
|
+
field_type = get_origin(field_info.annotation) or field_info.annotation
|
|
92
|
+
if field_type is SecretStr:
|
|
93
|
+
return SecretStr(value)
|
|
94
|
+
if field_type is bool:
|
|
95
|
+
return value.lower() in {"true", "yes", "y", "1"}
|
|
96
|
+
return field_type(value)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
CONFIG: AgentConfig = AgentConfig.load()
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from recurvedata.connectors.pigeon import get_pigeon_connector
|
|
2
|
+
from recurvedata.connectors.service import (
|
|
3
|
+
get_all_secret_keywords,
|
|
4
|
+
get_config_schema,
|
|
5
|
+
get_connection_category,
|
|
6
|
+
get_connection_class,
|
|
7
|
+
get_connection_host,
|
|
8
|
+
get_connection_type,
|
|
9
|
+
get_connection_ui_category,
|
|
10
|
+
get_connection_ui_type,
|
|
11
|
+
get_datasource_by_config,
|
|
12
|
+
get_datasource_by_name,
|
|
13
|
+
get_sqlalchemy_url_by_connection,
|
|
14
|
+
init_connector,
|
|
15
|
+
list_column_data_types,
|
|
16
|
+
list_config_schemas,
|
|
17
|
+
list_dbapi_types,
|
|
18
|
+
list_juice_sync_able_dbapi_types,
|
|
19
|
+
test_connection,
|
|
20
|
+
)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING, Type, Union
|
|
2
|
+
|
|
3
|
+
from recurvedata.connectors.const import get_module_name
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
7
|
+
|
|
8
|
+
_registry = {}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_connection_class(connection_type: str, only_enabled=True) -> Type["RecurveConnectorBase"]:
|
|
12
|
+
if connection_type not in _registry:
|
|
13
|
+
module_name = get_module_name(connection_type)
|
|
14
|
+
if module_name:
|
|
15
|
+
__import__(module_name)
|
|
16
|
+
|
|
17
|
+
cls = _registry.get(connection_type)
|
|
18
|
+
if not only_enabled:
|
|
19
|
+
return cls
|
|
20
|
+
if cls and cls.enabled:
|
|
21
|
+
return cls
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class register_connector_class(object): # todo: use meta class
|
|
25
|
+
def __init__(self, connection_types: Union[str, list[str]]):
|
|
26
|
+
if isinstance(connection_types, str):
|
|
27
|
+
connection_types = [
|
|
28
|
+
connection_types,
|
|
29
|
+
]
|
|
30
|
+
self.connection_types: list = connection_types
|
|
31
|
+
|
|
32
|
+
def __call__(self, connector_cls):
|
|
33
|
+
for name in self.connection_types:
|
|
34
|
+
_registry[name] = connector_cls
|
|
35
|
+
self.add_connection_type(connector_cls)
|
|
36
|
+
self.set_connection_keys(connector_cls)
|
|
37
|
+
return connector_cls
|
|
38
|
+
|
|
39
|
+
def add_connection_type(self, connection_cls):
|
|
40
|
+
if not connection_cls.connection_type:
|
|
41
|
+
connection_cls.connection_type = self.connection_types[0]
|
|
42
|
+
|
|
43
|
+
def set_connection_keys(self, connection_cls):
|
|
44
|
+
connection_cls.required_keys = connection_cls.config_schema.get("required", [])
|
|
45
|
+
connection_cls.connection_keys = connection_cls.config_schema.get("order", [])
|
|
46
|
+
connection_cls.secret_keys = connection_cls.config_schema.get("secret", [])
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from recurvedata.consts import ConnectionCategory, ConnectorGroup
|
|
4
|
+
from recurvedata.core.configurable import BaseConfigModel, Configurable
|
|
5
|
+
from recurvedata.utils.log import LoggingMixin
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseConnector(Configurable, LoggingMixin):
|
|
9
|
+
"""Abstract base class for connectors."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, config: BaseConfigModel, **kwargs):
|
|
12
|
+
self.config = config
|
|
13
|
+
self.kwargs = kwargs
|
|
14
|
+
|
|
15
|
+
self._log_config_logger_name = "recurvedata.connectors"
|
|
16
|
+
if kwargs.get("logger_name"):
|
|
17
|
+
self._logger_name = kwargs.get("logger_name")
|
|
18
|
+
|
|
19
|
+
def connect(self) -> Any:
|
|
20
|
+
raise NotImplementedError("connect method not implemented")
|
|
21
|
+
|
|
22
|
+
def test_connection(self) -> bool:
|
|
23
|
+
raise NotImplementedError("test_connection method not implemented")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RecurveConnectorBase(object):
|
|
27
|
+
category: list[ConnectionCategory] = [
|
|
28
|
+
ConnectionCategory.OTHERS,
|
|
29
|
+
]
|
|
30
|
+
ui_category = ""
|
|
31
|
+
setup_extras_require = []
|
|
32
|
+
connection_type = ""
|
|
33
|
+
ui_connection_type = ""
|
|
34
|
+
config_schema = {}
|
|
35
|
+
enabled = True
|
|
36
|
+
juice_sync_able = False # 是否可用于 juice sync 同步
|
|
37
|
+
group: list[ConnectorGroup] = []
|
|
38
|
+
test_required = True
|
|
39
|
+
available_column_types = []
|
|
40
|
+
|
|
41
|
+
def __init__(self, conf, *args, **kwargs):
|
|
42
|
+
self.conf = conf
|
|
43
|
+
self.args = args
|
|
44
|
+
self.kwargs = kwargs
|
|
45
|
+
|
|
46
|
+
def __getattribute__(self, key): # todo: use s.attr
|
|
47
|
+
try:
|
|
48
|
+
return super().__getattribute__(key)
|
|
49
|
+
except AttributeError as e:
|
|
50
|
+
if key in self.conf:
|
|
51
|
+
return self.conf[key]
|
|
52
|
+
if key in self.kwargs:
|
|
53
|
+
return self.kwargs[key]
|
|
54
|
+
if key in self.connection_keys and key not in self.required_keys:
|
|
55
|
+
return None
|
|
56
|
+
raise e
|
|
57
|
+
|
|
58
|
+
def test_connection(self):
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def get_secret_keys(cls):
|
|
63
|
+
return cls.config_schema.get("secret", [])
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def preprocess_conf(conf: dict) -> dict:
|
|
67
|
+
"""
|
|
68
|
+
在connector 对象初始化之前,调用这个函数,处理一下 conf,
|
|
69
|
+
原因:
|
|
70
|
+
1. 有些 json 数据存到数据库后,是 txt 字段,这里处理下
|
|
71
|
+
2. 增加一些通用的字段 mapping
|
|
72
|
+
:return: dict
|
|
73
|
+
"""
|
|
74
|
+
if "passwd" in conf:
|
|
75
|
+
conf["password"] = conf["passwd"]
|
|
76
|
+
return conf
|
|
77
|
+
|
|
78
|
+
required_keys = [] # todo: cache property from config_schema
|
|
79
|
+
connection_keys = []
|
|
80
|
+
|
|
81
|
+
def juice_sync_path(self, path: str) -> tuple[str, str]:
|
|
82
|
+
"""
|
|
83
|
+
Return the paths used in juice sync.
|
|
84
|
+
The first return value is the path with a secret key,
|
|
85
|
+
and the second return value is the path without a secret key, intended for display purposes.
|
|
86
|
+
"""
|
|
87
|
+
if not self.juice_sync_able:
|
|
88
|
+
raise ValueError(f"{self.connection_type} is not juice sync able")
|
|
89
|
+
raise NotImplementedError
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def format_config_schema(cls):
|
|
93
|
+
return cls.config_schema
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def is_dbt_supported(cls):
|
|
97
|
+
from recurvedata.connectors.const import DBT_SUPPORTED_TYPES
|
|
98
|
+
|
|
99
|
+
return cls.connection_type in DBT_SUPPORTED_TYPES
|
|
100
|
+
|
|
101
|
+
def convert_config_to_dbt_profile(self, database: str, schema: str = None) -> dict:
|
|
102
|
+
raise NotImplementedError("convert_config_to_dbt_profile method not implemented")
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def is_cube_supported(cls):
|
|
106
|
+
from recurvedata.connectors.const import CUBE_SUPPORTED_TYPES
|
|
107
|
+
|
|
108
|
+
return cls.connection_type in CUBE_SUPPORTED_TYPES
|
|
109
|
+
|
|
110
|
+
def convert_config_to_cube_config(self, database: str = None, schema: str = None, ds = None) -> dict:
|
|
111
|
+
raise NotImplementedError("convert_config_to_cube method not implemented")
|