recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import traceback
|
|
3
|
+
from typing import Any, Generic, Optional, Self, TypeVar
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
from recurvedata.consts import ETLExecutionStatus, Operator, ScheduleType
|
|
8
|
+
from recurvedata.exceptions import RecurveException, WrapRecurveException
|
|
9
|
+
from recurvedata.executors.utils import get_airflow_run_id, get_airflow_try_number, get_recurve_node_key
|
|
10
|
+
|
|
11
|
+
T = TypeVar("T")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class JobNodeItem(BaseModel):
|
|
15
|
+
id: int = Field(default=None)
|
|
16
|
+
node_key: str
|
|
17
|
+
project_id: int
|
|
18
|
+
job_id: int
|
|
19
|
+
job_name: str
|
|
20
|
+
workflow_id: int
|
|
21
|
+
workflow_name: str
|
|
22
|
+
job_schedule_type: ScheduleType
|
|
23
|
+
job_schedule_interval: str
|
|
24
|
+
job_timezone: str
|
|
25
|
+
job_owner: str
|
|
26
|
+
name: str
|
|
27
|
+
operator: Operator
|
|
28
|
+
config: dict
|
|
29
|
+
variable: dict[str, Any]
|
|
30
|
+
job_variable: dict[str, Any]
|
|
31
|
+
full_refresh_models: bool = False
|
|
32
|
+
skip_data_tests: bool = False
|
|
33
|
+
retries: int | None = None
|
|
34
|
+
retry_delay: int | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class WorkflowNodeItem(BaseModel):
|
|
38
|
+
id: int = Field(default=None, title="Node ID")
|
|
39
|
+
node_key: str
|
|
40
|
+
project_id: int
|
|
41
|
+
workflow_id: int
|
|
42
|
+
workflow_version: str
|
|
43
|
+
workflow_name: str
|
|
44
|
+
name: str = Field(title="Node Name")
|
|
45
|
+
operator: Operator
|
|
46
|
+
config: dict
|
|
47
|
+
variable: dict[str, Any]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ConnectionItem(BaseModel):
|
|
51
|
+
type: str
|
|
52
|
+
name: str
|
|
53
|
+
display_name: str
|
|
54
|
+
database: str
|
|
55
|
+
database_schema: str | None = None
|
|
56
|
+
data: dict
|
|
57
|
+
description: str | None
|
|
58
|
+
|
|
59
|
+
def model_post_init(self, context: dict):
|
|
60
|
+
if self.database:
|
|
61
|
+
self.data["database"] = self.database
|
|
62
|
+
if self.database_schema:
|
|
63
|
+
self.data["schema"] = self.database_schema
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TaskInstanceStart(BaseModel):
|
|
67
|
+
job_id: int
|
|
68
|
+
node_id: int
|
|
69
|
+
operator: Operator = Field(title="Node Operator", description="节点的 Operator 类型")
|
|
70
|
+
task: str = Field(title="Task Name", description="节点的任务名称")
|
|
71
|
+
stage: Optional[str] = Field(default=None, title="Stage", description="任务运行阶段")
|
|
72
|
+
execution_date: str = Field(title="Execution Date", description="调度时间")
|
|
73
|
+
rendered_config: dict = Field(title="Rendered Config", description="任务 Config, 已渲染")
|
|
74
|
+
start_time: str = Field(title="Task Start Time", description="任务开始时间")
|
|
75
|
+
hostname: Optional[str] = Field(default=None, title="Machine Host Name", description="任务所在的机器 hostname")
|
|
76
|
+
pid: Optional[int] = Field(default=None, title="Process ID", description="任务进程 ID")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class TaskInstanceStartResponse(BaseModel):
|
|
80
|
+
task_instance_id: int = Field(title="Task Instance ID", description="Task Instance id")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class TaskInstanceEnd(BaseModel):
|
|
84
|
+
job_id: int
|
|
85
|
+
node_id: int
|
|
86
|
+
run_id: str
|
|
87
|
+
end_time: str
|
|
88
|
+
execution_date: str
|
|
89
|
+
meta: Optional[Any]
|
|
90
|
+
traceback: Optional[Any]
|
|
91
|
+
status: Optional[ETLExecutionStatus]
|
|
92
|
+
|
|
93
|
+
current_retry_number: Optional[int]
|
|
94
|
+
max_retry_number: Optional[int]
|
|
95
|
+
link_workflow_id: Optional[int]
|
|
96
|
+
link_node_id: Optional[int]
|
|
97
|
+
data_interval_end: Optional[str] = None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class TaskLogRecord(BaseModel):
|
|
101
|
+
job_id: int
|
|
102
|
+
node_key: str
|
|
103
|
+
run_id: str
|
|
104
|
+
try_number: int
|
|
105
|
+
logs: list[str]
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
def init(cls, job_id: int, logs: list[str]) -> "TaskLogRecord":
|
|
109
|
+
return cls(
|
|
110
|
+
job_id=job_id,
|
|
111
|
+
node_key=get_recurve_node_key(),
|
|
112
|
+
run_id=get_airflow_run_id(),
|
|
113
|
+
try_number=get_airflow_try_number(),
|
|
114
|
+
logs=logs,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class DebugLogRecord(BaseModel):
|
|
119
|
+
workflow_id: int
|
|
120
|
+
node_key: str
|
|
121
|
+
celery_task_id: str
|
|
122
|
+
logs: list[str]
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def init(cls, workflow_id: int, node_key: str, celery_task_id: str, logs: list[str]) -> "DebugLogRecord":
|
|
126
|
+
return cls(
|
|
127
|
+
workflow_id=workflow_id,
|
|
128
|
+
node_key=node_key,
|
|
129
|
+
celery_task_id=celery_task_id,
|
|
130
|
+
logs=logs,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class DebugStart(BaseModel):
|
|
135
|
+
workflow_id: int
|
|
136
|
+
node_key: str
|
|
137
|
+
celery_task_id: str
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class DebugEnd(DebugStart):
|
|
141
|
+
is_success: bool
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class ConnectionRuntimePayload(BaseModel):
|
|
145
|
+
connection_type: str
|
|
146
|
+
config: dict
|
|
147
|
+
result_filename: str | None = None
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class TestConnectionPayload(ConnectionRuntimePayload):
|
|
151
|
+
timeout: int = 30
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class ListDatabases(BaseModel):
|
|
155
|
+
items: list[str] | None
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class ResponseError(BaseModel):
|
|
159
|
+
code: str
|
|
160
|
+
reason: str | None
|
|
161
|
+
exception: str | None = None
|
|
162
|
+
traceback: str | None = None
|
|
163
|
+
data: dict | str | None = None
|
|
164
|
+
|
|
165
|
+
@classmethod
|
|
166
|
+
def from_recurve_exception(cls, recurve_exception: RecurveException) -> Self:
|
|
167
|
+
if recurve_exception.data:
|
|
168
|
+
reason = f"{recurve_exception.code.message} {recurve_exception.data}"
|
|
169
|
+
else:
|
|
170
|
+
reason = recurve_exception.code.message
|
|
171
|
+
if isinstance(recurve_exception, WrapRecurveException):
|
|
172
|
+
exception = str(recurve_exception.exception)
|
|
173
|
+
else:
|
|
174
|
+
exception = None
|
|
175
|
+
return cls(
|
|
176
|
+
code=recurve_exception.code.code,
|
|
177
|
+
reason=reason,
|
|
178
|
+
exception=exception,
|
|
179
|
+
traceback=traceback.format_exc(),
|
|
180
|
+
data=recurve_exception.data,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class ResponseModel(BaseModel, Generic[T]):
|
|
185
|
+
ok: bool
|
|
186
|
+
error: ResponseError | None = None
|
|
187
|
+
data: T = None
|
|
188
|
+
|
|
189
|
+
def model_dump_json_file(self, filename: str):
|
|
190
|
+
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
|
191
|
+
with open(filename, "w") as f:
|
|
192
|
+
f.write(self.model_dump_json(indent=2))
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class ColumnItem(BaseModel):
|
|
196
|
+
name: str
|
|
197
|
+
type: str
|
|
198
|
+
comment: str | None = None
|
|
199
|
+
normalized_type: str
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class TableItem(BaseModel):
|
|
203
|
+
name: str
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class Pagination(BaseModel, Generic[T]):
|
|
207
|
+
total: int
|
|
208
|
+
items: list[T]
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
class TableListPayload(ConnectionRuntimePayload):
|
|
212
|
+
database: str
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class ColumnListPayload(TableListPayload):
|
|
216
|
+
table: str
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class FullDatabaseItem(BaseModel):
|
|
220
|
+
name: str
|
|
221
|
+
tables: list[TableItem]
|
|
222
|
+
views: list[TableItem]
|
|
File without changes
|
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
import datetime
|
|
3
|
+
import logging
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from decimal import Decimal
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
11
|
+
from recurvedata.connectors.dbapi import DBAPIBase
|
|
12
|
+
from recurvedata.connectors.service import DataSourceWrapper, get_datasource_by_config
|
|
13
|
+
from recurvedata.executors.schemas import ColumnItem, FullDatabaseItem, ListDatabases, Pagination, TableItem
|
|
14
|
+
from recurvedata.pigeon.connector.dbapi import DBAPIConnector
|
|
15
|
+
from recurvedata.pigeon.schema import Schema
|
|
16
|
+
from recurvedata.utils.normalizer import ColumnTypeNormalizer
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from recurvedata.dbt.schemas import PreviewResult
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ConnectionService:
|
|
23
|
+
@staticmethod
|
|
24
|
+
def test_connection(connection_type: str, connection_config: dict):
|
|
25
|
+
logging.info(f"Connection of type '{connection_type}' with provided config is valid.")
|
|
26
|
+
datasource = get_datasource_by_config(connection_type, connection_config)
|
|
27
|
+
|
|
28
|
+
def test_connection_with_timeout():
|
|
29
|
+
datasource.recurve_connector.test_connection()
|
|
30
|
+
|
|
31
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
32
|
+
future = executor.submit(test_connection_with_timeout)
|
|
33
|
+
future.result()
|
|
34
|
+
|
|
35
|
+
logging.info("Connection test successful.")
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def list_databases(connection_type: str, connection_config: dict) -> ListDatabases:
|
|
39
|
+
datasource = get_datasource_by_config(connection_type, connection_config)
|
|
40
|
+
logging.info(f"Listing databases for connection of type '{connection_type}' with provided config.")
|
|
41
|
+
if not datasource.is_dbapi:
|
|
42
|
+
logging.info(f"{datasource.ds_type} is not dbapi, not support this function")
|
|
43
|
+
raise ValueError(f"{datasource.ds_type} is not dbapi, not support this function")
|
|
44
|
+
|
|
45
|
+
return ListDatabases(items=datasource.recurve_connector.get_databases())
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def list_tables(connection_type: str, connection_config: dict, database: str) -> Pagination[TableItem]:
|
|
49
|
+
datasource = get_datasource_by_config(connection_type, connection_config)
|
|
50
|
+
logging.info(f"Listing tables for connection of type '{connection_type}' with provided config.")
|
|
51
|
+
if not datasource.is_dbapi:
|
|
52
|
+
raise ValueError(f"{datasource.ds_type} is not dbapi, not support this function")
|
|
53
|
+
|
|
54
|
+
tables = datasource.recurve_connector.get_tables(database)
|
|
55
|
+
tables = [TableItem(name=table) for table in tables]
|
|
56
|
+
return Pagination[TableItem](items=tables, total=len(tables))
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def list_columns(
|
|
60
|
+
connection_type: str, connection_config: dict, database_name: str, table_name: str
|
|
61
|
+
) -> Pagination[ColumnItem]:
|
|
62
|
+
datasource = get_datasource_by_config(connection_type, connection_config)
|
|
63
|
+
logging.info(f"Listing columns for connection of type '{connection_type}' with provided config.")
|
|
64
|
+
if not datasource.is_dbapi:
|
|
65
|
+
raise ValueError(f"{datasource.ds_type} is not dbapi, not support this function")
|
|
66
|
+
|
|
67
|
+
columns = datasource.recurve_connector.get_columns(table_name, database_name)
|
|
68
|
+
result = []
|
|
69
|
+
for column in columns:
|
|
70
|
+
normalizer = ColumnTypeNormalizer(
|
|
71
|
+
connection_type, custom_mappings=datasource.recurve_connector.column_type_mapping
|
|
72
|
+
)
|
|
73
|
+
normalized_type = normalizer.normalize(column["type"])
|
|
74
|
+
result.append(
|
|
75
|
+
ColumnItem(
|
|
76
|
+
name=column["name"],
|
|
77
|
+
type=column["type"],
|
|
78
|
+
normalized_type=normalized_type,
|
|
79
|
+
comment=column.get("comment"),
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return Pagination[ColumnItem](items=result, total=len(result))
|
|
84
|
+
|
|
85
|
+
@staticmethod
|
|
86
|
+
def list_full_databases(connection_type: str, connection_config: dict) -> Pagination[FullDatabaseItem]:
|
|
87
|
+
datasource = get_datasource_by_config(connection_type, connection_config)
|
|
88
|
+
databases = datasource.recurve_connector.get_databases()
|
|
89
|
+
|
|
90
|
+
def process_database(database: str):
|
|
91
|
+
con: DBAPIBase = datasource.recurve_connector
|
|
92
|
+
tables = con.get_tables(database)
|
|
93
|
+
views = con.get_views(database)
|
|
94
|
+
if con.connection_type == "impala":
|
|
95
|
+
tables = [table for table in tables if table not in views]
|
|
96
|
+
return FullDatabaseItem(
|
|
97
|
+
name=database,
|
|
98
|
+
tables=[TableItem(name=table) for table in tables],
|
|
99
|
+
views=[TableItem(name=view) for view in views],
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
results = []
|
|
103
|
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
|
104
|
+
future_to_db = {executor.submit(process_database, db): db for db in databases}
|
|
105
|
+
|
|
106
|
+
for future in as_completed(future_to_db):
|
|
107
|
+
db = future_to_db[future]
|
|
108
|
+
try:
|
|
109
|
+
result = future.result()
|
|
110
|
+
results.append(result)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logging.error(f"Error processing database {db}: {e}")
|
|
113
|
+
raise e
|
|
114
|
+
|
|
115
|
+
return Pagination[FullDatabaseItem](items=results, total=len(results))
|
|
116
|
+
|
|
117
|
+
def preview_sql(
|
|
118
|
+
self,
|
|
119
|
+
ds: DataSourceWrapper,
|
|
120
|
+
sql: str,
|
|
121
|
+
limit: int = 100,
|
|
122
|
+
max_limit: int = 10000,
|
|
123
|
+
orders: list[dict[str, str]] | None = None,
|
|
124
|
+
offset: int = 0,
|
|
125
|
+
) -> "PreviewResult":
|
|
126
|
+
from recurvedata.dbt.schemas import PreviewResult
|
|
127
|
+
|
|
128
|
+
recurve_con = ds.recurve_connector
|
|
129
|
+
limit = min(limit, max_limit)
|
|
130
|
+
limited_sql = recurve_con.limit_sql(sql, limit, orders=orders, offset=offset)
|
|
131
|
+
logging.info(f"preview_sql - limited_sql: {limited_sql}")
|
|
132
|
+
column_schema, data = self._fetch_many_return_type(ds, limited_sql, limit)
|
|
133
|
+
logging.info(f"preview_sql - fetched {len(data) if data else 0} rows")
|
|
134
|
+
try:
|
|
135
|
+
fields_log = []
|
|
136
|
+
for field in column_schema.fields:
|
|
137
|
+
field_info = {
|
|
138
|
+
"name": field.name,
|
|
139
|
+
"type": field.type,
|
|
140
|
+
}
|
|
141
|
+
if field.comment is not None:
|
|
142
|
+
field_info["comment"] = field.comment
|
|
143
|
+
fields_log.append(field_info)
|
|
144
|
+
logging.info(f"preview_sql - column_schema: {json.dumps(fields_log, default=str)}")
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logging.info(f"preview_sql - column_schema: {column_schema} (failed to serialize: {e})")
|
|
147
|
+
|
|
148
|
+
data = self._jsonable_value(data)
|
|
149
|
+
normalizer = ColumnTypeNormalizer(recurve_con.connection_type, custom_mappings=recurve_con.column_type_mapping)
|
|
150
|
+
columns = [
|
|
151
|
+
ColumnItem(
|
|
152
|
+
name=field.name,
|
|
153
|
+
type=field.type,
|
|
154
|
+
normalized_type=normalizer.normalize(field.type),
|
|
155
|
+
comment=field.comment,
|
|
156
|
+
)
|
|
157
|
+
for field in column_schema.fields
|
|
158
|
+
]
|
|
159
|
+
return PreviewResult(
|
|
160
|
+
compiled_sql=sql,
|
|
161
|
+
columns=columns,
|
|
162
|
+
data=data,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def validate_sql(
|
|
166
|
+
self,
|
|
167
|
+
ds: DataSourceWrapper,
|
|
168
|
+
sql: str,
|
|
169
|
+
limit: int = 0,
|
|
170
|
+
max_limit: int = 100,
|
|
171
|
+
) -> "SqlValidationResult":
|
|
172
|
+
"""
|
|
173
|
+
Validate SQL by executing it and checking for syntax/runtime errors.
|
|
174
|
+
|
|
175
|
+
This function executes ANY SQL (SELECT, DDL, DML) to validate syntax and logic.
|
|
176
|
+
Use limit=0 to avoid returning large datasets for non-SELECT statements.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
ds: DataSourceWrapper containing connection info
|
|
180
|
+
sql: SQL statement(s) to validate
|
|
181
|
+
limit: Maximum rows to return (0 = no data returned, just validation)
|
|
182
|
+
max_limit: Maximum allowed limit
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
SqlValidationResult with validation status and error details
|
|
186
|
+
"""
|
|
187
|
+
from recurvedata.server.data_service.schemas import SqlValidationResult
|
|
188
|
+
import traceback
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
recurve_con = ds.recurve_connector
|
|
192
|
+
limit = min(limit, max_limit)
|
|
193
|
+
rollback_supported = True # Default to True, will be set to False for databases that don't support it
|
|
194
|
+
|
|
195
|
+
# For validation, we don't need to limit non-SELECT statements
|
|
196
|
+
validation_sql = sql
|
|
197
|
+
if limit > 0:
|
|
198
|
+
# Only apply limit if we want to return data (SELECT statements)
|
|
199
|
+
validation_sql = recurve_con.limit_sql(sql, limit)
|
|
200
|
+
|
|
201
|
+
logging.info(f"validate_sql - executing: {validation_sql}")
|
|
202
|
+
|
|
203
|
+
# Detect if this is a SELECT query or not
|
|
204
|
+
sql_upper = validation_sql.strip().upper()
|
|
205
|
+
is_select_query = sql_upper.startswith('SELECT') or sql_upper.startswith('WITH')
|
|
206
|
+
|
|
207
|
+
if is_select_query and limit > 0:
|
|
208
|
+
# For SELECT queries, fetch results if limit > 0
|
|
209
|
+
column_schema, data = self._fetch_many_return_type(ds, validation_sql, limit)
|
|
210
|
+
logging.info(f"validate_sql - SELECT executed successfully, fetched {len(data) if data else 0} rows")
|
|
211
|
+
|
|
212
|
+
# Prepare column information
|
|
213
|
+
columns = []
|
|
214
|
+
if column_schema and hasattr(column_schema, 'fields'):
|
|
215
|
+
normalizer = ColumnTypeNormalizer(
|
|
216
|
+
recurve_con.connection_type,
|
|
217
|
+
custom_mappings=recurve_con.column_type_mapping
|
|
218
|
+
)
|
|
219
|
+
columns = [
|
|
220
|
+
{
|
|
221
|
+
"name": field.name,
|
|
222
|
+
"type": field.type,
|
|
223
|
+
"normalized_type": normalizer.normalize(field.type),
|
|
224
|
+
"comment": field.comment,
|
|
225
|
+
}
|
|
226
|
+
for field in column_schema.fields
|
|
227
|
+
]
|
|
228
|
+
|
|
229
|
+
# Convert data to JSON-serializable format
|
|
230
|
+
json_data = self._jsonable_value(data) if data else []
|
|
231
|
+
else:
|
|
232
|
+
# For DDL/DML queries or SELECT with limit=0, validate WITHOUT committing changes
|
|
233
|
+
cursor_options = {"commit_on_close": False} # Don't commit - we'll rollback
|
|
234
|
+
connector: DBAPIConnector = ds.connector
|
|
235
|
+
with connector.cursor(**cursor_options) as cursor:
|
|
236
|
+
# Initialize connection state variables outside try block
|
|
237
|
+
conn = cursor.connection
|
|
238
|
+
original_autocommit = None
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
# Save original autocommit state and ensure it's disabled for transactions
|
|
242
|
+
if hasattr(conn, 'autocommit'):
|
|
243
|
+
original_autocommit = conn.autocommit
|
|
244
|
+
if original_autocommit:
|
|
245
|
+
conn.autocommit = False
|
|
246
|
+
|
|
247
|
+
# Execute the SQL to validate syntax and logic
|
|
248
|
+
cursor.execute(validation_sql)
|
|
249
|
+
|
|
250
|
+
# Get affected rows count for logging
|
|
251
|
+
affected_rows = cursor.rowcount if hasattr(cursor, 'rowcount') else 0
|
|
252
|
+
|
|
253
|
+
# IMPORTANT: Rollback to undo any changes - this is validation only!
|
|
254
|
+
# But only if the connection supports rollback (PostgreSQL, MySQL, etc.)
|
|
255
|
+
if hasattr(conn, 'rollback'):
|
|
256
|
+
conn.rollback()
|
|
257
|
+
rollback_supported = True
|
|
258
|
+
rollback_status = "(rolled back)"
|
|
259
|
+
else:
|
|
260
|
+
# BigQuery and some other databases don't support rollback
|
|
261
|
+
# The DDL/DML will be executed and committed automatically
|
|
262
|
+
rollback_supported = False
|
|
263
|
+
rollback_status = "(auto-committed - no rollback support)"
|
|
264
|
+
|
|
265
|
+
# Restore original autocommit state if we changed it
|
|
266
|
+
if original_autocommit is not None and original_autocommit:
|
|
267
|
+
conn.autocommit = original_autocommit
|
|
268
|
+
|
|
269
|
+
logging.info(f"validate_sql - DDL/DML validated successfully {rollback_status}, would affect {affected_rows} rows")
|
|
270
|
+
|
|
271
|
+
except Exception as e:
|
|
272
|
+
# If there's an error, rollback anyway to be safe (if supported)
|
|
273
|
+
try:
|
|
274
|
+
if hasattr(conn, 'rollback'):
|
|
275
|
+
conn.rollback()
|
|
276
|
+
# Restore original autocommit state if we changed it
|
|
277
|
+
if original_autocommit is not None and original_autocommit:
|
|
278
|
+
conn.autocommit = original_autocommit
|
|
279
|
+
except:
|
|
280
|
+
pass # Ignore rollback errors if connection is broken
|
|
281
|
+
raise e # Re-raise the original validation error
|
|
282
|
+
|
|
283
|
+
columns = []
|
|
284
|
+
json_data = []
|
|
285
|
+
|
|
286
|
+
# Add warning message if rollback is not supported
|
|
287
|
+
warning_message = None
|
|
288
|
+
if not rollback_supported and not is_select_query:
|
|
289
|
+
warning_message = "WARNING: Database does not support rollback. DDL/DML changes were permanently applied to the database during validation."
|
|
290
|
+
|
|
291
|
+
return SqlValidationResult(
|
|
292
|
+
is_valid=True,
|
|
293
|
+
compiled_sql=sql,
|
|
294
|
+
columns=columns,
|
|
295
|
+
data=json_data if limit > 0 else [],
|
|
296
|
+
error_message=warning_message,
|
|
297
|
+
error_code=None,
|
|
298
|
+
error_traceback=None,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
except Exception as e:
|
|
302
|
+
# SQL validation failed - capture error details
|
|
303
|
+
error_message = str(e)
|
|
304
|
+
error_traceback = traceback.format_exc()
|
|
305
|
+
|
|
306
|
+
logging.error(f"validate_sql - failed: {error_message}")
|
|
307
|
+
logging.error(f"validate_sql - traceback: {error_traceback}")
|
|
308
|
+
|
|
309
|
+
return SqlValidationResult(
|
|
310
|
+
is_valid=False,
|
|
311
|
+
compiled_sql=sql,
|
|
312
|
+
columns=[],
|
|
313
|
+
data=[],
|
|
314
|
+
error_message=error_message,
|
|
315
|
+
error_code=getattr(e, 'code', 'VALIDATION_ERROR'),
|
|
316
|
+
error_traceback=error_traceback,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
def _fetch_many_return_type(self, ds: DataSourceWrapper, sql: str, limit: int) -> tuple[Schema, list[tuple]]:
|
|
320
|
+
cursor_options = {"commit_on_close": False}
|
|
321
|
+
connector: DBAPIConnector = ds.connector
|
|
322
|
+
with connector.cursor(**cursor_options) as cursor:
|
|
323
|
+
cursor.execute(sql)
|
|
324
|
+
|
|
325
|
+
# Postgres use server side cursor, need fetch first to get cursor.description
|
|
326
|
+
first_row = None
|
|
327
|
+
if connector.is_postgres() or connector.is_redshift():
|
|
328
|
+
limit = max(0, limit - 1)
|
|
329
|
+
first_row = cursor.fetchone()
|
|
330
|
+
|
|
331
|
+
schema = self._extract_column_info_from_cursor(ds.recurve_connector, cursor)
|
|
332
|
+
rv = cursor.fetchmany(limit)
|
|
333
|
+
if first_row:
|
|
334
|
+
rv = [first_row] + rv
|
|
335
|
+
if connector.is_google_bigquery():
|
|
336
|
+
# row is google.cloud.bigquery.table.Row type
|
|
337
|
+
rv = [row.values() for row in rv]
|
|
338
|
+
if connector.is_mssql():
|
|
339
|
+
rv = [tuple(row) for row in rv]
|
|
340
|
+
|
|
341
|
+
return schema, rv
|
|
342
|
+
|
|
343
|
+
@staticmethod
|
|
344
|
+
def _extract_column_info_from_cursor(recurve_con: RecurveConnectorBase, cursor) -> Schema:
|
|
345
|
+
schema = Schema()
|
|
346
|
+
for item in cursor.description:
|
|
347
|
+
name = item[0]
|
|
348
|
+
if "." in name:
|
|
349
|
+
name = name.split(".")[1]
|
|
350
|
+
|
|
351
|
+
type_code = item[1]
|
|
352
|
+
size = item[3]
|
|
353
|
+
ttype = recurve_con.sqlalchemy_column_type_code_to_name(type_code, size)
|
|
354
|
+
schema.add_field_by_attrs(name, ttype, size)
|
|
355
|
+
return schema
|
|
356
|
+
|
|
357
|
+
def _jsonable_value(self, value):
|
|
358
|
+
if value is None:
|
|
359
|
+
return value
|
|
360
|
+
elif isinstance(value, (int, float, Decimal)):
|
|
361
|
+
return str(value)
|
|
362
|
+
elif isinstance(value, bool):
|
|
363
|
+
return value
|
|
364
|
+
elif isinstance(value, dict):
|
|
365
|
+
return {k: self._jsonable_value(v) for k, v in value.items()}
|
|
366
|
+
elif isinstance(value, (list, tuple, set)):
|
|
367
|
+
return [self._jsonable_value(v) for v in value]
|
|
368
|
+
elif isinstance(value, (datetime.datetime, datetime.date)):
|
|
369
|
+
return value.isoformat()
|
|
370
|
+
else:
|
|
371
|
+
return str(value)
|
|
372
|
+
|
|
373
|
+
@staticmethod
|
|
374
|
+
def fetch_total(ds: DataSourceWrapper, sql: str) -> int:
|
|
375
|
+
recurve_con: RecurveConnectorBase = ds.recurve_connector
|
|
376
|
+
count_sql = recurve_con.count_sql(sql)
|
|
377
|
+
connector: DBAPIConnector = ds.connector
|
|
378
|
+
with connector.cursor() as cursor:
|
|
379
|
+
cursor.execute(count_sql)
|
|
380
|
+
return cursor.fetchone()[0]
|