recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,222 @@
1
+ import os
2
+ import traceback
3
+ from typing import Any, Generic, Optional, Self, TypeVar
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+ from recurvedata.consts import ETLExecutionStatus, Operator, ScheduleType
8
+ from recurvedata.exceptions import RecurveException, WrapRecurveException
9
+ from recurvedata.executors.utils import get_airflow_run_id, get_airflow_try_number, get_recurve_node_key
10
+
11
+ T = TypeVar("T")
12
+
13
+
14
+ class JobNodeItem(BaseModel):
15
+ id: int = Field(default=None)
16
+ node_key: str
17
+ project_id: int
18
+ job_id: int
19
+ job_name: str
20
+ workflow_id: int
21
+ workflow_name: str
22
+ job_schedule_type: ScheduleType
23
+ job_schedule_interval: str
24
+ job_timezone: str
25
+ job_owner: str
26
+ name: str
27
+ operator: Operator
28
+ config: dict
29
+ variable: dict[str, Any]
30
+ job_variable: dict[str, Any]
31
+ full_refresh_models: bool = False
32
+ skip_data_tests: bool = False
33
+ retries: int | None = None
34
+ retry_delay: int | None = None
35
+
36
+
37
+ class WorkflowNodeItem(BaseModel):
38
+ id: int = Field(default=None, title="Node ID")
39
+ node_key: str
40
+ project_id: int
41
+ workflow_id: int
42
+ workflow_version: str
43
+ workflow_name: str
44
+ name: str = Field(title="Node Name")
45
+ operator: Operator
46
+ config: dict
47
+ variable: dict[str, Any]
48
+
49
+
50
+ class ConnectionItem(BaseModel):
51
+ type: str
52
+ name: str
53
+ display_name: str
54
+ database: str
55
+ database_schema: str | None = None
56
+ data: dict
57
+ description: str | None
58
+
59
+ def model_post_init(self, context: dict):
60
+ if self.database:
61
+ self.data["database"] = self.database
62
+ if self.database_schema:
63
+ self.data["schema"] = self.database_schema
64
+
65
+
66
+ class TaskInstanceStart(BaseModel):
67
+ job_id: int
68
+ node_id: int
69
+ operator: Operator = Field(title="Node Operator", description="节点的 Operator 类型")
70
+ task: str = Field(title="Task Name", description="节点的任务名称")
71
+ stage: Optional[str] = Field(default=None, title="Stage", description="任务运行阶段")
72
+ execution_date: str = Field(title="Execution Date", description="调度时间")
73
+ rendered_config: dict = Field(title="Rendered Config", description="任务 Config, 已渲染")
74
+ start_time: str = Field(title="Task Start Time", description="任务开始时间")
75
+ hostname: Optional[str] = Field(default=None, title="Machine Host Name", description="任务所在的机器 hostname")
76
+ pid: Optional[int] = Field(default=None, title="Process ID", description="任务进程 ID")
77
+
78
+
79
+ class TaskInstanceStartResponse(BaseModel):
80
+ task_instance_id: int = Field(title="Task Instance ID", description="Task Instance id")
81
+
82
+
83
+ class TaskInstanceEnd(BaseModel):
84
+ job_id: int
85
+ node_id: int
86
+ run_id: str
87
+ end_time: str
88
+ execution_date: str
89
+ meta: Optional[Any]
90
+ traceback: Optional[Any]
91
+ status: Optional[ETLExecutionStatus]
92
+
93
+ current_retry_number: Optional[int]
94
+ max_retry_number: Optional[int]
95
+ link_workflow_id: Optional[int]
96
+ link_node_id: Optional[int]
97
+ data_interval_end: Optional[str] = None
98
+
99
+
100
+ class TaskLogRecord(BaseModel):
101
+ job_id: int
102
+ node_key: str
103
+ run_id: str
104
+ try_number: int
105
+ logs: list[str]
106
+
107
+ @classmethod
108
+ def init(cls, job_id: int, logs: list[str]) -> "TaskLogRecord":
109
+ return cls(
110
+ job_id=job_id,
111
+ node_key=get_recurve_node_key(),
112
+ run_id=get_airflow_run_id(),
113
+ try_number=get_airflow_try_number(),
114
+ logs=logs,
115
+ )
116
+
117
+
118
+ class DebugLogRecord(BaseModel):
119
+ workflow_id: int
120
+ node_key: str
121
+ celery_task_id: str
122
+ logs: list[str]
123
+
124
+ @classmethod
125
+ def init(cls, workflow_id: int, node_key: str, celery_task_id: str, logs: list[str]) -> "DebugLogRecord":
126
+ return cls(
127
+ workflow_id=workflow_id,
128
+ node_key=node_key,
129
+ celery_task_id=celery_task_id,
130
+ logs=logs,
131
+ )
132
+
133
+
134
+ class DebugStart(BaseModel):
135
+ workflow_id: int
136
+ node_key: str
137
+ celery_task_id: str
138
+
139
+
140
+ class DebugEnd(DebugStart):
141
+ is_success: bool
142
+
143
+
144
+ class ConnectionRuntimePayload(BaseModel):
145
+ connection_type: str
146
+ config: dict
147
+ result_filename: str | None = None
148
+
149
+
150
+ class TestConnectionPayload(ConnectionRuntimePayload):
151
+ timeout: int = 30
152
+
153
+
154
+ class ListDatabases(BaseModel):
155
+ items: list[str] | None
156
+
157
+
158
+ class ResponseError(BaseModel):
159
+ code: str
160
+ reason: str | None
161
+ exception: str | None = None
162
+ traceback: str | None = None
163
+ data: dict | str | None = None
164
+
165
+ @classmethod
166
+ def from_recurve_exception(cls, recurve_exception: RecurveException) -> Self:
167
+ if recurve_exception.data:
168
+ reason = f"{recurve_exception.code.message} {recurve_exception.data}"
169
+ else:
170
+ reason = recurve_exception.code.message
171
+ if isinstance(recurve_exception, WrapRecurveException):
172
+ exception = str(recurve_exception.exception)
173
+ else:
174
+ exception = None
175
+ return cls(
176
+ code=recurve_exception.code.code,
177
+ reason=reason,
178
+ exception=exception,
179
+ traceback=traceback.format_exc(),
180
+ data=recurve_exception.data,
181
+ )
182
+
183
+
184
+ class ResponseModel(BaseModel, Generic[T]):
185
+ ok: bool
186
+ error: ResponseError | None = None
187
+ data: T = None
188
+
189
+ def model_dump_json_file(self, filename: str):
190
+ os.makedirs(os.path.dirname(filename), exist_ok=True)
191
+ with open(filename, "w") as f:
192
+ f.write(self.model_dump_json(indent=2))
193
+
194
+
195
+ class ColumnItem(BaseModel):
196
+ name: str
197
+ type: str
198
+ comment: str | None = None
199
+ normalized_type: str
200
+
201
+
202
+ class TableItem(BaseModel):
203
+ name: str
204
+
205
+
206
+ class Pagination(BaseModel, Generic[T]):
207
+ total: int
208
+ items: list[T]
209
+
210
+
211
+ class TableListPayload(ConnectionRuntimePayload):
212
+ database: str
213
+
214
+
215
+ class ColumnListPayload(TableListPayload):
216
+ table: str
217
+
218
+
219
+ class FullDatabaseItem(BaseModel):
220
+ name: str
221
+ tables: list[TableItem]
222
+ views: list[TableItem]
File without changes
@@ -0,0 +1,380 @@
1
+ import concurrent.futures
2
+ import datetime
3
+ import logging
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from decimal import Decimal
6
+ from typing import TYPE_CHECKING
7
+ import json
8
+
9
+
10
+ from recurvedata.connectors.base import RecurveConnectorBase
11
+ from recurvedata.connectors.dbapi import DBAPIBase
12
+ from recurvedata.connectors.service import DataSourceWrapper, get_datasource_by_config
13
+ from recurvedata.executors.schemas import ColumnItem, FullDatabaseItem, ListDatabases, Pagination, TableItem
14
+ from recurvedata.pigeon.connector.dbapi import DBAPIConnector
15
+ from recurvedata.pigeon.schema import Schema
16
+ from recurvedata.utils.normalizer import ColumnTypeNormalizer
17
+
18
+ if TYPE_CHECKING:
19
+ from recurvedata.dbt.schemas import PreviewResult
20
+
21
+
22
+ class ConnectionService:
23
+ @staticmethod
24
+ def test_connection(connection_type: str, connection_config: dict):
25
+ logging.info(f"Connection of type '{connection_type}' with provided config is valid.")
26
+ datasource = get_datasource_by_config(connection_type, connection_config)
27
+
28
+ def test_connection_with_timeout():
29
+ datasource.recurve_connector.test_connection()
30
+
31
+ with concurrent.futures.ThreadPoolExecutor() as executor:
32
+ future = executor.submit(test_connection_with_timeout)
33
+ future.result()
34
+
35
+ logging.info("Connection test successful.")
36
+
37
+ @staticmethod
38
+ def list_databases(connection_type: str, connection_config: dict) -> ListDatabases:
39
+ datasource = get_datasource_by_config(connection_type, connection_config)
40
+ logging.info(f"Listing databases for connection of type '{connection_type}' with provided config.")
41
+ if not datasource.is_dbapi:
42
+ logging.info(f"{datasource.ds_type} is not dbapi, not support this function")
43
+ raise ValueError(f"{datasource.ds_type} is not dbapi, not support this function")
44
+
45
+ return ListDatabases(items=datasource.recurve_connector.get_databases())
46
+
47
+ @staticmethod
48
+ def list_tables(connection_type: str, connection_config: dict, database: str) -> Pagination[TableItem]:
49
+ datasource = get_datasource_by_config(connection_type, connection_config)
50
+ logging.info(f"Listing tables for connection of type '{connection_type}' with provided config.")
51
+ if not datasource.is_dbapi:
52
+ raise ValueError(f"{datasource.ds_type} is not dbapi, not support this function")
53
+
54
+ tables = datasource.recurve_connector.get_tables(database)
55
+ tables = [TableItem(name=table) for table in tables]
56
+ return Pagination[TableItem](items=tables, total=len(tables))
57
+
58
+ @staticmethod
59
+ def list_columns(
60
+ connection_type: str, connection_config: dict, database_name: str, table_name: str
61
+ ) -> Pagination[ColumnItem]:
62
+ datasource = get_datasource_by_config(connection_type, connection_config)
63
+ logging.info(f"Listing columns for connection of type '{connection_type}' with provided config.")
64
+ if not datasource.is_dbapi:
65
+ raise ValueError(f"{datasource.ds_type} is not dbapi, not support this function")
66
+
67
+ columns = datasource.recurve_connector.get_columns(table_name, database_name)
68
+ result = []
69
+ for column in columns:
70
+ normalizer = ColumnTypeNormalizer(
71
+ connection_type, custom_mappings=datasource.recurve_connector.column_type_mapping
72
+ )
73
+ normalized_type = normalizer.normalize(column["type"])
74
+ result.append(
75
+ ColumnItem(
76
+ name=column["name"],
77
+ type=column["type"],
78
+ normalized_type=normalized_type,
79
+ comment=column.get("comment"),
80
+ )
81
+ )
82
+
83
+ return Pagination[ColumnItem](items=result, total=len(result))
84
+
85
+ @staticmethod
86
+ def list_full_databases(connection_type: str, connection_config: dict) -> Pagination[FullDatabaseItem]:
87
+ datasource = get_datasource_by_config(connection_type, connection_config)
88
+ databases = datasource.recurve_connector.get_databases()
89
+
90
+ def process_database(database: str):
91
+ con: DBAPIBase = datasource.recurve_connector
92
+ tables = con.get_tables(database)
93
+ views = con.get_views(database)
94
+ if con.connection_type == "impala":
95
+ tables = [table for table in tables if table not in views]
96
+ return FullDatabaseItem(
97
+ name=database,
98
+ tables=[TableItem(name=table) for table in tables],
99
+ views=[TableItem(name=view) for view in views],
100
+ )
101
+
102
+ results = []
103
+ with ThreadPoolExecutor(max_workers=5) as executor:
104
+ future_to_db = {executor.submit(process_database, db): db for db in databases}
105
+
106
+ for future in as_completed(future_to_db):
107
+ db = future_to_db[future]
108
+ try:
109
+ result = future.result()
110
+ results.append(result)
111
+ except Exception as e:
112
+ logging.error(f"Error processing database {db}: {e}")
113
+ raise e
114
+
115
+ return Pagination[FullDatabaseItem](items=results, total=len(results))
116
+
117
+ def preview_sql(
118
+ self,
119
+ ds: DataSourceWrapper,
120
+ sql: str,
121
+ limit: int = 100,
122
+ max_limit: int = 10000,
123
+ orders: list[dict[str, str]] | None = None,
124
+ offset: int = 0,
125
+ ) -> "PreviewResult":
126
+ from recurvedata.dbt.schemas import PreviewResult
127
+
128
+ recurve_con = ds.recurve_connector
129
+ limit = min(limit, max_limit)
130
+ limited_sql = recurve_con.limit_sql(sql, limit, orders=orders, offset=offset)
131
+ logging.info(f"preview_sql - limited_sql: {limited_sql}")
132
+ column_schema, data = self._fetch_many_return_type(ds, limited_sql, limit)
133
+ logging.info(f"preview_sql - fetched {len(data) if data else 0} rows")
134
+ try:
135
+ fields_log = []
136
+ for field in column_schema.fields:
137
+ field_info = {
138
+ "name": field.name,
139
+ "type": field.type,
140
+ }
141
+ if field.comment is not None:
142
+ field_info["comment"] = field.comment
143
+ fields_log.append(field_info)
144
+ logging.info(f"preview_sql - column_schema: {json.dumps(fields_log, default=str)}")
145
+ except Exception as e:
146
+ logging.info(f"preview_sql - column_schema: {column_schema} (failed to serialize: {e})")
147
+
148
+ data = self._jsonable_value(data)
149
+ normalizer = ColumnTypeNormalizer(recurve_con.connection_type, custom_mappings=recurve_con.column_type_mapping)
150
+ columns = [
151
+ ColumnItem(
152
+ name=field.name,
153
+ type=field.type,
154
+ normalized_type=normalizer.normalize(field.type),
155
+ comment=field.comment,
156
+ )
157
+ for field in column_schema.fields
158
+ ]
159
+ return PreviewResult(
160
+ compiled_sql=sql,
161
+ columns=columns,
162
+ data=data,
163
+ )
164
+
165
+ def validate_sql(
166
+ self,
167
+ ds: DataSourceWrapper,
168
+ sql: str,
169
+ limit: int = 0,
170
+ max_limit: int = 100,
171
+ ) -> "SqlValidationResult":
172
+ """
173
+ Validate SQL by executing it and checking for syntax/runtime errors.
174
+
175
+ This function executes ANY SQL (SELECT, DDL, DML) to validate syntax and logic.
176
+ Use limit=0 to avoid returning large datasets for non-SELECT statements.
177
+
178
+ Args:
179
+ ds: DataSourceWrapper containing connection info
180
+ sql: SQL statement(s) to validate
181
+ limit: Maximum rows to return (0 = no data returned, just validation)
182
+ max_limit: Maximum allowed limit
183
+
184
+ Returns:
185
+ SqlValidationResult with validation status and error details
186
+ """
187
+ from recurvedata.server.data_service.schemas import SqlValidationResult
188
+ import traceback
189
+
190
+ try:
191
+ recurve_con = ds.recurve_connector
192
+ limit = min(limit, max_limit)
193
+ rollback_supported = True # Default to True, will be set to False for databases that don't support it
194
+
195
+ # For validation, we don't need to limit non-SELECT statements
196
+ validation_sql = sql
197
+ if limit > 0:
198
+ # Only apply limit if we want to return data (SELECT statements)
199
+ validation_sql = recurve_con.limit_sql(sql, limit)
200
+
201
+ logging.info(f"validate_sql - executing: {validation_sql}")
202
+
203
+ # Detect if this is a SELECT query or not
204
+ sql_upper = validation_sql.strip().upper()
205
+ is_select_query = sql_upper.startswith('SELECT') or sql_upper.startswith('WITH')
206
+
207
+ if is_select_query and limit > 0:
208
+ # For SELECT queries, fetch results if limit > 0
209
+ column_schema, data = self._fetch_many_return_type(ds, validation_sql, limit)
210
+ logging.info(f"validate_sql - SELECT executed successfully, fetched {len(data) if data else 0} rows")
211
+
212
+ # Prepare column information
213
+ columns = []
214
+ if column_schema and hasattr(column_schema, 'fields'):
215
+ normalizer = ColumnTypeNormalizer(
216
+ recurve_con.connection_type,
217
+ custom_mappings=recurve_con.column_type_mapping
218
+ )
219
+ columns = [
220
+ {
221
+ "name": field.name,
222
+ "type": field.type,
223
+ "normalized_type": normalizer.normalize(field.type),
224
+ "comment": field.comment,
225
+ }
226
+ for field in column_schema.fields
227
+ ]
228
+
229
+ # Convert data to JSON-serializable format
230
+ json_data = self._jsonable_value(data) if data else []
231
+ else:
232
+ # For DDL/DML queries or SELECT with limit=0, validate WITHOUT committing changes
233
+ cursor_options = {"commit_on_close": False} # Don't commit - we'll rollback
234
+ connector: DBAPIConnector = ds.connector
235
+ with connector.cursor(**cursor_options) as cursor:
236
+ # Initialize connection state variables outside try block
237
+ conn = cursor.connection
238
+ original_autocommit = None
239
+
240
+ try:
241
+ # Save original autocommit state and ensure it's disabled for transactions
242
+ if hasattr(conn, 'autocommit'):
243
+ original_autocommit = conn.autocommit
244
+ if original_autocommit:
245
+ conn.autocommit = False
246
+
247
+ # Execute the SQL to validate syntax and logic
248
+ cursor.execute(validation_sql)
249
+
250
+ # Get affected rows count for logging
251
+ affected_rows = cursor.rowcount if hasattr(cursor, 'rowcount') else 0
252
+
253
+ # IMPORTANT: Rollback to undo any changes - this is validation only!
254
+ # But only if the connection supports rollback (PostgreSQL, MySQL, etc.)
255
+ if hasattr(conn, 'rollback'):
256
+ conn.rollback()
257
+ rollback_supported = True
258
+ rollback_status = "(rolled back)"
259
+ else:
260
+ # BigQuery and some other databases don't support rollback
261
+ # The DDL/DML will be executed and committed automatically
262
+ rollback_supported = False
263
+ rollback_status = "(auto-committed - no rollback support)"
264
+
265
+ # Restore original autocommit state if we changed it
266
+ if original_autocommit is not None and original_autocommit:
267
+ conn.autocommit = original_autocommit
268
+
269
+ logging.info(f"validate_sql - DDL/DML validated successfully {rollback_status}, would affect {affected_rows} rows")
270
+
271
+ except Exception as e:
272
+ # If there's an error, rollback anyway to be safe (if supported)
273
+ try:
274
+ if hasattr(conn, 'rollback'):
275
+ conn.rollback()
276
+ # Restore original autocommit state if we changed it
277
+ if original_autocommit is not None and original_autocommit:
278
+ conn.autocommit = original_autocommit
279
+ except:
280
+ pass # Ignore rollback errors if connection is broken
281
+ raise e # Re-raise the original validation error
282
+
283
+ columns = []
284
+ json_data = []
285
+
286
+ # Add warning message if rollback is not supported
287
+ warning_message = None
288
+ if not rollback_supported and not is_select_query:
289
+ warning_message = "WARNING: Database does not support rollback. DDL/DML changes were permanently applied to the database during validation."
290
+
291
+ return SqlValidationResult(
292
+ is_valid=True,
293
+ compiled_sql=sql,
294
+ columns=columns,
295
+ data=json_data if limit > 0 else [],
296
+ error_message=warning_message,
297
+ error_code=None,
298
+ error_traceback=None,
299
+ )
300
+
301
+ except Exception as e:
302
+ # SQL validation failed - capture error details
303
+ error_message = str(e)
304
+ error_traceback = traceback.format_exc()
305
+
306
+ logging.error(f"validate_sql - failed: {error_message}")
307
+ logging.error(f"validate_sql - traceback: {error_traceback}")
308
+
309
+ return SqlValidationResult(
310
+ is_valid=False,
311
+ compiled_sql=sql,
312
+ columns=[],
313
+ data=[],
314
+ error_message=error_message,
315
+ error_code=getattr(e, 'code', 'VALIDATION_ERROR'),
316
+ error_traceback=error_traceback,
317
+ )
318
+
319
+ def _fetch_many_return_type(self, ds: DataSourceWrapper, sql: str, limit: int) -> tuple[Schema, list[tuple]]:
320
+ cursor_options = {"commit_on_close": False}
321
+ connector: DBAPIConnector = ds.connector
322
+ with connector.cursor(**cursor_options) as cursor:
323
+ cursor.execute(sql)
324
+
325
+ # Postgres use server side cursor, need fetch first to get cursor.description
326
+ first_row = None
327
+ if connector.is_postgres() or connector.is_redshift():
328
+ limit = max(0, limit - 1)
329
+ first_row = cursor.fetchone()
330
+
331
+ schema = self._extract_column_info_from_cursor(ds.recurve_connector, cursor)
332
+ rv = cursor.fetchmany(limit)
333
+ if first_row:
334
+ rv = [first_row] + rv
335
+ if connector.is_google_bigquery():
336
+ # row is google.cloud.bigquery.table.Row type
337
+ rv = [row.values() for row in rv]
338
+ if connector.is_mssql():
339
+ rv = [tuple(row) for row in rv]
340
+
341
+ return schema, rv
342
+
343
+ @staticmethod
344
+ def _extract_column_info_from_cursor(recurve_con: RecurveConnectorBase, cursor) -> Schema:
345
+ schema = Schema()
346
+ for item in cursor.description:
347
+ name = item[0]
348
+ if "." in name:
349
+ name = name.split(".")[1]
350
+
351
+ type_code = item[1]
352
+ size = item[3]
353
+ ttype = recurve_con.sqlalchemy_column_type_code_to_name(type_code, size)
354
+ schema.add_field_by_attrs(name, ttype, size)
355
+ return schema
356
+
357
+ def _jsonable_value(self, value):
358
+ if value is None:
359
+ return value
360
+ elif isinstance(value, (int, float, Decimal)):
361
+ return str(value)
362
+ elif isinstance(value, bool):
363
+ return value
364
+ elif isinstance(value, dict):
365
+ return {k: self._jsonable_value(v) for k, v in value.items()}
366
+ elif isinstance(value, (list, tuple, set)):
367
+ return [self._jsonable_value(v) for v in value]
368
+ elif isinstance(value, (datetime.datetime, datetime.date)):
369
+ return value.isoformat()
370
+ else:
371
+ return str(value)
372
+
373
+ @staticmethod
374
+ def fetch_total(ds: DataSourceWrapper, sql: str) -> int:
375
+ recurve_con: RecurveConnectorBase = ds.recurve_connector
376
+ count_sql = recurve_con.count_sql(sql)
377
+ connector: DBAPIConnector = ds.connector
378
+ with connector.cursor() as cursor:
379
+ cursor.execute(count_sql)
380
+ return cursor.fetchone()[0]