recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,126 @@
1
+ from fastapi import APIRouter
2
+
3
+ from recurvedata.core.tracing import Tracing
4
+ from recurvedata.dbt.schemas import PreviewResponseWithError
5
+ from recurvedata.executors.schemas import ResponseModel
6
+ from recurvedata.executors.utils import run_with_result_handling_v2
7
+ from recurvedata.server.data_service.schemas import (
8
+ DownloadPayload,
9
+ DownloadResponseWithError,
10
+ FetchCountPayload,
11
+ PreviewPayload,
12
+ PreviewTotalResponseWithError,
13
+ SqlValidationPayload,
14
+ SqlValidationResponseWithError,
15
+ )
16
+ from recurvedata.server.data_service.service import DataServiceService
17
+ from recurvedata.utils.sql import trim_replace_special_character
18
+
19
+ tracer = Tracing()
20
+ router = APIRouter()
21
+
22
+
23
+ @router.post("/preview")
24
+ @tracer.create_span(sampling_rate=0.1)
25
+ async def preview(*, payload: PreviewPayload) -> PreviewResponseWithError:
26
+ sql = trim_replace_special_character(payload.sql, strip_sufix=True)
27
+
28
+ service = DataServiceService(
29
+ project_id=payload.project_id,
30
+ project_connection_id=payload.project_connection_id,
31
+ )
32
+ res: ResponseModel = await run_with_result_handling_v2(
33
+ service.preview,
34
+ sql=sql,
35
+ limit=payload.limit,
36
+ no_data=payload.no_data,
37
+ orders=payload.orders,
38
+ )
39
+
40
+ return PreviewResponseWithError.model_validate(res.model_dump())
41
+
42
+
43
+ @router.post("/download")
44
+ @tracer.create_span(sampling_rate=0.1)
45
+ async def download(*, payload: DownloadPayload) -> DownloadResponseWithError:
46
+ sql = trim_replace_special_character(payload.sql, strip_sufix=True)
47
+
48
+ service = DataServiceService(
49
+ project_id=payload.project_id,
50
+ project_connection_id=payload.project_connection_id,
51
+ )
52
+ res: ResponseModel = await run_with_result_handling_v2(
53
+ service.download,
54
+ storage_type=payload.storage_type,
55
+ storage_options=payload.storage_options,
56
+ sql=sql,
57
+ orders=payload.orders,
58
+ fields=payload.fields,
59
+ file_type=payload.file_type,
60
+ file_name=payload.file_name,
61
+ tenant_id=payload.tenant_id,
62
+ user_id=payload.user_id,
63
+ project_id=payload.project_id,
64
+ )
65
+
66
+ return DownloadResponseWithError.model_validate(res.model_dump())
67
+
68
+
69
+ @router.post("/preview-total")
70
+ @tracer.create_span(sampling_rate=0.1)
71
+ async def preview_total(*, payload: PreviewPayload) -> PreviewTotalResponseWithError:
72
+ sql = trim_replace_special_character(payload.sql, strip_sufix=True)
73
+
74
+ service = DataServiceService(
75
+ project_id=payload.project_id,
76
+ project_connection_id=payload.project_connection_id,
77
+ )
78
+ res = await run_with_result_handling_v2(
79
+ service.preview_total,
80
+ sql=sql,
81
+ limit=payload.limit,
82
+ no_data=payload.no_data,
83
+ orders=payload.orders,
84
+ offset=payload.offset,
85
+ )
86
+
87
+ return PreviewTotalResponseWithError.model_validate(res.model_dump())
88
+
89
+
90
+ @router.post("/fetch-count")
91
+ @tracer.create_span(sampling_rate=0.1)
92
+ async def fetch_count(*, payload: FetchCountPayload) -> dict:
93
+ sql = trim_replace_special_character(payload.sql, strip_sufix=True)
94
+
95
+ service = DataServiceService(
96
+ project_id=payload.project_id,
97
+ project_connection_id=payload.project_connection_id,
98
+ )
99
+ res: ResponseModel = await run_with_result_handling_v2(
100
+ service.fetch_count,
101
+ sql=sql,
102
+ )
103
+
104
+ return res.model_dump()
105
+
106
+
107
+ @router.post("/validate-sql")
108
+ @tracer.create_span(sampling_rate=0.1)
109
+ async def validate_sql(*, payload: SqlValidationPayload) -> SqlValidationResponseWithError:
110
+ """
111
+ Validate SQL by executing it and checking for syntax/runtime errors.
112
+ Supports any SQL statement type including DDL, DML, and SELECT.
113
+ """
114
+ sql = trim_replace_special_character(payload.sql, strip_sufix=True)
115
+
116
+ service = DataServiceService(
117
+ project_id=payload.project_id,
118
+ project_connection_id=payload.project_connection_id,
119
+ )
120
+ res: ResponseModel = await run_with_result_handling_v2(
121
+ service.validate_sql,
122
+ sql=sql,
123
+ limit=payload.limit,
124
+ )
125
+
126
+ return SqlValidationResponseWithError.model_validate(res.model_dump())
@@ -0,0 +1,18 @@
1
+ from recurvedata.client.client import Client
2
+ from recurvedata.server.schemas import ConnectionAndVariables
3
+ from recurvedata.utils.helpers import get_env_id
4
+
5
+
6
+ class DataServiceClient(Client):
7
+ def get_connection_and_variables(self, project_id: int, project_connection_id: int) -> ConnectionAndVariables:
8
+ params = {
9
+ "env_id": get_env_id(),
10
+ "project_id": project_id,
11
+ "project_connection_id": project_connection_id,
12
+ }
13
+ return self.request(
14
+ "GET",
15
+ path="/api/data-service/connection-and-variable",
16
+ response_model_class=ConnectionAndVariables,
17
+ params=params,
18
+ )
@@ -0,0 +1 @@
1
+ FIELD_TYPE_MAP = {"varchar": str, "string": str}
@@ -0,0 +1,68 @@
1
+ from typing import Any, Literal
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from recurvedata.executors.schemas import Pagination, ResponseModel
6
+ from recurvedata.filestorage import StorageType
7
+
8
+
9
+ class PreviewPayload(BaseModel):
10
+ sql: str
11
+ project_id: int
12
+ project_connection_id: int
13
+ limit: int
14
+ no_data: bool = False
15
+ orders: list[dict[str, str]] | None = None
16
+ offset: int = 0
17
+
18
+
19
+ class DownloadPayload(BaseModel):
20
+ sql: str
21
+ project_id: int
22
+ project_connection_id: int
23
+ orders: list[dict[str, str]] | None = None
24
+ fields: list[dict] | None = None
25
+ file_name: str
26
+ file_type: Literal["csv", "xlsx"]
27
+ storage_type: StorageType
28
+ storage_options: dict[str, Any]
29
+ tenant_id: int
30
+ user_id: int
31
+
32
+
33
+ class DownloadResult(BaseModel):
34
+ file_name: str
35
+
36
+
37
+ class DownloadResponseWithError(ResponseModel):
38
+ data: DownloadResult | None
39
+
40
+
41
+ class PreviewTotalResponseWithError(ResponseModel):
42
+ data: Pagination[dict[str, Any]] | None
43
+
44
+
45
+ class FetchCountPayload(BaseModel):
46
+ sql: str
47
+ project_id: int
48
+ project_connection_id: int
49
+
50
+ class SqlValidationResult(BaseModel):
51
+ """Result of SQL validation including any type of SQL statement"""
52
+ is_valid: bool
53
+ compiled_sql: str
54
+ columns: list[dict] = Field(default_factory=list) # Column info if applicable
55
+ data: list[list] = Field(default_factory=list) # Data if applicable and requested
56
+ error_message: str | None = None
57
+ error_code: str | None = None
58
+ error_traceback: str | None = None
59
+
60
+ class SqlValidationResponseWithError(ResponseModel):
61
+ data: SqlValidationResult | None
62
+
63
+ class SqlValidationPayload(BaseModel):
64
+ sql: str
65
+ project_id: int
66
+ project_connection_id: int
67
+ limit: int = 0 # Default to 0 to avoid large data returns
68
+
@@ -0,0 +1,218 @@
1
+ import os
2
+ import tempfile
3
+ from dataclasses import dataclass
4
+ from functools import cached_property
5
+ from typing import Any, Literal
6
+
7
+ import pandas as pd
8
+ from loguru import logger
9
+
10
+ from recurvedata.config import SERVER_RESULT_STAGING_PATH
11
+ from recurvedata.connectors.service import get_datasource_by_config
12
+ from recurvedata.core.templating import Renderer
13
+ from recurvedata.dbt.schemas import PreviewResult
14
+ from recurvedata.exceptions import ERR, WrapRecurveException, wrap_error
15
+ from recurvedata.executors.cli.connector import ConnectionService
16
+ from recurvedata.executors.executor import Executor
17
+ from recurvedata.executors.schemas import Pagination
18
+ from recurvedata.filestorage import StorageType
19
+ from recurvedata.filestorage import factory as filestorage_factory
20
+ from recurvedata.pigeon.dumper import new_to_csv_dumper
21
+ from recurvedata.server.data_service.client import DataServiceClient
22
+ from recurvedata.server.data_service.consts import FIELD_TYPE_MAP
23
+ from recurvedata.server.data_service.schemas import DownloadResult, SqlValidationResult
24
+ from recurvedata.utils.date_time import now
25
+
26
+
27
+ @dataclass
28
+ class DataServiceService:
29
+ project_id: int
30
+ project_connection_id: int
31
+ variables: dict = None
32
+
33
+ @cached_property
34
+ def client(self):
35
+ return DataServiceClient()
36
+
37
+ def prepare_variables(self, variables: dict | None) -> dict:
38
+ logger.info("start process variables")
39
+ execution_date, schedule_interval = now(), "@daily"
40
+ processed_variables = Executor.process_variables(variables or {}, {}, execution_date, schedule_interval)
41
+ result_variables = Renderer().init_context(execution_date, schedule_interval)
42
+ result_variables.update(processed_variables)
43
+ return result_variables
44
+
45
+ @wrap_error(ERR.DP_FETCH_CONNECTION_FAILED)
46
+ def fetch_connection_and_variables(self):
47
+ logger.info("start fetch connection and variables")
48
+ item = self.client.get_connection_and_variables(self.project_id, self.project_connection_id)
49
+ con_item = item.connection
50
+ logger.info("after fetch connection and variables")
51
+ self.ds = get_datasource_by_config(
52
+ con_item.type, config=con_item.data, database=con_item.database, schema=con_item.database_schema
53
+ )
54
+ self.variables = self.prepare_variables(item.variables)
55
+
56
+ def preview(
57
+ self, sql: str, limit: int, no_data: bool = False, orders: list[dict[str, str]] | None = None, offset: int = 0
58
+ ) -> PreviewResult:
59
+ self.fetch_connection_and_variables()
60
+ rendered_sql = Renderer().render_template(sql, self.variables)
61
+ if no_data:
62
+ limit = 0
63
+
64
+ con_service = ConnectionService()
65
+ try:
66
+ result = con_service.preview_sql(self.ds, rendered_sql, limit, orders=orders, offset=offset)
67
+ columns = result.columns
68
+ column_names = set()
69
+ for col in columns:
70
+ if col.name in column_names:
71
+ raise ValueError(f"duplicate column name: {col.name}, please check your sql query")
72
+ column_names.add(col.name)
73
+
74
+ return result
75
+
76
+ except Exception as e:
77
+ logger.error(f"Failed to preview data: {e}")
78
+ raise WrapRecurveException(ERR.PREVIEW_DATA_FAILED, e)
79
+
80
+ async def download(
81
+ self,
82
+ *,
83
+ storage_type: StorageType,
84
+ storage_options: dict[str, Any],
85
+ file_name: str,
86
+ sql: str,
87
+ tenant_id: int,
88
+ project_id: int,
89
+ user_id: int,
90
+ orders: list[dict[str, str]] | None = None,
91
+ file_type: Literal["csv", "xlsx"] = "csv",
92
+ fields: list[dict] | None = None,
93
+ ) -> PreviewResult:
94
+ self.fetch_connection_and_variables()
95
+ rendered_sql = Renderer().render_template(sql, self.variables)
96
+
97
+ connection = self.ds.data.copy()
98
+ connection.pop("database", None)
99
+ recurve_con = self.ds.recurve_connector
100
+ ordered_sql = recurve_con.order_sql(rendered_sql, orders=orders)
101
+
102
+ result_file_name = tempfile.mktemp(dir=SERVER_RESULT_STAGING_PATH)
103
+ os.makedirs(os.path.dirname(result_file_name), exist_ok=True)
104
+ open(result_file_name, "w").close()
105
+ logger.info(f"result_file_name: {result_file_name}")
106
+
107
+ logger.info("start dump data")
108
+ dumper = new_to_csv_dumper(
109
+ dbtype=self.ds.ds_type,
110
+ connection=connection,
111
+ database=self.ds.database,
112
+ connector=self.ds.connector,
113
+ sql=ordered_sql,
114
+ filename=result_file_name,
115
+ write_header=True,
116
+ merge_files=True,
117
+ )
118
+
119
+ dumper.execute()
120
+
121
+ logger.info(f"result_file_name size: {os.path.getsize(result_file_name)}")
122
+
123
+ if os.path.getsize(result_file_name) == 0:
124
+ return DownloadResult(file_name="")
125
+
126
+ dtype = {}
127
+ rename_dict = None
128
+
129
+ if fields:
130
+ for field in fields:
131
+ field_type = FIELD_TYPE_MAP.get(field.get("field_type"))
132
+ if field_type:
133
+ dtype[field["name"]] = field_type
134
+
135
+ _rename_dict = {field["name"]: field["alias"] or field["name"] for field in fields}
136
+ if any([k != v for k, v in _rename_dict.items()]):
137
+ rename_dict = _rename_dict
138
+
139
+ if file_type == "xlsx" or rename_dict:
140
+ df = pd.read_csv(result_file_name, dtype=dtype)
141
+
142
+ if rename_dict:
143
+ df.rename(columns=lambda x: rename_dict.get(x, x), inplace=True)
144
+
145
+ if file_type == "xlsx":
146
+ if not result_file_name.endswith(".xlsx"):
147
+ result_file_name += ".xlsx"
148
+ df.to_excel(result_file_name, index=False)
149
+ else:
150
+ df.to_csv(result_file_name, index=False)
151
+
152
+ if not file_name.endswith(file_type):
153
+ file_name += f".{file_type}"
154
+
155
+ file_name = f"{tenant_id}/{project_id}/{user_id}/{file_name}"
156
+
157
+ logger.info("start upload file")
158
+ storage = filestorage_factory.create(storage_type, storage_options)
159
+ with open(result_file_name, "rb") as f:
160
+ file_content = f.read()
161
+ await storage.write_bytes(file_name, file_content)
162
+ logger.info("upload file success")
163
+
164
+ return DownloadResult(file_name=file_name)
165
+
166
+ def preview_total(
167
+ self,
168
+ sql: str,
169
+ limit: int,
170
+ no_data: bool = False,
171
+ orders: list[dict[str, str]] | None = None,
172
+ offset: int = 0,
173
+ ) -> Pagination[dict[str, Any]]:
174
+ preview_result = self.preview(sql, limit, no_data, orders, offset)
175
+
176
+ items = []
177
+ for row in preview_result.data:
178
+ row_data = {}
179
+ for col, value in zip(preview_result.columns, row):
180
+ row_data[col.name] = {
181
+ "type": col.type,
182
+ "name": col.name,
183
+ "normalized_type": col.normalized_type,
184
+ "value": value,
185
+ }
186
+ items.append(row_data)
187
+
188
+ con_service = ConnectionService()
189
+ total = con_service.fetch_total(self.ds, sql)
190
+
191
+ return Pagination[dict[str, Any]](
192
+ items=items,
193
+ total=total,
194
+ )
195
+
196
+ def fetch_count(self, sql: str) -> int:
197
+ self.fetch_connection_and_variables()
198
+ rendered_sql = Renderer().render_template(sql, self.variables)
199
+ con_service = ConnectionService()
200
+ return con_service.fetch_total(self.ds, rendered_sql)
201
+
202
+ def validate_sql(self, sql: str, limit: int = 0) -> SqlValidationResult:
203
+ """
204
+ Validate SQL by executing it and checking for syntax/runtime errors.
205
+ Supports any SQL statement type including DDL, DML, and SELECT.
206
+
207
+ Args:
208
+ sql: SQL statement(s) to validate
209
+ limit: Maximum rows to return (0 = no data returned, just validation)
210
+
211
+ Returns:
212
+ SqlValidationResult with validation status and error details
213
+ """
214
+
215
+ self.fetch_connection_and_variables()
216
+ rendered_sql = Renderer().render_template(sql, self.variables)
217
+ con_service = ConnectionService()
218
+ return con_service.validate_sql(self.ds, rendered_sql, limit=limit)
File without changes
@@ -0,0 +1,116 @@
1
+ import json
2
+ import tempfile
3
+
4
+ from fastapi import APIRouter
5
+ from loguru import logger
6
+
7
+ from recurvedata.config import SERVER_RESULT_STAGING_PATH
8
+ from recurvedata.core.tracing import Tracing
9
+ from recurvedata.dbt.schemas import (
10
+ BuildPayload,
11
+ BuildResponseWithError,
12
+ CompilePayload,
13
+ CompileResponseWithError,
14
+ CompileResult,
15
+ PreviewPayload,
16
+ PreviewResponseWithError,
17
+ )
18
+ from recurvedata.dbt.service import DbtService
19
+ from recurvedata.dbt.utils import format_var
20
+ from recurvedata.executors.utils import run_with_result_handling
21
+ from recurvedata.utils.sql import trim_replace_special_character
22
+
23
+ tracer = Tracing()
24
+ router = APIRouter()
25
+
26
+
27
+ @router.post("/compile")
28
+ @tracer.create_span(sampling_rate=0.1, context_payload_name="payload")
29
+ def compile(*, payload: CompilePayload) -> CompileResponseWithError:
30
+ sql = trim_replace_special_character(payload.sql)
31
+ service = DbtService(
32
+ project_id=payload.project_id,
33
+ project_connection_name=payload.alias,
34
+ force_regenerate_dir=payload.force_regenerate_dir,
35
+ need_fetch_variable=True,
36
+ )
37
+
38
+ result_file_name = tempfile.mktemp(dir=SERVER_RESULT_STAGING_PATH)
39
+ logger.info(f"compile result_file_name: {result_file_name}, payload: {payload}")
40
+
41
+ run_with_result_handling(
42
+ service.compile, inline_sql=sql, result_filename=result_file_name, validate_sql=payload.validate_sql
43
+ )
44
+ with open(result_file_name, "r") as temp_file:
45
+ data: dict = json.load(temp_file)
46
+
47
+ logger.info(f"finish compile {result_file_name}")
48
+
49
+ return CompileResponseWithError.model_validate(data)
50
+
51
+
52
+ @router.post("/preview")
53
+ @tracer.create_span(sampling_rate=0.1, context_payload_name="payload")
54
+ async def preview(*, payload: PreviewPayload) -> PreviewResponseWithError:
55
+ sql = trim_replace_special_character(payload.sql, strip_sufix=True)
56
+ service = DbtService(
57
+ project_id=payload.project_id,
58
+ project_connection_name=payload.alias,
59
+ force_regenerate_dir=payload.force_regenerate_dir,
60
+ need_fetch_variable=True,
61
+ )
62
+
63
+ result_file_name = tempfile.mktemp(dir=SERVER_RESULT_STAGING_PATH)
64
+ logger.info(f"preview result_file_name: {result_file_name}, payload: {payload}")
65
+
66
+ run_with_result_handling(
67
+ service.preview,
68
+ inline_sql=sql,
69
+ limit=payload.limit,
70
+ result_filename=result_file_name,
71
+ no_data=payload.no_data,
72
+ is_compiled=payload.is_compiled,
73
+ )
74
+ with open(result_file_name, "r") as temp_file:
75
+ data: dict = json.load(temp_file)
76
+
77
+ logger.info(f"finish preview {result_file_name}")
78
+
79
+ return PreviewResponseWithError.model_validate(data)
80
+
81
+
82
+ @router.post("/build")
83
+ @tracer.create_span(sampling_rate=0.1, context_payload_name="payload")
84
+ def build(*, payload: BuildPayload) -> BuildResponseWithError:
85
+ service = DbtService(
86
+ project_id=payload.project_id,
87
+ project_connection_name=payload.alias,
88
+ force_regenerate_dir=payload.force_regenerate_dir,
89
+ need_fetch_variable=True,
90
+ )
91
+ service.prepare()
92
+
93
+ logger.info(f"build data model {payload.model_name}")
94
+
95
+ var_str = format_var(service, service.variables | (payload.variables or {}))
96
+
97
+ def run_model(model_name: str, dbt_vars: str = None, full_refresh: bool = False):
98
+ compiled_code, _ = service._run_model(
99
+ model_name=model_name,
100
+ dbt_vars=dbt_vars,
101
+ full_refresh=full_refresh,
102
+ )
103
+ return CompileResult(compiled_sql=compiled_code)
104
+
105
+ result_file_name = tempfile.mktemp(dir=SERVER_RESULT_STAGING_PATH)
106
+ run_with_result_handling(
107
+ run_model,
108
+ model_name=payload.model_name,
109
+ dbt_vars=var_str,
110
+ full_refresh=payload.full_refresh,
111
+ result_filename=result_file_name,
112
+ )
113
+ with open(result_file_name, "r") as temp_file:
114
+ data: dict = json.load(temp_file)
115
+ logger.info(f"finished build model {payload.model_name}")
116
+ return BuildResponseWithError.model_validate(data)
@@ -0,0 +1,49 @@
1
+ from enum import Enum, EnumMeta
2
+
3
+
4
+ class ErrorCodeMeta(EnumMeta):
5
+ _error_codes = set()
6
+
7
+ def __new__(metacls, clsname, bases, classdict):
8
+ enum_members = {k: v for k, v in classdict.items() if not k.startswith("_")}
9
+ for name, code in enum_members.items():
10
+ if type(code) is not tuple:
11
+ continue
12
+ for error_code in metacls._error_codes:
13
+ if code[0] == error_code[1][0]:
14
+ raise ValueError(f"Error code {code[0]} in {clsname} already exists globally")
15
+ metacls._error_codes.add((name, code))
16
+ return super().__new__(metacls, clsname, bases, classdict)
17
+
18
+ @classmethod
19
+ def error_codes(cls):
20
+ return sorted(list(cls._error_codes), key=lambda x: x[1][0])
21
+
22
+
23
+ class BaseErrorCode(Enum, metaclass=ErrorCodeMeta):
24
+ @property
25
+ def code(self):
26
+ return self.value[0]
27
+
28
+ @property
29
+ def message(self):
30
+ return self.value[1]
31
+
32
+ def to_dict(self):
33
+ return {"code": self.code, "msg": self.message}
34
+
35
+ def exception(self, data: dict | None = None, status_code: int | None = None):
36
+ from recurvedata.server.exceptions import RecurveException
37
+
38
+ return RecurveException(self, data, status_code)
39
+
40
+
41
+ class ErrorCode(BaseErrorCode):
42
+ # 00: General
43
+ INTERNAL_SERVER_ERROR = ("B0001", "Internal Server Error")
44
+ NOT_IMPLEMENTED = ("B0002", "Not Implemented")
45
+
46
+ UNKNOWN_ERROR = ("D0001", "Unknown Error")
47
+
48
+
49
+ ERR = ErrorCode # shortcut
@@ -0,0 +1,19 @@
1
+ from recurvedata.server.error_code import ERR
2
+
3
+
4
+ class RecurveException(Exception):
5
+ _default_code: ERR = ERR.UNKNOWN_ERROR
6
+ _default_status_code: int = 200
7
+
8
+ def __init__(self, code: ERR = None, data: dict | str = None, status_code: int = None):
9
+ self.code = code or self._default_code
10
+ self.data = data
11
+ self.status_code = status_code or self._default_status_code
12
+
13
+ def to_dict(self) -> dict:
14
+ return self.code.to_dict() | {"data": self.data}
15
+
16
+
17
+ class InternalServerError(RecurveException):
18
+ _default_code = ERR.INTERNAL_SERVER_ERROR
19
+ _default_status_code = 500
File without changes
@@ -0,0 +1,37 @@
1
+ from fastapi import APIRouter
2
+ from loguru import logger
3
+
4
+ from recurvedata.core.tracing import Tracing
5
+ from recurvedata.executors.schemas import ResponseModel
6
+ from recurvedata.executors.utils import run_with_result_handling_v2
7
+ from recurvedata.server.executor.schemas import (
8
+ ValidatePythonScriptPayload,
9
+ ValidatePythonScriptResponse,
10
+ )
11
+ from recurvedata.server.executor.service import ExecutorService
12
+
13
+ tracer = Tracing()
14
+ router = APIRouter()
15
+
16
+
17
+ @router.post("/validate-python-script")
18
+ @tracer.create_span(sampling_rate=0.1)
19
+ async def validate_python_script(*, payload: ValidatePythonScriptPayload) -> ValidatePythonScriptResponse:
20
+ """
21
+ Validate Python script by executing it in the configured Python environment.
22
+
23
+ This endpoint runs the Python code using the same infrastructure as PythonOperator,
24
+ including proper environment setup and requirements installation.
25
+ """
26
+ logger.info(f"validate_python_script: project_id={payload.project_id}, python_env={payload.python_env}")
27
+
28
+ res: ResponseModel = await run_with_result_handling_v2(
29
+ ExecutorService.validate_python_script,
30
+ payload.timeout,
31
+ payload.project_id,
32
+ payload.python_code,
33
+ payload.python_env,
34
+ )
35
+
36
+ logger.info("finish validate_python_script")
37
+ return ValidatePythonScriptResponse.model_validate(res.model_dump())
@@ -0,0 +1,30 @@
1
+ from typing import Optional
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from recurvedata.executors.schemas import ResponseModel
6
+
7
+
8
+ class ValidatePythonScriptPayload(BaseModel):
9
+ """Payload for Python script validation request"""
10
+
11
+ project_id: int = Field(..., description="Project ID")
12
+ python_code: str = Field(..., description="Python code to validate")
13
+ python_env: str = Field(..., description="Python environment connection name")
14
+ timeout: int = Field(default=30, description="Validation timeout in seconds")
15
+
16
+
17
+ class PythonScriptValidationResult(BaseModel):
18
+ """Result of Python script validation"""
19
+
20
+ valid: bool = Field(..., description="Whether the script is valid")
21
+ message: str = Field(default="", description="Validation message")
22
+ error: Optional[dict] = Field(default=None, description="Error details if validation failed")
23
+ execution_time_ms: Optional[float] = Field(default=None, description="Execution time in milliseconds")
24
+ installed_requirements: Optional[str] = Field(default=None, description="Installed requirements")
25
+
26
+
27
+ class ValidatePythonScriptResponse(ResponseModel):
28
+ """Response for Python script validation"""
29
+
30
+ data: Optional[PythonScriptValidationResult] = None