recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,220 @@
1
+ import logging
2
+ import os
3
+ import time
4
+ import traceback
5
+ from typing import Dict, Any
6
+ from tempfile import NamedTemporaryFile
7
+
8
+ from recurvedata.executors.client import ExecutorClient
9
+ from recurvedata.operators.config import CONF
10
+ from recurvedata.config import RECURVE_EXECUTOR_PYENV_NAME, PY_PACKAGES_PATH
11
+ from recurvedata.operators.python_operator.operator import PythonTask, PythonRequirementsMixin
12
+ from recurvedata.server.executor.schemas import PythonScriptValidationResult
13
+ from recurvedata.utils.mp import robust_run_subprocess
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ DEFAULT_PY_VERSION = os.environ.get("RECURVE_OPERATOR_PYTHON_DEFAULT_VERSION", "3.11.9")
18
+
19
+
20
+ class ExecutorService:
21
+ """Service for executing Python code validation using the same infrastructure as PythonOperator"""
22
+
23
+ @staticmethod
24
+ def validate_python_script(
25
+ project_id: int,
26
+ python_code: str,
27
+ python_env_conn_name: str,
28
+ ) -> PythonScriptValidationResult:
29
+ """
30
+ Validate Python script by executing it in the specified Python environment.
31
+
32
+ Uses the same logic as PythonOperator to ensure consistency with actual execution.
33
+
34
+ Args:
35
+ project_id: Project ID for context
36
+ python_code: Python code to validate
37
+ python_env_conn_name: Python environment from project connection name
38
+
39
+ Returns:
40
+ PythonScriptValidationResult with validation status and details
41
+ """
42
+ start_time = time.time()
43
+ requirements = ""
44
+
45
+ try:
46
+ logger.info(f"Starting Python script validation for project: {project_id} with python env conn name: {python_env_conn_name}")
47
+
48
+ # Get connection configuration from gateway executor service API by project connection name
49
+ conn_config = ExecutorService._get_python_env_config(python_env_conn_name, project_id)
50
+
51
+ # Prepare environment using PythonOperator logic (reuse existing methods)
52
+ pyenv_name: str = conn_config.get("pyenv")
53
+ py_version: str = conn_config.get("python_version", DEFAULT_PY_VERSION)
54
+ requirements = conn_config.get("requirements", "")
55
+
56
+ logger.info(f"Preparing Python environment: {pyenv_name} with version {py_version} and requirements: {requirements}")
57
+
58
+ # Reuse existing static methods from PythonTask and PythonRequirementsMixin
59
+ PythonTask._install_virtualenv(py_version, pyenv_name)
60
+ if requirements:
61
+ # Use custom requirements installation to capture error output
62
+ ExecutorService._install_requirements_with_output_capture(requirements, pyenv_name)
63
+
64
+ # Execute the Python code
65
+ ExecutorService._execute_python_code(python_code, conn_config, project_id)
66
+
67
+ execution_time_ms = (time.time() - start_time) * 1000
68
+
69
+ logger.info(f"Python script validation completed successfully in {execution_time_ms:.2f}ms")
70
+
71
+ return PythonScriptValidationResult(
72
+ valid=True,
73
+ message="Python script executed successfully",
74
+ execution_time_ms=execution_time_ms,
75
+ installed_requirements=requirements
76
+ )
77
+
78
+ except Exception as e:
79
+ execution_time_ms = (time.time() - start_time) * 1000
80
+ error_details = {
81
+ "type": type(e).__name__,
82
+ "message": str(e),
83
+ "traceback": traceback.format_exc()
84
+ }
85
+
86
+ logger.error(f"Python script validation failed: {error_details}")
87
+
88
+ return PythonScriptValidationResult(
89
+ valid=False,
90
+ message=f"Python script validation failed: {str(e)}",
91
+ error=error_details,
92
+ execution_time_ms=execution_time_ms,
93
+ installed_requirements=requirements
94
+ )
95
+
96
+ @staticmethod
97
+ def _get_python_env_config(python_env_project_conn_name: str, project_id: int) -> Dict[str, Any]:
98
+ """
99
+ Get Python environment configuration using ExecutorClient.
100
+
101
+ Fetches the real connection configuration from the executor API.
102
+ """
103
+ try:
104
+ logger.info(f"Fetching Python environment configuration for: {python_env_project_conn_name} in project: {project_id}")
105
+
106
+ # Create ExecutorClient to fetch connection configuration
107
+ executor_client = ExecutorClient()
108
+
109
+ # Use the get_py_conn_configs method to fetch configuration
110
+ py_conn_config = executor_client.get_py_conn_configs(
111
+ conn_type="python",
112
+ pyenv_name=RECURVE_EXECUTOR_PYENV_NAME,
113
+ project_conn_name=python_env_project_conn_name,
114
+ project_id=project_id
115
+ )
116
+
117
+ logger.info(f"Retrieved Python connection config: {py_conn_config}")
118
+
119
+ # Extract the configuration data
120
+ if py_conn_config and isinstance(py_conn_config, dict):
121
+ # The response structure should contain the environment configuration
122
+ requirements_data = py_conn_config.get("requirements", "")
123
+
124
+ # Handle requirements - could be a list or string
125
+ if isinstance(requirements_data, list):
126
+ # Join multiple requirements with newlines
127
+ requirements_str = "\n".join(requirements_data)
128
+ else:
129
+ requirements_str = requirements_data or ""
130
+
131
+ config = {
132
+ "pyenv": py_conn_config.get("pyenv"),
133
+ "python_version": py_conn_config.get("python_version", DEFAULT_PY_VERSION),
134
+ "requirements": requirements_str
135
+ }
136
+ logger.info(f"Using python env configuration: {config}")
137
+ return config
138
+ else:
139
+ logger.warning(f"No configuration found for {python_env_project_conn_name} in project: {project_id}, using defaults: {DEFAULT_PY_VERSION}")
140
+
141
+ except Exception as e:
142
+ logger.error(f"Failed to fetch Python environment config for {python_env_project_conn_name} in project: {project_id}: {e}")
143
+ logger.info("Falling back to default configuration")
144
+
145
+ # Fallback to default configuration
146
+ return {
147
+ "pyenv": RECURVE_EXECUTOR_PYENV_NAME,
148
+ "python_version": DEFAULT_PY_VERSION,
149
+ "requirements": ""
150
+ }
151
+
152
+ @staticmethod
153
+ def _install_requirements_with_output_capture(requirements: str, pyenv_name: str) -> None:
154
+ """
155
+ Install requirements with output capture to include pip errors in exceptions.
156
+
157
+ This is a modified version of PythonRequirementsMixin._install_requirements
158
+ that captures the subprocess output for better error reporting.
159
+ """
160
+ if pyenv_name != RECURVE_EXECUTOR_PYENV_NAME:
161
+ requirements += "\nrecurvedata-lib[slim]"
162
+ if not requirements:
163
+ return
164
+
165
+ logger.info("installing requirements")
166
+
167
+ # Install recurvedata-lib from local package if it's a new virtualenv
168
+ if pyenv_name != RECURVE_EXECUTOR_PYENV_NAME:
169
+ python = CONF.PYENV_PYTHON_PATH.format(pyenv=pyenv_name)
170
+ output, ret_code = robust_run_subprocess(
171
+ f"{python} -m pip install -v --no-index --find-links={PY_PACKAGES_PATH} recurvedata-lib[slim]".split(),
172
+ _logger=logger
173
+ )
174
+ if ret_code:
175
+ raise RuntimeError(f"Failed to install recurvedata-lib:\n{output}")
176
+
177
+ # Install user requirements
178
+ with NamedTemporaryFile(mode="w+t", prefix="recurve_python_requirements_", suffix=".txt") as requirements_path:
179
+ requirements_path.write(requirements)
180
+ requirements_path.flush()
181
+ python = CONF.PYENV_PYTHON_PATH.format(pyenv=pyenv_name)
182
+
183
+ # Use robust_run_subprocess to capture output
184
+ output, ret_code = robust_run_subprocess(
185
+ f"{python} -m pip install -r {requirements_path.name}".split(),
186
+ _logger=logger
187
+ )
188
+
189
+ if ret_code:
190
+ raise RuntimeError(f"Failed to install requirements:\n{output}")
191
+
192
+ @staticmethod
193
+ def _execute_python_code(python_code: str, conn_config: Dict[str, Any], project_id: int) -> None:
194
+ """
195
+ Execute Python code using the exact same logic as PythonOperator.
196
+
197
+ This reuses the execution logic from PythonTask.__run_python.
198
+ """
199
+ pyenv = conn_config["pyenv"]
200
+
201
+ # Create temporary file for the Python code (same pattern as PythonTask)
202
+ prefix = f"recurve_python_validation_{project_id}_"
203
+ with NamedTemporaryFile(mode="w+t", prefix=prefix, suffix=".py") as tmp_file:
204
+ tmp_file.write(python_code)
205
+ tmp_file.flush()
206
+
207
+ logger.info(f"Executing Python code in environment {pyenv}")
208
+ logger.debug(f"Code to execute:\n{python_code}")
209
+
210
+ # Reuse the exact same execution logic as PythonTask.__run_python
211
+ script_path = os.path.abspath(tmp_file.name)
212
+ python_path = CONF.PYENV_PYTHON_PATH.format(pyenv=pyenv)
213
+ os_env = os.environ.copy()
214
+
215
+ output, ret_code = robust_run_subprocess([python_path, script_path], env=os_env, _logger=logger)
216
+
217
+ if ret_code:
218
+ raise RuntimeError(f"Python Operator Error:\n{output}") # Same error message as PythonTask
219
+
220
+ logger.info("Python script executed successfully")
@@ -0,0 +1,32 @@
1
+ from fastapi import APIRouter, FastAPI
2
+ from fastapi.responses import ORJSONResponse
3
+
4
+ from recurvedata.core.tracing import Tracing
5
+ from recurvedata.server.connector.api import router as connector_router
6
+ from recurvedata.server.data_service.api import router as data_service_router
7
+ from recurvedata.server.dbt.api import router as dbt_router
8
+ from recurvedata.server.executor.api import router as executor_router
9
+ from recurvedata.server.schedulers.api import router as schedulers_router
10
+ from recurvedata.utils.log import init_logging, setup_loguru
11
+
12
+ __all__ = ["create_app"]
13
+
14
+
15
+ def create_app() -> FastAPI:
16
+ init_logging()
17
+ setup_loguru()
18
+
19
+ if not Tracing.is_instantiated():
20
+ from recurvedata.utils.tracing import create_dp_tracer
21
+
22
+ create_dp_tracer("recurve-lib-server")
23
+
24
+ app = FastAPI(title="Recurve Lib Server", default_response_class=ORJSONResponse)
25
+ public_router = APIRouter(prefix="/api")
26
+ public_router.include_router(dbt_router, prefix="/dbt")
27
+ public_router.include_router(connector_router, prefix="/connector")
28
+ public_router.include_router(data_service_router, prefix="/data-service")
29
+ public_router.include_router(executor_router, prefix="/executor")
30
+ public_router.include_router(schedulers_router, prefix="/schedulers")
31
+ app.include_router(public_router)
32
+ return app
File without changes
@@ -0,0 +1,252 @@
1
+ import json
2
+ import logging
3
+ import subprocess
4
+ from typing import List
5
+
6
+ from fastapi import APIRouter
7
+
8
+ from recurvedata.exceptions import RecurveException
9
+ from recurvedata.server.schedulers.schemas import (
10
+ CreateDagRequest,
11
+ DeleteDagRequest,
12
+ RerunJobRunRequest,
13
+ RerunTaskRunRequest,
14
+ StartDevRunRequest,
15
+ TerminateTaskRunRequest,
16
+ TriggerJobRunRequest,
17
+ )
18
+ from recurvedata.server.schemas import ResponseError, ResponseModel
19
+ from recurvedata.utils.date_time import to_local_datetime
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ router = APIRouter(tags=["schedulers"])
24
+ job_router = APIRouter()
25
+ task_router = APIRouter()
26
+
27
+ # ------------------------------
28
+ # job APIs
29
+ # ------------------------------
30
+
31
+
32
+ async def _execute_scheduler_command(cmd: List[str], operation_name: str, job_id: int) -> ResponseModel:
33
+ """
34
+ Execute a scheduler command and return standardized response.
35
+
36
+ Args:
37
+ cmd: Command to execute
38
+ operation_name: Name of the operation for logging
39
+ job_id: Job ID for error context
40
+ """
41
+ is_ok = True
42
+ error = None
43
+
44
+ try:
45
+ result = subprocess.run(cmd, capture_output=True, text=True)
46
+ logger.info(f"{operation_name} {job_id} result: {result.stdout}")
47
+
48
+ if result.returncode != 0:
49
+ logger.error(f"{operation_name} {job_id} failed: {result.stderr}")
50
+ is_ok = False
51
+ error = ResponseError.from_recurve_exception(RecurveException(data={"job_id": job_id}))
52
+
53
+ except Exception as e:
54
+ logger.error(f"{operation_name} failed: {e}")
55
+ is_ok = False
56
+ error = ResponseError.from_recurve_exception(RecurveException(data={"job_id": job_id}))
57
+
58
+ return ResponseModel(ok=is_ok, error=error)
59
+
60
+
61
+ @job_router.post("/{job_id}/create")
62
+ async def create_dag(*, job_id: int, body: CreateDagRequest) -> ResponseModel:
63
+ # First update the DAG
64
+ update_result = await _execute_scheduler_command(
65
+ ["recurve_scheduler", "update-dag", "--job_id", str(job_id)], "update dag", job_id
66
+ )
67
+
68
+ if not update_result.ok:
69
+ return update_result
70
+
71
+ # Then activate if requested
72
+ if body.is_active:
73
+ activate_result = await _execute_scheduler_command(
74
+ ["recurve_scheduler", "activate-dag", "--job_id", str(job_id)], "activate dag", job_id
75
+ )
76
+ if not activate_result.ok:
77
+ return activate_result
78
+
79
+ logger.info(f"create dag {job_id} success")
80
+ return ResponseModel(ok=True)
81
+
82
+
83
+ @job_router.post("/{job_id}/update")
84
+ async def update_dag(*, job_id: int) -> ResponseModel:
85
+ return await _execute_scheduler_command(
86
+ ["recurve_scheduler", "update-dag", "--job_id", str(job_id)], "update dag", job_id
87
+ )
88
+
89
+
90
+ @job_router.post("/{job_id}/activate")
91
+ async def activate_dag(*, job_id: int) -> ResponseModel:
92
+ return await _execute_scheduler_command(
93
+ ["recurve_scheduler", "activate-dag", "--job_id", str(job_id)], "activate dag", job_id
94
+ )
95
+
96
+
97
+ @job_router.post("/{job_id}/deactivate")
98
+ async def deactivate_dag(*, job_id: int) -> ResponseModel:
99
+ return await _execute_scheduler_command(
100
+ ["recurve_scheduler", "deactivate-dag", "--job_id", str(job_id)], "deactivate dag", job_id
101
+ )
102
+
103
+
104
+ @job_router.post("/{job_id}/delete")
105
+ async def delete_dag(*, job_id: int, body: DeleteDagRequest) -> ResponseModel:
106
+ return await _execute_scheduler_command(
107
+ ["recurve_scheduler", "delete-dag", "--job_id", str(job_id), "--job_name", body.job_name], "delete dag", job_id
108
+ )
109
+
110
+
111
+ @job_router.post("/trigger-job-run")
112
+ async def trigger_job_run(
113
+ *,
114
+ body: TriggerJobRunRequest,
115
+ ) -> ResponseModel:
116
+ job_id = body.job_id
117
+ cmd = [
118
+ "recurve_scheduler",
119
+ "trigger-job-run",
120
+ "--job_id",
121
+ str(job_id),
122
+ "--execution_date",
123
+ body.execution_date,
124
+ ]
125
+
126
+ if body.include_past:
127
+ cmd.append("--include_past")
128
+ if body.include_future:
129
+ cmd.append("--include_future")
130
+ if body.run_type:
131
+ cmd.extend(["--run_type", body.run_type])
132
+ if body.conf:
133
+ cmd.extend(["--conf", json.dumps(body.conf)])
134
+
135
+ return await _execute_scheduler_command(cmd, "trigger job run", job_id)
136
+
137
+
138
+ @job_router.post("/rerun-job")
139
+ async def rerun_job_run(
140
+ *,
141
+ body: RerunJobRunRequest,
142
+ ) -> ResponseModel:
143
+ job_id = body.job_id
144
+ cmd = ["recurve_scheduler", "rerun-job-run", "--job_id", str(job_id)]
145
+
146
+ if body.run_id:
147
+ cmd.append("--run_id")
148
+ cmd.append(body.run_id)
149
+
150
+ if body.min_execution_date:
151
+ cmd.append("--min_execution_date")
152
+ cmd.append(body.min_execution_date)
153
+ if body.max_execution_date:
154
+ cmd.append("--max_execution_date")
155
+ cmd.append(body.max_execution_date)
156
+ if body.failed_only:
157
+ cmd.append("--failed_only")
158
+
159
+ return await _execute_scheduler_command(cmd, "rerun job run", job_id)
160
+
161
+
162
+ @job_router.post("/{job_id}/stop-dev-run")
163
+ async def stop_dev_run(*, job_id: int) -> ResponseModel:
164
+ logger.info(f"start stop dev run job_id: {job_id}")
165
+ return await _execute_scheduler_command(
166
+ ["recurve_scheduler", "stop-dev-run", "--job_id", str(job_id)], "stop dev run", job_id
167
+ )
168
+
169
+
170
+ @job_router.post("/{job_id}/start-dev-run")
171
+ async def start_dev_run(*, job_id: int, body: StartDevRunRequest) -> ResponseModel:
172
+ logger.info(f"start start dev run job_id: {job_id}")
173
+ execution_date = to_local_datetime(body.execution_date)
174
+ return await _execute_scheduler_command(
175
+ [
176
+ "recurve_scheduler",
177
+ "start-dev-run",
178
+ "--job_id",
179
+ str(job_id),
180
+ "--execution_date",
181
+ execution_date.isoformat(),
182
+ ],
183
+ "start dev run",
184
+ job_id,
185
+ )
186
+
187
+
188
+ # ------------------------------
189
+ # task APIs
190
+ # ------------------------------
191
+
192
+
193
+ @task_router.post("/rerun-task")
194
+ async def rerun_task_run(
195
+ *,
196
+ body: RerunTaskRunRequest,
197
+ ) -> ResponseModel:
198
+ job_id = body.job_id
199
+ cmd = [
200
+ "recurve_scheduler",
201
+ "rerun-task-run",
202
+ "--job_id",
203
+ str(job_id),
204
+ "--node_key",
205
+ body.node_key,
206
+ ]
207
+
208
+ if body.run_id:
209
+ cmd.append("--run_id")
210
+ cmd.append(body.run_id)
211
+ if body.min_execution_date:
212
+ cmd.append("--min_execution_date")
213
+ cmd.append(body.min_execution_date)
214
+ if body.max_execution_date:
215
+ cmd.append("--max_execution_date")
216
+ cmd.append(body.max_execution_date)
217
+
218
+ if body.include_upstream:
219
+ cmd.append("--include_upstream")
220
+ if body.include_downstream:
221
+ cmd.append("--include_downstream")
222
+ if body.failed_only:
223
+ cmd.append("--failed_only")
224
+
225
+ return await _execute_scheduler_command(cmd, "rerun task run", job_id)
226
+
227
+
228
+ @task_router.post("/terminate-task")
229
+ async def terminate_task_run(
230
+ *,
231
+ body: TerminateTaskRunRequest,
232
+ ) -> ResponseModel:
233
+ job_id = body.job_id
234
+ return await _execute_scheduler_command(
235
+ [
236
+ "recurve_scheduler",
237
+ "terminate-task-run",
238
+ "--job_id",
239
+ str(job_id),
240
+ "--run_id",
241
+ body.run_id,
242
+ "--node_key",
243
+ body.node_key,
244
+ ],
245
+ "terminate task run",
246
+ job_id,
247
+ )
248
+
249
+
250
+ # Register sub-routers
251
+ router.include_router(job_router, prefix="/jobs")
252
+ router.include_router(task_router, prefix="/tasks")
@@ -0,0 +1,50 @@
1
+ from datetime import datetime
2
+ from typing import Any, Optional
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class TriggerJobRunRequest(BaseModel):
8
+ job_id: int
9
+ execution_date: str
10
+ include_past: Optional[bool] = False
11
+ include_future: Optional[bool] = False
12
+ run_type: Optional[str] = None
13
+ conf: Optional[dict[str, Any]] = None
14
+
15
+
16
+ class RerunJobRunRequest(BaseModel):
17
+ job_id: int
18
+ run_id: str | None = None
19
+ min_execution_date: str | None = None
20
+ max_execution_date: str | None = None
21
+ failed_only: bool = False
22
+
23
+
24
+ class RerunTaskRunRequest(BaseModel):
25
+ job_id: int
26
+ node_key: str
27
+ run_id: str | None = None
28
+ min_execution_date: str | None = None
29
+ max_execution_date: str | None = None
30
+ include_upstream: bool = False
31
+ include_downstream: bool = False
32
+ failed_only: bool = False
33
+
34
+
35
+ class TerminateTaskRunRequest(BaseModel):
36
+ job_id: int
37
+ run_id: str
38
+ node_key: str
39
+
40
+
41
+ class DeleteDagRequest(BaseModel):
42
+ job_name: str
43
+
44
+
45
+ class StartDevRunRequest(BaseModel):
46
+ execution_date: datetime
47
+
48
+
49
+ class CreateDagRequest(BaseModel):
50
+ is_active: bool
@@ -0,0 +1,50 @@
1
+ import os
2
+ from traceback import format_exc
3
+ from typing import Any, Self
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from recurvedata.exceptions import RecurveException, WrapRecurveException
8
+ from recurvedata.executors.schemas import ConnectionItem
9
+
10
+
11
+ class ConnectionAndVariables(BaseModel):
12
+ connection: ConnectionItem
13
+ variables: dict
14
+
15
+
16
+ class ResponseError(BaseModel):
17
+ code: str
18
+ reason: str | None
19
+ exception: str | None = None
20
+ traceback: str | None = None
21
+ data: dict | str | None = None
22
+
23
+ @classmethod
24
+ def from_recurve_exception(cls, recurve_exception: RecurveException) -> Self:
25
+ if recurve_exception.data:
26
+ reason = f"{recurve_exception.code.message} {recurve_exception.data}"
27
+ else:
28
+ reason = recurve_exception.code.message
29
+ if isinstance(recurve_exception, WrapRecurveException):
30
+ exception = str(recurve_exception.exception)
31
+ else:
32
+ exception = None
33
+ return cls(
34
+ code=recurve_exception.code.code,
35
+ reason=reason,
36
+ exception=exception,
37
+ traceback=format_exc(),
38
+ data=recurve_exception.data,
39
+ )
40
+
41
+
42
+ class ResponseModel(BaseModel):
43
+ ok: bool
44
+ error: ResponseError | None = None
45
+ data: Any = None
46
+
47
+ def model_dump_json_file(self, filename: str):
48
+ os.makedirs(os.path.dirname(filename), exist_ok=True)
49
+ with open(filename, "w") as f:
50
+ f.write(self.model_dump_json(indent=2))
@@ -0,0 +1,15 @@
1
+ from recurvedata.utils.helpers import extract_dict, first, get_env_id, md5hash, truncate_string, unescape_backslash
2
+ from recurvedata.utils.json import json_dumps, json_loads
3
+ from recurvedata.utils.log import init_logging
4
+
5
+ __all__ = [
6
+ "extract_dict",
7
+ "unescape_backslash",
8
+ "md5hash",
9
+ "truncate_string",
10
+ "get_env_id",
11
+ "first",
12
+ "json_dumps",
13
+ "json_loads",
14
+ "init_logging",
15
+ ]
@@ -0,0 +1,61 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import functools
5
+ import os
6
+ import traceback
7
+ from typing import Any, Callable
8
+
9
+ import typer
10
+ from click import ClickException
11
+ from typer.models import CommandFunctionType
12
+
13
+ RECURVE_TEST_MODE = os.getenv("RECURVE_TEST_MODE", "false").lower() == "true"
14
+
15
+
16
+ class RecurveTyper(typer.Typer):
17
+ """
18
+ Wrapper for Typer to support async functions and handle errors.
19
+ """
20
+
21
+ def command(self, name: str | None = None, *args, **kwargs) -> Callable[[CommandFunctionType], CommandFunctionType]:
22
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
23
+ fn = func
24
+ if asyncio.iscoroutinefunction(func):
25
+
26
+ @functools.wraps(func)
27
+ def sync_wrapper(*inner_args: Any, **inner_kwargs: Any) -> Any:
28
+ asyncio.run(func(*inner_args, **inner_kwargs))
29
+
30
+ fn = sync_wrapper
31
+
32
+ fn = with_cli_exception_handling(fn)
33
+
34
+ return super(RecurveTyper, self).command(name=name, *args, **kwargs)(fn)
35
+
36
+ return decorator
37
+
38
+
39
+ def exit_with_error(message: str, code: int = 1, **kwargs) -> None:
40
+ """
41
+ Utility to print a stylized error message and exit with a non-zero code
42
+ """
43
+ kwargs.setdefault("fg", typer.colors.RED)
44
+ typer.secho(message, **kwargs)
45
+ raise typer.Exit(code)
46
+
47
+
48
+ def with_cli_exception_handling(fn):
49
+ @functools.wraps(fn)
50
+ def wrapper(*args, **kwargs):
51
+ try:
52
+ return fn(*args, **kwargs)
53
+ except (typer.Exit, typer.Abort, ClickException):
54
+ raise # Do not capture click or typer exceptions
55
+ except Exception:
56
+ if RECURVE_TEST_MODE:
57
+ raise # Reraise exceptions during test mode
58
+ traceback.print_exc()
59
+ exit_with_error("An exception occurred.")
60
+
61
+ return wrapper