recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,172 @@
1
+ import asyncio
2
+ import contextlib
3
+ import datetime
4
+ import logging
5
+ import os
6
+ from typing import TYPE_CHECKING, Callable, Optional
7
+
8
+ import pendulum
9
+
10
+ from recurvedata.config import EXECUTOR_META_PATH
11
+ from recurvedata.error_codes import ERR
12
+ from recurvedata.exceptions import RecurveException, TimeoutException, WrapRecurveException
13
+ from recurvedata.executors.consts import VAR_CONVERT_STRING_FUNCS, VariableType
14
+ from recurvedata.utils import json_dumps, json_loads
15
+ from recurvedata.utils.timeout import timeout
16
+
17
+ if TYPE_CHECKING:
18
+ from recurvedata.executors.schemas import ResponseModel
19
+
20
+
21
+ def convert_var_value_from_string(var_type, var_value):
22
+ """
23
+ the var value from front-end is in string type,
24
+ this function will transform var value to corresponding type
25
+ """
26
+ if not isinstance(var_value, str):
27
+ return var_value
28
+ func = VAR_CONVERT_STRING_FUNCS[var_type]
29
+ return func(var_value)
30
+
31
+
32
+ def get_variable_type_by_value(value):
33
+ type_mappings = {
34
+ bool: VariableType.BOOLEAN,
35
+ int: VariableType.INT,
36
+ float: VariableType.FLOAT,
37
+ str: VariableType.STRING,
38
+ dict: VariableType.STRING, # 先用 STRING 类型(key 非 str 情况下,JSON 类型会报错)
39
+ (datetime.datetime, datetime.date, pendulum.DateTime, pendulum.Date): VariableType.DATETIME,
40
+ }
41
+ for types, var_type in type_mappings.items():
42
+ if isinstance(value, types):
43
+ return var_type
44
+ return VariableType.STRING
45
+
46
+
47
+ def format_meta_file_path(job_id: int, node_key: str, execution_date: datetime.datetime) -> str:
48
+ sub_path = os.path.join(str(job_id), node_key, execution_date.isoformat())
49
+ path = os.path.join(EXECUTOR_META_PATH, sub_path)
50
+ os.makedirs(path, exist_ok=True)
51
+ return os.path.join(path, "meta.json")
52
+
53
+
54
+ def update_meta_file(job_id: int, node_key: str, execution_date: datetime.datetime, meta: dict):
55
+ if not meta:
56
+ return
57
+ file_path = format_meta_file_path(job_id, node_key, execution_date)
58
+ with open(file_path, "w") as f:
59
+ f.write(json_dumps(meta))
60
+
61
+
62
+ def read_meta_file(
63
+ job_id: int, node_key: str, execution_date: datetime.datetime, delete_after_read: bool = False
64
+ ) -> Optional[dict]:
65
+ file_path = format_meta_file_path(job_id, node_key, execution_date)
66
+ if not os.path.exists(file_path):
67
+ return
68
+ with open(file_path, "r") as f:
69
+ meta = json_loads(f.read())
70
+ if delete_after_read:
71
+ with contextlib.suppress(OSError, TypeError, ValueError):
72
+ os.unlink(file_path)
73
+ return meta
74
+
75
+
76
+ def get_airflow_run_id():
77
+ return os.environ.get("AIRFLOW_CTX_DAG_RUN_ID")
78
+
79
+
80
+ def get_airflow_try_number():
81
+ return os.environ.get("AIRFLOW_CTX_TRY_NUMBER")
82
+
83
+
84
+ def get_recurve_node_key():
85
+ return os.environ.get("RECURVE__NODE_KEY")
86
+
87
+
88
+ def run_with_result_handling(func: Callable = None, ttl: int = None, result_filename: str = None, *args, **kwargs):
89
+ """Run a function with timeout and handle the result.
90
+
91
+ Args:
92
+ func:
93
+ ttl (int, optional): timeout in seconds.
94
+ result_filename: the file to dump the result.
95
+ """
96
+ from recurvedata.executors.schemas import ResponseError, ResponseModel
97
+
98
+ def exec_with_timeout(ttl: int):
99
+ with timeout(ttl):
100
+ return func(*args, **kwargs)
101
+
102
+ result = ResponseModel(ok=True)
103
+ try:
104
+ data = exec_with_timeout(ttl) if ttl else func(*args, **kwargs)
105
+ result.data = data
106
+ except Exception as e:
107
+ result.ok = False
108
+ if not isinstance(e, RecurveException):
109
+ e = WrapRecurveException(ERR.UNKNOWN_ERROR, e)
110
+ result.error = ResponseError.from_recurve_exception(e)
111
+
112
+ if result_filename:
113
+ result.model_dump_json_file(result_filename)
114
+ else:
115
+ logging.info(result.model_dump_json(indent=2))
116
+ return result
117
+
118
+
119
+ async def run_with_result_handling_v2(func: Callable = None, ttl: int = None, *args, **kwargs) -> "ResponseModel":
120
+ """
121
+ compare with run_with_result_handling,
122
+ difference is the timeout logic.
123
+ timeout using signal cannot work on fastapi.
124
+ parameters:
125
+ func is a synchronous task.
126
+ """
127
+ from recurvedata.executors.schemas import ResponseError, ResponseModel
128
+
129
+ result = ResponseModel(ok=True)
130
+ try:
131
+ if asyncio.iscoroutinefunction(func):
132
+ # 如果 func 是一个协程函数,直接 await 它
133
+ data = await asyncio.wait_for(
134
+ func(*args, **kwargs),
135
+ timeout=ttl if ttl else None,
136
+ )
137
+ else:
138
+ # 否则,使用 asyncio.to_thread 运行同步函数
139
+ data = await asyncio.wait_for(
140
+ asyncio.to_thread(func, *args, **kwargs),
141
+ timeout=ttl if ttl else None,
142
+ )
143
+ result.data = data
144
+ except asyncio.TimeoutError:
145
+ raise TimeoutException(f"Function {func.__name__} timed out after {ttl} seconds")
146
+ except Exception as e:
147
+ result.ok = False
148
+ if not isinstance(e, RecurveException):
149
+ e = WrapRecurveException(ERR.UNKNOWN_ERROR, e)
150
+ result.error = ResponseError.from_recurve_exception(e)
151
+
152
+ return result
153
+
154
+
155
+ def patch_pandas_mysql_connector_cext_missing():
156
+ """
157
+ Patch for MySQL Connector/Python C Extension issue.
158
+
159
+ When pandas is imported before mysql.connector, the MySQL Connector/Python C Extension
160
+ may be missing, which can cause connection errors like:
161
+ '2013: Lost connection to MySQL server during query' when compiling dbt Doris models.
162
+
163
+ This function attempts to preemptively import mysql.connector to ensure the C Extension
164
+ is properly loaded before pandas.
165
+ """
166
+ try:
167
+ # Attempt to import mysql.connector first to ensure C Extension is loaded
168
+ import mysql.connector # noqa: F401
169
+ except ImportError:
170
+ # Silently continue if mysql.connector is not installed
171
+ # The error will be handled elsewhere if the connector is actually needed
172
+ pass
@@ -0,0 +1,11 @@
1
+ from recurvedata.filestorage._factory import Factory
2
+ from recurvedata.filestorage.interface import AbstractFileStorage, StorageType
3
+
4
+ factory = Factory[StorageType, AbstractFileStorage](
5
+ {
6
+ StorageType.LOCAL: "recurvedata.filestorage.backends.local.LocalStorage",
7
+ StorageType.OSS: "recurvedata.filestorage.backends.oss.OSSStorage",
8
+ }
9
+ )
10
+
11
+ __all__ = ["AbstractFileStorage", "StorageType", "factory"]
@@ -0,0 +1,33 @@
1
+ from typing import Any, Generic, TypeVar, cast
2
+
3
+ from pydantic import ValidationError
4
+
5
+ from recurvedata.filestorage.interface import AbstractFileStorage, StorageType
6
+ from recurvedata.utils.imports import import_string
7
+
8
+ _ST = TypeVar("_ST", bound=StorageType)
9
+ _SC = TypeVar("_SC", bound=AbstractFileStorage)
10
+
11
+
12
+ class Factory(Generic[_ST, _SC]):
13
+ def __init__(self, implementations: dict[_ST, str]):
14
+ self._implementations: dict[_ST, str] = implementations
15
+
16
+ def get_supported_backends(self) -> list[_ST]:
17
+ return list(self._implementations.keys())
18
+
19
+ def get_storage_class(self, type_: _ST | str):
20
+ return cast(type[_SC], import_string(self._implementations[type_]))
21
+
22
+ def create(self, type_: _ST | str, options: dict[str, Any]) -> _SC:
23
+ if type_ not in self._implementations:
24
+ raise ValueError(f"Unsupported storage backend: {type_}")
25
+
26
+ storage_class = self.get_storage_class(type_)
27
+
28
+ try:
29
+ obj = storage_class.from_params(**options)
30
+ except ValidationError as e:
31
+ raise ValueError(f"Invalid configuration for {type_}: {e}")
32
+
33
+ return obj
File without changes
@@ -0,0 +1,45 @@
1
+ from abc import ABC, abstractmethod
2
+ from functools import cached_property
3
+
4
+ from fsspec.asyn import AsyncFileSystem
5
+
6
+ from recurvedata.filestorage.interface import AbstractFileStorage
7
+
8
+
9
+ class FSSpecAbstractStorage(AbstractFileStorage, ABC):
10
+ _fs: AsyncFileSystem
11
+
12
+ @cached_property
13
+ def _fs(self) -> AsyncFileSystem:
14
+ # TODO(liyangliang): we could consider involve a pooling mechanism here
15
+ # see discussion with ChatGPT https://chat.openai.com/share/972e3bcc-0ebc-43b8-9a49-72f3dd7dc2b6
16
+ return self.get_fs()
17
+
18
+ @abstractmethod
19
+ def get_fs(self) -> AsyncFileSystem:
20
+ ...
21
+
22
+ def normalize_path(self, path: str) -> str:
23
+ """Normalize a path to be used with the filesystem."""
24
+ return path
25
+
26
+ async def listdir(self, path: str) -> list[str]:
27
+ return await self._fs._ls(self.normalize_path(path))
28
+
29
+ async def write_bytes(self, path: str, content: bytes):
30
+ await self._fs._pipe_file(self.normalize_path(path), content)
31
+
32
+ async def read_bytes(self, path: str) -> bytes:
33
+ return await self._fs._cat_file(self.normalize_path(path))
34
+
35
+ async def put(self, local_path: str, remote_path: str):
36
+ await self._fs._put_file(local_path, self.normalize_path(remote_path))
37
+
38
+ async def get(self, remote_path: str, local_path: str):
39
+ await self._fs._get_file(self.normalize_path(remote_path), local_path)
40
+
41
+ async def delete(self, path: str):
42
+ await self._fs._rm(self.normalize_path(path))
43
+
44
+ async def exists(self, path: str) -> bool:
45
+ return await self._fs._exists(self.normalize_path(path))
@@ -0,0 +1,67 @@
1
+ import datetime
2
+ import urllib.parse
3
+ from typing import Any
4
+
5
+ from jose import JWTError, jwt
6
+ from morefs.asyn_local import AsyncLocalFileSystem
7
+ from pydantic import SecretStr
8
+
9
+ from recurvedata.filestorage.backends.fsspec import FSSpecAbstractStorage
10
+ from recurvedata.filestorage.interface import StorageConfig, StorageType
11
+ from recurvedata.utils.date_time import utcnow
12
+
13
+ ALGORITHM = "HS256"
14
+ _DEFAULT_SECRET_KEY = SecretStr("619805f2af666a623f37221ce8dfbec85ce9e83a16b20fe4a424078ed37f2a3a")
15
+
16
+
17
+ class LocalStorageConfig(StorageConfig):
18
+ root_dir: str = "/tmp"
19
+ auto_mkdir: bool = True
20
+
21
+ # below are required for generating presigned url
22
+ secret_key: SecretStr = _DEFAULT_SECRET_KEY
23
+ """
24
+ The secret key to sign and verify the presigned url. Although it's better to
25
+ use a more secure key, for simplicity, we assign a default value here.
26
+ To generate a secure key, you can use the following code:
27
+
28
+ ```python
29
+ import secrets
30
+ key = secrets.token_hex(32)
31
+ print(key)
32
+ ```
33
+ """
34
+ server_base_url: str = None
35
+
36
+
37
+ class LocalStorage(FSSpecAbstractStorage):
38
+ config_class = LocalStorageConfig
39
+ config: LocalStorageConfig
40
+ _fs: AsyncLocalFileSystem
41
+
42
+ @classmethod
43
+ def storage_type(cls) -> StorageType:
44
+ return StorageType.LOCAL
45
+
46
+ def get_fs(self) -> AsyncLocalFileSystem:
47
+ return AsyncLocalFileSystem(auto_mkdir=self.config.auto_mkdir)
48
+
49
+ def normalize_path(self, path: str) -> str:
50
+ return self.join_path(self.config.root_dir, path)
51
+
52
+ def get_presigned_url(self, path: str, expiration: int = 1800, **kwargs) -> str:
53
+ if not all((self.config.secret_key, self.config.server_base_url)):
54
+ raise ValueError("secret_key and server_base_url are required to generate presigned url")
55
+
56
+ to_encode = {"path": path, "exp": utcnow() + datetime.timedelta(seconds=expiration)}
57
+ encoded = jwt.encode(to_encode, self.config.secret_key.get_secret_value(), algorithm=ALGORITHM)
58
+ base_url = str(self.config.server_base_url).rstrip("/")
59
+ query_string = urllib.parse.urlencode({"token": encoded} | kwargs)
60
+ return f"{base_url}/{path}?{query_string}"
61
+
62
+ def decode_presigned_url(self, token: str) -> dict[str, Any] | None:
63
+ try:
64
+ payload = jwt.decode(token, self.config.secret_key.get_secret_value(), algorithms=[ALGORITHM])
65
+ return payload
66
+ except JWTError:
67
+ return None
@@ -0,0 +1,56 @@
1
+ from functools import cached_property
2
+
3
+ import oss2
4
+ from ossfs.async_oss import AioOSSFileSystem
5
+ from pydantic import ConfigDict
6
+
7
+ from recurvedata.filestorage.backends.fsspec import FSSpecAbstractStorage
8
+ from recurvedata.filestorage.interface import StorageConfig, StorageType
9
+
10
+
11
+ class OSSStorageConfig(StorageConfig):
12
+ endpoint: str
13
+ access_key_id: str
14
+ access_key_secret: str
15
+ bucket_name: str
16
+ directory: str = ""
17
+ security_token: str | None = None # for sts token
18
+
19
+ model_config = ConfigDict(extra="allow")
20
+
21
+
22
+ class OSSStorage(FSSpecAbstractStorage):
23
+ config_class = OSSStorageConfig
24
+ config: OSSStorageConfig
25
+ _fs: AioOSSFileSystem
26
+
27
+ @classmethod
28
+ def storage_type(cls) -> StorageType:
29
+ return StorageType.OSS
30
+
31
+ def get_fs(self) -> AioOSSFileSystem:
32
+ return AioOSSFileSystem(
33
+ endpoint=self.config.endpoint,
34
+ key=self.config.access_key_id,
35
+ secret=self.config.access_key_secret,
36
+ token=self.config.security_token,
37
+ )
38
+
39
+ def normalize_path(self, path: str) -> str:
40
+ return self.join_path(self.config.bucket_name, self.config.directory, path)
41
+
42
+ @cached_property
43
+ def public_bucket(self) -> oss2.Bucket:
44
+ """The public Bucket object, used for generating public download urls."""
45
+ if "-internal" not in self.config.endpoint:
46
+ public_endpoint = self.config.endpoint
47
+ else:
48
+ public_endpoint = self.config.endpoint.replace("-internal", "")
49
+ auth = oss2.Auth(self.config.access_key_id, self.config.access_key_secret)
50
+ return oss2.Bucket(auth, public_endpoint, self.config.bucket_name)
51
+
52
+ def get_presigned_url(self, path: str, expiration: int = 1800, **kwargs) -> str:
53
+ headers = {"content-disposition": "attachment"}
54
+ return self.public_bucket.sign_url(
55
+ "GET", self.join_path(self.config.directory, path), expiration, headers=headers, slash_safe=True
56
+ )
@@ -0,0 +1,84 @@
1
+ import os
2
+ from abc import ABC, abstractmethod
3
+ from enum import Enum
4
+ from typing import ClassVar, Self
5
+
6
+ from pydantic import BaseModel, ConfigDict
7
+
8
+
9
+ class StorageType(str, Enum):
10
+ LOCAL = "local"
11
+ S3 = "s3"
12
+ OSS = "oss"
13
+
14
+ def __str__(self) -> str:
15
+ return str.__str__(self)
16
+
17
+
18
+ class StorageConfig(BaseModel):
19
+ """Base class for storage configurations."""
20
+
21
+ model_config = ConfigDict(extra="forbid")
22
+
23
+
24
+ class AbstractFileStorage(ABC):
25
+ config_class: ClassVar[type[StorageConfig]] = StorageConfig
26
+ config: StorageConfig
27
+
28
+ def __init__(self, config: StorageConfig):
29
+ self.config = config
30
+
31
+ @classmethod
32
+ def from_params(cls, **kwargs) -> Self:
33
+ return cls(cls.config_class.model_validate(kwargs))
34
+
35
+ @classmethod
36
+ @abstractmethod
37
+ def storage_type(cls) -> StorageType:
38
+ """Return the storage type."""
39
+ ...
40
+
41
+ @abstractmethod
42
+ async def listdir(self, path: str) -> list[str]:
43
+ """List the contents of a directory."""
44
+ ...
45
+
46
+ @abstractmethod
47
+ async def write_bytes(self, path: str, content: bytes):
48
+ """Write bytes content to a destination path."""
49
+ ...
50
+
51
+ @abstractmethod
52
+ async def read_bytes(self, path: str) -> bytes:
53
+ """Read bytes content from a source path."""
54
+ ...
55
+
56
+ @abstractmethod
57
+ async def put(self, local_path: str, remote_path: str):
58
+ """Upload a file from a source path to a destination path."""
59
+ ...
60
+
61
+ @abstractmethod
62
+ async def get(self, remote_path: str, local_path: str):
63
+ """Download a file from a source path to a destination path."""
64
+ ...
65
+
66
+ @abstractmethod
67
+ async def delete(self, path: str):
68
+ """Delete a file from the given path."""
69
+ ...
70
+
71
+ @abstractmethod
72
+ async def exists(self, path: str) -> bool:
73
+ """Check if a file exists at the given path."""
74
+ ...
75
+
76
+ @abstractmethod
77
+ def get_presigned_url(self, path: str, expiration: int = 1800, **kwargs) -> str:
78
+ """Generate a presigned URL for a file for temporary access."""
79
+ ...
80
+
81
+ @staticmethod
82
+ def join_path(*parts: str) -> str:
83
+ """Join path parts together."""
84
+ return os.path.normpath(os.path.join(*parts))
@@ -0,0 +1,10 @@
1
+ from recurvedata.operators.context import context # noqa
2
+ from recurvedata.operators.dbt_operator import DBTOperator
3
+ from recurvedata.operators.link_operator import LinkOperator
4
+ from recurvedata.operators.notify_operator import NotifyOperator
5
+ from recurvedata.operators.operator import get_operator_class # noqa
6
+ from recurvedata.operators.python_operator import PythonOperator
7
+ from recurvedata.operators.sensor_operator import SensorOperator
8
+ from recurvedata.operators.sql_operator import SQLOperator
9
+ from recurvedata.operators.transfer_operator import TransferOperator
10
+ from recurvedata.operators.web_init import init_operator_web
@@ -0,0 +1,28 @@
1
+ from typing import Any
2
+
3
+ import jsonschema
4
+
5
+ from recurvedata.core.translation import convert_lazy_string
6
+
7
+
8
+ class Configurable(object):
9
+ enabled = True
10
+
11
+ @classmethod
12
+ def config_schema(cls) -> dict:
13
+ return {}
14
+
15
+ @classmethod
16
+ def name(cls) -> str:
17
+ return cls.__name__
18
+
19
+ @classmethod
20
+ def to_dict(cls) -> dict:
21
+ return {"name": cls.name(), "config_schema": cls.config_schema()}
22
+
23
+ @classmethod
24
+ def validate(cls, configuration: dict[str, Any]) -> dict[str, Any]:
25
+ schema = cls.config_schema()
26
+ schema = convert_lazy_string(schema)
27
+ jsonschema.validate(configuration, schema)
28
+ return configuration
@@ -0,0 +1,21 @@
1
+ import os
2
+
3
+ from recurvedata.operators.utils.fs import get_exist_path
4
+
5
+
6
+ class Config(object): # todo: move to somewhere else
7
+ DATA_ROOT = "/opt/recurve/worker_data"
8
+
9
+ PYENV_BASE_BIN_PATH = os.path.join(
10
+ get_exist_path([os.environ.get("RECURVE__WORKER__PYENV__BASE"), "~/.pyenv"]) or "/opt/pyenv",
11
+ "versions/{pyenv}/bin",
12
+ )
13
+ PYENV_PYTHON_PATH = os.path.join(PYENV_BASE_BIN_PATH, "python")
14
+
15
+ RECURVE_EXECUTOR_PYENV_BIN_PATH = PYENV_BASE_BIN_PATH.format(pyenv="recurve_executor")
16
+
17
+ REDIS_URL = "redis://localhost:6381/13"
18
+
19
+
20
+ # CONF 是一个全局对象,用于获取配置项。
21
+ CONF = Config()