recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,255 @@
1
+ import contextvars
2
+ import functools
3
+ import inspect
4
+ import os
5
+ import warnings
6
+ from typing import TYPE_CHECKING, Any, Callable
7
+
8
+ if TYPE_CHECKING:
9
+ from recurvedata.operators.task import BaseTask
10
+
11
+ try:
12
+ from fsspec.asyn import get_running_loop, sync # todo
13
+ except ImportError:
14
+ pass
15
+
16
+ from recurvedata.operators.operator import BaseOperator, _registry, get_operator_class
17
+
18
+
19
+ class Context(object):
20
+ """
21
+ Web/Worker 调用的对象,提供:
22
+ 1. 注册函数功能
23
+ 调用之前需要注册一些函数,用于 config_schema 以及 validate,包括:
24
+ get_connection_names_by_type: 根据连接源 type 返回连接源 names。支持同步/异步写法
25
+ get_connection_by_name: 根据连接源 name 返回连接源对象。支持同步/异步写法 todo
26
+ 2. list_config_schemas
27
+ 3. get_supported_operators
28
+ 4. get_config_schema
29
+ 5. Worker 执行时,需要的一些函数
30
+
31
+ sync/async 调用
32
+ Context 支持同步/异步的调用方式,
33
+ 1. Web 调用是异步,Worker 调用是同步;Web 端注册的 get_connection_names_by_type 等函数是异步的,Worker 端注册的函数是同步的
34
+ 2. 为了避免在 operator 里引入 async/await 语法,保持 operator 代码的简洁,
35
+ 各个 Operator 里,统一使用同步的写法,
36
+ Operator 里只提供同步的 config_schema, validate, execute 写法,
37
+ context 也提供同步的 get_connection_names_by_type 等方法,供 operator 调用
38
+ 3. Web 注册的 get_connection_names_by_type 是异步的,Context 提供的 get_connection_names_by_type 是同步的。
39
+ Context 为了提供同步的 get_connection_names_by_type,把 Web 注册的异步的 get_connection_names_by_type 转成了同步
40
+ 4. Operator 的 config_schema 是同步的,而 Web 端调用需要异步的方法,
41
+ 所以 Context.get_config_schema 方法通过异步的方式调用 Operator.config_schema
42
+ """
43
+
44
+ def __init__(self):
45
+ # 根据连接源 type 返回连接源 names。支持同步/异步写法
46
+ self._get_connection_names_by_type: Callable = None
47
+
48
+ # 根据连接源 name 返回连接源对象。支持同步/异步写法
49
+ self._get_connection_by_name: Callable = None
50
+
51
+ self.current_project_id = contextvars.ContextVar("Recurve Project ID")
52
+
53
+ self._pid = os.getpid()
54
+ self._loop = None
55
+ self.async_mode = False
56
+ self._functions = {}
57
+
58
+ def init_context(self, get_connection_names_by_type: Callable = None, get_connection_by_name: Callable = None):
59
+ """
60
+ :param get_connection_names_by_type: 根据连接源 type 返回连接源 names。支持同步/异步写法
61
+ get_connection_names_by_type 函数定义: get_connection_names_by_type(project_id, connection_type)
62
+ :param get_connection_by_name: 根据连接源 name 返回连接源对象。支持同步/异步写法
63
+ get_connection_by_name 函数定义:get_connection_by_name(project_id, connection_name)
64
+ """
65
+ self._get_connection_names_by_type = get_connection_names_by_type
66
+ self._get_connection_by_name = get_connection_by_name
67
+ if inspect.iscoroutinefunction(self._get_connection_names_by_type):
68
+ self.async_mode = True
69
+ else:
70
+ self.async_mode = False
71
+
72
+ @property
73
+ def loop(self):
74
+ if self._pid != os.getpid():
75
+ raise RuntimeError("This class is not fork-safe")
76
+ if self._loop:
77
+ return self._loop
78
+ # self._loop = asyncio.get_event_loop() # todo: get_running_loop?
79
+ self._loop = get_running_loop()
80
+ # self._loop = get_loop() # todo: maybe have problem
81
+ return self._loop
82
+
83
+ def get_connection_names_by_type(self, connection_type: str) -> list[str]:
84
+ """
85
+ 根据连接源类型,返回连接源名称
86
+ Web 端调用的时候,self._get_connection_names_by_type 是异步方法
87
+ Worker 端调用,self._get_connection_names_by_type 是同步方法
88
+ :param connection_type:
89
+ :return:
90
+ """
91
+ project_id = self.current_project_id.get()
92
+ if inspect.iscoroutinefunction(self._get_connection_names_by_type):
93
+ return sync(self.loop, self._get_connection_names_by_type, project_id, connection_type)
94
+ return self._get_connection_names_by_type(project_id, connection_type)
95
+
96
+ def get_connection_by_name(self, connection_name: str):
97
+ project_id = self.current_project_id.get()
98
+ if inspect.iscoroutinefunction(self._get_connection_by_name):
99
+ return sync(self.loop, self._get_connection_by_name, project_id, connection_name)
100
+ return self._get_connection_by_name(project_id, connection_name)
101
+
102
+ def get_connection_choices_by_type(self, connection_type):
103
+ warnings.warn(
104
+ "This function is deprecated. Please use `get_connection_names_by_type`",
105
+ DeprecationWarning,
106
+ stacklevel=2,
107
+ )
108
+ return self.get_connection_names_by_type(connection_type)
109
+
110
+ async def async_call_synchronous_func(self, func, *args):
111
+ project_id = self.current_project_id.get()
112
+ loop = self.loop
113
+ res = await loop.run_in_executor(None, self.contextvars_wrapper(project_id, func), *args)
114
+ return res
115
+
116
+ def must_get_connection_by_name(self, connection_name: str):
117
+ connection = self.get_connection_by_name(connection_name)
118
+ if not connection:
119
+ raise ValueError(f"connection {connection_name} not exists")
120
+ return connection
121
+
122
+ async def validate_operator_configuration(self, operator_name: str, configuration: dict, project_id: str):
123
+ self.current_project_id.set(project_id)
124
+ operator_cls: BaseOperator = get_operator_class(operator_name)
125
+ if not operator_cls:
126
+ raise ValueError(f"no operator {operator_name}")
127
+ return await self.async_call_synchronous_func(operator_cls.ui_validate, configuration)
128
+
129
+ def validate_operator_configuration_synchronously(self, operator_name: str, configuration: dict, project_id: str):
130
+ self.current_project_id.set(project_id)
131
+ operator_cls: BaseOperator = get_operator_class(operator_name)
132
+ if not operator_cls:
133
+ raise ValueError(f"no operator {operator_name}")
134
+ if not self.async_mode:
135
+ return operator_cls.ui_validate(configuration)
136
+ else:
137
+ return sync(self.loop, operator_cls.ui_validate, configuration)
138
+
139
+ # validate_operator_configuration_synchronously = sync_wrapper(validate_operator_configuration)
140
+
141
+ @staticmethod
142
+ def get_ds_name_field_values(operator_name: str, rendered_config: dict) -> list[str]:
143
+ operator_cls: BaseOperator = get_operator_class(operator_name)
144
+ if not operator_cls:
145
+ raise ValueError(f"no operator {operator_name}")
146
+ return operator_cls.get_ds_name_field_values(rendered_config)
147
+
148
+ def contextvars_wrapper(self, project_id, func):
149
+ """
150
+ init contextvars in asyncio
151
+ """
152
+
153
+ @functools.wraps(func)
154
+ def wrapper(*args, **kwargs):
155
+ token = self.current_project_id.set(project_id)
156
+ res = func(*args, **kwargs)
157
+ self.current_project_id.reset(token)
158
+ return res
159
+
160
+ return wrapper
161
+
162
+ async def get_config_schema(self, operator_name: str, project_id: str):
163
+ """
164
+ 默认返回的是 get_ui_config_schema
165
+ :param operator_name:
166
+ :param project_id:
167
+ :return:
168
+ """
169
+ self.current_project_id.set(project_id)
170
+ operator_cls: BaseOperator = get_operator_class(operator_name)
171
+ if operator_cls:
172
+ return await self.async_call_synchronous_func(operator_cls.ui_config_schema)
173
+
174
+ def get_config_schema_synchronously(self, operator_name: str, project_id: str):
175
+ self.current_project_id.set(project_id)
176
+ operator_cls: BaseOperator = get_operator_class(operator_name)
177
+ if not operator_cls:
178
+ raise ValueError(f"no operator {operator_name}")
179
+ if not self.async_mode:
180
+ return operator_cls.ui_config_schema()
181
+ else:
182
+ return sync(self.loop, operator_cls.ui_config_schema)
183
+
184
+ # get_config_schema_synchronously = sync_wrapper(get_config_schema)
185
+
186
+ @staticmethod
187
+ def get_supported_operators() -> list[str]:
188
+ res_lst = []
189
+ for op_name, op_cls in _registry.items():
190
+ if not op_cls.enabled:
191
+ continue
192
+ res_lst.append(op_name)
193
+ return res_lst
194
+
195
+ async def list_config_schemas(self, project_id: str):
196
+ self.current_project_id.set(project_id)
197
+ res_lst = []
198
+
199
+ for operator_name, operator_cls in _registry.items():
200
+ res_lst.append(
201
+ {
202
+ "name": operator_name,
203
+ "config_schema": await self.async_call_synchronous_func(operator_cls.config_schema),
204
+ }
205
+ )
206
+ return res_lst
207
+
208
+ def list_config_schemas_synchronously(self, project_id: str):
209
+ self.current_project_id.set(project_id)
210
+ res_lst = []
211
+
212
+ for operator_name, operator_cls in _registry.items():
213
+ res_lst.append(
214
+ {
215
+ "name": operator_name,
216
+ "config_schema": self.get_config_schema_synchronously(operator_name, project_id),
217
+ }
218
+ )
219
+ return res_lst
220
+
221
+ # list_config_schemas_synchronously = sync_wrapper(list_config_schemas)
222
+
223
+ @property
224
+ def client(self):
225
+ return self._client
226
+
227
+ @client.setter
228
+ def client(self, client):
229
+ self._client = client
230
+
231
+ def register_function(self, name: str, function: Callable):
232
+ self._functions[name] = function
233
+
234
+ def init_task_instance_on_task_start(self, task: "BaseTask", *args, **kwargs) -> int:
235
+ func = self._functions.get("init_task_instance_on_task_start")
236
+ if func:
237
+ return func(task, *args, **kwargs)
238
+
239
+ def update_task_instance_on_task_finish(
240
+ self,
241
+ task: "BaseTask",
242
+ ti_id: int,
243
+ task_status: str,
244
+ meta: Any,
245
+ error: Exception,
246
+ error_stack: str,
247
+ *args,
248
+ **kwargs,
249
+ ):
250
+ func = self._functions.get("update_task_instance_on_task_finish")
251
+ if func:
252
+ return func(task, ti_id, task_status, meta, error, error_stack, *args, **kwargs)
253
+
254
+
255
+ context = Context()
@@ -0,0 +1,2 @@
1
+ from recurvedata.operators.dbt_operator.model_pipeline_link_operator import LinkModelPipelineOperator
2
+ from recurvedata.operators.dbt_operator.operator import DBTOperator
@@ -0,0 +1,55 @@
1
+ from recurvedata.core.translation import _l
2
+ from recurvedata.operators.link_operator import LinkOperator
3
+
4
+
5
+ class LinkModelPipelineOperator(LinkOperator):
6
+ @classmethod
7
+ def config_schema(cls) -> dict: # front-end does not use this config schema to show
8
+ return {
9
+ "type": "object",
10
+ "properties": {
11
+ "model_pipeline_id": {
12
+ "type": "string",
13
+ "title": _l("Model Pipeline ID"),
14
+ "description": _l("Model Pipeline ID"),
15
+ "ui:field": "CodeEditorWithReferencesField",
16
+ "ui:options": {
17
+ "type": "plain",
18
+ },
19
+ },
20
+ "workflow_id": {
21
+ "type": "string",
22
+ "title": _l("Workflow ID"),
23
+ "description": _l("Workflow ID"),
24
+ "ui:field": "CodeEditorWithReferencesField",
25
+ "ui:options": {
26
+ "type": "plain",
27
+ },
28
+ },
29
+ "workflow_version": {
30
+ "type": "string",
31
+ "title": _l("Workflow Version"),
32
+ "description": _l("Workflow Version"),
33
+ "ui:field": "CodeEditorWithReferencesField",
34
+ "ui:options": {
35
+ "type": "plain",
36
+ },
37
+ },
38
+ "variables": {
39
+ "type": "string",
40
+ "title": _l("Variables"),
41
+ "default": "{}",
42
+ "description": _l("Variables in JSON format"),
43
+ "ui:field": "CodeEditorWithReferencesField",
44
+ "ui:options": {
45
+ "type": "code",
46
+ "lang": "json",
47
+ },
48
+ },
49
+ },
50
+ "required": [
51
+ "model_pipeline_id",
52
+ "workflow_id",
53
+ "workflow_version",
54
+ ],
55
+ }
@@ -0,0 +1,353 @@
1
+ import datetime
2
+ import logging
3
+ from dataclasses import dataclass
4
+ from typing import TYPE_CHECKING, Union
5
+
6
+ from recurvedata.core.translation import _l
7
+ from recurvedata.dbt.utils import parse_run_model_log
8
+ from recurvedata.exceptions import MaxRetriesExceededException
9
+ from recurvedata.operators.operator import BaseOperator
10
+ from recurvedata.operators.task import BaseTask
11
+ from recurvedata.utils.date_time import utcnow_aware
12
+ from recurvedata.utils.helpers import get_environment_variable
13
+
14
+ if TYPE_CHECKING:
15
+ from recurvedata.dbt.consts import DbtMaterialization
16
+ from recurvedata.dbt.schemas import PreviewResult
17
+ from recurvedata.dbt.service import DbtService
18
+
19
+ try:
20
+ from dbt.cli.main import dbtRunnerResult
21
+ except ImportError:
22
+ dbtRunnerResult = None
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ @dataclass
28
+ class TaskRuntimeException:
29
+ exception: Exception
30
+
31
+ def to_dict(self):
32
+ return {
33
+ "success": False,
34
+ "exception": {
35
+ "type": f"TaskRuntimeException-{type(self.exception).__name__}",
36
+ "message": str(self.exception),
37
+ },
38
+ }
39
+
40
+
41
+ @dataclass
42
+ class DbtResultConstructor:
43
+ project_id: int
44
+ model_name: str
45
+ materialization: "DbtMaterialization"
46
+ compiled_code: str = None
47
+
48
+ @staticmethod
49
+ def _construct_timing(action_name: str, start_time: datetime.datetime, end_time: datetime.datetime) -> list[dict]:
50
+ def _format_time(dt: datetime.datetime) -> str:
51
+ dt_utc = dt.astimezone(datetime.timezone.utc)
52
+ ds = dt_utc.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
53
+ return ds
54
+
55
+ return [
56
+ {"name": action_name, "started_at": _format_time(start_time), "completed_at": _format_time(end_time)},
57
+ ]
58
+
59
+ def construct_ephemeral_materialized_result(
60
+ self, materialized_result_dct: dict, start_time: datetime.datetime, end_time: datetime.datetime
61
+ ) -> dict:
62
+ """
63
+ For ephemeral materialization, the materialized_result_dct $result.results is empty.
64
+ CP relies on $result.results to show message and start/end time
65
+ """
66
+ if not materialized_result_dct["success"]:
67
+ return materialized_result_dct
68
+ result_dct = materialized_result_dct["result"]
69
+ if not result_dct:
70
+ return materialized_result_dct
71
+ sub_results: list[dict] = result_dct["results"]
72
+ if not sub_results:
73
+ result_dct["results"] = [
74
+ {
75
+ "unique_id": self.format_model_unique_id(),
76
+ "status": "success",
77
+ "timing": self._construct_timing("execute", start_time=start_time, end_time=end_time),
78
+ "message": "Ephemeral model compiled successfully",
79
+ "compiled_code": self.compiled_code,
80
+ }
81
+ ]
82
+ return materialized_result_dct
83
+
84
+ def format_model_unique_id(self) -> str:
85
+ return f"model.project_{self.project_id}.{self.model_name}"
86
+
87
+
88
+ class DbtTask(BaseTask):
89
+ def execute_impl(self):
90
+ from recurvedata.dbt.schemas import PreviewResult
91
+ from recurvedata.dbt.service import DbtService
92
+ from recurvedata.dbt.utils import format_var
93
+ from recurvedata.utils.redis_lock import RedisLock
94
+
95
+ model_name = self.rendered_config.get("model_name") or self.rendered_config.get("entity_name")
96
+
97
+ lock = RedisLock(
98
+ f"dbt_task_{self.dag.project_id}_{model_name}", auto_extend=True, expire=60, timeout=60 * 60 * 1
99
+ )
100
+ lock.acquire()
101
+
102
+ try:
103
+ service = DbtService(self.dag.project_id)
104
+ service.prepare()
105
+ model_id: int = int(self.rendered_config["entity_id"])
106
+ var_str = format_var(service, self.get_template_context())
107
+
108
+ materialize_start_time = utcnow_aware()
109
+ full_refresh = self.dag.full_refresh_models
110
+
111
+ model_run_result = service.run_model(model_name, var_str, full_refresh=full_refresh, include_run_log=True)
112
+ compiled_code = model_run_result.compiled_sql
113
+ materialized_result = model_run_result.result
114
+ run_sql = model_run_result.run_sql
115
+ run_log = model_run_result.run_log
116
+
117
+ materialize_end_time = utcnow_aware()
118
+
119
+ if not compiled_code:
120
+ logger.info("compiled_code empty, use un-compiled sql")
121
+ compiled_code = service.read_model_sql(model_name)
122
+
123
+ if not compiled_code:
124
+ logger.info("compiled_code still empty, set materialized_result to failed")
125
+ materialized_result.success = False
126
+ materialized_result.exception = RuntimeError("Materialization failed due to empty compiled_code")
127
+ materialized_result.result = None
128
+
129
+ if not materialized_result.success:
130
+ self.send_dbt_model_result(
131
+ service,
132
+ compiled_sql=compiled_code,
133
+ run_sql=run_sql,
134
+ run_log=run_log,
135
+ try_number=get_environment_variable("AIRFLOW_RETRY_NUMBER", int),
136
+ materialized_result=materialized_result,
137
+ materialize_start_time=materialize_start_time,
138
+ materialize_end_time=materialize_end_time,
139
+ )
140
+ raise Exception(f"run model {model_name} materialized failed")
141
+
142
+ test_case_skipped = False
143
+ if self.dag.skip_data_tests:
144
+ logger.info("skip data tests")
145
+ test_result = None
146
+ test_case_sample_result = None
147
+ test_case_skipped = True
148
+ else:
149
+ logger.info("run data tests")
150
+ test_result = service.run_test(model_id, var_str)
151
+ test_case_sample_result: dict[str, PreviewResult] = service.run_test_sample_data(test_result)
152
+ self.send_dbt_model_result(
153
+ service,
154
+ compiled_sql=compiled_code,
155
+ run_sql=run_sql,
156
+ run_log=run_log,
157
+ try_number=get_environment_variable("AIRFLOW_RETRY_NUMBER", int),
158
+ materialized_result=materialized_result,
159
+ test_case_result=test_result,
160
+ test_case_sample_result=test_case_sample_result,
161
+ materialize_start_time=materialize_start_time,
162
+ materialize_end_time=materialize_end_time,
163
+ test_case_skipped=test_case_skipped,
164
+ )
165
+
166
+ test_case_result_dct = self.format_test_case_result(test_result)
167
+ if test_case_result_dct and not test_case_result_dct["success"]:
168
+ raise Exception("Task Run failed due to Error / Failed test cases")
169
+ except Exception as e:
170
+ raise e from None
171
+ finally:
172
+ lock.release()
173
+
174
+ @staticmethod
175
+ def format_materialized_result(
176
+ project_id: int,
177
+ model_name: str,
178
+ materialization: Union["DbtMaterialization", str],
179
+ compiled_code: str,
180
+ materialized_result: Union["dbtRunnerResult", "TaskRuntimeException"],
181
+ materialize_start_time: datetime.datetime = None,
182
+ materialize_end_time: datetime.datetime = None,
183
+ ) -> dict | None:
184
+ """
185
+ materialized: model, ephemeral, view, incremental
186
+ """
187
+ from recurvedata.dbt.consts import DbtMaterialization
188
+ from recurvedata.dbt.utils import dbt_runner_result_to_dict
189
+
190
+ if not materialized_result:
191
+ return
192
+ if isinstance(materialized_result, TaskRuntimeException):
193
+ materialized_result_dct = materialized_result.to_dict()
194
+ else:
195
+ materialized_result_dct = dbt_runner_result_to_dict(materialized_result)
196
+
197
+ if materialization == DbtMaterialization.EPHEMERAL:
198
+ constructor = DbtResultConstructor(
199
+ project_id=project_id,
200
+ model_name=model_name,
201
+ materialization=materialization,
202
+ compiled_code=compiled_code,
203
+ )
204
+ materialized_result_dct = constructor.construct_ephemeral_materialized_result(
205
+ materialized_result_dct, materialize_start_time, materialize_end_time
206
+ )
207
+
208
+ if materialized_result_dct["success"]:
209
+ results = materialized_result_dct.get("result", {}).get("results")
210
+ if not results:
211
+ # The selection criterion '' does not match any nodes
212
+ materialized_result_dct["success"] = False
213
+
214
+ return materialized_result_dct
215
+
216
+ @staticmethod
217
+ def format_test_case_result(test_case_result: Union["dbtRunnerResult", "TaskRuntimeException"]) -> dict | None:
218
+ from recurvedata.dbt.utils import dbt_runner_result_to_dict
219
+
220
+ if not test_case_result:
221
+ return
222
+ if isinstance(test_case_result, TaskRuntimeException):
223
+ test_case_result_dct = test_case_result.to_dict()
224
+ else:
225
+ test_case_result_dct = dbt_runner_result_to_dict(test_case_result)
226
+ return test_case_result_dct
227
+
228
+ @property
229
+ def model_name(self) -> str:
230
+ return self.rendered_config.get("model_name") or self.rendered_config.get("entity_name")
231
+
232
+ @property
233
+ def materialization(self) -> str:
234
+ return self.rendered_config.get("materialized")
235
+
236
+ def send_dbt_model_result(
237
+ self,
238
+ service: "DbtService",
239
+ compiled_sql: str | None,
240
+ try_number: int,
241
+ run_sql: str | None = None,
242
+ run_log: str | None = None,
243
+ materialized_result: Union["dbtRunnerResult", "TaskRuntimeException"] = None,
244
+ test_case_result: Union["dbtRunnerResult", "TaskRuntimeException"] = None,
245
+ test_case_sample_result: dict[str, "PreviewResult"] = None,
246
+ materialize_start_time: datetime.datetime = None,
247
+ materialize_end_time: datetime.datetime = None,
248
+ test_case_skipped: bool = False,
249
+ ):
250
+ materialized_result_dct = self.format_materialized_result(
251
+ self.dag.project_id,
252
+ self.model_name,
253
+ self.materialization,
254
+ compiled_sql,
255
+ materialized_result,
256
+ materialize_start_time,
257
+ materialize_end_time,
258
+ )
259
+ test_case_result_dct = self.format_test_case_result(test_case_result)
260
+
261
+ if not compiled_sql:
262
+ logger.info(f"compiled_sql empty, materialized_result_dct: {materialized_result_dct}")
263
+
264
+ if test_case_sample_result:
265
+ test_case_sample_result_dct = {
266
+ unique_id: preview_obj.model_dump() for unique_id, preview_obj in test_case_sample_result.items()
267
+ }
268
+ else:
269
+ test_case_sample_result_dct = None
270
+
271
+ run_sql_log = parse_run_model_log(run_log)
272
+
273
+ logger.info(f"debug: compiled sql: {compiled_sql}")
274
+ logger.info(f"debug: run sql: {run_sql}")
275
+ logger.info(f"debug: run_log: {run_log}")
276
+ logger.info(f"debug: run_sql_log: {run_sql_log}")
277
+ logger.info(f"debug: materialized_result_dct: {materialized_result_dct}")
278
+ logger.info(f"debug: test_case_result_dct: {test_case_result_dct}")
279
+ logger.info(f"debug: test_case_sample_result_dct: {test_case_sample_result_dct}")
280
+
281
+ try:
282
+ service.client.send_dbt_model_result(
283
+ self.dag.id,
284
+ self.node.node_key,
285
+ compiled_sql,
286
+ run_sql,
287
+ run_sql_log=run_sql_log,
288
+ raw_materialized_result=materialized_result_dct,
289
+ raw_test_result=test_case_result_dct,
290
+ test_case_sample_data=test_case_sample_result_dct,
291
+ materialization=self.materialization,
292
+ try_number=try_number,
293
+ test_case_skipped=test_case_skipped,
294
+ )
295
+ except MaxRetriesExceededException as e:
296
+ logger.exception(f"send_dbt_model_result failed, error: {e}")
297
+ self.sent_dbt_model_result = True
298
+
299
+ def on_execute_impl_error(self, err: Exception):
300
+ from recurvedata.dbt.service import DbtService
301
+
302
+ if getattr(self, "sent_dbt_model_result", False):
303
+ return
304
+ service = DbtService(self.dag.project_id)
305
+ self.send_dbt_model_result(
306
+ service,
307
+ compiled_sql=None,
308
+ try_number=get_environment_variable("AIRFLOW_RETRY_NUMBER", int),
309
+ materialized_result=TaskRuntimeException(err),
310
+ test_case_result=None,
311
+ )
312
+
313
+
314
+ class DBTOperator(BaseOperator):
315
+ task_cls = DbtTask
316
+
317
+ @classmethod
318
+ def config_schema(cls) -> dict:
319
+ return {
320
+ "type": "object",
321
+ "properties": {
322
+ "entity_name": {
323
+ "type": "string",
324
+ "title": _l("Entity Name"),
325
+ "description": _l("Entity Name"),
326
+ "ui:field": "CodeEditorWithReferencesField",
327
+ "ui:options": {
328
+ "type": "plain",
329
+ },
330
+ },
331
+ "materialized": { # for front-end display
332
+ "type": "string",
333
+ "title": _l("Materialized"),
334
+ "default": "view",
335
+ "enum": ["table", "view", "incremental", "ephemeral"],
336
+ "enumNames": ["table", "view", "incremental", "ephemeral"],
337
+ },
338
+ },
339
+ "required": ["entity_name", "materialized"],
340
+ }
341
+
342
+ @classmethod
343
+ def validate(cls, configuration) -> dict:
344
+ return configuration
345
+
346
+ @classmethod
347
+ def ui_config_to_config(cls, configuration: dict) -> dict:
348
+ source = configuration["source"]
349
+ return source
350
+
351
+ @classmethod
352
+ def get_ds_name_field_values(cls, rendered_config: dict) -> list[str]:
353
+ return []
@@ -0,0 +1 @@
1
+ from recurvedata.operators.link_operator.operator import LinkOperator