recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,97 @@
1
+ import os
2
+
3
+ from recurvedata.client import Client
4
+ from recurvedata.dbt.schemas import AnalyticsDatabaseConnectionAndVariable, DbtGzMd5
5
+ from recurvedata.executors.schemas import ConnectionItem
6
+ from recurvedata.executors.utils import get_airflow_run_id
7
+ from recurvedata.utils import get_env_id
8
+
9
+
10
+ class DbtClient(Client):
11
+ def fetch_project_gzip_md5(self, project_id: int) -> DbtGzMd5:
12
+ params = {"env_id": get_env_id(), "project_id": project_id}
13
+ return self.request(
14
+ "GET",
15
+ path=f"/api/dbt/project/{project_id}/md5",
16
+ response_model_class=DbtGzMd5,
17
+ params=params,
18
+ retries=5,
19
+ )
20
+
21
+ def fetch_project_gzip(self, project_id: int, file_name: str, client_md5: str = None):
22
+ # client_md5 is the md5 of the file on the client side, if client_md5 is not changed in the server side, the file will not be downloaded
23
+ params = {"env_id": get_env_id(), "project_id": project_id}
24
+ if client_md5:
25
+ params["client_md5"] = client_md5
26
+ return self.request_file(
27
+ "GET",
28
+ path=f"/api/dbt/project/{project_id}/gz",
29
+ params=params,
30
+ file_name=file_name,
31
+ retries=5,
32
+ )
33
+
34
+ def send_dbt_model_result(
35
+ self,
36
+ job_id: int,
37
+ node_key: str,
38
+ compiled_sql: str,
39
+ run_sql: str,
40
+ run_sql_log: list[dict] | None,
41
+ materialization: str,
42
+ try_number: int,
43
+ raw_materialized_result: dict = None,
44
+ raw_test_result: dict = None,
45
+ test_case_sample_data: dict[str, dict] = None,
46
+ test_case_skipped: bool = False,
47
+ ):
48
+ payload = {
49
+ "raw_materialized_result": raw_materialized_result,
50
+ "raw_test_result": raw_test_result,
51
+ "compiled_code": compiled_sql,
52
+ "run_sql": run_sql,
53
+ "run_sql_log": run_sql_log,
54
+ "materialization": materialization,
55
+ "test_case_sample_data": test_case_sample_data,
56
+ "data_interval_end": os.environ.get("AIRFLOW_DATA_INTERVAL_END"),
57
+ "try_number": try_number,
58
+ "test_case_skipped": test_case_skipped,
59
+ }
60
+ params = {
61
+ "env_id": get_env_id(),
62
+ "run_id": get_airflow_run_id(),
63
+ }
64
+ return self.request(
65
+ "POST",
66
+ path=f"/api/dbt/model_result/{job_id}/{node_key}",
67
+ params=params,
68
+ json=payload,
69
+ timeout=10,
70
+ retries=5,
71
+ )
72
+
73
+ def get_connection(self, project_id: int) -> ConnectionItem:
74
+ params = {
75
+ "env_id": get_env_id(),
76
+ "project_id": project_id,
77
+ }
78
+ return self.request(
79
+ "GET",
80
+ path="/api/dbt/connection",
81
+ response_model_class=ConnectionItem,
82
+ params=params,
83
+ retries=5,
84
+ )
85
+
86
+ def get_connection_and_variables(self, project_id: int) -> AnalyticsDatabaseConnectionAndVariable:
87
+ params = {
88
+ "env_id": get_env_id(),
89
+ "project_id": project_id,
90
+ }
91
+ return self.request(
92
+ "GET",
93
+ path="/api/dbt/connection_and_variable",
94
+ response_model_class=AnalyticsDatabaseConnectionAndVariable,
95
+ params=params,
96
+ retries=5,
97
+ )
@@ -0,0 +1,99 @@
1
+ import os
2
+ import shutil
3
+ import sys
4
+ from dataclasses import dataclass
5
+ from enum import Enum
6
+
7
+ from recurvedata.config import RECURVE_DBT_HOME
8
+
9
+ DBT_BIN_PATH = shutil.which("dbt") or os.path.join(os.path.dirname(sys.executable), "dbt")
10
+ DBT_PROFILE_KEY = "profile"
11
+
12
+ DEFAULT_MATERIALIZED = "view"
13
+
14
+
15
+ class DbtFileNames(str, Enum):
16
+ PACKAGES_FILE = "packages.yml"
17
+ PACKAGE_LOCK_FILE = "package-lock.yml"
18
+ DBT_PROJECT_YML_FILE = "dbt_project.yml"
19
+ PROFILES_FILE = "profiles.yml"
20
+ MANIFEST_FILE = "manifest.json"
21
+ DEPS_PACKAGE_DIR = "dbt_packages"
22
+
23
+
24
+ @dataclass
25
+ class DbtPath:
26
+ project_id: int
27
+ env_id: int
28
+ base_path: str = RECURVE_DBT_HOME
29
+ pipeline_id: int = None
30
+
31
+ @property
32
+ def project_gzip_file(self) -> str:
33
+ return f"{self.project_dir}.tar.gz"
34
+
35
+ @property
36
+ def project_dir(self) -> str:
37
+ return os.path.join(self.base_path, self.simple_project_dir)
38
+
39
+ @property
40
+ def simple_project_dir(self) -> str:
41
+ if self.pipeline_id:
42
+ return f"project_{self.project_id}_env_{self.env_id}_pipeline_{self.pipeline_id}"
43
+ return f"project_{self.project_id}_env_{self.env_id}"
44
+
45
+ @property
46
+ def profiles_path(self) -> str:
47
+ return format_profiles_path(self.project_dir)
48
+
49
+ @property
50
+ def dbt_project_yml_path(self) -> str:
51
+ return format_dbt_project_yml_path(self.project_dir)
52
+
53
+ @property
54
+ def project_name(self) -> str:
55
+ return f"project_{self.project_id}"
56
+
57
+ def get_model_compiled_sql_path(self, model_name: str) -> str:
58
+ return os.path.join(self.project_dir, "target", "compiled", self.project_name, "models", model_name + ".sql")
59
+
60
+ def get_model_run_sql_path(self, model_name: str) -> str:
61
+ return os.path.join(self.project_dir, "target", "run", self.project_name, "models", model_name + ".sql")
62
+
63
+ def get_model_sql_path(self, model_name: str) -> str:
64
+ return os.path.join(self.project_dir, "models", model_name + ".sql")
65
+
66
+
67
+ def format_profiles_path(project_dir: str) -> str:
68
+ return os.path.join(project_dir, "profiles.yml")
69
+
70
+
71
+ def format_dbt_project_yml_path(project_dir: str) -> str:
72
+ return os.path.join(project_dir, "dbt_project.yml")
73
+
74
+
75
+ def format_packages_yml_path(project_dir: str) -> str:
76
+ return os.path.join(project_dir, "packages.yml")
77
+
78
+
79
+ def format_package_lock_path(project_dir: str) -> str:
80
+ return os.path.join(project_dir, "package-lock.yml")
81
+
82
+
83
+ def format_installed_packages_path(project_dir: str) -> str:
84
+ return os.path.join(project_dir, "dbt_packages")
85
+
86
+
87
+ class DbtMaterialization(str, Enum):
88
+ VIEW = "view"
89
+ TABLE = "table"
90
+ EPHEMERAL = "ephemeral"
91
+ INCREMENTAL = "incremental"
92
+
93
+
94
+ OVERWRITE_DIRECTORIES = [
95
+ "macros",
96
+ "models",
97
+ "tests",
98
+ ]
99
+ OVERWRITE_FILES = ["dbt_project.yml", "profiles.yml", "packages.yml"]
@@ -0,0 +1,275 @@
1
+ import argparse
2
+ import os
3
+ import re
4
+ import shlex
5
+ import sys
6
+ from dataclasses import dataclass
7
+ from enum import Enum
8
+ from pathlib import Path
9
+ from typing import Generator, Optional, Self
10
+ from unittest.mock import MagicMock
11
+
12
+ from recurvedata.dbt.consts import (
13
+ DBT_BIN_PATH,
14
+ DBT_PROFILE_KEY,
15
+ DEFAULT_MATERIALIZED,
16
+ DbtFileNames,
17
+ format_dbt_project_yml_path,
18
+ format_profiles_path,
19
+ )
20
+ from recurvedata.dbt.schemas import DbtGraph, DbtOperatorNode, SingleModelLineage
21
+ from recurvedata.dbt.utils import (
22
+ VariableJSONEncoder,
23
+ ensure_manifest_json_exists,
24
+ extract_project_name,
25
+ run_deps_if_necessary,
26
+ )
27
+
28
+
29
+ def _mock_cosmos_airflow():
30
+ """
31
+ cosmos will import airflow internally,
32
+ in CP we don't have airflow environ.
33
+ Airflow use sqlalchemy 1.4, will have some conflict with CP env.
34
+ """
35
+ airflow_mock = MagicMock()
36
+
37
+ airflow_mock.version = "2.9"
38
+ airflow_mock.DAG = dict
39
+ airflow_mock.TaskGroup = dict
40
+ airflow_mock.BaseOperator = dict
41
+ sys.modules["airflow"] = airflow_mock
42
+ sys.modules["airflow.models"] = airflow_mock
43
+ sys.modules["airflow.models.dag"] = airflow_mock
44
+ sys.modules["airflow.models.baseoperator"] = airflow_mock
45
+ sys.modules["airflow.models.taskinstance"] = airflow_mock
46
+ sys.modules["airflow.utils"] = airflow_mock
47
+ sys.modules["airflow.utils.task_group"] = airflow_mock
48
+ sys.modules["airflow.utils.strings"] = airflow_mock
49
+ sys.modules["airflow.utils.session"] = airflow_mock
50
+ sys.modules["airflow.utils.operator_helpers"] = airflow_mock
51
+ sys.modules["airflow.utils.context"] = airflow_mock
52
+ sys.modules["airflow.version"] = airflow_mock
53
+ sys.modules["airflow.hooks"] = airflow_mock
54
+ sys.modules["airflow.hooks.base"] = airflow_mock
55
+ sys.modules["airflow.exceptions"] = airflow_mock
56
+ sys.modules["airflow.configuration"] = airflow_mock
57
+ sys.modules["airflow.io"] = airflow_mock
58
+ sys.modules["airflow.io.path"] = airflow_mock
59
+
60
+
61
+ try:
62
+ import airflow.models # noqa
63
+ except ImportError:
64
+ _mock_cosmos_airflow()
65
+
66
+ try:
67
+ from cosmos import ExecutionConfig, ProfileConfig, ProjectConfig, RenderConfig, settings
68
+ from cosmos.dbt.graph import DbtGraph as CosmosDbtGraph
69
+ from cosmos.dbt.graph import DbtNode as CosmosDbtNode
70
+ except ImportError:
71
+ CosmosDbtGraph = None
72
+ CosmosDbtNode = None
73
+
74
+
75
+ class NodeType(str, Enum):
76
+ TEST = "test"
77
+ MODEL = "model"
78
+
79
+
80
+ @dataclass
81
+ class ParsedModel:
82
+ model_name: str
83
+ project_name: str
84
+ materialized: str
85
+
86
+ @classmethod
87
+ def is_test_node(cls, node: "CosmosDbtNode") -> bool:
88
+ node_id = node.unique_id
89
+ tmp_lst = node_id.split(".")
90
+ node_type = tmp_lst[0]
91
+ return node_type == NodeType.TEST
92
+
93
+ @classmethod
94
+ def extract_node_model_id(cls, node: "CosmosDbtNode") -> int | None:
95
+ if cls.is_test_node(node):
96
+ # todo: singular test not supported(not in model_properties.yml)
97
+ model_properties_filename: str = node.file_path.name
98
+ pat = re.compile(r"^model_(?P<model_id>\d+)_properties.yml")
99
+ mobj = pat.match(model_properties_filename)
100
+ return mobj and int(mobj.group("model_id"))
101
+ else:
102
+ # tags rely on CP generation
103
+ pat = re.compile(r"^model_(?P<model_id>\d+)$")
104
+ for tag in node.tags:
105
+ mobj = pat.match(tag)
106
+ if mobj:
107
+ return int(mobj.group("model_id"))
108
+
109
+ @classmethod
110
+ def from_cosmos_node(cls, node: "CosmosDbtNode") -> Optional[Self]:
111
+ node_id = node.unique_id
112
+ tmp_lst = node_id.split(".")
113
+ node_type = tmp_lst[0]
114
+ if node_type != NodeType.MODEL:
115
+ return
116
+ project_name = tmp_lst[1]
117
+ model_name = ".".join(tmp_lst[2:])
118
+ return cls(
119
+ model_name=model_name,
120
+ project_name=project_name,
121
+ materialized=node.config.get("materialized", DEFAULT_MATERIALIZED),
122
+ )
123
+
124
+ def to_node_config(self) -> dict:
125
+ return {
126
+ "source": {
127
+ "entity_name": self.model_name,
128
+ "materialized": self.materialized,
129
+ }
130
+ }
131
+
132
+ def is_current_project(self, current_project_name):
133
+ return self.project_name == current_project_name
134
+
135
+
136
+ def _extract_select_from_command(command: str) -> list[str]:
137
+ """
138
+ extract --select content from user input command
139
+ """
140
+
141
+ parser = argparse.ArgumentParser(description="Parse dbt build command")
142
+ parser.add_argument("-s", "--select", nargs="+")
143
+
144
+ args = shlex.split(command)
145
+
146
+ if len(args) > 2: # omit `recurve build`
147
+ args = args[2:]
148
+
149
+ parsed_args, unknown = parser.parse_known_args(args)
150
+ return parsed_args.select
151
+
152
+
153
+ def _prepare_os_env():
154
+ # todo(chenjingmeng): move to connectors
155
+ os.environ["DBT_USER"] = ""
156
+ os.environ["DBT_PASSWORD"] = ""
157
+
158
+
159
+ def _construct_cosmos_dag_graph(
160
+ dbt_project_dir: str, dbt_profiles_path: str, dbt_project_yml_path: str, select: list[str], variables: dict
161
+ ) -> "CosmosDbtGraph":
162
+ _prepare_os_env()
163
+ settings.enable_cache = False
164
+
165
+ render_config = RenderConfig(select=select, dbt_project_path=dbt_project_dir, dbt_deps=False)
166
+
167
+ profile_config = ProfileConfig(
168
+ profile_name=DBT_PROFILE_KEY,
169
+ target_name="dev", # when extract model, it always using dev as env
170
+ profiles_yml_filepath=dbt_profiles_path,
171
+ )
172
+
173
+ project_config = ProjectConfig(
174
+ dbt_project_path=dbt_project_dir,
175
+ project_name=extract_project_name(dbt_project_yml_path),
176
+ dbt_vars={k: VariableJSONEncoder.format_var(v) for k, v in variables.items()} if variables else None,
177
+ )
178
+ project_config.manifest_path = Path(dbt_project_dir) / "target" / DbtFileNames.MANIFEST_FILE.value
179
+ execution_config = ExecutionConfig(
180
+ dbt_executable_path=DBT_BIN_PATH,
181
+ dbt_project_path=dbt_project_dir,
182
+ )
183
+
184
+ dbt_graph = CosmosDbtGraph(
185
+ project=project_config,
186
+ execution_config=execution_config,
187
+ profile_config=profile_config,
188
+ render_config=render_config,
189
+ )
190
+ return dbt_graph
191
+
192
+
193
+ def extract_graph(
194
+ dbt_project_dir: str, models: list[str] = None, model_cmd: str = None, variables: dict = None
195
+ ) -> DbtGraph:
196
+ """
197
+ extract the models and model graph from model pipeline settings
198
+ :param models: the models selected in the drop down list
199
+ :param model_cmd: the command from the advanced mode
200
+ """
201
+ if models:
202
+ select = models
203
+ else:
204
+ select = _extract_select_from_command(model_cmd)
205
+
206
+ if variables:
207
+ variables: dict[str, str] = {k: VariableJSONEncoder.format_var(v) for (k, v) in variables.items()}
208
+
209
+ dbt_project_dir = os.path.abspath(os.path.expanduser(dbt_project_dir))
210
+ run_deps_if_necessary(dbt_project_dir)
211
+ ensure_manifest_json_exists(dbt_project_dir)
212
+
213
+ dbt_profiles_path = format_profiles_path(dbt_project_dir)
214
+ project_yml_path = format_dbt_project_yml_path(dbt_project_dir)
215
+
216
+ cosmos_graph = _construct_cosmos_dag_graph(dbt_project_dir, dbt_profiles_path, project_yml_path, select, variables)
217
+ cosmos_graph.load()
218
+
219
+ return _cosmos_graph_2_dbt_graph(cosmos_graph)
220
+
221
+
222
+ def _cosmos_graph_2_dbt_graph(cosmos_graph: CosmosDbtGraph) -> DbtGraph:
223
+ # todo: forbid circular dependency
224
+ graph: list[SingleModelLineage] = []
225
+ extracted_models: list[str] = []
226
+ nodes: list[DbtOperatorNode] = []
227
+ project_name = cosmos_graph.project.project_name
228
+ cross_model_test_dependency: dict[int, set[str]] = {} # {model_id: depends_on_node_ids}
229
+ for node in cosmos_graph.filtered_nodes.values():
230
+ if not ParsedModel.is_test_node(node):
231
+ continue
232
+ if not (node.depends_on and len(node.depends_on) > 1):
233
+ continue
234
+ model_id = ParsedModel.extract_node_model_id(node)
235
+ if model_id is None:
236
+ # singular test
237
+ continue
238
+ if model_id not in cross_model_test_dependency:
239
+ cross_model_test_dependency[model_id] = set()
240
+ for upstream_node_id in node.depends_on:
241
+ # upstream_node_id maybe current model node_id, not filtered here
242
+ cross_model_test_dependency[model_id].add(upstream_node_id)
243
+
244
+ for node_id, node in cosmos_graph.filtered_nodes.items():
245
+ if ParsedModel.is_test_node(node):
246
+ continue
247
+ parsed_model = ParsedModel.from_cosmos_node(node)
248
+ if not parsed_model:
249
+ continue
250
+ # todo(chenjingmeng): support package/dependency
251
+ if not parsed_model.is_current_project(project_name):
252
+ continue
253
+ extracted_models.append(parsed_model.model_name)
254
+ nodes.append(DbtOperatorNode(model_name=parsed_model.model_name, config=parsed_model.to_node_config()))
255
+
256
+ node_model_id = ParsedModel.extract_node_model_id(node)
257
+ upstream_node_ids = _dedup_list(node.depends_on + list(cross_model_test_dependency.get(node_model_id, set())))
258
+ for upstream_node_id in upstream_node_ids:
259
+ if upstream_node_id == node_id:
260
+ continue
261
+ upstream_node = cosmos_graph.filtered_nodes.get(upstream_node_id)
262
+ if upstream_node is None:
263
+ continue
264
+ upstream_parsed = ParsedModel.from_cosmos_node(upstream_node)
265
+ if upstream_parsed.is_current_project(project_name):
266
+ graph.append(
267
+ SingleModelLineage(
268
+ upstream_model_name=upstream_parsed.model_name, downstream_model_name=parsed_model.model_name
269
+ )
270
+ )
271
+ return DbtGraph(model_names=extracted_models, graph=graph, nodes=nodes)
272
+
273
+
274
+ def _dedup_list(lst: list | Generator) -> list:
275
+ return list(set(lst))
@@ -0,0 +1,18 @@
1
+ """
2
+ copied from recurve-server
3
+ """
4
+
5
+ from recurvedata.error_codes import BaseErrorCode
6
+
7
+
8
+ class ErrorCode(BaseErrorCode):
9
+ MODEL_COMPILE_FAILED = ("A1301", "Model compile failed")
10
+ MODEL_PREVIEW_FAILED = ("A1302", "Model preview failed")
11
+ DEPS_FAILED = ("A1303", "DBT deps failed")
12
+ DP_FETCH_PROJECT_FAILED = ("A1304", "DP fetch project failed")
13
+ DP_FETCH_CONNECTION_FAILED = ("A1305", "DP fetch connection failed")
14
+ DP_FETCH_VARIABLE_FAILED = ("A1306", "DP fetch variable failed")
15
+ MODEL_RUN_FAILED = ("A1307", "Model run failed")
16
+
17
+
18
+ ERR = ErrorCode # shortcut
@@ -0,0 +1,98 @@
1
+ from dbt.cli.main import dbtRunnerResult
2
+ from pydantic import BaseModel, ConfigDict, Field
3
+
4
+ from recurvedata.core.consts import TRACING_CONTEXT_KEY
5
+ from recurvedata.executors.schemas import ColumnItem, ConnectionItem, ResponseModel
6
+
7
+
8
+ class CompileResult(BaseModel):
9
+ compiled_sql: str | None = None
10
+
11
+
12
+ class PreviewResult(BaseModel):
13
+ compiled_sql: str
14
+ columns: list[ColumnItem]
15
+ data: list[list]
16
+
17
+
18
+ class SingleModelLineage(BaseModel):
19
+ upstream_model_name: str
20
+ downstream_model_name: str
21
+ upstream_package_name: str | None = None # None means current project
22
+
23
+
24
+ class DbtOperatorNode(BaseModel):
25
+ model_name: str
26
+ config: dict
27
+
28
+
29
+ class DbtGraph(BaseModel):
30
+ model_names: list[str]
31
+ graph: list[SingleModelLineage]
32
+ nodes: list[DbtOperatorNode]
33
+
34
+
35
+ class DbtGzMd5(BaseModel):
36
+ md5: str
37
+
38
+
39
+ class AnalyticsDatabaseConnectionAndVariable(BaseModel):
40
+ connection: ConnectionItem
41
+ variables: dict
42
+
43
+
44
+ class CompileResponseWithError(ResponseModel):
45
+ data: CompileResult | None
46
+
47
+
48
+ class PreviewResponseWithError(ResponseModel):
49
+ data: PreviewResult | None
50
+
51
+
52
+ class CompilePayload(BaseModel):
53
+ model_config = {"populate_by_name": True}
54
+
55
+ project_id: int
56
+ sql: str
57
+ alias: str
58
+ force_regenerate_dir: bool = False
59
+ validate_sql: bool = False
60
+ tracing_context: str | None = Field(default=None, alias=TRACING_CONTEXT_KEY)
61
+
62
+
63
+ class PreviewPayload(BaseModel):
64
+ model_config = {"populate_by_name": True}
65
+
66
+ sql: str
67
+ project_id: int
68
+ alias: str
69
+ limit: int
70
+ force_regenerate_dir: bool = False
71
+ no_data: bool = False
72
+ is_compiled: bool = False
73
+ tracing_context: str | None = Field(default=None, alias=TRACING_CONTEXT_KEY)
74
+
75
+
76
+ class BuildPayload(BaseModel):
77
+ model_config = {"populate_by_name": True}
78
+
79
+ project_id: int
80
+ model_name: str
81
+ alias: str
82
+ full_refresh: bool = False
83
+ force_regenerate_dir: bool = False
84
+ variables: dict | None = None
85
+ tracing_context: str | None = Field(default=None, alias=TRACING_CONTEXT_KEY)
86
+
87
+
88
+ class BuildResponseWithError(ResponseModel):
89
+ data: CompileResult | None
90
+
91
+
92
+ class RunModelResult(BaseModel):
93
+ model_config = ConfigDict(arbitrary_types_allowed=True)
94
+
95
+ compiled_sql: str | None = None
96
+ result: "dbtRunnerResult"
97
+ run_sql: str | None = None
98
+ run_log: str | None = None