recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,451 @@
1
+ import contextlib
2
+ import io
3
+ import logging
4
+ import os
5
+ import shutil
6
+ import tempfile
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ from dataclasses import dataclass
9
+ from functools import cached_property
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ from recurvedata.core.templating import Renderer
14
+ from recurvedata.core.tracing import Tracing
15
+ from recurvedata.dbt.client import DbtClient
16
+ from recurvedata.dbt.consts import OVERWRITE_DIRECTORIES, OVERWRITE_FILES, DbtPath
17
+ from recurvedata.dbt.cosmos_utils import extract_graph
18
+ from recurvedata.dbt.error_codes import ERR
19
+ from recurvedata.dbt.schemas import CompileResult, DbtGraph, PreviewResult, RunModelResult
20
+ from recurvedata.dbt.utils import change_directory, format_var, run_deps_if_necessary
21
+ from recurvedata.exceptions import WrapRecurveException, wrap_error
22
+ from recurvedata.utils.compression import tar_gzip_uncompress
23
+ from recurvedata.utils.date_time import now
24
+ from recurvedata.utils.files import calculate_md5
25
+ from recurvedata.utils.helpers import get_env_id
26
+
27
+ tracer = Tracing()
28
+
29
+ if TYPE_CHECKING:
30
+ from recurvedata.connectors.service import DataSourceWrapper
31
+ try:
32
+ import yaml
33
+ from dbt.cli.main import dbtRunnerResult
34
+ from dbt.contracts.results import RunResult
35
+ except ImportError:
36
+ dbtRunnerResult = RunResult = None
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ @dataclass
42
+ class DbtService:
43
+ project_id: int
44
+ project_connection_name: str = None
45
+ force_regenerate_dir: bool = False
46
+ ds: "DataSourceWrapper" = None
47
+ need_fetch_variable: bool = False # when compile/preview, need fetch variable
48
+ variables: dict = None # used in compile/preview
49
+
50
+ @cached_property
51
+ def client(self):
52
+ return DbtClient()
53
+
54
+ @cached_property
55
+ def path(self):
56
+ return DbtPath(project_id=self.project_id, env_id=get_env_id())
57
+
58
+ @wrap_error(ERR.DP_FETCH_PROJECT_FAILED)
59
+ @tracer.create_span()
60
+ def fetch_project(self):
61
+ def _is_the_same_file(file1: str, file2: str) -> bool:
62
+ def __read_file(filename: str) -> bytes:
63
+ with open(filename, "rb") as f:
64
+ return f.read()
65
+
66
+ if os.path.exists(file1) != os.path.exists(file2):
67
+ return False
68
+
69
+ return __read_file(file1) == __read_file(file2)
70
+
71
+ def _overwrite_from_gzip_dir(src_dir: str, dst_dir: str):
72
+ for sub_dir in OVERWRITE_DIRECTORIES:
73
+ src_sub_dir = os.path.join(src_dir, sub_dir)
74
+ for root, dirs, files in os.walk(src_sub_dir):
75
+ dst_root = os.path.join(dst_dir, sub_dir, os.path.relpath(root, src_sub_dir))
76
+ os.makedirs(dst_root, exist_ok=True)
77
+ for tmp_file in files:
78
+ src_file = os.path.join(root, tmp_file)
79
+ dst_file = os.path.join(dst_root, tmp_file)
80
+ if _is_the_same_file(src_file, dst_file):
81
+ logger.info(f"skip {dst_file}")
82
+ continue
83
+ shutil.copy2(src_file, dst_file)
84
+ for dst_file_dir in os.listdir(dst_root):
85
+ if dst_file_dir not in dirs + files:
86
+ dst_file_dir = os.path.join(dst_root, dst_file_dir)
87
+ logger.info(f"remove {dst_file_dir}")
88
+ if os.path.isdir(dst_file_dir):
89
+ shutil.rmtree(dst_file_dir, ignore_errors=True)
90
+ else:
91
+ try:
92
+ os.remove(dst_file_dir)
93
+ except FileNotFoundError:
94
+ pass
95
+ for tmp_file in OVERWRITE_FILES:
96
+ src_file = os.path.join(src_dir, tmp_file)
97
+ dst_file = os.path.join(dst_dir, tmp_file)
98
+ if _is_the_same_file(src_file, dst_file):
99
+ logger.info(f"skip {dst_file}")
100
+ continue
101
+ shutil.copy2(src_file, dst_file)
102
+
103
+ logger.info(f"fetch dbt project: {self.project_id} -> {self.path.project_dir}")
104
+
105
+ os.makedirs(self.path.base_path, exist_ok=True)
106
+ logger.info(f"fetch dbt project: preparing 1 - base_path: {self.path.base_path}")
107
+
108
+ gzip_temp_dir: str = tempfile.mkdtemp(dir=self.path.base_path, prefix=f"_tmp_{self.path.simple_project_dir}")
109
+ gzip_file = f"{gzip_temp_dir}.tar.gz"
110
+ logger.info(
111
+ f"fetch dbt project: preparing 2 - simple_project_dir: {self.path.simple_project_dir} - gzip_file: {gzip_file}"
112
+ )
113
+
114
+ local_md5 = ""
115
+ if os.path.exists(self.path.project_dir):
116
+ if os.path.isfile(self.path.project_gzip_file):
117
+ # check if the project exists -> then cal MD5 of project_gzip_file = {self.project_dir}.tar.gz
118
+ local_md5 = calculate_md5(self.path.project_gzip_file)
119
+ logger.info(
120
+ f"fetch dbt project: preparing 3 - current project_gzip_file: {self.path.project_gzip_file}, local_md5: {local_md5}"
121
+ )
122
+ else:
123
+ logger.info(
124
+ f"fetch dbt project: preparing 3 - current project_gzip_file: {self.path.project_gzip_file} not exists"
125
+ )
126
+ else:
127
+ logger.info(f"fetch dbt project: preparing 3 - current project_dir: {self.path.project_dir} not exists")
128
+
129
+ fetch_gzip_result = self.client.fetch_project_gzip(self.project_id, gzip_file, client_md5=local_md5)
130
+ logger.info(f"fetch dbt project: fetching - fetch_gzip_result: {fetch_gzip_result}")
131
+
132
+ if not fetch_gzip_result:
133
+ logger.info("fetch dbt project: md5 is the same, skip fetch project")
134
+ # delete unused empty temp dir
135
+ shutil.rmtree(gzip_temp_dir, ignore_errors=True)
136
+ return
137
+
138
+ tar_gzip_uncompress(gzip_file, gzip_temp_dir)
139
+
140
+ logger.info(f"uncompress {gzip_file} to {gzip_temp_dir} success")
141
+
142
+ os.makedirs(self.path.project_dir, exist_ok=True)
143
+ _overwrite_from_gzip_dir(gzip_temp_dir, self.path.project_dir)
144
+ shutil.move(gzip_file, self.path.project_gzip_file)
145
+ shutil.rmtree(gzip_temp_dir, ignore_errors=True)
146
+
147
+ @wrap_error(ERR.DP_FETCH_CONNECTION_FAILED)
148
+ def fetch_connection(self):
149
+ from recurvedata.connectors.service import get_datasource_by_config
150
+
151
+ logger.info("start fetch connection")
152
+ con_item = self.client.get_connection(self.project_id)
153
+ self.ds = get_datasource_by_config(
154
+ con_item.type, config=con_item.data, database=con_item.database, schema=con_item.database_schema
155
+ )
156
+ self.ds.recurve_connector.set_env_when_get_dbt_connection()
157
+
158
+ @wrap_error(ERR.DP_FETCH_CONNECTION_FAILED)
159
+ def fetch_connection_and_variables(self):
160
+ from recurvedata.connectors.service import get_datasource_by_config
161
+
162
+ logger.info("start fetch connection and variables")
163
+ item = self.client.get_connection_and_variables(self.project_id)
164
+ con_item = item.connection
165
+ logger.info("after fetch connection and variables")
166
+ self.ds = get_datasource_by_config(
167
+ con_item.type, config=con_item.data, database=con_item.database, schema=con_item.database_schema
168
+ )
169
+ os.environ["DBT_USER"] = self.ds.user or ""
170
+ os.environ["DBT_PASSWORD"] = self.ds.password or ""
171
+ self.variables = self.prepare_variables(item.variables)
172
+ logger.info("start process variables")
173
+ logger.info("after process variables")
174
+
175
+ def prepare_variables(self, variables: dict | None) -> dict:
176
+ from recurvedata.executors.executor import Executor
177
+
178
+ execution_date, schedule_interval = now(), "@daily"
179
+ processed_variables = Executor.process_variables(variables or {}, {}, execution_date, schedule_interval)
180
+ result_variables = Renderer().init_context(execution_date, schedule_interval)
181
+ result_variables.update(processed_variables)
182
+ return result_variables
183
+
184
+ @tracer.create_span()
185
+ def compile(self, model_name: str = None, inline_sql: str = None, validate_sql: bool = False) -> CompileResult:
186
+ logger.info(f"prepare to compile: model_name: {model_name}, inline_sql: {inline_sql}")
187
+ self.prepare()
188
+ compiled_sql = self._run_compile(model_name, inline_sql)
189
+ logger.info(f"compiled_sql is :{compiled_sql}")
190
+ if validate_sql:
191
+ self._run_preview(compiled_sql, limit=0)
192
+ return CompileResult(compiled_sql=compiled_sql)
193
+
194
+ def should_fetch_project(self) -> bool:
195
+ if self.force_regenerate_dir or not os.path.exists(self.path.project_dir):
196
+ return True
197
+ remote_md5 = self.client.fetch_project_gzip_md5(self.project_id).md5
198
+ local_md5 = calculate_md5(self.path.project_gzip_file)
199
+ if remote_md5 == local_md5:
200
+ logger.info("md5 is the same, skip fetch project")
201
+ return False
202
+ logger.info(f"remote_md5 md5 {remote_md5} vs local md5 {local_md5}")
203
+ return True
204
+
205
+ @tracer.create_span()
206
+ def prepare(self):
207
+ self.fetch_project()
208
+
209
+ if self.need_fetch_variable:
210
+ try:
211
+ self.fetch_connection_and_variables()
212
+ except Exception: # back compatible
213
+ logger.exception("fetch_connection_and_variables fail")
214
+ self.fetch_connection()
215
+ else:
216
+ self.fetch_connection()
217
+
218
+ self.run_dependency()
219
+
220
+ @wrap_error(ERR.DEPS_FAILED)
221
+ @tracer.create_span()
222
+ def run_dependency(self):
223
+ run_deps_if_necessary(self.path.project_dir)
224
+
225
+ @wrap_error(ERR.MODEL_COMPILE_FAILED)
226
+ @tracer.create_span()
227
+ def _run_compile(self, model_name: str = None, inline_sql: str = None) -> str:
228
+ if model_name:
229
+ cmds = ["compile", "--select", model_name]
230
+ elif inline_sql:
231
+ cmds = ["compile", "-d", "--inline", inline_sql]
232
+ else:
233
+ raise ValueError("model_name or inline_sql must be specified")
234
+
235
+ if self.variables:
236
+ dbt_vars = format_var(self, self.variables)
237
+ cmds += ["--vars", dbt_vars]
238
+
239
+ result, _ = self._run_dbt_cmds(cmds)
240
+ if result.success:
241
+ compiled_code = result.result.results[0].node.compiled_code
242
+ return compiled_code.strip()
243
+
244
+ def _run_dbt_cmds(self, cmds: list, raise_when_failed: bool = True) -> ("dbtRunnerResult", str):
245
+ from dbt.cli.main import dbtRunner
246
+
247
+ logger.info(f"prepare run dbt cmds: {cmds}")
248
+ dbt = dbtRunner()
249
+
250
+ with change_directory(self.path.project_dir):
251
+ log_buffer = io.StringIO()
252
+ # Redirect stdout and stderr to the buffer
253
+ with contextlib.redirect_stdout(log_buffer), contextlib.redirect_stderr(log_buffer):
254
+ result: "dbtRunnerResult" = dbt.invoke(cmds)
255
+
256
+ if raise_when_failed and not result.success:
257
+ if isinstance(result.exception, BaseException):
258
+ raise result.exception
259
+ raise ValueError(str(result.exception))
260
+ logger.info(f"run dbt cmds finished: {cmds}")
261
+ return result, log_buffer.getvalue()
262
+
263
+ @tracer.create_span()
264
+ def preview(
265
+ self,
266
+ model_name: str = None,
267
+ inline_sql: str = None,
268
+ limit: int = 100,
269
+ no_data: bool = False,
270
+ is_compiled: bool = False,
271
+ ) -> "PreviewResult":
272
+ self.prepare()
273
+
274
+ if is_compiled:
275
+ compiled_sql = inline_sql
276
+ logger.info(f"sql is compiled: {compiled_sql}")
277
+ else:
278
+ compiled_sql = self._run_compile(model_name, inline_sql)
279
+ logger.info(f"compiled_sql is :{compiled_sql}")
280
+
281
+ if no_data:
282
+ limit = 0
283
+ return self._run_preview(compiled_sql, limit)
284
+
285
+ @wrap_error(ERR.MODEL_PREVIEW_FAILED)
286
+ def _run_preview(self, compiled_sql: str, limit: int = 100) -> "PreviewResult":
287
+ from recurvedata.executors.cli.connector import ConnectionService
288
+
289
+ con_service = ConnectionService()
290
+ try:
291
+ return con_service.preview_sql(self.ds, compiled_sql, limit)
292
+ except Exception as e:
293
+ raise WrapRecurveException(ERR.MODEL_PREVIEW_FAILED, e, data={"compiled_sql": compiled_sql})
294
+
295
+ def get_test_cases(self, model_name: str) -> list[str]:
296
+ cmds = ["ls", "--resource-type", "test", "--select", model_name]
297
+ result, _ = self._run_dbt_cmds(cmds)
298
+ if result.success:
299
+ return result.result
300
+
301
+ @wrap_error(ERR.MODEL_RUN_FAILED)
302
+ @tracer.create_span()
303
+ def _run_model(
304
+ self, model_name: str, dbt_vars: str = None, full_refresh: bool = False
305
+ ) -> tuple[str, "dbtRunnerResult"]:
306
+ run_model_result = self.run_model(model_name, dbt_vars, full_refresh)
307
+ compiled_sql = run_model_result.compiled_sql
308
+ res = run_model_result.result
309
+ if not res.success:
310
+ error_message = None
311
+ if res.result and res.result.results:
312
+ # Case 1: Has results with error messages
313
+ errors = [r.message for r in res.result.results if r.message]
314
+ if errors:
315
+ error_message = "\n".join(errors)
316
+ elif res.exception:
317
+ # Case 2: Has exception
318
+ error_message = str(res.exception)
319
+ else:
320
+ # Case 3: No results and no exception
321
+ error_message = "Unknown error occurred during model run"
322
+
323
+ raise WrapRecurveException(
324
+ ERR.MODEL_RUN_FAILED,
325
+ Exception(error_message),
326
+ data={
327
+ "compiled_sql": compiled_sql,
328
+ },
329
+ )
330
+ return compiled_sql, res
331
+
332
+ @tracer.create_span()
333
+ def run_model(
334
+ self, model_name: str, dbt_vars: str = None, full_refresh: bool = False, include_run_log: bool = False
335
+ ) -> RunModelResult:
336
+ cmds = ["run", "--select", model_name]
337
+ if dbt_vars:
338
+ cmds.extend(["--vars", dbt_vars])
339
+ if full_refresh:
340
+ cmds.append("--full-refresh")
341
+
342
+ if include_run_log:
343
+ cmds.append("--debug")
344
+ cmds.extend(["--log-format", "json"])
345
+
346
+ res, run_log = self._run_dbt_cmds(cmds, raise_when_failed=False)
347
+
348
+ compiled_code = self._extract_compiled_code(model_name, res)
349
+ run_sql = self._get_model_run_sql(model_name)
350
+ return RunModelResult(compiled_sql=compiled_code, result=res, run_sql=run_sql, run_log=run_log)
351
+
352
+ def _extract_compiled_code(self, model_name: str, materialized_result: "dbtRunnerResult") -> str | None:
353
+ # partial-compile will not have compiled_sql in materialized_result
354
+ return self._extract_compiled_code_from_run_result(materialized_result) or self._get_model_compiled_sql(
355
+ model_name
356
+ )
357
+
358
+ @classmethod
359
+ def _extract_compiled_code_from_run_result(cls, materialized_result: "dbtRunnerResult") -> str | None:
360
+ if not materialized_result.result:
361
+ return
362
+ results = materialized_result.result.results
363
+ run_result: "RunResult" = results[0]
364
+ compiled_code = run_result.node.compiled_code
365
+ if compiled_code:
366
+ return compiled_code.strip()
367
+
368
+ return None
369
+
370
+ def _get_model_compiled_sql(self, model_name: str) -> str | None:
371
+ compiled_sql_path = Path(self.path.get_model_compiled_sql_path(model_name))
372
+ if compiled_sql_path.exists():
373
+ return compiled_sql_path.read_text()
374
+
375
+ def _get_model_run_sql(self, model_name: str) -> str | None:
376
+ run_sql_path = Path(self.path.get_model_run_sql_path(model_name))
377
+ if run_sql_path.exists():
378
+ return run_sql_path.read_text()
379
+
380
+ @tracer.create_span()
381
+ def run_test(self, model_id: int, dbt_vars: str = None) -> "dbtRunnerResult":
382
+ cmds = [
383
+ "test",
384
+ "--select",
385
+ f"tag:model_{model_id}",
386
+ ]
387
+ if dbt_vars:
388
+ cmds.extend(["--vars", dbt_vars])
389
+
390
+ res, _ = self._run_dbt_cmds(cmds, raise_when_failed=False)
391
+ return res
392
+
393
+ def extract_model_graph(self, models: list[str] = None, model_cmd: str = None) -> DbtGraph:
394
+ """
395
+ extract the models and model graph from model pipeline settings
396
+ :param models: the models selected in the drop down list
397
+ :param model_cmd: the command from the advanced mode
398
+ """
399
+
400
+ return extract_graph(self.path.project_dir, models, model_cmd)
401
+
402
+ def extract_var_from_dbt_project(self) -> dict:
403
+ with open(self.path.dbt_project_yml_path, "r") as file:
404
+ dbt_project_dct = yaml.safe_load(file)
405
+ return dbt_project_dct.get("vars", {})
406
+
407
+ def read_model_sql(self, model_name: str) -> str | None:
408
+ model_path = Path(self.path.get_model_sql_path(model_name))
409
+ if not model_path.exists():
410
+ return
411
+ return model_path.read_text()
412
+
413
+ @tracer.create_span()
414
+ def run_test_sample_data(self, dbt_test_result: "dbtRunnerResult") -> dict[str, PreviewResult]:
415
+ # todo: use dbt store-failure
416
+
417
+ from recurvedata.executors.cli.connector import ConnectionService
418
+
419
+ if not dbt_test_result.result:
420
+ return {}
421
+
422
+ result: dict[str, PreviewResult] = {}
423
+
424
+ con_service = ConnectionService()
425
+
426
+ def _run_single_test_case_sample_data(unique_id: str, sql: str):
427
+ try:
428
+ data: PreviewResult = con_service.preview_sql(self.ds, sql, limit=100)
429
+ except Exception as e:
430
+ logger.exception(f"run single test case {unique_id} fail: {e}, sql: {sql}")
431
+ return
432
+ result[unique_id] = data
433
+
434
+ unique_id_2_sql = {
435
+ dbt_result.node.unique_id: dbt_result.node.compiled_code
436
+ for dbt_result in dbt_test_result.result.results
437
+ if dbt_result.node.compiled_code
438
+ # todo: if no failure, then skip fetching sample data
439
+ }
440
+ logger.debug(f"unique_id_2_sql: {unique_id_2_sql}")
441
+
442
+ with ThreadPoolExecutor(max_workers=10) as executor:
443
+ futures = {
444
+ executor.submit(_run_single_test_case_sample_data, unique_id, sql): unique_id
445
+ for unique_id, sql in unique_id_2_sql.items()
446
+ }
447
+
448
+ for future in futures:
449
+ future.result()
450
+
451
+ return result
@@ -0,0 +1,246 @@
1
+ import datetime
2
+ import json
3
+ import logging
4
+ import os
5
+ import re
6
+ from contextlib import contextmanager
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING, Any
9
+
10
+ from recurvedata.dbt.consts import (
11
+ DbtFileNames,
12
+ format_installed_packages_path,
13
+ format_package_lock_path,
14
+ format_packages_yml_path,
15
+ )
16
+ from recurvedata.utils.files import FileLock
17
+
18
+ try:
19
+ import yaml
20
+ from dbt.cli.main import dbtRunnerResult
21
+ from dbt.contracts.results import RunExecutionResult, RunResultsArtifact
22
+ from dbt.exceptions import DbtRuntimeError
23
+ except ImportError:
24
+ dbtRunnerResult = None
25
+ DbtRuntimeError = None
26
+ RunExecutionResult = RunResultsArtifact = None
27
+
28
+ if TYPE_CHECKING:
29
+ from recurvedata.dbt.service import DbtService
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ @contextmanager
35
+ def change_directory(new_dir): # todo(chenjingmeng): use dbt api instead of cli
36
+ """Context manager to change the current working directory temporarily."""
37
+ original_dir = os.getcwd()
38
+ os.chdir(new_dir)
39
+ try:
40
+ yield
41
+ finally:
42
+ os.chdir(original_dir)
43
+
44
+
45
+ def extract_project_name(project_yml: str) -> str:
46
+ with open(project_yml, "r") as file:
47
+ dbt_project = yaml.safe_load(file)
48
+ project_name = dbt_project.get("name")
49
+ return project_name
50
+
51
+
52
+ def run_dbt_cmds(project_dir: str, cmds: list) -> dbtRunnerResult:
53
+ from dbt.cli.main import dbtRunner
54
+
55
+ def _set_default_os_env():
56
+ os.environ.setdefault("DBT_USER", "")
57
+ os.environ.setdefault("DBT_PASSWORD", "")
58
+
59
+ _set_default_os_env()
60
+ dbt = dbtRunner()
61
+ logger.info(f"prepare run dbt cmds: {cmds}")
62
+ with change_directory(project_dir):
63
+ result: dbtRunnerResult = dbt.invoke(cmds)
64
+ return result
65
+
66
+
67
+ def dbt_runner_result_to_dict(result: dbtRunnerResult) -> dict:
68
+ def _exception_to_dict(exception: DbtRuntimeError | BaseException | None) -> dict | None:
69
+ if exception is None:
70
+ return None
71
+ if isinstance(exception, DbtRuntimeError):
72
+ return exception.data()
73
+ return {
74
+ "type": type(exception).__name__,
75
+ "message": str(exception),
76
+ }
77
+
78
+ def _result_to_dict(sub_result: RunExecutionResult | None) -> dict | None:
79
+ if isinstance(sub_result, RunExecutionResult):
80
+ res_dct = sub_result.to_dict(omit_none=False)
81
+ return _format_cp_result_dct(res_dct)
82
+
83
+ def _format_cp_result_dct(dbt_result_dct: dict) -> dict:
84
+ if not dbt_result_dct.get("results"):
85
+ return dbt_result_dct
86
+ results: list[dict] = dbt_result_dct["results"]
87
+ if results:
88
+ for sub_result in results:
89
+ node_dct = sub_result.get("node", {})
90
+ sub_result.update(node_dct) # on CP, DBTTestResultDetails needs node data like unique_id to validate
91
+ # todo: better to adjust cp pydantic schema
92
+ return dbt_result_dct
93
+
94
+ return {
95
+ "success": result.success,
96
+ "exception": _exception_to_dict(result.exception),
97
+ "result": _result_to_dict(result.result),
98
+ }
99
+
100
+
101
+ class VariableJSONEncoder(json.JSONEncoder):
102
+ def default(self, obj: Any):
103
+ return self.format_var(obj)
104
+
105
+ @classmethod
106
+ def format_var(cls, value: Any):
107
+ if value is None or isinstance(value, (int, bool, float)):
108
+ return value
109
+ elif isinstance(value, datetime.datetime):
110
+ return value.isoformat()
111
+ return str(value)
112
+
113
+
114
+ def format_var(service: "DbtService", variables: dict) -> str | None:
115
+ default_var_dct: dict = service.extract_var_from_dbt_project()
116
+ override_variables: dict = {
117
+ k: v for (k, v) in variables.items() if k not in default_var_dct or v != default_var_dct[k]
118
+ }
119
+ if not override_variables:
120
+ return
121
+ vars_string = json.dumps(override_variables, cls=VariableJSONEncoder)
122
+ return vars_string
123
+
124
+
125
+ def should_run_dependency(project_dir: str) -> bool:
126
+ packages_yml = Path(format_packages_yml_path(project_dir))
127
+ if not packages_yml.exists():
128
+ return False
129
+ if packages_yml.stat().st_size == 0:
130
+ return False
131
+ data: dict = read_yaml_file(str(packages_yml))
132
+ if not data.get("packages"):
133
+ return False
134
+ package_lock = Path(format_package_lock_path(project_dir))
135
+ if not package_lock.exists():
136
+ return True
137
+ packages_dir = Path(format_installed_packages_path(project_dir))
138
+ if not packages_dir.exists():
139
+ # maybe concurrency issue, causing the dbt_packages dir missing
140
+ return True
141
+ data: dict = read_yaml_file(str(package_lock))
142
+ pack_cnt = len(data.get("packages", []))
143
+ if pack_cnt > len(os.listdir(str(packages_dir))):
144
+ # previous concurrency issue
145
+ return True
146
+ if packages_yml.stat().st_mtime > package_lock.stat().st_mtime:
147
+ return True
148
+ return False
149
+
150
+
151
+ def read_yaml_file(filename: str) -> dict:
152
+ with open(filename, "r") as file:
153
+ return yaml.safe_load(file)
154
+
155
+
156
+ def run_deps_if_necessary(project_dir: str):
157
+ if not should_run_dependency(project_dir):
158
+ logger.info(f"skip deps on {project_dir}")
159
+ return
160
+
161
+ lock = FileLock(lock_file_path=Path(project_dir).with_suffix(".deps_lock"))
162
+ with lock:
163
+ if not should_run_dependency(project_dir):
164
+ logger.info(f"skip deps on {project_dir}")
165
+ return
166
+ res = run_dbt_cmds(
167
+ project_dir,
168
+ [
169
+ "deps",
170
+ ],
171
+ )
172
+ if not res.success:
173
+ raise DbtRuntimeError(f"run deps failed on {project_dir}, {res.exception}")
174
+
175
+ lock_file = Path(format_package_lock_path(project_dir))
176
+ if lock_file.exists():
177
+ lock_file.touch() # used in should_run_dependency
178
+ logger.info(f"deps on {project_dir} finish")
179
+
180
+
181
+ def ensure_manifest_json_exists(project_dir: str):
182
+ manifest_path = Path(project_dir) / "target" / DbtFileNames.MANIFEST_FILE.value
183
+ if manifest_path.exists():
184
+ return
185
+ run_dbt_cmds(project_dir, ["parse"])
186
+
187
+
188
+
189
+ def _has_error_log(log: dict) -> bool:
190
+ log_data = log.get('data')
191
+ if log_data:
192
+ return log_data.get('status') == 'error' or 'error' in log_data.get("base_msg", "")
193
+ return False
194
+
195
+
196
+ def _create_success_log(sql: str) -> dict:
197
+ return {
198
+ "sql": sql,
199
+ "status": 'success'
200
+ }
201
+
202
+
203
+ def _create_failed_log(sql: str) -> dict:
204
+ return {
205
+ "sql": sql,
206
+ "status": 'failed'
207
+ }
208
+
209
+ def parse_run_model_log(run_log: str) -> list[dict]:
210
+ if not run_log:
211
+ return []
212
+
213
+ run_sql_log = []
214
+ sql = None
215
+ for line in run_log.splitlines():
216
+ try:
217
+ log = json.loads(line)
218
+ log_data = log.get('data')
219
+
220
+ if log_data and 'sql' in log_data:
221
+ # If there is no error in the log instead we meet the next sql, the sql is success
222
+ if sql is not None:
223
+ run_sql_log.append(_create_success_log(sql))
224
+
225
+ sql = log_data['sql']
226
+ # Remove /* ... */ using regex
227
+ cleaned = re.sub(r"/\*.*?\*/", "", sql, flags=re.DOTALL)
228
+ # Strip whitespace to get the SQL
229
+ sql = cleaned.strip()
230
+
231
+ elif sql:
232
+ # failed if status is error or log has base_msg contains error keyword
233
+ # if log does not have status or base_msg, then skip
234
+ if _has_error_log(log):
235
+ run_sql_log.append(_create_failed_log(sql))
236
+ sql = None
237
+
238
+
239
+ except json.JSONDecodeError:
240
+ logger.error("Skipping non-JSON line:", line)
241
+
242
+ # mark log success
243
+ if sql is not None:
244
+ run_sql_log.append(_create_success_log(sql))
245
+
246
+ return run_sql_log