recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,83 @@
1
+ import os
2
+
3
+ from recurvedata.pigeon.schema import Schema
4
+ from recurvedata.pigeon.utils import LoggingMixin, fs, sql
5
+
6
+
7
+ class BaseLoader(LoggingMixin):
8
+ def __init__(self, *args, **kwargs):
9
+ pass
10
+
11
+ def before_execute(self):
12
+ pass
13
+
14
+ def after_execute(self):
15
+ pass
16
+
17
+ def execute(self):
18
+ self.before_execute()
19
+ self.execute_impl()
20
+ self.after_execute()
21
+
22
+ def execute_impl(self):
23
+ raise NotImplementedError("execute_impl must be implemented by subclass")
24
+
25
+
26
+ class CSVToDBAPIMixin(object):
27
+ @property
28
+ def schema_filename(self) -> str:
29
+ return fs.schema_filename(self.filename)
30
+
31
+ def _prepare_target_table(self):
32
+ # add schema for azure data warehouse
33
+ if self.connector.has_table(table=self.table, schema=getattr(self, "schema", None)):
34
+ return
35
+
36
+ self.logger.info("table not found, try to create it")
37
+ ddl = self._infer_create_table_ddl()
38
+ if not ddl:
39
+ raise ValueError("table not found, create_table_ddl is required")
40
+ ddl = ddl.strip().rstrip(";")
41
+ self.logger.info("create table ddl: %s\n", ddl)
42
+ with self.connector.cursor() as cursor:
43
+ cursor.execute(ddl)
44
+
45
+ def _infer_create_table_ddl(self):
46
+ if not self.create_table_ddl:
47
+ # infer by schema
48
+ schema_file = self.schema_filename
49
+ self.logger.info("infer ddl by schema file %s", schema_file)
50
+ return self._generate_ddl_from_schema(schema_file)
51
+
52
+ if "CREATE TABLE" in self.create_table_ddl.upper():
53
+ self.logger.info("self.create_table_ddl contains `CREATE TABLE`, use it")
54
+ create_table_ddl = self.create_table_ddl
55
+
56
+ # Safely get schema, defaulting to None if not present
57
+ schema = getattr(self, "schema", None)
58
+ if not schema:
59
+ return create_table_ddl
60
+
61
+ self.logger.info(f"add schema {schema} to create table ddl")
62
+ return sql.add_schema_to_create_table(create_table_ddl, schema, self.connector.quote_identifier)
63
+
64
+ if os.path.isfile(self.create_table_ddl):
65
+ self.logger.info("self.create_table_ddl is a filename, treat it as schema file")
66
+ return self._generate_ddl_from_schema(self.create_table_ddl)
67
+ return None
68
+
69
+ def _generate_ddl_from_schema(self, schema_file):
70
+ if not os.path.exists(schema_file):
71
+ self.logger.error("file not exists, not able to infer DDL")
72
+ return None
73
+
74
+ try:
75
+ schema = Schema.load(schema_file)
76
+ except Exception:
77
+ self.logger.exception("failed to load schema from %s", schema_file)
78
+ return None
79
+
80
+ ddl_options = getattr(self, "ddl_options", {})
81
+ table_name = getattr(self, "full_table_name", self.table)
82
+ ddl = self.connector.generate_create_table_ddl(table_name, schema, **ddl_options)
83
+ return ddl
@@ -0,0 +1,214 @@
1
+ from typing import TYPE_CHECKING, List, Union
2
+
3
+ from recurvedata.pigeon import const
4
+ from recurvedata.pigeon.loader.base import BaseLoader, CSVToDBAPIMixin
5
+ from recurvedata.pigeon.utils import ensure_query_list, ensure_str_list, fs
6
+
7
+ if TYPE_CHECKING:
8
+ from recurvedata.pigeon.connector.azure_synapse import AzureSynapseConnector
9
+
10
+ allowed_modes = (
11
+ const.LOAD_OVERWRITE,
12
+ const.LOAD_MERGE,
13
+ const.LOAD_APPEND
14
+ )
15
+
16
+
17
+ class CSVToAzureSynapseLoader(BaseLoader, CSVToDBAPIMixin):
18
+ def __init__(
19
+ self,
20
+ table: str,
21
+ filename: str,
22
+ azure_synapse_connector: 'AzureSynapseConnector' = None,
23
+ schema: str = None,
24
+ create_table_ddl: str = None,
25
+ mode: str = const.LOAD_MERGE,
26
+ primary_keys: Union[str, List[str]] = None,
27
+ columns: Union[str, List[str]] = None,
28
+ compress: bool = True,
29
+ delete_file: bool = True,
30
+ dedup: bool = False,
31
+ dedup_uniq_keys: Union[str, List[str]] = None,
32
+ dedup_orderby: Union[str, List[str]] = None,
33
+ pre_queries: Union[str, List[str]] = None,
34
+ post_queries: Union[str, List[str]] = None,
35
+ *args, **kwargs
36
+ ):
37
+ if '.' in table:
38
+ self.schema, self.table = table.split('.')
39
+ else:
40
+ self.schema = schema or 'dbo'
41
+ self.table = table
42
+
43
+ # determine table name of target table and staging table
44
+ self.compress = compress
45
+ self.filename = filename # full file path
46
+
47
+ self.azure_synapse = azure_synapse_connector # TODO: use default
48
+
49
+ # determine table ddl stuff
50
+ self.create_table_ddl = create_table_ddl
51
+
52
+ # merge stuff
53
+ if mode not in allowed_modes:
54
+ raise ValueError(f'mode should be one of ({allowed_modes})')
55
+
56
+ self.mode = mode
57
+ self.primary_keys = ensure_str_list(primary_keys)
58
+ self.columns = ensure_str_list(columns)
59
+
60
+ # dedup stuff
61
+ self.dedup = dedup
62
+ self.dedup_uniq_keys = ensure_str_list(dedup_uniq_keys)
63
+ self.dedup_orderby = dedup_orderby
64
+ if self.dedup and not self.dedup_uniq_keys:
65
+ raise ValueError('dedup_uniq_keys should not be empty if dedup is true')
66
+
67
+ self.pre_queries = ensure_query_list(pre_queries) or []
68
+ self.post_queries = ensure_query_list(post_queries) or []
69
+
70
+ self.delete_file = delete_file
71
+
72
+ super().__init__()
73
+
74
+ def execute_impl(self):
75
+ if fs.is_file_empty(self.filename):
76
+ self.logger.error('file not exists or has no content. %s', self.filename)
77
+ fs.remove_files_safely(fs.schema_filename(self.filename))
78
+ return
79
+ self._prepare_target_table()
80
+ self._prepare_staging_table()
81
+ self._merge_into_target_table()
82
+
83
+ # do cleaning things
84
+ if self.delete_file:
85
+ self.logger.info('delete local file %s', self.filename)
86
+ fs.remove_files_safely(self.filename)
87
+ fs.remove_files_safely(fs.schema_filename(self.filename))
88
+
89
+ @property
90
+ def connector(self):
91
+ return self.azure_synapse
92
+
93
+ @property
94
+ def staging_table(self):
95
+ return f'{self.table}_staging'
96
+
97
+ @property
98
+ def full_staging_table_name(self):
99
+ return f'{self.schema}.{self.staging_table}'
100
+
101
+ @property
102
+ def full_table_name(self):
103
+ return f'{self.schema}.{self.table}'
104
+
105
+ @property
106
+ def quoted_full_staging_table(self):
107
+ return self.connector.quote_identifier(self.full_staging_table_name)
108
+
109
+ @property
110
+ def quoted_full_table_name(self):
111
+ return self.connector.quote_identifier(self.full_table_name)
112
+
113
+ def _prepare_staging_table(self):
114
+ schema, table = self.full_staging_table_name.split('.')
115
+ drop = self._make_drop_table_query(schema, table)
116
+ ddl = f'SELECT TOP 0 * INTO {self.quoted_full_staging_table} FROM {self.quoted_full_table_name}'
117
+ self.connector.execute([drop, ddl])
118
+
119
+ self.logger.info(f'load {self.filename} into staging table {self.full_staging_table_name}')
120
+ self.connector.load_csv(table=self.full_staging_table_name,
121
+ filename=self.filename,
122
+ columns=self.columns)
123
+
124
+ if self.dedup:
125
+ dedup_query = self._construct_dedup_query()
126
+ self.connector.execute(dedup_query, autocommit=False, commit_on_close=True)
127
+
128
+ def _construct_dedup_query(self):
129
+ partitions_cols = []
130
+ for col in self.dedup_uniq_keys:
131
+ partitions_cols.append(self.connector.quote_identifier(col))
132
+ partition_by = ', '.join(partitions_cols)
133
+ columns = ' ,'.join(self.azure_synapse.get_columns(schema=self.schema, table=self.staging_table))
134
+ tmp_table = f'{self.full_staging_table_name}_tmp'
135
+ quoted_tmp_table = self.connector.quote_identifier(tmp_table)
136
+ quoted_bak_table = self.connector.quote_identifier(f'{self.staging_table}_bak')
137
+
138
+ queries = f"""
139
+ {self._make_drop_table_query(self.schema, tmp_table)};
140
+
141
+ CREATE TABLE {quoted_tmp_table} AS
142
+ SELECT {', '.join(self.connector.quote_identifier(x) for x in columns)}
143
+ FROM (
144
+ SELECT *, ROW_NUMBER() OVER (PARTITION BY {partition_by} ORDER BY {self.dedup_orderby}) rn
145
+ FROM {self.quoted_full_staging_table}
146
+ ) AS t
147
+ WHERE rn = 1;
148
+
149
+ RENAME OBJECT {self.quoted_full_staging_table} TO {quoted_bak_table};
150
+ RENAME OBJECT {quoted_tmp_table} TO {self.staging_table};
151
+ DROP TABLE {quoted_bak_table};
152
+ """
153
+ return queries
154
+
155
+ def _make_drop_table_query(self, schema: str, table: str) -> str:
156
+ if '.' in table:
157
+ schema, table = table.split('.')
158
+ if not schema:
159
+ schema = self.schema
160
+ full_table = f'{schema}.{table}'
161
+ query = f'''
162
+ IF EXISTS (
163
+ SELECT * FROM sys.tables
164
+ WHERE schema_name(schema_id) = '{schema}' AND name = '{table}'
165
+ )
166
+ DROP TABLE {self.connector.quote_identifier(full_table)}
167
+ '''
168
+ return query
169
+
170
+ def _merge_into_target_table(self):
171
+ target = self.quoted_full_table_name
172
+ staging = self.quoted_full_staging_table
173
+
174
+ append_sql = f'INSERT INTO {target} SELECT * FROM {staging}'
175
+ if self.mode == const.LOAD_OVERWRITE:
176
+ queries = [f'TRUNCATE TABLE {target}', append_sql]
177
+ elif self.mode == const.LOAD_MERGE:
178
+ queries = self._ingest_by_merging()
179
+ else:
180
+ # APPEND
181
+ queries = [append_sql]
182
+
183
+ queries.append(f'DROP TABLE {staging}')
184
+
185
+ queries = self.pre_queries + queries + self.post_queries
186
+ self.logger.info('running Azure Synapse queries...')
187
+ self.connector.execute(queries, autocommit=True, commit_on_close=True)
188
+ self.logger.info('done.')
189
+
190
+ def _ingest_by_merging(self):
191
+ merge_table = f'{self.full_table_name}_merge'
192
+ quote = self.connector.quote_identifier
193
+ join = ' AND '.join([f'a.{quote(x)} = b.{quote(x)}' for x in self.primary_keys])
194
+
195
+ drop_merge_table = self._make_drop_table_query(self.schema, merge_table)
196
+ # TODO: maybe use the builtin MERGE by T-SQL?
197
+ queries = f"""
198
+ {drop_merge_table};
199
+
200
+ CREATE TABLE {quote(merge_table)} WITH (DISTRIBUTION = ROUND_ROBIN)
201
+ AS
202
+ SELECT a.*
203
+ FROM {self.quoted_full_table_name} AS a
204
+ LEFT JOIN {self.quoted_full_staging_table} AS b ON {join}
205
+ WHERE b.{quote(self.primary_keys[0])} IS NULL
206
+ UNION ALL
207
+ SELECT * FROM {self.quoted_full_staging_table};
208
+
209
+ TRUNCATE TABLE {self.quoted_full_table_name};
210
+ INSERT INTO {self.quoted_full_table_name} SELECT * FROM {quote(merge_table)};
211
+
212
+ {drop_merge_table};
213
+ """
214
+ return queries.split(';')
@@ -0,0 +1,152 @@
1
+ from recurvedata.pigeon import const
2
+ from recurvedata.pigeon.connector import new_clickhouse_connector
3
+ from recurvedata.pigeon.csv import CSV
4
+ from recurvedata.pigeon.loader.base import BaseLoader, CSVToDBAPIMixin
5
+ from recurvedata.pigeon.utils import ensure_query_list, ensure_str_list, fs
6
+ from recurvedata.pigeon.utils.sql import bak_table_of, reconcile_table_of, staging_table_of
7
+
8
+ allowed_modes = (const.LOAD_OVERWRITE, const.LOAD_MERGE, const.LOAD_APPEND)
9
+
10
+
11
+ class CSVToClickHouseLoader(BaseLoader, CSVToDBAPIMixin):
12
+ def __init__(
13
+ self,
14
+ database,
15
+ table,
16
+ filename,
17
+ connector=None,
18
+ create_table_ddl=None,
19
+ mode=const.LOAD_OVERWRITE,
20
+ primary_keys=None,
21
+ skiprows=0,
22
+ using_insert=False,
23
+ insert_batch_size=10000,
24
+ insert_concurrency=1,
25
+ delete_file=False,
26
+ table_engine="Log",
27
+ pre_queries=None,
28
+ post_queries=None,
29
+ native=False,
30
+ ):
31
+ self.database = database
32
+ self.table = table
33
+
34
+ if isinstance(filename, CSV):
35
+ filename = filename.path
36
+ self.filename = filename
37
+ self.csvfile = CSV(self.filename)
38
+
39
+ if connector is None:
40
+ connector = new_clickhouse_connector(database=self.database, native=native)
41
+ else:
42
+ connector.database = self.database
43
+ self.connector = connector
44
+
45
+ self.create_table_ddl = create_table_ddl
46
+ self.ddl_options = {"ENGINE": table_engine}
47
+
48
+ if mode not in allowed_modes:
49
+ raise ValueError("mode should be one of ({})".format(allowed_modes))
50
+
51
+ self.mode = mode
52
+ self.primary_keys = ensure_str_list(primary_keys)
53
+ if self.mode == const.LOAD_MERGE and not self.primary_keys:
54
+ raise ValueError("primary_keys should not be empty in mode {}".format(const.LOAD_MERGE))
55
+
56
+ # self.columns = columns or self.csvfile.header
57
+ # self.skiprows = int(skiprows or self.csvfile.has_header)
58
+ self.skiprows = int(skiprows)
59
+ self.using_insert = using_insert
60
+ self.insert_batch_size = insert_batch_size
61
+ self.insert_concurrency = insert_concurrency
62
+ self.delete_file = delete_file
63
+
64
+ self.pre_queries = ensure_query_list(pre_queries) or []
65
+ self.post_queries = ensure_query_list(post_queries) or []
66
+
67
+ super().__init__()
68
+
69
+ @property
70
+ def staging_table(self):
71
+ return staging_table_of(self.table)
72
+
73
+ def execute_impl(self):
74
+ if fs.is_file_empty(self.filename):
75
+ self.logger.error("file not exists or has no content. %s", self.filename)
76
+ fs.remove_files_safely(fs.schema_filename(self.filename))
77
+ return
78
+ self._prepare_target_table()
79
+ self._prepare_staging_table()
80
+ self._load_to_staging()
81
+ self._merge_into_target_table()
82
+
83
+ # do cleaning things
84
+ if self.delete_file:
85
+ self.logger.info("delete local file %s", self.filename)
86
+ fs.remove_files_safely(self.filename)
87
+ fs.remove_files_safely(fs.schema_filename(self.filename))
88
+
89
+ def _prepare_staging_table(self):
90
+ queries = """
91
+ DROP TABLE IF EXISTS {staging};
92
+ CREATE TABLE {staging} AS {table};
93
+ """.format(
94
+ staging=self.staging_table, table=self.table
95
+ )
96
+ self.connector.execute(queries, autocommit=True)
97
+
98
+ def _load_to_staging(self):
99
+ self.connector.load_csv(
100
+ table=self.staging_table,
101
+ filename=self.csvfile.path,
102
+ lineterminator=self.csvfile.dialect.lineterminator,
103
+ skiprows=self.skiprows,
104
+ null_values=("NULL", r"\N"),
105
+ using_insert=self.using_insert,
106
+ batch_size=self.insert_batch_size,
107
+ concurrency=self.insert_concurrency,
108
+ )
109
+
110
+ def _merge_into_target_table(self):
111
+ queries = []
112
+ if self.mode == const.LOAD_MERGE:
113
+ queries.extend(self._ingest_by_merging())
114
+ elif self.mode == const.LOAD_OVERWRITE:
115
+ bak_table = bak_table_of(self.table)
116
+ queries.append(f"DROP TABLE IF EXISTS {bak_table}")
117
+ queries.append(f"RENAME TABLE {self.table} TO {bak_table}")
118
+ queries.append(f"RENAME TABLE {self.staging_table} TO {self.table}")
119
+ queries.append(f"DROP TABLE IF EXISTS {bak_table}")
120
+ else:
121
+ queries.append(f"INSERT INTO {self.table} SELECT * FROM {self.staging_table}")
122
+ queries.append(f"DROP TABLE {self.staging_table}")
123
+
124
+ queries = self.pre_queries + queries + self.post_queries
125
+ self.connector.execute(queries)
126
+
127
+ def _ingest_by_merging(self):
128
+ reconcile = reconcile_table_of(self.table)
129
+ bak = bak_table_of(self.table)
130
+ using = ", ".join(self.primary_keys)
131
+ sql = """
132
+ DROP TABLE IF EXISTS {reconcile};
133
+ CREATE TABLE {reconcile} AS {table};
134
+
135
+ INSERT INTO {reconcile}
136
+ SELECT * FROM {table} WHERE NOT ({using}) IN (SELECT {using} FROM {staging})
137
+ UNION ALL
138
+ SELECT * FROM {staging};
139
+
140
+ RENAME TABLE {table} TO {bak};
141
+ RENAME TABLE {reconcile} TO {table};
142
+ DROP TABLE IF EXISTS {bak};
143
+ DROP TABLE {staging};
144
+ """.format(
145
+ reconcile=self.connector.quote_identifier(reconcile),
146
+ table=self.connector.quote_identifier(self.table),
147
+ staging=self.connector.quote_identifier(self.staging_table),
148
+ bak=self.connector.quote_identifier(bak),
149
+ using=using,
150
+ )
151
+ queries = sql.split(";")
152
+ return queries
@@ -0,0 +1,215 @@
1
+ from typing import TYPE_CHECKING, Any, List, Optional
2
+
3
+ from recurvedata.pigeon import const
4
+ from recurvedata.pigeon.loader.csv_to_mysql import CSVToMySQLLoader
5
+ from recurvedata.pigeon.utils import md5hash
6
+ from recurvedata.pigeon.utils.sql import bak_table_of, reconcile_table_of, staging_table_of
7
+
8
+ if TYPE_CHECKING:
9
+ from recurvedata.pigeon.connector.doris import DorisConnector
10
+
11
+ allowed_modes = (const.LOAD_OVERWRITE, const.LOAD_MERGE, const.LOAD_APPEND)
12
+
13
+
14
+ class CSVToDorisLoader(CSVToMySQLLoader):
15
+ def __init__(
16
+ self,
17
+ database: str,
18
+ table: str,
19
+ filename: str,
20
+ connector: Optional["DorisConnector"] = None,
21
+ create_table_ddl: Optional[str] = None,
22
+ mode: str = const.LOAD_OVERWRITE,
23
+ primary_keys: Optional[List[str]] = None,
24
+ skiprows: int = 0,
25
+ columns: Optional[List[str]] = None,
26
+ using_insert: bool = False,
27
+ insert_batch_size: int = 1000,
28
+ insert_concurrency: int = 1,
29
+ delete_file: bool = False,
30
+ pre_queries: Optional[List[str]] = None,
31
+ post_queries: Optional[List[str]] = None,
32
+ load_strict_mode: bool = False,
33
+ max_filter_ratio: float = 0,
34
+ *args: Any,
35
+ **kwargs: Any,
36
+ ):
37
+ if not connector:
38
+ raise ValueError(f"connector is required for {self.__class__.__name__}")
39
+ self.load_strict_mode: bool = load_strict_mode
40
+ connector.load_strict_mode = load_strict_mode
41
+ self.logger.info(f"load_strict_mode: {load_strict_mode}")
42
+ connector.max_filter_ratio = max_filter_ratio
43
+ self.logger.info(f"max_filter_ratio: {max_filter_ratio}")
44
+ table_suffix: str = md5hash(filename)[:6]
45
+ self.__staging_table: str = staging_table_of(table) + "_" + table_suffix
46
+ self.__reconcile_table: str = reconcile_table_of(table) + "_" + table_suffix
47
+ self.__bak_table: str = bak_table_of(table) + "_" + table_suffix
48
+
49
+ # Check table name length
50
+ if any(
51
+ [
52
+ len(self.__staging_table) > 64,
53
+ len(self.__reconcile_table) > 64,
54
+ len(self.__bak_table) > 64,
55
+ ]
56
+ ):
57
+ self.logger.error(
58
+ f"table name {self.__staging_table} 's length: {len(self.__staging_table)}\n"
59
+ f"table name {self.__reconcile_table}'s length: {len(self.__reconcile_table)}\n"
60
+ f"table name {self.__bak_table}'s length: {len(self.__bak_table)}\n"
61
+ )
62
+ raise ValueError("length of intermediate table name is greater than 64!")
63
+
64
+ super().__init__(
65
+ database=database,
66
+ table=table,
67
+ filename=filename,
68
+ connector=connector,
69
+ create_table_ddl=create_table_ddl,
70
+ mode=mode,
71
+ primary_keys=primary_keys,
72
+ skiprows=skiprows,
73
+ columns=columns,
74
+ using_insert=using_insert,
75
+ insert_batch_size=insert_batch_size,
76
+ insert_concurrency=insert_concurrency,
77
+ delete_file=delete_file,
78
+ pre_queries=pre_queries,
79
+ post_queries=post_queries,
80
+ *args,
81
+ **kwargs,
82
+ )
83
+
84
+ @property
85
+ def staging_table(self) -> str:
86
+ """
87
+ Override method to return table name with random suffix
88
+ """
89
+ return self.__staging_table
90
+
91
+ def _merge_into_target_table(self) -> None:
92
+ queries = []
93
+ if self.mode == const.LOAD_MERGE:
94
+ queries.extend(self._ingest_by_merging())
95
+ elif self.mode == const.LOAD_OVERWRITE:
96
+ bak_table = self.__bak_table
97
+ queries.extend(
98
+ [
99
+ f"DROP TABLE IF EXISTS {bak_table}",
100
+ f"ALTER TABLE {self.table} RENAME {bak_table}",
101
+ f"ALTER TABLE {self.staging_table} RENAME {self.table}",
102
+ f"DROP TABLE IF EXISTS {bak_table}",
103
+ ]
104
+ )
105
+ else: # APPEND mode
106
+ queries.append(f"INSERT INTO {self.table} SELECT * FROM {self.staging_table}")
107
+ self.connector.execute(self.pre_queries + queries, autocommit=True, commit_on_close=False)
108
+
109
+ queries.clear()
110
+ queries.append(f"DROP TABLE {self.staging_table}")
111
+ self.connector.execute(queries + self.post_queries, autocommit=True, commit_on_close=False)
112
+ return
113
+
114
+ queries = self.pre_queries + queries + self.post_queries
115
+ self.logger.info("running MySQL queries within a transaction")
116
+ self.connector.execute(queries, autocommit=False, commit_on_close=True)
117
+
118
+ def _ingest_by_merging(self) -> List[str]:
119
+ """Merge with deduplication based on primary keys using Doris-compatible syntax"""
120
+ # First, deduplicate staging table based on primary keys using window function
121
+ pk_columns = ", ".join(self.primary_keys)
122
+
123
+ # Get all columns from staging table (excluding the rn column we'll add)
124
+ cols = self.connector.get_columns(self.staging_table)
125
+ cols_str = ", ".join(self.connector.quote_identifier(x) for x in cols)
126
+
127
+ # Create a temporary table with deduplicated data
128
+ tmp_table = f"{self.staging_table}_dedup"
129
+ dedup_sql = f"""
130
+ DROP TABLE IF EXISTS {tmp_table};
131
+ CREATE TABLE {tmp_table} LIKE {self.staging_table};
132
+ INSERT INTO {tmp_table}
133
+ SELECT {cols_str} FROM (
134
+ SELECT *, ROW_NUMBER() OVER(PARTITION BY {pk_columns} ORDER BY {pk_columns}) AS rn
135
+ FROM {self.staging_table}
136
+ ) t
137
+ WHERE rn = 1;
138
+ """
139
+
140
+ # Replace staging table with deduplicated data
141
+ replace_sql = f"""
142
+ DROP TABLE {self.staging_table};
143
+ ALTER TABLE {tmp_table} RENAME {self.staging_table};
144
+ """
145
+
146
+ # Simple merge: backup target table, then merge and deduplicate in one step
147
+ bak = self.__bak_table
148
+ table = self.connector.quote_identifier(self.table)
149
+ staging = self.connector.quote_identifier(self.staging_table)
150
+ bak = self.connector.quote_identifier(bak)
151
+
152
+ # Simple and efficient merge: backup + merge + deduplicate in one operation
153
+ merge_sql = f"""
154
+ -- Backup target table
155
+ DROP TABLE IF EXISTS {bak};
156
+ ALTER TABLE {table} RENAME {bak};
157
+
158
+ -- Create new target table and insert deduplicated merged data in one step
159
+ CREATE TABLE {table} AS
160
+ SELECT {cols_str} FROM (
161
+ SELECT *, ROW_NUMBER() OVER(PARTITION BY {pk_columns} ORDER BY {pk_columns}) AS rn
162
+ FROM (
163
+ SELECT * FROM {bak}
164
+ UNION ALL
165
+ SELECT * FROM {staging}
166
+ ) combined
167
+ ) t WHERE rn = 1;
168
+
169
+ -- Clean up
170
+ DROP TABLE {bak};
171
+ DROP TABLE {staging};
172
+ """
173
+
174
+ return [dedup_sql, replace_sql, merge_sql]
175
+
176
+ def execute(self) -> None:
177
+ """
178
+ Override method to implement exception handling
179
+ """
180
+ self.before_execute()
181
+ try:
182
+ self.execute_impl()
183
+ except Exception as e:
184
+ self.handle_exception()
185
+ raise e
186
+ self.after_execute()
187
+
188
+ def handle_exception(self) -> None:
189
+ """
190
+ Ensure safe cleanup of all intermediate tables after an exception occurs
191
+ """
192
+ qry_exists_sql = """
193
+ SELECT 1 FROM information_schema.tables
194
+ WHERE table_schema = '{database}' AND table_name = '{table}';
195
+ """
196
+ is_table_exists = self.connector.fetchall(qry_exists_sql.format(database=self.database, table=self.table))
197
+ is_bak_exists = self.connector.fetchall(qry_exists_sql.format(database=self.database, table=self.__bak_table))
198
+
199
+ if is_table_exists:
200
+ # Directly clean up intermediate tables
201
+ queries = [
202
+ f"DROP TABLE IF EXISTS {self.__bak_table}",
203
+ f"DROP TABLE IF EXISTS {self.__staging_table}",
204
+ f"DROP TABLE IF EXISTS {self.__reconcile_table}",
205
+ ]
206
+ elif is_bak_exists:
207
+ # Restore from backup table
208
+ queries = [
209
+ f"ALTER TABLE {self.__bak_table} RENAME {self.table}",
210
+ f"DROP TABLE IF EXISTS {self.__staging_table}",
211
+ f"DROP TABLE IF EXISTS {self.__reconcile_table}",
212
+ ]
213
+ else:
214
+ queries = [f"DROP TABLE IF EXISTS {self.__staging_table}", f"DROP TABLE IF EXISTS {self.__reconcile_table}"]
215
+ self.connector.execute(queries, autocommit=False, commit_on_close=True)