recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,469 @@
1
+ import itertools
2
+ from abc import ABC, abstractmethod
3
+ from dataclasses import dataclass
4
+ from functools import cached_property, wraps
5
+ from io import StringIO
6
+ from typing import Any, Optional
7
+
8
+ import sqlalchemy
9
+ import sqlalchemy.sql.schema
10
+ import sqlglot
11
+ from sqlalchemy import create_engine, insert, inspect
12
+ from sqlalchemy import text as sqlalchemy_text
13
+ from sqlalchemy.engine.reflection import Inspector
14
+ from sqlalchemy.engine.url import URL
15
+ from sqlalchemy.schema import CreateTable, MetaData
16
+ from sqlalchemy.sql.compiler import DDLCompiler
17
+ from sqlglot import exp
18
+
19
+ from recurvedata.connectors.base import RecurveConnectorBase
20
+ from recurvedata.connectors.const import ENV_VAR_DBT_PASSWORD, ENV_VAR_DBT_USER, set_env_dbt_password, set_env_dbt_user
21
+ from recurvedata.consts import ConnectionCategory
22
+ from recurvedata.utils.imports import MockModule
23
+
24
+ # Lazy imports for non-simple dependencies
25
+ try:
26
+ import pandas as pd
27
+ import sqlparse
28
+ import sshtunnel
29
+ from paramiko import RSAKey
30
+ except ImportError:
31
+ pd = MockModule("pandas")
32
+ sqlparse = MockModule("sqlparse")
33
+ sshtunnel = MockModule("sshtunnel")
34
+ RSAKey = MockModule("paramiko.RSAKey")
35
+
36
+ if sqlalchemy.__version__ >= "2":
37
+ text = sqlalchemy_text
38
+ else:
39
+
40
+ def text(v):
41
+ return v
42
+
43
+
44
+ class DBAPIABC(ABC):
45
+ @property
46
+ @abstractmethod
47
+ def sqlalchemy_url(self) -> URL:
48
+ ...
49
+
50
+ @property
51
+ @abstractmethod
52
+ def test_query(self) -> str:
53
+ ...
54
+
55
+ @abstractmethod
56
+ def test_connection(self):
57
+ ...
58
+
59
+ @property
60
+ @abstractmethod
61
+ def connect_args(self) -> Optional[dict]:
62
+ ...
63
+
64
+ @abstractmethod
65
+ def connect(self):
66
+ ...
67
+
68
+ @abstractmethod
69
+ def execute(self, query: str):
70
+ ...
71
+
72
+ @property # todo: cache
73
+ @abstractmethod
74
+ def inspector(self) -> Inspector:
75
+ ...
76
+
77
+ @abstractmethod
78
+ def has_table(self, table, database=None):
79
+ ...
80
+
81
+ @abstractmethod
82
+ def get_columns(self, table: str, database=None):
83
+ ...
84
+
85
+ @abstractmethod
86
+ def _reflect_table(self, table: str, database=None, engine=None) -> sqlalchemy.sql.schema.Table:
87
+ ...
88
+
89
+ @abstractmethod
90
+ def generate_ddl(self, table: str, database=None):
91
+ ...
92
+
93
+ @abstractmethod
94
+ def fetchall(self, query: str):
95
+ ...
96
+
97
+ @abstractmethod
98
+ def fetchmany(self, query: str, size=None):
99
+ ...
100
+
101
+ @abstractmethod
102
+ def fetchone(self, query: str):
103
+ ...
104
+
105
+ @abstractmethod
106
+ def get_pandas_df(self, query: str, parameters=None, **kwargs):
107
+ ...
108
+
109
+ # def commit(self):
110
+ # raise NotImplementedError
111
+
112
+ @abstractmethod
113
+ def insert(self, table: str, data: list[dict], database: str = None):
114
+ ...
115
+
116
+
117
+ def with_ssh_tunnel(func):
118
+ """
119
+ a decorator that wrap func with a ssh tunnel
120
+ """
121
+
122
+ @wraps(func)
123
+ def wrapper(self, *args, **kwargs):
124
+ tunnel = self.ssh_tunnel
125
+ if not tunnel:
126
+ return func(self, *args, **kwargs)
127
+ try:
128
+ with tunnel:
129
+ return func(self, *args, **kwargs)
130
+ finally:
131
+ tunnel.stop()
132
+
133
+ return wrapper
134
+
135
+
136
+ class DBAPIBase(RecurveConnectorBase):
137
+ SYSTEM_DATABASES = []
138
+ setup_extras_require = ["sqlalchemy", "sshtunnel", "paramiko"]
139
+ driver = ""
140
+ config_schema = {}
141
+ category = [
142
+ ConnectionCategory.DATABASE,
143
+ ]
144
+ column_type_mapping = {}
145
+ # Common data types supported by all connectors
146
+ # each connector can add its own types
147
+ available_column_types = [
148
+ "smallint",
149
+ "int",
150
+ "bigint",
151
+ "float",
152
+ "double",
153
+ "decimal",
154
+ "date",
155
+ "timestamp",
156
+ "char",
157
+ "varchar",
158
+ "json",
159
+ ]
160
+
161
+ def __init__(self, conf, *args, **kwargs):
162
+ if conf.get("password") == "":
163
+ conf["password"] = None
164
+ super().__init__(conf, *args, **kwargs)
165
+
166
+ @property
167
+ def sqlalchemy_url(self):
168
+ host, port = self.host, self.port
169
+ if self.ssh_tunnel and self.ssh_tunnel.is_active:
170
+ host, port = self.ssh_tunnel.local_bind_host, self.ssh_tunnel.local_bind_port
171
+
172
+ if sqlalchemy.__version__ >= "2":
173
+ return URL(self.driver, self.user, self.password, host, port, self.database, query={})
174
+ return URL(self.driver, self.user, self.password, host, port, self.database)
175
+
176
+ @property
177
+ def test_query(self):
178
+ return "select 1"
179
+
180
+ @cached_property
181
+ @with_ssh_tunnel
182
+ def type_code_mapping(self) -> dict:
183
+ """
184
+ type_code from sqlalchemy's cursor.description -> database's dialect data type name
185
+ """
186
+ raise NotImplementedError
187
+
188
+ def sqlalchemy_column_type_code_to_name(self, type_code: Any, size: int | None = None) -> str:
189
+ """
190
+ since cursor.description return type code only, we need to map it to dialect data type name
191
+
192
+ :param type_code: an object returned by cursor.description
193
+ :return: a string of column type name, in lower case
194
+ """
195
+ raise NotImplementedError
196
+
197
+ def test_connection(self):
198
+ self.execute(self.test_query)
199
+
200
+ @property
201
+ def connect_args(self):
202
+ return {}
203
+
204
+ def connect(self):
205
+ engine = create_engine(
206
+ self.sqlalchemy_url,
207
+ max_overflow=0, # todo: add to const
208
+ pool_recycle=10 * 60, # todo: add to const
209
+ connect_args=self.connect_args,
210
+ echo=False,
211
+ )
212
+ return engine # todo: thread safe? use session to wrap?
213
+
214
+ @with_ssh_tunnel
215
+ def execute(self, query: str):
216
+ if isinstance(query, list):
217
+ queries = list(itertools.chain(*map(sqlparse.split, query)))
218
+ else:
219
+ queries = sqlparse.split(query)
220
+
221
+ engine = self.connect()
222
+ with engine.connect() as con:
223
+ for q in queries:
224
+ con.execute(text(q))
225
+
226
+ @property
227
+ def inspector(self) -> Inspector:
228
+ engine = self.connect()
229
+ inspector: Inspector = inspect(engine)
230
+ return inspector
231
+
232
+ @with_ssh_tunnel
233
+ def has_table(self, table, database=None):
234
+ database = database or self.database
235
+ return self.inspector.has_table(table, schema=database)
236
+
237
+ def _extract_column_name(self, column_type):
238
+ return column_type.__visit_name__
239
+
240
+ @with_ssh_tunnel
241
+ def get_columns(self, table: str, database: str = None) -> list[str]:
242
+ database = database or self.database
243
+ column_dcts = self.inspector.get_columns(table, schema=database)
244
+ for dct in column_dcts:
245
+ dct["type"] = self._extract_column_name(dct["type"]).lower()
246
+ return column_dcts
247
+
248
+ @staticmethod
249
+ def format_key(key):
250
+ key = key.strip("`")
251
+ return f"`{key}`"
252
+
253
+ def _reflect_table(self, table, database=None, engine=None) -> sqlalchemy.sql.schema.Table:
254
+ if not engine:
255
+ engine = self.connect()
256
+ meta = MetaData()
257
+ meta.reflect(
258
+ bind=engine,
259
+ schema=database,
260
+ only=[
261
+ table,
262
+ ],
263
+ )
264
+ table = meta.sorted_tables[0]
265
+ return table
266
+
267
+ @with_ssh_tunnel
268
+ def generate_ddl(self, table, database=None):
269
+ engine = self.connect()
270
+ table = self._reflect_table(table, database=database, engine=engine)
271
+ ddl: DDLCompiler = CreateTable(table).compile(engine)
272
+ return ddl.string
273
+
274
+ @with_ssh_tunnel
275
+ def fetchall(self, query):
276
+ engine = self.connect()
277
+ connection = engine.raw_connection()
278
+ with connection.cursor() as cursor:
279
+ cursor.execute(query)
280
+ res = cursor.fetchall()
281
+ connection.close()
282
+ return res
283
+
284
+ @with_ssh_tunnel
285
+ def fetchmany(self, query, size=None):
286
+ engine = self.connect()
287
+ connection = engine.raw_connection()
288
+ with connection.cursor() as cursor:
289
+ cursor.execute(query)
290
+ res = cursor.fetchmany(size=size)
291
+ connection.close()
292
+ return res
293
+
294
+ @with_ssh_tunnel
295
+ def fetchone(self, query):
296
+ engine = self.connect()
297
+ connection = engine.raw_connection()
298
+ with connection.cursor() as cursor:
299
+ cursor.execute(query)
300
+ res = cursor.fetchone()
301
+ connection.close()
302
+ return res
303
+
304
+ @with_ssh_tunnel
305
+ def get_pandas_df(self, query, parameters=None, **kwargs):
306
+ engine = self.connect()
307
+ try:
308
+ df = pd.read_sql_query(sql=query, con=engine, params=parameters, **kwargs)
309
+ finally:
310
+ engine.dispose()
311
+ return df
312
+
313
+ # def commit(self):
314
+ # raise NotImplementedError
315
+
316
+ @with_ssh_tunnel
317
+ def insert(self, table: str, data: list[dict], database: str = None):
318
+ engine = self.connect()
319
+ table = self._reflect_table(table, database=database, engine=engine)
320
+ with engine.connect() as conn:
321
+ conn.execute(insert(table), data)
322
+ engine.dispose()
323
+
324
+ @with_ssh_tunnel
325
+ def get_databases(self):
326
+ return [d for d in self.inspector.get_schema_names() if d.lower() not in self.SYSTEM_DATABASES]
327
+
328
+ @with_ssh_tunnel
329
+ def get_tables(self, database: str = None):
330
+ database = database or self.database
331
+ return self.inspector.get_table_names(database)
332
+
333
+ @with_ssh_tunnel
334
+ def get_views(self, database: str = None):
335
+ database = database or self.database
336
+ return self.inspector.get_view_names(database)
337
+
338
+ def _init_ssh_tunnel(self):
339
+ """
340
+ init a ssh tunnel based on self.ssh_tunnel_config
341
+ """
342
+
343
+ def _init_private_key(config: SSHTunnelConfig):
344
+ if config.private_key_str:
345
+ pk_str = config.private_key_str.replace("\\n", "\n")
346
+ return RSAKey.from_private_key(StringIO(pk_str), password=config.private_key_passphrase)
347
+
348
+ tunnel_config = self.ssh_tunnel_config
349
+ if not tunnel_config:
350
+ return
351
+
352
+ tunnel = sshtunnel.SSHTunnelForwarder(
353
+ ssh_address_or_host=(tunnel_config.host, tunnel_config.port),
354
+ ssh_username=tunnel_config.user,
355
+ ssh_password=tunnel_config.password,
356
+ ssh_pkey=_init_private_key(tunnel_config),
357
+ remote_bind_address=(self.host, self.port),
358
+ )
359
+
360
+ return tunnel
361
+
362
+ @property
363
+ def ssh_tunnel(self):
364
+ tunnel_config = self.ssh_tunnel_config
365
+ if not tunnel_config:
366
+ return
367
+ if not hasattr(self, "_ssh_tunnel"):
368
+ self._ssh_tunnel = self._init_ssh_tunnel()
369
+ return self._ssh_tunnel
370
+
371
+ @property
372
+ def ssh_tunnel_config(self) -> Optional["SSHTunnelConfig"]:
373
+ ssh_config = self.conf.get("ssh_tunnel", {})
374
+ if not (ssh_config and ssh_config.get("host")):
375
+ return
376
+ return SSHTunnelConfig(**ssh_config)
377
+
378
+ @classmethod
379
+ def get_sql_operator_types(cls):
380
+ return [
381
+ cls.connection_type,
382
+ ]
383
+
384
+ def convert_config_to_dbt_profile(self, database: str, schema: str = None) -> dict:
385
+ return {
386
+ "server": self.host,
387
+ "port": self.port,
388
+ "user": ENV_VAR_DBT_USER,
389
+ "password": ENV_VAR_DBT_PASSWORD,
390
+ "schema": database or self.database,
391
+ "type": self.connection_type,
392
+ "threads": 10,
393
+ }
394
+
395
+ def set_env_when_get_dbt_connection(self):
396
+ set_env_dbt_user(self.user or "")
397
+ set_env_dbt_password(self.password or "")
398
+
399
+ @classmethod
400
+ def order_sql(cls, sql: str, orders: list[dict[str, str]] = None, return_sql: bool = True):
401
+ """
402
+ order the sql by the orders
403
+ """
404
+ # dialect impala -> hive, cuz there is no dialect 'impala' in sqlglot
405
+ dialect = "hive" if cls.connection_type == "impala" else (cls.connection_type or None)
406
+ # Parse the SQL query
407
+ parsed = sqlglot.parse_one(sql, read=dialect)
408
+ # since some sql dialects have special identifier, we need to use the dialect to generate the clean sql
409
+ clean_sql = parsed.sql(dialect=dialect, comments=False)
410
+ # Wrap the entire query with a subquery
411
+ alias = "_recurve_limit_subquery"
412
+ subquery = exp.Subquery(this=clean_sql, alias=alias)
413
+
414
+ # Create a new SELECT statement with the subquery and the LIMIT clause
415
+ outer_select = exp.select("*").from_(subquery)
416
+ if orders:
417
+ order_clauses = []
418
+ for order in orders:
419
+ if cls.connection_type in ["postgres", "redshift"]:
420
+ field_expr = f'{alias}."{order["field"]}"'
421
+ else:
422
+ field_expr = exp.Column(this=order["field"], table=alias)
423
+ field_expr = field_expr.sql(dialect=dialect)
424
+
425
+ order_clauses.append(f'{field_expr} {order["order"]}')
426
+
427
+ order_stmt = ", ".join(order_clauses)
428
+ outer_select = outer_select.order_by(order_stmt)
429
+
430
+ return outer_select.sql(dialect=dialect) if return_sql else outer_select
431
+
432
+ @classmethod
433
+ def limit_sql(cls, sql: str, limit: int = 100, orders: list[dict[str, str]] = None, offset: int = 0) -> str:
434
+ """
435
+ used for preview, parse sql and wrap sql with limit.
436
+ no validation on sql.
437
+ If the sql is DML, then execute it will raise an error.
438
+ """
439
+ dialect = "hive" if cls.connection_type == "impala" else (cls.connection_type or None)
440
+
441
+ outer_select = cls.order_sql(sql, orders, return_sql=False)
442
+
443
+ if offset:
444
+ outer_select = outer_select.offset(offset)
445
+
446
+ outer_select = outer_select.limit(limit)
447
+
448
+ result = outer_select.sql(dialect=dialect)
449
+
450
+ return result
451
+
452
+ @classmethod
453
+ def count_sql(cls, sql: str) -> str:
454
+ """
455
+ used for preview, parse sql and wrap sql with count.
456
+ no validation on sql.
457
+ If the sql is DML, then execute it will raise an error.
458
+ """
459
+ return f"SELECT COUNT(1) FROM ({sql}) AS cnt_subquery"
460
+
461
+
462
+ @dataclass
463
+ class SSHTunnelConfig:
464
+ host: str
465
+ port: int
466
+ user: str
467
+ password: str = None
468
+ private_key_str: str = None # 私钥字符串,非文件名
469
+ private_key_passphrase: str = None # 私钥的 passphrase
@@ -0,0 +1,66 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from recurvedata.connectors.base import RecurveConnectorBase
4
+
5
+
6
+ class FileConnectorABC(ABC):
7
+ @abstractmethod
8
+ def exists(self, key) -> bool:
9
+ ...
10
+
11
+ @abstractmethod
12
+ def stat(self, key):
13
+ ...
14
+
15
+ @abstractmethod
16
+ def test_connection(self):
17
+ ...
18
+
19
+ @abstractmethod
20
+ def get(self, key, local_file):
21
+ ...
22
+
23
+ @abstractmethod
24
+ def put(self, local_file, object_store_key):
25
+ ...
26
+
27
+ @abstractmethod
28
+ def delete(self, key):
29
+ ...
30
+
31
+ @abstractmethod
32
+ def ls(self, key):
33
+ ...
34
+
35
+
36
+ class FileConnectorMixin(FileConnectorABC, RecurveConnectorBase):
37
+ def __init__(self, conf: dict, *args, **kwargs):
38
+ self.conf = conf
39
+ self.connector = self.init_connection(conf)
40
+
41
+ def init_connection(self, conf):
42
+ raise NotImplementedError
43
+
44
+ def exists(self, key) -> bool:
45
+ return self.connector.exists(key)
46
+
47
+ def stat(self, key):
48
+ return self.connector.stat(key)
49
+
50
+ def mkdir(self, key):
51
+ return self.connector.mkdir(key)
52
+
53
+ def test_connection(self):
54
+ self.connector.ls("/")
55
+
56
+ def get(self, key, local_file):
57
+ return self.connector.get(key, local_file)
58
+
59
+ def put(self, local_file, object_store_key):
60
+ return self.connector.put(local_file, object_store_key)
61
+
62
+ def delete(self, key):
63
+ return self.connector.rm(key)
64
+
65
+ def ls(self, key):
66
+ return self.connector.ls(key)
@@ -0,0 +1,40 @@
1
+ from recurvedata.connectors.base import RecurveConnectorBase
2
+ from recurvedata.connectors.fs import FileConnectorABC
3
+ from recurvedata.consts import ConnectionCategory
4
+
5
+
6
+ class FTPMixin(RecurveConnectorBase, FileConnectorABC):
7
+ category = [
8
+ ConnectionCategory.STORAGE,
9
+ ]
10
+
11
+ def __init__(self, conf: dict, *args, **kwargs):
12
+ self.conf = conf
13
+ self.connector = self.init_connection(conf)
14
+
15
+ def init_connection(self, conf):
16
+ raise NotImplementedError
17
+
18
+ def exists(self, key) -> bool:
19
+ return self.connector.exists(key)
20
+
21
+ def stat(self, key):
22
+ return self.connector.stat(key)
23
+
24
+ def mkdir(self, key):
25
+ return self.connector.mkdir(key)
26
+
27
+ def test_connection(self):
28
+ self.connector.ls("/")
29
+
30
+ def get(self, key, local_file):
31
+ return self.connector.get(key, local_file)
32
+
33
+ def put(self, local_file, object_store_key):
34
+ return self.connector.put(local_file, object_store_key)
35
+
36
+ def delete(self, key):
37
+ return self.connector.rm(key)
38
+
39
+ def ls(self, key):
40
+ return self.connector.ls(key)
@@ -0,0 +1,60 @@
1
+ from recurvedata.connectors.base import RecurveConnectorBase
2
+ from recurvedata.connectors.fs import FileConnectorABC
3
+ from recurvedata.consts import ConnectionCategory
4
+
5
+
6
+ class ObjectStoreMixin(RecurveConnectorBase, FileConnectorABC):
7
+ category = [
8
+ ConnectionCategory.STORAGE,
9
+ ]
10
+
11
+ def __init__(self, conf: dict, *args, **kwargs):
12
+ super().__init__(conf, *args, **kwargs)
13
+ self.connector = self.init_connection(conf)
14
+
15
+ def init_connection(self, conf):
16
+ raise NotImplementedError
17
+
18
+ def exists(self, key) -> bool:
19
+ key = self.bucket_key(key)
20
+ return self.connector.exists(key)
21
+
22
+ def stat(self, key):
23
+ key = self.bucket_key(key)
24
+ return self.connector.stat(key)
25
+
26
+ def mkdir(self, key):
27
+ key = self.bucket_key(key)
28
+ return self.connector.mkdir(key)
29
+
30
+ def test_connection(self):
31
+ self.connector.ls(self.bucket_key("/"))
32
+
33
+ def get(self, key, local_file):
34
+ key = self.bucket_key(key)
35
+ return self.connector.get(key, local_file)
36
+
37
+ def put(self, local_file, object_store_key):
38
+ object_store_key = self.bucket_key(object_store_key)
39
+ return self.connector.put(local_file, object_store_key)
40
+
41
+ def delete(self, key):
42
+ key = self.bucket_key(key)
43
+ return self.connector.rm(key)
44
+
45
+ def ls(self, key):
46
+ key = self.bucket_key(key)
47
+ return self.connector.ls(key)
48
+
49
+ @property
50
+ def bucket(self):
51
+ return self.conf.get("bucket")
52
+
53
+ def bucket_key(self, key):
54
+ if self.bucket:
55
+ if key.startswith("/"):
56
+ return f"{self.bucket}{key}"
57
+ return f"{self.bucket}/{key}"
58
+ return key
59
+
60
+ # todo: delete by prefix