recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,315 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ from enum import Enum
5
+
6
+ from recurvedata.connectors.base import RecurveConnectorBase
7
+ from recurvedata.connectors.proxy import HttpProxyMixin
8
+ from recurvedata.core.translation import _l
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ DEFAULT_TIMEOUT = 10 # in seconds
13
+
14
+ # todo(chenjingmeng): using auto generated by scripts/gen_const.py
15
+ ALL_CONNECTION_SECRET_WORDS = [
16
+ "account_key",
17
+ "api_key",
18
+ "api_secret_key",
19
+ "app_secret",
20
+ "blob_options.sas_token",
21
+ "password",
22
+ "private_key",
23
+ "sas_token",
24
+ "secret_access_key",
25
+ "secret_key",
26
+ "key_dict.private_key",
27
+ "client_secret",
28
+ "access_key_secret",
29
+ ]
30
+
31
+
32
+ def _format_connector_module_name(connection_type: str) -> str:
33
+ """
34
+ connection_type can be ui_connection_type
35
+ """
36
+ connection_type = connection_type.replace(" ", "_").lower()
37
+ return f"recurvedata.connectors.connectors.{connection_type}"
38
+
39
+
40
+ DBAPI_TYPES = [
41
+ "azure_synapse",
42
+ "bigquery",
43
+ "clickhouse",
44
+ "elasticsearch",
45
+ "hive",
46
+ "impala",
47
+ "mongodb",
48
+ "mssql",
49
+ "mysql",
50
+ "phoenix",
51
+ "postgres",
52
+ "redis",
53
+ "redshift",
54
+ "starrocks",
55
+ "tidb",
56
+ "doris",
57
+ "microsoft_fabric",
58
+ ]
59
+ CONNECTION_TYPE_MODULE_MAPPING = { # todo(chenjingmeng): auto generated
60
+ "Tencent COS": "recurvedata.connectors.connectors.tencent_cos",
61
+ "cos": "recurvedata.connectors.connectors.tencent_cos",
62
+ "Elastic Search": "recurvedata.connectors.connectors.es",
63
+ "elasticsearch": "recurvedata.connectors.connectors.es",
64
+ "Ding Talk": "recurvedata.connectors.connectors.dingtalk",
65
+ "feishu_bot": "recurvedata.connectors.connectors.feishu",
66
+ "SelectDB(Doris)": "recurvedata.connectors.connectors.doris",
67
+ "azure_mssql": "recurvedata.connectors.connectors.mssql",
68
+ "google_bigquery": "recurvedata.connectors.connectors.bigquery",
69
+ "Google BigQuery": "recurvedata.connectors.connectors.bigquery",
70
+ "BigQuery": "recurvedata.connectors.connectors.bigquery",
71
+ "PostgreSQL": "recurvedata.connectors.connectors.postgres",
72
+ "MongoDB": "recurvedata.connectors.connectors.mongo",
73
+ "mongodb": "recurvedata.connectors.connectors.mongo",
74
+ "Microsoft SQL Server": "recurvedata.connectors.connectors.mssql",
75
+ "selectdb(doris)": "recurvedata.connectors.connectors.doris",
76
+ "apache impala": "recurvedata.connectors.connectors.impala",
77
+ "Apache Impala": "recurvedata.connectors.connectors.impala",
78
+ "Aliyun OSS": "recurvedata.connectors.connectors.oss",
79
+ }
80
+
81
+
82
+ def get_module_name(connection_type: str) -> str:
83
+ if connection_type in CONNECTION_TYPE_MODULE_MAPPING:
84
+ return CONNECTION_TYPE_MODULE_MAPPING[connection_type]
85
+ return _format_connector_module_name(connection_type)
86
+
87
+
88
+ SQL_OPERATOR_TYPES = [
89
+ "azure_mssql",
90
+ "azure_synapse",
91
+ "bigquery",
92
+ "clickhouse",
93
+ "google_bigquery",
94
+ "hive",
95
+ "impala",
96
+ "mssql",
97
+ "mysql",
98
+ "phoenix",
99
+ "postgres",
100
+ "redshift",
101
+ "starrocks",
102
+ "tidb",
103
+ "doris",
104
+ "microsoft_fabric",
105
+ ]
106
+ JUICE_SYNC_ABLE_DBAPI_TYPES = ["azure_blob", "cos", "google_cloud_storage", "oss", "s3", "sftp"]
107
+
108
+ # This Const is manually built, refer to "https://docs.getdbt.com/docs/supported-data-platforms"
109
+ # is there any web's api available?
110
+ DBT_SUPPORTED_TYPES = [
111
+ # official trusted, seems that these database/data
112
+ # warehouse connector are more robust
113
+ # ------------------------------------------------
114
+ "spark",
115
+ "azure_synapse",
116
+ "bigquery",
117
+ "postgres",
118
+ "redshift",
119
+ # not implemented connectors
120
+ "alloy_db",
121
+ "athena",
122
+ "databricks",
123
+ "dremio",
124
+ "glue",
125
+ "materialize",
126
+ "microsoft_fabric",
127
+ "oracle_autonomous_database",
128
+ "snowflake",
129
+ "starburst",
130
+ "teradata",
131
+ # --------------------------------------------------
132
+ # community maintained
133
+ # --------------------------------------------------
134
+ "mysql",
135
+ "starrocks",
136
+ "clickhouse",
137
+ "doris",
138
+ "tidb",
139
+ "hive",
140
+ "impala",
141
+ # not implemented connectors
142
+ "duckdb",
143
+ "exasol_analytics",
144
+ "extrica",
145
+ "ibm_db2",
146
+ "infer",
147
+ "iomete",
148
+ "mindsdb",
149
+ "risingwave",
150
+ "rockset",
151
+ "single_store",
152
+ "sql_server",
153
+ "sqlite",
154
+ "timescaledb",
155
+ "upsolver",
156
+ "vertica",
157
+ "databend_cloud",
158
+ "yellowbrick",
159
+ ]
160
+
161
+ # This is also manually built :)). Refer to: https://cube.dev/docs/product/configuration/data-sources
162
+ CUBE_SUPPORTED_TYPES = [
163
+ "doris",
164
+ "postgres",
165
+ "bigquery",
166
+ "starrocks",
167
+ "mysql",
168
+ ]
169
+
170
+
171
+ class ProcessDBMixin(object):
172
+ @staticmethod
173
+ def auth_preprocess_conf(data):
174
+ data = RecurveConnectorBase.preprocess_conf(data)
175
+ json_data = data.get("extra")
176
+ if json_data and isinstance(json_data, str):
177
+ data["extra"] = json.loads(json_data)
178
+ return data
179
+
180
+ @classmethod
181
+ def bigquery_preprocess_conf(cls, data: dict) -> dict:
182
+ data = RecurveConnectorBase.preprocess_conf(data)
183
+ proxies = data.get("proxies")
184
+ if proxies and not HttpProxyMixin.check_proxy(proxies):
185
+ logger.warning(f"proxies {proxies} is not available, use direct connect")
186
+ data["proxies"] = None
187
+ return data
188
+
189
+ @classmethod
190
+ def google_cloud_storage_preprocess_conf(cls, data: dict) -> dict:
191
+ data = RecurveConnectorBase.preprocess_conf(data)
192
+ proxies = data.get("proxies")
193
+ if proxies and not HttpProxyMixin.check_proxy(proxies):
194
+ logger.warning(f"proxies {proxies} is not available, use direct connect")
195
+ data["proxies"] = None
196
+ return data
197
+
198
+ @classmethod
199
+ def google_service_account_preprocess_conf(cls, data: dict) -> dict:
200
+ data = RecurveConnectorBase.preprocess_conf(data)
201
+ proxies = data.get("proxies")
202
+ if proxies and not HttpProxyMixin.check_proxy(proxies):
203
+ logger.warning(f"proxies {proxies} is not available, use direct connect")
204
+ data["proxies"] = None
205
+ return data
206
+
207
+ @classmethod
208
+ def oss_preprocess_conf(cls, data: dict) -> dict:
209
+ data = RecurveConnectorBase.preprocess_conf(data)
210
+ proxies = data.get("proxies")
211
+ if proxies and not HttpProxyMixin.check_proxy(proxies):
212
+ logger.warning(f"proxies {proxies} is not available, use direct connect")
213
+ data["proxies"] = None
214
+ return data
215
+
216
+ @staticmethod
217
+ def other_preprocess_conf(data):
218
+ data = RecurveConnectorBase.preprocess_conf(data)
219
+ json_data = data.get("data")
220
+ if json_data and isinstance(json_data, str):
221
+ real_data = json.loads(json_data)
222
+ return real_data
223
+ return data
224
+
225
+ @classmethod
226
+ def s3_preprocess_conf(cls, data: dict) -> dict:
227
+ data = RecurveConnectorBase.preprocess_conf(data)
228
+ proxies = data.get("proxies")
229
+ if proxies and not HttpProxyMixin.check_proxy(proxies):
230
+ logger.warning(f"proxies {proxies} is not available, use direct connect")
231
+ data["proxies"] = None
232
+ return data
233
+
234
+ @staticmethod
235
+ def spark_preprocess_conf(data):
236
+ data = RecurveConnectorBase.preprocess_conf(data)
237
+ execution_config = data.get("execution_config")
238
+ if execution_config:
239
+ execution_config_conf = execution_config.get("conf")
240
+ if execution_config_conf and isinstance(execution_config_conf, str):
241
+ execution_config_conf = json.loads(execution_config_conf)
242
+ execution_config["conf"] = execution_config_conf
243
+ return data
244
+
245
+
246
+ CONNECTION_TYPE_PREPROCESS_CONF_MAPPING = {
247
+ "auth": ProcessDBMixin.auth_preprocess_conf,
248
+ "Auth": ProcessDBMixin.auth_preprocess_conf,
249
+ "bigquery": ProcessDBMixin.bigquery_preprocess_conf,
250
+ "BigQuery": ProcessDBMixin.bigquery_preprocess_conf,
251
+ "google_cloud_storage": ProcessDBMixin.google_cloud_storage_preprocess_conf,
252
+ "Google Cloud Storage": ProcessDBMixin.google_cloud_storage_preprocess_conf,
253
+ "google_service_account": ProcessDBMixin.google_service_account_preprocess_conf,
254
+ "Google Service Account": ProcessDBMixin.google_service_account_preprocess_conf,
255
+ "oss": ProcessDBMixin.oss_preprocess_conf,
256
+ "OSS": ProcessDBMixin.oss_preprocess_conf,
257
+ "s3": ProcessDBMixin.s3_preprocess_conf,
258
+ "S3": ProcessDBMixin.s3_preprocess_conf,
259
+ "spark": ProcessDBMixin.spark_preprocess_conf,
260
+ "Spark": ProcessDBMixin.spark_preprocess_conf,
261
+ }
262
+
263
+
264
+ # auto generated finish
265
+
266
+
267
+ def preprocess_conf(connection_type: str, data: dict):
268
+ func = CONNECTION_TYPE_PREPROCESS_CONF_MAPPING.get(connection_type, RecurveConnectorBase.preprocess_conf)
269
+ return func(data)
270
+
271
+
272
+ SSH_TUNNEL_CONFIG_SCHEMA = {
273
+ "type": "object",
274
+ "title": _l("SSH Tunnel Configuration"),
275
+ "description": _l("Configuration for establishing an SSH tunnel connection"),
276
+ "properties": {
277
+ "host": {"type": "string", "title": _l("Host Address")},
278
+ "user": {"type": "string", "title": _l("Username")},
279
+ "port": {
280
+ "type": "number",
281
+ "title": _l("Port Number"),
282
+ "default": 22,
283
+ },
284
+ "password": {"type": "string", "title": _l("Password")},
285
+ "private_key_str": {
286
+ "type": "string",
287
+ "title": _l("SSH Private Key"),
288
+ "description": _l("Private key content for SSH key-based authentication"),
289
+ },
290
+ "private_key_passphrase": {
291
+ "type": "string",
292
+ "title": _l("SSH Private Key Passphrase"),
293
+ "description": _l("Passphrase to decrypt the SSH private key if encrypted"),
294
+ },
295
+ },
296
+ "order": ["host", "user", "port", "password", "private_key_str", "private_key_passphrase"],
297
+ "secret": ["password", "private_key_str", "private_key_passphrase"],
298
+ }
299
+
300
+
301
+ class LoadMode(str, Enum):
302
+ OVERWRITE = "OVERWRITE"
303
+ APPEND = "APPEND"
304
+
305
+
306
+ ENV_VAR_DBT_USER = '{{ env_var("DBT_USER") }}' # after yaml dump, single quote will become '', which cause dbt error
307
+ ENV_VAR_DBT_PASSWORD = '{{ env_var("DBT_PASSWORD") }}'
308
+
309
+
310
+ def set_env_dbt_user(user_name: str):
311
+ os.environ["DBT_USER"] = user_name
312
+
313
+
314
+ def set_env_dbt_password(password: str):
315
+ os.environ["DBT_PASSWORD"] = password
@@ -0,0 +1,189 @@
1
+ """
2
+ 之前 Pigeon 里叫法是 DataSource
3
+ 这里先封装一个类似的,
4
+ 之后再整合到 Base 里
5
+ """
6
+
7
+ import copy
8
+ from dataclasses import dataclass
9
+ from typing import Optional
10
+
11
+ from recurvedata.connectors.const import DBAPI_TYPES, preprocess_conf
12
+
13
+
14
+ @dataclass
15
+ class DataSourceBase:
16
+ connection_type: str
17
+ data: dict
18
+ name: str = ""
19
+ extra: dict = None
20
+
21
+ def __post_init__(self):
22
+ self.data = preprocess_conf(self.connection_type, self.data)
23
+ self.process_pigeon_keyword()
24
+ self.extra = copy.deepcopy(self.data)
25
+
26
+ def process_pigeon_keyword(self):
27
+ """
28
+ OneFlow 使用的是 pigeon 的关键词,部分和 Recurve 不一致。
29
+ 历史原因,从 OneFlow 迁移过来时,是直接复制数据库数据,没有转换关键词,
30
+ 导致 Recurve 数据库里同时存在两套。
31
+ 这里把旧的 pigeon 关键词转成新的 Recurve 关键词
32
+ :return:
33
+ """
34
+ from recurvedata.connectors.pigeon import DataSource as PigeonDataSource
35
+
36
+ keyword_renames: dict = PigeonDataSource.PIGEON_KEYWORD_MAPPING.get(self.connection_type, {})
37
+ for recurve_keyword, pigeon_keyword in keyword_renames.items():
38
+ if not pigeon_keyword or recurve_keyword in self.data or pigeon_keyword not in self.data:
39
+ continue
40
+ self.data[recurve_keyword] = self.data[pigeon_keyword]
41
+
42
+ @property
43
+ def recurve_connector_cls(self):
44
+ from recurvedata.connectors._register import get_connection_class
45
+
46
+ return get_connection_class(self.connection_type)
47
+
48
+ @property
49
+ def recurve_connector(self):
50
+ """
51
+ 和 pigeon connector 区分开
52
+ :return:
53
+ """
54
+ recurve_cls = self.recurve_connector_cls
55
+ if not recurve_cls:
56
+ raise ValueError(f"Unknown connection type: {self.connection_type}")
57
+ recurve_con = recurve_cls(self.data)
58
+ return recurve_con
59
+
60
+ def juice_sync_path(self, path: str) -> tuple[str, str]:
61
+ """
62
+ Return the paths used in juice sync.
63
+ The first return value is the path with a secret key,
64
+ and the second return value is the path without a secret key, intended for display purposes
65
+ """
66
+ if not self.recurve_connector_cls:
67
+ raise ValueError(f"{self.connection_type} is not juice sync able")
68
+ return self.recurve_connector.juice_sync_path(path)
69
+
70
+
71
+ class DataSource(DataSourceBase):
72
+ """
73
+ pigeon 里 DataSource 等同于 Connection,这里保留两种叫法
74
+ """
75
+
76
+ @property
77
+ def connector(self):
78
+ """
79
+ 暂时保留之前 OneFlow 做法,返回 pigeon Connector 对象
80
+ """
81
+ from recurvedata.connectors.pigeon import DataSource as PigeonDataSource
82
+
83
+ if PigeonDataSource.is_support_connection_type(self.connection_type):
84
+ pigeon_ds = PigeonDataSource(connection_type=self.connection_type, name=self.name, data=self.data)
85
+ return pigeon_ds.connector
86
+ recurve_cls = self.recurve_connector_cls
87
+ if not recurve_cls:
88
+ raise ValueError(f"Unknown connection type: {self.connection_type}")
89
+ recurve_con = recurve_cls(self.data)
90
+ return recurve_con
91
+
92
+ def create_engine(self):
93
+ return self.connector.create_engine()
94
+
95
+ @property
96
+ def host(self):
97
+ for key in ["url", "host"]:
98
+ if key in self.data:
99
+ return self.data[key]
100
+
101
+ @property
102
+ def ds_type(self):
103
+ # 兼容 oneflow lineage
104
+ return self.connection_type
105
+
106
+ @property
107
+ def database(self):
108
+ # used in postgres load
109
+ return self.data.get("database")
110
+
111
+ @property
112
+ def port(self):
113
+ # used in email load
114
+ return self.data.get("port")
115
+
116
+ @property
117
+ def password(self):
118
+ # used in email load
119
+ return self.data.get("password")
120
+
121
+ @property
122
+ def user(self):
123
+ # used in email load
124
+ return self.data.get("user")
125
+
126
+
127
+ class DataSourceWrapper(object):
128
+ """封装 DataSource,只保留必要的只读功能"""
129
+
130
+ def __init__(self, ds: DataSource):
131
+ self.__ds = ds
132
+
133
+ @property
134
+ def name(self) -> str:
135
+ return self.__ds.name
136
+
137
+ @property
138
+ def ds_type(self) -> str:
139
+ return self.__ds.connection_type
140
+
141
+ @property
142
+ def host(self) -> Optional[str]:
143
+ return self.__ds.host
144
+
145
+ @property
146
+ def database(self) -> Optional[str]:
147
+ return self.__ds.database
148
+
149
+ @property
150
+ def user(self) -> Optional[str]:
151
+ return self.__ds.user
152
+
153
+ @property
154
+ def password(self) -> Optional[str]:
155
+ # 最初的设计里,ds_wrapper 不提供 password,但是后来使用的地方较多
156
+ return self.__ds.password
157
+
158
+ @property
159
+ def port(self) -> Optional[int]:
160
+ return self.__ds.port
161
+
162
+ @property
163
+ def is_dbapi(self) -> bool:
164
+ return self.ds_type in DBAPI_TYPES
165
+
166
+ @property
167
+ def connector(self):
168
+ return self.__ds.connector
169
+
170
+ @property
171
+ def recurve_connector(self):
172
+ return self.__ds.recurve_connector
173
+
174
+ @property
175
+ def data(self) -> dict:
176
+ if self.ds_type == "other":
177
+ return self.__ds.data.get("data", self.__ds.data)
178
+ return self.__ds.data
179
+
180
+ @property
181
+ def extra(self) -> dict:
182
+ if self.ds_type == "other":
183
+ return self.__ds.extra.get("data", self.__ds.extra)
184
+ return self.__ds.extra
185
+
186
+ def __getattr__(self, name):
187
+ if name in self.data:
188
+ return self.data[name]
189
+ return super().__getattribute__(name) # raise