recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,210 @@
1
+ from functools import cached_property
2
+
3
+ try:
4
+ import re
5
+
6
+ from pyhive.sqlalchemy_hive import HiveDialect, exc
7
+ from sqlalchemy import text
8
+
9
+ def _get_table_columns(self, connection, table_name, schema):
10
+ full_table = table_name
11
+ if schema:
12
+ full_table = schema + "." + table_name
13
+ # TODO using TGetColumnsReq hangs after sending TFetchResultsReq.
14
+ # Using DESCRIBE works but is uglier.
15
+ try:
16
+ # This needs the table name to be unescaped (no backticks).
17
+ rows = connection.execute(text("DESCRIBE {}".format(full_table))).fetchall()
18
+ except exc.OperationalError as e:
19
+ # Does the table exist?
20
+ regex_fmt = r"TExecuteStatementResp.*SemanticException.*Table not found {}"
21
+ regex = regex_fmt.format(re.escape(full_table))
22
+ if re.search(regex, e.args[0]):
23
+ raise exc.NoSuchTableError(full_table)
24
+ else:
25
+ raise
26
+ else:
27
+ # Hive is stupid: this is what I get from DESCRIBE some_schema.does_not_exist
28
+ regex = r"Table .* does not exist"
29
+ if len(rows) == 1:
30
+ # recurvedata changed
31
+ if "name" not in rows[0].keys(): # hive
32
+ if re.match(regex, rows[0].col_name):
33
+ raise exc.NoSuchTableError(full_table)
34
+ else:
35
+ if re.match(regex, rows[0].name): # impala
36
+ raise exc.NoSuchTableError(full_table)
37
+ # recurvedata changed finish pyhive==0.6.5
38
+ return rows
39
+
40
+ HiveDialect._get_table_columns = _get_table_columns
41
+ except ImportError:
42
+ pass
43
+
44
+ from recurvedata.connectors._register import register_connector_class
45
+ from recurvedata.connectors.const import ENV_VAR_DBT_PASSWORD, ENV_VAR_DBT_USER
46
+ from recurvedata.connectors.dbapi import DBAPIBase, with_ssh_tunnel
47
+ from recurvedata.consts import ConnectionCategory, ConnectorGroup
48
+ from recurvedata.core.translation import _l
49
+
50
+ CONNECTION_TYPE = "impala"
51
+ UI_CONNECTION_TYPE = "Apache Impala"
52
+
53
+
54
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
55
+ class ImpalaConnector(DBAPIBase):
56
+ SYSTEM_DATABASES = [
57
+ "information_schema",
58
+ "_impala_builtins",
59
+ ]
60
+ connection_type = CONNECTION_TYPE
61
+ ui_connection_type = UI_CONNECTION_TYPE
62
+ setup_extras_require = ["PyHive", "thrift-sasl"]
63
+ driver = "hive" # todo: 先用 hive 的
64
+ category = [ConnectionCategory.WAREHOUSE]
65
+ group = [ConnectorGroup.DESTINATION]
66
+
67
+ config_schema = {
68
+ "type": "object",
69
+ "properties": {
70
+ "host": {
71
+ "type": "string",
72
+ "title": _l("Host Address"),
73
+ "default": "127.0.0.1",
74
+ },
75
+ "port": {
76
+ "type": "number",
77
+ "title": _l("Port Number"),
78
+ "default": 21050,
79
+ },
80
+ "user": {"type": "string", "title": _l("Username")},
81
+ "password": {"type": "string", "title": _l("Password")},
82
+ "database": {
83
+ "type": "string",
84
+ "title": _l("Database"),
85
+ "description": _l("The name of the database to connect to"),
86
+ "default": "default",
87
+ },
88
+ "hdfs_options": {
89
+ "type": "object",
90
+ "title": _l("HDFS Configuration"),
91
+ "description": _l("Configuration options for HDFS connection"),
92
+ "properties": {
93
+ "host": {
94
+ "type": "string",
95
+ "title": _l("Host"),
96
+ "description": _l("HDFS namenode hostname or IP address"),
97
+ },
98
+ "port": {
99
+ "type": "number",
100
+ "title": _l("Port Number"),
101
+ "description": _l("HDFS namenode port number"),
102
+ "default": 50070,
103
+ },
104
+ "user": {"type": "string", "title": _l("Username")},
105
+ },
106
+ "order": ["host", "port", "user"],
107
+ },
108
+ "auth_mechanism": {
109
+ "type": "string",
110
+ "title": _l("Authentication Mechanism"),
111
+ "description": _l("Impala authentication mechanism (e.g. PLAIN, GSSAPI, LDAP)"),
112
+ "default": "PLAIN",
113
+ },
114
+ "auth": {
115
+ "type": "string",
116
+ "title": _l("Authentication Type"),
117
+ "default": "LDAP",
118
+ },
119
+ "use_http_transport": {
120
+ "type": "boolean",
121
+ "title": _l("Use HTTP Transport"),
122
+ "default": True,
123
+ },
124
+ "use_ssl": {
125
+ "type": "boolean",
126
+ "title": _l("Use SSL"),
127
+ "default": True,
128
+ },
129
+ "http_path": {
130
+ "type": "string",
131
+ "title": _l("HTTP Path"),
132
+ "default": "",
133
+ },
134
+ },
135
+ "order": [
136
+ "host",
137
+ "port",
138
+ "user",
139
+ "password",
140
+ "database",
141
+ "hdfs_options",
142
+ "auth",
143
+ "auth_mechanism",
144
+ "use_http_transport",
145
+ "use_ssl",
146
+ "http_path",
147
+ ],
148
+ "required": ["host", "port"],
149
+ "secret": ["password"],
150
+ }
151
+
152
+ @property
153
+ def connect_args(self):
154
+ if not self.password and not self.user:
155
+ return {"auth": "NOSASL"}
156
+ if self.password:
157
+ return {"auth": "LDAP"} # 先粗暴处理
158
+ if self.auth == "LDAP":
159
+ return {"auth": "LDAP"} # todo
160
+ return {}
161
+
162
+ # generate_ddl todo: stored as parquet
163
+
164
+ def _extract_column_name(self, column_type):
165
+ visit_type = column_type.__visit_name__
166
+ if visit_type == "type_decorator":
167
+ return column_type.impl.__visit_name__
168
+ return visit_type
169
+
170
+ @with_ssh_tunnel
171
+ def get_tables(self, database: str = None):
172
+ database = database or self.database
173
+ result = self.fetchall(f"SHOW TABLES IN {database}")
174
+ return [r[0] for r in result]
175
+
176
+ @with_ssh_tunnel
177
+ def get_views(self, database: str = None):
178
+ database = database or self.database
179
+ result = self.fetchall(f"SHOW VIEWS IN {database}")
180
+ return [r[0] for r in result]
181
+
182
+ def get_columns(self, table, database=None):
183
+ database = database or self.database
184
+ column_dcts = self.inspector.get_columns(self.format_key(table), schema=self.format_key(database))
185
+ for dct in column_dcts:
186
+ dct["type"] = self._extract_column_name(dct["type"]).lower()
187
+ return column_dcts
188
+
189
+ def convert_config_to_dbt_profile(self, database: str, schema: str = None) -> dict:
190
+ return {
191
+ "type": "impala",
192
+ "host": self.host,
193
+ "http_path": self.http_path,
194
+ "port": self.port,
195
+ "auth_type": self.auth.lower(),
196
+ "use_http_transport": self.use_http_transport,
197
+ "use_ssl": self.use_ssl,
198
+ "username": ENV_VAR_DBT_USER,
199
+ "password": ENV_VAR_DBT_PASSWORD,
200
+ "schema": database or self.database,
201
+ "threads": 10,
202
+ }
203
+
204
+ @cached_property
205
+ @with_ssh_tunnel
206
+ def type_code_mapping(self) -> dict:
207
+ return {}
208
+
209
+ def sqlalchemy_column_type_code_to_name(self, code: str, size: int | None = None) -> str:
210
+ return code.lower()
@@ -0,0 +1,51 @@
1
+ from recurvedata.connectors._register import register_connector_class
2
+ from recurvedata.connectors.base import RecurveConnectorBase
3
+ from recurvedata.utils.imports import MockModule
4
+
5
+ try:
6
+ import jenkins
7
+ except ImportError:
8
+ jenkins = MockModule("jenkins")
9
+
10
+ CONNECTION_TYPE = "jenkins"
11
+ UI_CONNECTION_TYPE = "Jenkins"
12
+
13
+
14
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
15
+ class Jenkins(RecurveConnectorBase):
16
+ connection_type = CONNECTION_TYPE
17
+ ui_connection_type = UI_CONNECTION_TYPE
18
+ setup_extras_require = [
19
+ "python-jenkins",
20
+ ]
21
+
22
+ config_schema = {
23
+ "type": "object",
24
+ "properties": {
25
+ "url": {"type": "string", "title": "url"},
26
+ "user": {"type": "string", "title": "Username"},
27
+ "password": {"type": "string", "title": "Password"},
28
+ },
29
+ "order": [
30
+ "url",
31
+ "user",
32
+ "password",
33
+ ],
34
+ "required": [
35
+ "url",
36
+ "user",
37
+ "password",
38
+ ],
39
+ "secret": ["password"],
40
+ }
41
+
42
+ def __init__(self, conf: dict, *args, **kwargs):
43
+ super().__init__(conf, *args, **kwargs)
44
+ self.conf = conf
45
+ self.connector = self.init_connection(conf)
46
+
47
+ def init_connection(self, conf):
48
+ return jenkins.Jenkins(url=conf["url"], username=conf["user"], password=conf["password"])
49
+
50
+ def test_connection(self):
51
+ self.connector.get_whoami()
@@ -0,0 +1,89 @@
1
+ from smtplib import SMTP, SMTP_SSL
2
+
3
+ from recurvedata.connectors._register import register_connector_class
4
+ from recurvedata.connectors.base import RecurveConnectorBase
5
+ from recurvedata.core.translation import _l
6
+
7
+ CONNECTION_TYPE = "mail"
8
+ UI_CONNECTION_TYPE = "Mail"
9
+
10
+
11
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
12
+ class Mail(RecurveConnectorBase):
13
+ connection_type = CONNECTION_TYPE
14
+ ui_connection_type = UI_CONNECTION_TYPE
15
+
16
+ config_schema = {
17
+ "type": "object",
18
+ "properties": {
19
+ "host": {"type": "string", "title": _l("SMTP Server Address")},
20
+ "port": {
21
+ "type": "number",
22
+ "title": _l("Port Number"),
23
+ "default": 465,
24
+ },
25
+ "user": {"type": "string", "title": _l("Username")},
26
+ "password": {"type": "string", "title": _l("Password")},
27
+ "reply_to": {
28
+ "type": "string",
29
+ "title": _l("Reply-To Address"),
30
+ "description": _l("Email address that recipients will reply to"),
31
+ },
32
+ "mail_from": {
33
+ "type": "string",
34
+ "title": _l("From Address"),
35
+ "description": _l("Email address that appears in the From field"),
36
+ },
37
+ "ssl": {
38
+ "type": "boolean",
39
+ "title": _l("Use SSL/TLS"),
40
+ "description": _l("Enable SSL/TLS encryption for secure email transmission"),
41
+ "default": True,
42
+ },
43
+ "timeout": {
44
+ "type": "number",
45
+ "title": _l("Connection Timeout"),
46
+ "description": _l("Maximum time in seconds to wait for server connection"),
47
+ "default": 180,
48
+ },
49
+ },
50
+ "order": [
51
+ "host",
52
+ "port",
53
+ "user",
54
+ "password",
55
+ "reply_to",
56
+ "mail_from",
57
+ "ssl",
58
+ "timeout",
59
+ ],
60
+ "required": ["host", "port", "user", "password"],
61
+ "secret": ["password"],
62
+ }
63
+
64
+ def __init__(self, conf: dict, *args, **kwargs):
65
+ self.conf = conf
66
+ self.connector, self.connector_err = self.init_connection(conf)
67
+
68
+ def init_connection(self, conf: dict):
69
+ try:
70
+ smtp_class = SMTP if not conf.get("ssl") else SMTP_SSL
71
+ smtp = smtp_class(host=conf["host"], port=conf["port"], timeout=conf.get("timeout", 180))
72
+ if conf.get("user") and conf.get("password"):
73
+ smtp.login(conf["user"], conf["password"])
74
+ return smtp, None
75
+ except Exception as e:
76
+ return None, e
77
+
78
+ def test_connection(self):
79
+ if self.connector_err:
80
+ raise ValueError(f"Failed to connect: {self.connector_err}")
81
+ if not self.connector:
82
+ raise ValueError("SMTP connection not initialized")
83
+
84
+ try:
85
+ res = self.connector.noop()
86
+ if res[0] != 250:
87
+ raise ValueError(f"SMTP server returned unexpected response: {res}")
88
+ except Exception as e:
89
+ raise ValueError(f"SMTP connection test failed: {str(e)}")
@@ -0,0 +1,284 @@
1
+ import datetime
2
+ import logging
3
+ import uuid
4
+ from enum import StrEnum
5
+ from typing import Any
6
+
7
+ from sqlalchemy.engine import URL
8
+
9
+ from recurvedata.connectors._register import register_connector_class
10
+ from recurvedata.connectors.connectors.mssql import MssqlConnector
11
+ from recurvedata.consts import ConnectionCategory
12
+ from recurvedata.core.translation import _l
13
+
14
+ CONNECTION_TYPE = "microsoft_fabric"
15
+ UI_CONNECTION_TYPE = "Microsoft Fabric"
16
+
17
+
18
+ class AuthMethod(StrEnum):
19
+ """Microsoft Fabric authentication methods"""
20
+
21
+ SERVICE_PRINCIPAL = "ActiveDirectoryServicePrincipal"
22
+
23
+
24
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
25
+ class MsFabricConnector(MssqlConnector):
26
+ connection_type = CONNECTION_TYPE
27
+ ui_connection_type = UI_CONNECTION_TYPE
28
+ category = [ConnectionCategory.WAREHOUSE]
29
+ SYSTEM_DATABASES = [
30
+ "information_schema",
31
+ "sys",
32
+ "db_owner",
33
+ "db_datareader",
34
+ "db_datawriter",
35
+ "db_ddladmin",
36
+ "db_securityadmin",
37
+ "db_accessadmin",
38
+ "db_backupoperator",
39
+ "db_denydatawriter",
40
+ "db_denydatareader",
41
+ "_rsc",
42
+ "queryinsights",
43
+ "guest",
44
+ ]
45
+
46
+ available_column_types = [
47
+ "bit",
48
+ "smallint",
49
+ "int",
50
+ "bigint",
51
+ "decimal",
52
+ "numeric",
53
+ "float",
54
+ "real",
55
+ "date",
56
+ "time",
57
+ "datetime2",
58
+ "char",
59
+ "varchar",
60
+ "varbinary",
61
+ "uniqueidentifier",
62
+ ]
63
+
64
+ column_type_mapping = {
65
+ "integer": ["smallint", "int", "bigint"],
66
+ "float": ["real", "float"],
67
+ "datetime": ["datetime2"],
68
+ "string": ["char", "varchar"],
69
+ "binary": ["varbinary", "uniqueidentifier"],
70
+ }
71
+
72
+ config_schema = {
73
+ "type": "object",
74
+ "properties": {
75
+ "host": {
76
+ "type": "string",
77
+ "title": _l("Host Address"),
78
+ "default": "127.0.0.1",
79
+ },
80
+ "port": {
81
+ "type": "number",
82
+ "title": _l("Port Number"),
83
+ "default": 1433,
84
+ },
85
+ "authentication": {
86
+ "type": "string",
87
+ "title": _l("Authentication Method"),
88
+ "default": "ActiveDirectoryServicePrincipal",
89
+ "ui:options": {
90
+ "disabled": True,
91
+ },
92
+ },
93
+ "tenant_id": {
94
+ "type": "string",
95
+ "title": _l("Tenant ID"),
96
+ },
97
+ "client_id": {
98
+ "type": "string",
99
+ "title": _l("Client ID"),
100
+ },
101
+ "client_secret": {
102
+ "type": "string",
103
+ "title": _l("Client Secret"),
104
+ },
105
+ "database": {
106
+ "type": "string",
107
+ "title": _l("Database Name"),
108
+ },
109
+ "odbc_driver": {
110
+ "type": "string",
111
+ "title": _l("ODBC Driver"),
112
+ "default": "ODBC Driver 18 for SQL Server",
113
+ "ui:options": {
114
+ "disabled": True,
115
+ },
116
+ },
117
+ "encrypt": {
118
+ "type": "boolean",
119
+ "title": _l("Encrypt Connection"),
120
+ "description": _l("Whether to encrypt the connection"),
121
+ "default": True,
122
+ },
123
+ "trust_server_certificate": {
124
+ "type": "boolean",
125
+ "title": _l("Trust Server Certificate"),
126
+ "default": True,
127
+ },
128
+ "blob_options": {
129
+ "type": "object",
130
+ "title": _l("Azure Blob Storage Options"),
131
+ "properties": {
132
+ "account_name": {"type": "string", "title": _l("Storage Account Name")},
133
+ "endpoint_suffix": {"type": "string", "title": _l("Endpoint Suffix")},
134
+ "container_name": {"type": "string", "title": _l("Container Name")},
135
+ "sas_token": {"type": "string", "title": _l("SAS Token")},
136
+ },
137
+ "order": ["account_name", "endpoint_suffix", "container_name", "sas_token"],
138
+ },
139
+ },
140
+ "order": [
141
+ "host",
142
+ "port",
143
+ "authentication",
144
+ "tenant_id",
145
+ "client_id",
146
+ "client_secret",
147
+ "database",
148
+ "odbc_driver",
149
+ "encrypt",
150
+ "trust_server_certificate",
151
+ "blob_options",
152
+ ],
153
+ "required": ["host", "port", "tenant_id", "client_id", "client_secret", "database"],
154
+ "secret": ["client_secret", "blob_options.sas_token"],
155
+ }
156
+
157
+ INT_TYPE_MAPPING = {
158
+ range(0, 4): "tinyint",
159
+ range(4, 6): "smallint",
160
+ range(6, 11): "int",
161
+ range(11, 20): "bigint",
162
+ }
163
+
164
+ BASE_TYPE_MAPPING = {
165
+ bool: "bit",
166
+ float: "float",
167
+ datetime.datetime: "datetime2",
168
+ str: "varchar",
169
+ bytes: "varbinary",
170
+ uuid.UUID: "uniqueidentifier",
171
+ }
172
+
173
+ @property
174
+ def sqlalchemy_url(self):
175
+ query = {
176
+ "driver": self.odbc_driver,
177
+ "authentication": AuthMethod.SERVICE_PRINCIPAL.value,
178
+ "Encrypt": "yes" if self.conf.get("encrypt", True) else "no",
179
+ "TrustServerCertificate": "yes" if self.conf.get("trust_server_certificate", True) else "no",
180
+ "Tenant Id": self.conf["tenant_id"],
181
+ }
182
+
183
+ url = URL(
184
+ self.driver,
185
+ self.conf["client_id"],
186
+ self.conf["client_secret"],
187
+ self.conf["host"],
188
+ self.conf["port"],
189
+ self.conf["database"],
190
+ query=query,
191
+ )
192
+
193
+ # Debug logging
194
+ logging.info(f"Connection URL (without credentials): {url.render_as_string(hide_password=True)}")
195
+ return url
196
+
197
+ def convert_config_to_dbt_profile(self, database: str, schema: str = None) -> dict[str, Any]:
198
+ return {
199
+ "server": self.conf["host"],
200
+ "port": self.conf["port"],
201
+ "client_id": self.conf["client_id"],
202
+ "client_secret": self.conf["client_secret"],
203
+ "database": database or self.database,
204
+ "type": "fabric",
205
+ "authentication": "ServicePrincipal",
206
+ "tenant_id": self.conf["tenant_id"],
207
+ "schema": schema or database or self.database,
208
+ "driver": self.odbc_driver,
209
+ }
210
+
211
+ def sqlalchemy_column_type_code_to_name(self, type_code: Any, size: int | None = None) -> str:
212
+ """
213
+ Map SQL Server/Microsoft Fabric type codes to their corresponding type names.
214
+ Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
215
+
216
+ Microsoft Fabric supports a subset of T-SQL data types, including:
217
+ - Exact numerics: bit, smallint, int, bigint, decimal/numeric
218
+ - Approximate numerics: float, real
219
+ - Date and time: date, time, datetime2
220
+ - Character strings: char, varchar
221
+ - Binary strings: varbinary, uniqueidentifier
222
+ """
223
+ # First try to get the Python type from type_code
224
+ py_type = type(type_code) if type_code is not None else None
225
+
226
+ # If type_code itself is a type (like int, str, etc), use it directly
227
+ if isinstance(type_code, type):
228
+ py_type = type_code
229
+
230
+ type_mapping = {
231
+ bool: "bit",
232
+ int: "int",
233
+ float: "float",
234
+ datetime.datetime: "datetime2",
235
+ str: "varchar",
236
+ bytes: "varbinary",
237
+ uuid.UUID: "uniqueidentifier",
238
+ }
239
+
240
+ if py_type in type_mapping:
241
+ base_type = type_mapping[py_type]
242
+ if py_type is int and size is not None:
243
+ for size_range, type_name in self.INT_TYPE_MAPPING.items():
244
+ if size in size_range:
245
+ return type_name
246
+ return base_type
247
+
248
+ return "varchar"
249
+
250
+ def set_env_when_get_dbt_connection(self):
251
+ pass
252
+
253
+ @classmethod
254
+ def limit_sql(
255
+ cls, sql: str, limit: int = 100, orders: list[dict[str, str]] | None = None, offset: int | None = None
256
+ ) -> str:
257
+ """Add pagination to SQL query for Microsoft Fabric.
258
+
259
+ Args:
260
+ sql: The SQL query to add limit to
261
+ limit: Maximum number of rows to return
262
+ orders: List of order by clauses
263
+ offset: Number of rows to skip
264
+
265
+ Returns:
266
+ SQL query with pagination
267
+ """
268
+ # dbt model with `ephemeral` will automatically add `__dbt__cte__` to the query
269
+ # which is not supported by Microsoft Fabric
270
+ # so we need to handle it separately
271
+ if "__dbt__cte__" not in sql and not sql.upper().strip().startswith("WITH"):
272
+ return super().limit_sql(sql, limit, orders, offset)
273
+
274
+ sql = cls.order_sql(sql, orders)
275
+
276
+ # Build final query with CTE
277
+ sub_query_name = "_recurve_limit_subquery"
278
+ base_sql = f"WITH {sub_query_name} AS ({sql})"
279
+
280
+ if offset:
281
+ base_sql = f"{base_sql} ORDER BY (SELECT NULL)"
282
+ return f"{base_sql} SELECT * FROM {sub_query_name} OFFSET {offset} ROWS FETCH NEXT {limit} ROWS ONLY"
283
+
284
+ return f"{base_sql} SELECT TOP {limit} * FROM {sub_query_name}"