recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,36 @@
1
+ from sqlalchemy.engine import URL
2
+
3
+ from recurvedata.connectors._register import register_connector_class
4
+ from recurvedata.connectors.dbapi import DBAPIBase
5
+ from recurvedata.consts import ConnectorGroup
6
+
7
+ CONNECTION_TYPE = "phoenix"
8
+ UI_CONNECTION_TYPE = "HBase Phoenix"
9
+
10
+
11
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
12
+ class PhoenixConnector(DBAPIBase):
13
+ connection_type = CONNECTION_TYPE
14
+ ui_connection_type = UI_CONNECTION_TYPE
15
+ group = [ConnectorGroup.DESTINATION]
16
+ setup_extras_require = ["sqlalchemy-phoenix"]
17
+ driver = "phoenix"
18
+
19
+ config_schema = {
20
+ "type": "object",
21
+ "properties": {
22
+ "host": {"type": "string", "title": "Host Address"},
23
+ "port": {
24
+ "type": "number",
25
+ "title": "Port Number",
26
+ "default": 8765,
27
+ },
28
+ },
29
+ "order": ["host", "port"],
30
+ "required": ["host", "port"],
31
+ "secret": [],
32
+ }
33
+
34
+ @property
35
+ def sqlalchemy_url(self):
36
+ return URL(self.driver, host=self.host, port=self.port)
@@ -0,0 +1,230 @@
1
+ from functools import cached_property
2
+ from typing import Any
3
+
4
+ from recurvedata.connectors._register import register_connector_class
5
+ from recurvedata.connectors.const import ENV_VAR_DBT_PASSWORD, ENV_VAR_DBT_USER
6
+ from recurvedata.connectors.datasource import DataSourceWrapper
7
+ from recurvedata.connectors.dbapi import DBAPIBase, with_ssh_tunnel
8
+ from recurvedata.consts import ConnectorGroup
9
+ from recurvedata.core.translation import _l
10
+
11
+ CONNECTION_TYPE = "postgres"
12
+ UI_CONNECTION_TYPE = "PostgreSQL"
13
+
14
+
15
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
16
+ class PostgresConnector(DBAPIBase):
17
+ SYSTEM_DATABASES = [
18
+ "information_schema",
19
+ "pg_catalog",
20
+ "pg_global",
21
+ "pg_statistic",
22
+ "pg_toast",
23
+ "pg_temp_1",
24
+ "pg_temp_2",
25
+ "pg_toast_temp_1",
26
+ "pg_toast_temp_2",
27
+ "pg_type",
28
+ ]
29
+ connection_type = CONNECTION_TYPE
30
+ ui_connection_type = UI_CONNECTION_TYPE
31
+ group = [ConnectorGroup.DESTINATION]
32
+ setup_extras_require = ["psycopg2-binary"]
33
+ driver = "postgresql"
34
+ config_schema = {
35
+ "type": "object",
36
+ "properties": {
37
+ "host": {
38
+ "type": "string",
39
+ "title": _l("Host Address"),
40
+ "default": "127.0.0.1",
41
+ },
42
+ "port": {
43
+ "type": "number",
44
+ "title": _l("Port Number"),
45
+ "default": 5432,
46
+ },
47
+ "user": {"type": "string", "title": _l("Username")},
48
+ "password": {"type": "string", "title": _l("Password")},
49
+ "database": {
50
+ "type": "string",
51
+ "title": _l("Database Name"),
52
+ "description": _l("The name of the database to connect to"),
53
+ },
54
+ },
55
+ "order": ["host", "port", "user", "password", "database"],
56
+ "required": [
57
+ "host",
58
+ "user",
59
+ "password",
60
+ ],
61
+ "secret": ["password"],
62
+ }
63
+
64
+ column_type_mapping = {
65
+ "integer": ["int2", "int4", "int8", "serial", "bigserial", "smallserial"],
66
+ "float": ["float4", "float8", "real", "numeric", "decimal"],
67
+ "datetime": ["timestamptz"],
68
+ "time": ["timetz"],
69
+ "binary": ["bytea"],
70
+ "string": ["uuid"],
71
+ "json": ["jsonb"],
72
+ }
73
+
74
+ # Extend base types with PostgreSQL specific types
75
+ available_column_types = DBAPIBase.available_column_types + [
76
+ # Numeric types
77
+ "int2", # alias for smallint
78
+ "integer",
79
+ "int4", # alias for integer
80
+ "int8", # alias for bigint
81
+ "real",
82
+ "float4", # alias for real
83
+ "double precision",
84
+ "float8", # alias for double precision
85
+ "serial",
86
+ "serial4", # alias for serial
87
+ "bigserial",
88
+ "serial8", # alias for bigserial
89
+ "smallserial",
90
+ "serial2", # alias for smallserial
91
+ "money",
92
+ "numeric",
93
+ # Character types
94
+ "text",
95
+ "bpchar", # blank-padded char
96
+ "character",
97
+ "character varying",
98
+ # Date/Time types
99
+ "timestamp without time zone",
100
+ "timestamp with time zone",
101
+ "timestamptz", # alias for timestamp with time zone
102
+ "time",
103
+ "time without time zone",
104
+ "time with time zone",
105
+ "timetz", # alias for time with time zone
106
+ "interval",
107
+ # Boolean type
108
+ "boolean",
109
+ "bool", # alias for boolean
110
+ # Geometric types
111
+ "point",
112
+ "line",
113
+ "lseg",
114
+ "box",
115
+ "path",
116
+ "polygon",
117
+ "circle",
118
+ # Network address types
119
+ "cidr",
120
+ "inet",
121
+ "macaddr",
122
+ "macaddr8",
123
+ # Binary data
124
+ "bytea",
125
+ # UUID type
126
+ "uuid",
127
+ # JSON types
128
+ "jsonb",
129
+ # XML type
130
+ "xml",
131
+ # Bit string
132
+ "bit",
133
+ "bit varying",
134
+ "varbit",
135
+ # Text search
136
+ "tsvector",
137
+ "tsquery",
138
+ # Range types
139
+ "int4range",
140
+ "int8range",
141
+ "numrange",
142
+ "tsrange",
143
+ "tstzrange",
144
+ "daterange",
145
+ ]
146
+
147
+ def convert_config_to_dbt_profile(self, database: str, schema: str = None) -> dict:
148
+ return {
149
+ "host": self.host,
150
+ "port": self.port,
151
+ "user": ENV_VAR_DBT_USER,
152
+ "password": ENV_VAR_DBT_PASSWORD,
153
+ "dbname": database or self.database,
154
+ "schema": schema or f"dbt_{database or self.database}",
155
+ "type": self.connection_type,
156
+ }
157
+
158
+ @with_ssh_tunnel
159
+ def get_columns(self, table: str, database: str = None) -> list:
160
+ database = database or self.database
161
+ query = f"""
162
+ WITH table_info AS (
163
+ SELECT c.oid AS table_oid
164
+ FROM pg_class c
165
+ JOIN pg_namespace n ON c.relnamespace = n.oid
166
+ WHERE n.nspname = '{database}'
167
+ AND c.relname = '{table}'
168
+ )
169
+ SELECT
170
+ a.attname AS column_name,
171
+ t.typname AS data_type,
172
+ NOT a.attnotnull AS nullable,
173
+ pg_get_expr(ad.adbin, ad.adrelid) AS "default",
174
+ col_description(a.attrelid, a.attnum) AS comment
175
+ FROM pg_attribute a
176
+ JOIN table_info ti ON a.attrelid = ti.table_oid
177
+ JOIN pg_type t ON a.atttypid = t.oid
178
+ LEFT JOIN pg_attrdef ad ON a.attrelid = ad.adrelid AND a.attnum = ad.adnum
179
+ WHERE a.attnum > 0
180
+ AND NOT a.attisdropped
181
+ ORDER BY a.attnum;
182
+ """
183
+ result = self.fetchall(query)
184
+ column_metas = []
185
+ for row in result:
186
+ column_metas.append(
187
+ {
188
+ "name": row[0],
189
+ "type": row[1].lower() if row[1] else "",
190
+ "nullable": row[2],
191
+ "default": row[3],
192
+ "comment": row[4],
193
+ }
194
+ )
195
+ return column_metas
196
+
197
+ @cached_property
198
+ @with_ssh_tunnel
199
+ def type_code_mapping(self) -> dict:
200
+ try:
201
+ rv = self.fetchall("SELECT oid, typname FROM pg_type")
202
+ return {row[0]: row[1] for row in rv if row[1]}
203
+ except Exception:
204
+ pass
205
+
206
+ def sqlalchemy_column_type_code_to_name(self, type_code: Any, size: int | None = None) -> str:
207
+ if self.type_code_mapping:
208
+ type_name = self.type_code_mapping.get(type_code)
209
+ if type_name:
210
+ return type_name
211
+
212
+ import psycopg2.extensions
213
+
214
+ pg_type = psycopg2.extensions.string_types.get(type_code)
215
+ pg_type_name = pg_type.name.lower() if pg_type else None
216
+ if pg_type_name in self.available_column_types:
217
+ return pg_type_name
218
+ return "varchar"
219
+
220
+ def convert_config_to_cube_config(
221
+ self, database: str, schema: str = None, datasource: DataSourceWrapper = None
222
+ ) -> dict:
223
+ return {
224
+ "type": "postgres",
225
+ "host": self.host,
226
+ "port": self.port,
227
+ "user": datasource.user,
228
+ "password": datasource.password,
229
+ "database": database or self.database,
230
+ }
@@ -0,0 +1,50 @@
1
+ from recurvedata.connectors._register import register_connector_class
2
+ from recurvedata.connectors.base import RecurveConnectorBase
3
+ from recurvedata.consts import ConnectorGroup
4
+ from recurvedata.core.translation import _l
5
+
6
+ CONNECTION_TYPE = "python"
7
+ UI_CONNECTION_TYPE = "Python"
8
+
9
+
10
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
11
+ class Python(RecurveConnectorBase):
12
+ connection_type = CONNECTION_TYPE
13
+ ui_connection_type = UI_CONNECTION_TYPE
14
+
15
+ group = [ConnectorGroup.DESTINATION]
16
+ test_required = False
17
+ config_schema = {
18
+ "type": "object",
19
+ "properties": {
20
+ "python_version": {
21
+ "type": "string",
22
+ "title": _l("Python Version"),
23
+ "enum": ["3.11.9", "3.10.14"],
24
+ "enumNames": ["3.11.9", "3.10.14"],
25
+ "default": "3.11.9",
26
+ },
27
+ "pyenv": {
28
+ "type": "string",
29
+ "title": _l("Python Virtual Environment Name"),
30
+ "default": "recurve_executor",
31
+ },
32
+ "requirements": {
33
+ "type": "string",
34
+ "title": _l("Python Package Requirements"),
35
+ "description": _l(
36
+ "List of Python packages and versions to install, the same format as requirements.txt"
37
+ ),
38
+ "ui:options": {
39
+ "type": "textarea",
40
+ "rows": 10,
41
+ },
42
+ },
43
+ },
44
+ "order": ["python_version", "pyenv", "requirements"],
45
+ "required": ["python_version", "pyenv"],
46
+ "secret": [],
47
+ }
48
+
49
+ def test_connection(self):
50
+ pass
@@ -0,0 +1,187 @@
1
+ from functools import cached_property
2
+ from typing import Any
3
+
4
+ from recurvedata.connectors._register import register_connector_class
5
+ from recurvedata.connectors.const import ENV_VAR_DBT_PASSWORD, ENV_VAR_DBT_USER
6
+ from recurvedata.connectors.datasource import DataSourceWrapper
7
+ from recurvedata.connectors.dbapi import DBAPIBase, with_ssh_tunnel
8
+ from recurvedata.consts import ConnectorGroup
9
+ from recurvedata.core.translation import _l
10
+
11
+ CONNECTION_TYPE = "redshift"
12
+ UI_CONNECTION_TYPE = "Redshift"
13
+
14
+
15
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
16
+ class RedshiftConnector(DBAPIBase):
17
+ SYSTEM_DATABASES = ["information_schema", "pg_catalog", "public", "temporary"]
18
+ connection_type = CONNECTION_TYPE
19
+ ui_connection_type = UI_CONNECTION_TYPE
20
+ group = [ConnectorGroup.DESTINATION]
21
+ driver = "redshift+psycopg2"
22
+ setup_extras_require = ["psycopg2-binary", "sqlalchemy_redshift==0.8.15+recurve"]
23
+ config_schema = {
24
+ "type": "object",
25
+ "properties": {
26
+ "host": {"type": "string", "title": _l("Host Address")},
27
+ "port": {
28
+ "type": "number",
29
+ "title": _l("Port Number"),
30
+ "default": 5439,
31
+ },
32
+ "user": {"type": "string", "title": _l("Username")},
33
+ "password": {"type": "string", "title": _l("Password")},
34
+ "database": {
35
+ "type": "string",
36
+ "title": _l("Database Name"),
37
+ "description": _l("The name of the database to connect to"),
38
+ },
39
+ "s3_options": {
40
+ "type": "object",
41
+ "title": _l("S3 Configuration"),
42
+ "description": _l("AWS S3 credentials for data loading and unloading"),
43
+ "properties": {
44
+ "access_key_id": {"type": "string", "title": _l("AWS Access Key ID")},
45
+ "secret_access_key": {"type": "string", "title": _l("AWS Secret Access Key")},
46
+ "region": {"type": "string", "title": _l("AWS Region")},
47
+ },
48
+ "order": ["access_key_id", "secret_access_key", "region"],
49
+ },
50
+ },
51
+ "order": ["host", "port", "user", "password", "database", "s3_options"],
52
+ "required": ["host", "port"],
53
+ "secret": ["password"],
54
+ }
55
+
56
+ # All supported Redshift data types based on official documentation
57
+ available_column_types = [
58
+ # Numeric types
59
+ "smallint",
60
+ "int2",
61
+ "integer",
62
+ "int",
63
+ "int4",
64
+ "bigint",
65
+ "int8",
66
+ "decimal",
67
+ "numeric",
68
+ "real",
69
+ "float",
70
+ "float4",
71
+ "double precision",
72
+ "float8",
73
+ # Character types
74
+ "char",
75
+ "character",
76
+ "nchar",
77
+ "bpchar",
78
+ "varchar",
79
+ "character varying",
80
+ "nvarchar",
81
+ "text",
82
+ # Datetime types
83
+ "date",
84
+ "timestamp",
85
+ "timestamptz",
86
+ "timestamp with time zone",
87
+ "timestamp without time zone",
88
+ "time",
89
+ "timetz",
90
+ "time with time zone",
91
+ "time without time zone",
92
+ # Boolean type
93
+ "boolean",
94
+ "bool",
95
+ # Special types
96
+ "super",
97
+ "hllsketch",
98
+ "geometry",
99
+ "geography",
100
+ "varbyte",
101
+ ]
102
+
103
+ def convert_config_to_dbt_profile(self, database: str, schema: str = None) -> dict:
104
+ return {
105
+ "host": self.host,
106
+ "port": self.port,
107
+ "user": ENV_VAR_DBT_USER,
108
+ "password": ENV_VAR_DBT_PASSWORD,
109
+ "dbname": database,
110
+ "schema": schema,
111
+ "type": self.connection_type,
112
+ "threads": 10,
113
+ }
114
+
115
+ @with_ssh_tunnel
116
+ def get_columns(self, table: str, database=None):
117
+ database = database or self.database
118
+ query = f"""
119
+ WITH table_info AS (
120
+ SELECT c.oid AS table_oid
121
+ FROM pg_class c
122
+ JOIN pg_namespace n ON c.relnamespace = n.oid
123
+ WHERE n.nspname = '{database}'
124
+ AND c.relname = '{table}'
125
+ )
126
+ SELECT
127
+ a.attname AS column_name,
128
+ t.typname AS data_type,
129
+ NOT a.attnotnull AS nullable,
130
+ pg_get_expr(ad.adbin, ad.adrelid) AS "default",
131
+ col_description(a.attrelid, a.attnum) AS comment
132
+ FROM pg_attribute a
133
+ JOIN table_info ti ON a.attrelid = ti.table_oid
134
+ JOIN pg_type t ON a.atttypid = t.oid
135
+ LEFT JOIN pg_attrdef ad ON a.attrelid = ad.adrelid AND a.attnum = ad.adnum
136
+ WHERE a.attnum > 0
137
+ AND NOT a.attisdropped
138
+ ORDER BY a.attnum;
139
+ """
140
+ result = self.fetchall(query)
141
+ column_metas = []
142
+ for row in result:
143
+ column_metas.append(
144
+ {
145
+ "name": row[0],
146
+ "type": row[1].lower() if row[1] else "",
147
+ "nullable": row[2],
148
+ "default": row[3],
149
+ "comment": row[4],
150
+ }
151
+ )
152
+ return column_metas
153
+
154
+ @cached_property
155
+ @with_ssh_tunnel
156
+ def type_code_mapping(self) -> dict:
157
+ try:
158
+ rv = self.fetchall("SELECT oid, typname FROM pg_catalog.pg_type")
159
+ return {row[0]: row[1] for row in rv if row[1]}
160
+ except Exception:
161
+ pass
162
+
163
+ def sqlalchemy_column_type_code_to_name(self, type_code: Any, size: int | None = None) -> str:
164
+ if self.type_code_mapping:
165
+ type_name = self.type_code_mapping.get(type_code)
166
+ if type_name:
167
+ return type_name
168
+
169
+ import psycopg2.extensions
170
+
171
+ pg_type = psycopg2.extensions.string_types.get(type_code)
172
+ pg_type_name = pg_type.name.lower() if pg_type else None
173
+ if pg_type_name in self.available_column_types:
174
+ return pg_type_name
175
+ return "varchar"
176
+
177
+ def convert_config_to_cube_config(
178
+ self, database: str, schema: str = None, datasource: DataSourceWrapper = None
179
+ ) -> dict:
180
+ return {
181
+ "type": "redshift",
182
+ "host": self.host,
183
+ "port": self.port,
184
+ "user": datasource.user,
185
+ "password": datasource.password,
186
+ "database": database or self.database,
187
+ }
@@ -0,0 +1,93 @@
1
+ import logging
2
+ import os
3
+
4
+ from recurvedata.connectors._register import register_connector_class
5
+ from recurvedata.connectors.object_store import ObjectStoreMixin
6
+ from recurvedata.connectors.proxy import HTTP_PROXY_CONFIG_SCHEMA, HttpProxyMixin
7
+ from recurvedata.connectors.utils import juice_sync_process_special_character_within_secret
8
+ from recurvedata.consts import ConnectorGroup
9
+ from recurvedata.core.translation import _l
10
+
11
+ try:
12
+ import boto3
13
+ from botocore.config import Config
14
+ from s3fs import S3FileSystem
15
+ except ImportError:
16
+ S3FileSystem = None
17
+
18
+ CONNECTION_TYPE = "s3"
19
+ UI_CONNECTION_TYPE = "AWS S3"
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
25
+ class S3(HttpProxyMixin, ObjectStoreMixin):
26
+ connection_type = CONNECTION_TYPE
27
+ ui_connection_type = UI_CONNECTION_TYPE
28
+ group = [ConnectorGroup.DESTINATION]
29
+ setup_extras_require = ["fsspec[s3]", "s3fs>=2021.08", "boto3"]
30
+
31
+ config_schema = {
32
+ "type": "object",
33
+ "properties": {
34
+ "access_key_id": {"type": "string", "title": _l("AWS Access Key ID")},
35
+ "secret_access_key": {"type": "string", "title": _l("AWS Secret Access Key")},
36
+ "region": {"type": "string", "title": _l("AWS Region")},
37
+ "bucket": {"type": "string", "title": _l("Bucket Name")},
38
+ "proxies": HTTP_PROXY_CONFIG_SCHEMA["proxies"],
39
+ },
40
+ "order": ["access_key_id", "secret_access_key", "region", "bucket", "proxies"],
41
+ "required": ["access_key_id", "secret_access_key"],
42
+ "secret": ["secret_access_key"],
43
+ }
44
+
45
+ def init_connection(self, conf) -> S3FileSystem:
46
+ # todo: proxy
47
+ with self._init_proxy_manager():
48
+ client_kwargs = {}
49
+ if "region" in conf and conf["region"]:
50
+ client_kwargs["region_name"] = conf["region"]
51
+ con = S3FileSystem(key=conf["access_key_id"], secret=conf["secret_access_key"], client_kwargs=client_kwargs)
52
+ self.connector = con
53
+ return con
54
+
55
+ def test_connection(self):
56
+ with self._init_proxy_manager():
57
+ logger.info(
58
+ f'test s3 connection with bucket {self.bucket} and region {self.region}, proxy: {os.environ.get("http_proxy")}'
59
+ )
60
+
61
+ session = boto3.Session(
62
+ aws_access_key_id=self.access_key_id,
63
+ aws_secret_access_key=self.secret_access_key,
64
+ region_name=self.region,
65
+ )
66
+ timeout = 30
67
+ s3_client = session.client("s3", config=Config(connect_timeout=timeout, read_timeout=timeout))
68
+ if self.bucket:
69
+ s3_client.list_objects_v2(Bucket=self.bucket, MaxKeys=1)
70
+ else:
71
+ s3_client.list_buckets()
72
+
73
+ @property
74
+ def endpoint(self):
75
+ if not self.region:
76
+ raise ValueError("there is no region for endpoint")
77
+ return f"s3.{self.region}.amazonaws.com"
78
+
79
+ juice_sync_able = True
80
+
81
+ def juice_sync_path(self, path: str) -> str:
82
+ """
83
+ :param path: the s3 path, note that path is not started with bucket
84
+ :return:
85
+ """
86
+ if not self.bucket:
87
+ raise ValueError("the connection bucket cannot be empty in juice sync")
88
+ secret_part = f"{self.access_key_id}:{self.secret_access_key}"
89
+ secret_part = juice_sync_process_special_character_within_secret(secret_part)
90
+ endpoint_path_part = f'{self.bucket}.{self.endpoint}/{path.lstrip("/")}'
91
+ path_with_secret = f"s3://{secret_part}@{endpoint_path_part}"
92
+ path_without_secret = f"s3://{endpoint_path_part}"
93
+ return path_with_secret, path_without_secret
@@ -0,0 +1,87 @@
1
+ import os
2
+
3
+ from recurvedata.consts import ConnectorGroup
4
+
5
+ try:
6
+ from fsspec.implementations.sftp import SFTPFileSystem
7
+ except ImportError:
8
+ SFTPFileSystem = None
9
+
10
+ from recurvedata.connectors._register import register_connector_class
11
+ from recurvedata.connectors.ftp import FTPMixin
12
+ from recurvedata.core.translation import _l
13
+
14
+ CONNECTION_TYPE = "sftp"
15
+ UI_CONNECTION_TYPE = "SFTP"
16
+
17
+
18
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
19
+ class SFTP(FTPMixin):
20
+ connection_type = CONNECTION_TYPE
21
+ ui_connection_type = UI_CONNECTION_TYPE
22
+ group = [ConnectorGroup.DESTINATION]
23
+ setup_extras_require = ["fsspec[sftp]", "paramiko"]
24
+
25
+ config_schema = {
26
+ "type": "object",
27
+ "properties": {
28
+ "host": {
29
+ "type": "string",
30
+ "title": _l("Host Address"),
31
+ "default": "127.0.0.1",
32
+ },
33
+ "port": {
34
+ "type": "number",
35
+ "title": _l("Port Number"),
36
+ "default": 22,
37
+ },
38
+ "user": {"type": "string", "title": _l("Username")},
39
+ "password": {"type": "string", "title": _l("Password")},
40
+ "private_key_path": {"type": "string", "title": _l("Private Key File Path")},
41
+ },
42
+ "order": ["host", "port", "user", "password", "private_key_path"],
43
+ "required": ["host", "port"],
44
+ "secret": ["password"],
45
+ }
46
+
47
+ def _build_ssh_kwargs(self) -> dict:
48
+ """
49
+ build fsspec ssh_kwargs
50
+ :return:
51
+ """
52
+ import paramiko
53
+
54
+ pkey = None
55
+ pk_path = self.conf.get("private_key_path")
56
+ if pk_path:
57
+ pk_path = os.path.expanduser(pk_path)
58
+ pkey = paramiko.RSAKey.from_private_key_file(pk_path, password=self.conf.get("password"))
59
+ return {
60
+ "username": self.conf["user"],
61
+ "password": self.conf.get("password"),
62
+ "port": self.conf["port"],
63
+ "pkey": pkey,
64
+ }
65
+
66
+ def init_connection(self, conf) -> SFTPFileSystem:
67
+ con = SFTPFileSystem(host=conf["host"], **self._build_ssh_kwargs())
68
+ self.connector = con
69
+ return con
70
+
71
+ def test_connection(self):
72
+ self.connector.ls(".")
73
+
74
+ juice_sync_able = True
75
+
76
+ def juice_sync_path(self, path: str) -> str:
77
+ from urllib.parse import quote
78
+
79
+ username = self.conf["user"]
80
+ password = self.conf["password"]
81
+ password = quote(password)
82
+ port = self.conf["port"]
83
+ host = self.conf["host"]
84
+ # tmp only allow password
85
+ secret_path = f"{username}:{password}@{host}:{port}{path}"
86
+ non_secret_path = f"{username}:********@{host}:{port}{path}"
87
+ return secret_path, non_secret_path