recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,49 @@
1
+ import json
2
+
3
+ from recurvedata.connectors._register import register_connector_class
4
+ from recurvedata.connectors.base import RecurveConnectorBase
5
+ from recurvedata.consts import ConnectorGroup
6
+ from recurvedata.core.translation import _l
7
+
8
+ CONNECTION_TYPE = "generic"
9
+ UI_CONNECTION_TYPE = "Generic"
10
+
11
+
12
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
13
+ class GenericConnector(RecurveConnectorBase):
14
+ connection_type = CONNECTION_TYPE
15
+ ui_connection_type = UI_CONNECTION_TYPE
16
+ group = [ConnectorGroup.DESTINATION]
17
+ test_required = False
18
+
19
+ config_schema = {
20
+ "type": "object",
21
+ "properties": {
22
+ "host": {"type": "string", "title": _l("Host")},
23
+ "port": {"type": "integer", "title": _l("Port")},
24
+ "user": {"type": "string", "title": _l("Username")},
25
+ "password": {"type": "string", "title": _l("Password")},
26
+ "timeout": {"type": "integer", "title": _l("Timeout (seconds)"), "default": 30},
27
+ "custom": {
28
+ "type": "string",
29
+ "title": _l("Custom Configuration"),
30
+ "description": _l("Custom configuration parameters in JSON format"),
31
+ "ui:field": "CodeEditorWithReferencesField",
32
+ "ui:options": {"type": "code", "lang": "json"},
33
+ },
34
+ },
35
+ "order": ["host", "port", "user", "password", "timeout", "custom"],
36
+ "required": ["host", "port", "user", "password"],
37
+ "secret": ["password"],
38
+ }
39
+
40
+ def test_connection(self):
41
+ pass
42
+
43
+ @staticmethod
44
+ def preprocess_conf(data):
45
+ data = RecurveConnectorBase.preprocess_conf(data)
46
+ json_data = data.get("custom")
47
+ if json_data and isinstance(json_data, str):
48
+ data["custom"] = json.loads(json_data)
49
+ return data
@@ -0,0 +1,115 @@
1
+ from recurvedata.connectors._register import register_connector_class
2
+ from recurvedata.connectors.object_store import ObjectStoreMixin
3
+ from recurvedata.connectors.proxy import HTTP_PROXY_CONFIG_SCHEMA, HttpProxyMixin
4
+ from recurvedata.consts import ConnectorGroup
5
+ from recurvedata.core.translation import _l
6
+
7
+ CONNECTION_TYPE = "google_cloud_storage"
8
+ UI_CONNECTION_TYPE = "Google Cloud Storage"
9
+
10
+
11
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
12
+ class GoogleCloudStorage(HttpProxyMixin, ObjectStoreMixin):
13
+ connection_type = CONNECTION_TYPE
14
+ ui_connection_type = UI_CONNECTION_TYPE
15
+ setup_extras_require = []
16
+ group = [ConnectorGroup.DESTINATION]
17
+ test_required = False
18
+
19
+ config_schema = {
20
+ "type": "object",
21
+ "properties": {
22
+ "key_dict": {
23
+ "type": "object",
24
+ "title": _l("Service Account Key"),
25
+ "description": _l("Google Cloud service account key credentials"),
26
+ "properties": {
27
+ "type": {
28
+ "type": "string",
29
+ "title": _l("Account Type"),
30
+ "default": "service_account",
31
+ },
32
+ "project_id": {"type": "string", "title": _l("Google Cloud Project ID")},
33
+ "private_key_id": {"type": "string", "title": _l("Google Auth Private Key ID")},
34
+ "private_key": {
35
+ "type": "string",
36
+ "title": _l("Google Auth Private Key"),
37
+ "ui:options": {"type": "textarea"},
38
+ },
39
+ "client_email": {"type": "string", "title": _l("Service Account Email")},
40
+ "client_id": {"type": "string", "title": _l("Google OAuth Client ID")},
41
+ "auth_uri": {
42
+ "type": "string",
43
+ "title": _l("Google OAuth Auth URI"),
44
+ "default": "https://accounts.google.com/o/oauth2/auth",
45
+ },
46
+ "token_uri": {
47
+ "type": "string",
48
+ "title": _l("Google OAuth Token URI"),
49
+ "default": "https://oauth2.googleapis.com/token",
50
+ },
51
+ "auth_provider_x509_cert_url": {
52
+ "type": "string",
53
+ "title": _l("Google OAuth Certificate URL (Auth Provider)"),
54
+ "default": "https://www.googleapis.com/oauth2/v1/certs",
55
+ },
56
+ "client_x509_cert_url": {
57
+ "type": "string",
58
+ "title": _l("Google OAuth Certificate URL (Client)"),
59
+ "default": "https://www.googleapis.com/robot/v1/metadata/x509/recurvedata-gcs%40brand-portal-prod.iam.gserviceaccount.com",
60
+ },
61
+ },
62
+ "order": [
63
+ "type",
64
+ "project_id",
65
+ "private_key_id",
66
+ "private_key",
67
+ "client_email",
68
+ "client_id",
69
+ "auth_uri",
70
+ "token_uri",
71
+ "auth_provider_x509_cert_url",
72
+ "client_x509_cert_url",
73
+ ],
74
+ "required": [
75
+ "type",
76
+ "project_id",
77
+ "private_key_id",
78
+ "private_key",
79
+ "client_id",
80
+ ],
81
+ "secret": [
82
+ "private_key",
83
+ ],
84
+ },
85
+ "bucket": {
86
+ "type": "string",
87
+ "title": _l("Bucket Name"),
88
+ "description": _l("Name of the Google Cloud Storage bucket"),
89
+ },
90
+ "proxies": HTTP_PROXY_CONFIG_SCHEMA["proxies"],
91
+ },
92
+ "order": [
93
+ "key_dict",
94
+ "bucket",
95
+ "proxies",
96
+ ],
97
+ "required": [
98
+ "key_dict",
99
+ ],
100
+ "secret": [
101
+ "key_dict.private_key",
102
+ ],
103
+ }
104
+
105
+ def init_connection(self, conf):
106
+ self.connector = None # todo
107
+
108
+ def test_connection(self):
109
+ # todo
110
+ pass
111
+
112
+ juice_sync_able = True
113
+
114
+ def juice_sync_path(self, path: str) -> str:
115
+ return f"gcs://{path}" # todo
@@ -0,0 +1,225 @@
1
+ import re
2
+
3
+ from recurvedata.consts import ConnectionCategory, ConnectorGroup
4
+
5
+ try:
6
+ from pandas import DataFrame
7
+ except ImportError:
8
+ pass
9
+
10
+ from recurvedata.connectors._register import register_connector_class
11
+ from recurvedata.connectors.base import RecurveConnectorBase
12
+ from recurvedata.connectors.const import LoadMode
13
+ from recurvedata.connectors.proxy import HTTP_PROXY_CONFIG_SCHEMA, HttpProxyMixin
14
+ from recurvedata.core.translation import _l
15
+
16
+ CONNECTION_TYPE = "google_service_account"
17
+ UI_CONNECTION_TYPE = "Google Service Account"
18
+
19
+ try:
20
+ import gspread # noqa
21
+ import pandas as pd
22
+ from google.oauth2 import service_account
23
+ from gspread.worksheet import Worksheet # noqa
24
+ except ImportError:
25
+ pass
26
+
27
+
28
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
29
+ class GoogleServiceAccount(HttpProxyMixin, RecurveConnectorBase):
30
+ connection_type = CONNECTION_TYPE
31
+ ui_connection_type = UI_CONNECTION_TYPE
32
+ group = [ConnectorGroup.DESTINATION]
33
+ category = [ConnectionCategory.SERVICE]
34
+ setup_extras_require = [
35
+ "google-auth",
36
+ ]
37
+ # gspread 暂时不加到 setup_extras_require 里,而是加到 operator 的 setup 里
38
+ default_timeout = 120
39
+
40
+ config_schema = {
41
+ "type": "object",
42
+ "properties": {
43
+ "project_id": {"type": "string", "title": _l("Google Cloud Project ID")},
44
+ "private_key_id": {"type": "string", "title": _l("Google Auth Private Key ID")},
45
+ "private_key": {
46
+ "type": "string",
47
+ "title": _l("Google Auth Private Key"),
48
+ "ui:options": {"type": "textarea"},
49
+ },
50
+ "client_email": {"type": "string", "title": _l("Service Account Email")},
51
+ "client_id": {"type": "string", "title": _l("Google OAuth Client ID")},
52
+ "auth_uri": {
53
+ "type": "string",
54
+ "title": _l("Google OAuth Auth URI"),
55
+ "default": "https://accounts.google.com/o/oauth2/auth",
56
+ },
57
+ "token_uri": {
58
+ "type": "string",
59
+ "title": _l("Google OAuth Token URI"),
60
+ "default": "https://oauth2.googleapis.com/token",
61
+ },
62
+ "auth_provider_x509_cert_url": {
63
+ "type": "string",
64
+ "title": _l("Google OAuth Certificate URL (Auth Provider)"),
65
+ "default": "https://www.googleapis.com/oauth2/v1/certs",
66
+ },
67
+ "client_x509_cert_url": {
68
+ "type": "string",
69
+ "title": _l("Google OAuth Certificate URL (Client)"),
70
+ "default": "https://www.googleapis.com/robot/v1/metadata/x509/recurvedata-gcs%40brand-portal-prod.iam.gserviceaccount.com",
71
+ },
72
+ "universe_domain": {
73
+ "type": "string",
74
+ "title": _l("Universe Domain"),
75
+ "default": "googleapis.com",
76
+ },
77
+ "proxies": HTTP_PROXY_CONFIG_SCHEMA["proxies"],
78
+ },
79
+ "order": [
80
+ "project_id",
81
+ "private_key_id",
82
+ "private_key",
83
+ "client_email",
84
+ "client_id",
85
+ "auth_uri",
86
+ "token_uri",
87
+ "auth_provider_x509_cert_url",
88
+ "client_x509_cert_url",
89
+ "universe_domain",
90
+ "proxies",
91
+ ],
92
+ "required": [
93
+ "project_id",
94
+ "private_key_id",
95
+ "private_key",
96
+ "client_email",
97
+ ],
98
+ "secret": [
99
+ "private_key",
100
+ ],
101
+ }
102
+
103
+ def init_credential_key_dict(self):
104
+ self.private_key = self._convert_private_key(self.private_key)
105
+ _key_dict = {
106
+ "type": "service_account",
107
+ "project_id": self.project_id,
108
+ "private_key_id": self.private_key_id,
109
+ "private_key": self.private_key,
110
+ "client_email": self.client_email,
111
+ "auth_uri": self.auth_uri,
112
+ "token_uri": self.token_uri,
113
+ "auth_provider_x509_cert_url": self.auth_provider_x509_cert_url,
114
+ "client_x509_cert_url": self.client_x509_cert_url,
115
+ "universe_domain": self.universe_domain,
116
+ }
117
+ if self.client_id:
118
+ _key_dict["client_id"] = self.client_id
119
+ return _key_dict
120
+
121
+ def init_credential(self):
122
+ credentials = service_account.Credentials.from_service_account_info(info=self.init_credential_key_dict())
123
+ return credentials
124
+
125
+ @staticmethod
126
+ def _convert_private_key(private_key: str):
127
+ # Depending on how the JSON was formatted, it may contain
128
+ # escaped newlines. Convert those to actual newlines.
129
+ private_key = private_key.replace("\\\n", "\n")
130
+ return private_key.replace("\\n", "\n")
131
+
132
+ def test_connection(self):
133
+ # 暂时不校验。如果私钥有问题这里好像会报错
134
+ with self._init_proxy_manager():
135
+ _ = self.init_credential()
136
+
137
+ def get_sheet(self, url: str, sheet_gid: int = None):
138
+ """
139
+ 不传 sheet_gid,默认返回第一个 sheet
140
+ :param url:
141
+ :param sheet_gid:
142
+ :return:
143
+ """
144
+ with self._init_proxy_manager():
145
+ gc = gspread.service_account_from_dict(self.init_credential_key_dict())
146
+ gc.set_timeout(self.default_timeout)
147
+ spread_sheet = gc.open_by_url(url)
148
+ sheets = spread_sheet.worksheets()
149
+ if sheet_gid is not None:
150
+ for sheet in sheets:
151
+ if sheet.id == sheet_gid:
152
+ return sheet
153
+ if sheets:
154
+ return sheets[0]
155
+
156
+ def read_sheet_to_df(
157
+ self, sheet: "Worksheet", cell_range: str = None, columns: list[str] = None, dataframe_kwargs: dict = None
158
+ ) -> "pd.DataFrame":
159
+ """
160
+ :param sheet:
161
+ :param cell_range:
162
+ 不传的话,默认读取整个 sheet
163
+ 传的话,例如 'A1:B5'
164
+ :param columns:
165
+ 不传的话,默认取 sheet 第一行作为 columns
166
+ 传的话,例如 ['col1', 'col2', 'col3']
167
+ :param dataframe_kwargs:
168
+ pandas.DataFrame 传入的参数
169
+ :return:
170
+ """
171
+ with self._init_proxy_manager(): # todo: use wrapper
172
+ if not cell_range:
173
+ data = sheet.get_all_values()
174
+ else:
175
+ data = sheet.get(cell_range)
176
+ dataframe_kwargs = dataframe_kwargs or {}
177
+ if not columns:
178
+ df = pd.DataFrame(data[1:], columns=data[0], **dataframe_kwargs)
179
+ else:
180
+ df = pd.DataFrame(data, columns=columns, **dataframe_kwargs)
181
+ return df
182
+
183
+ @staticmethod
184
+ def parse_sheet_url(url: str) -> (str, int):
185
+ """输入 URL,返回 token 和 sheet id
186
+ :param url: https://docs.google.com/spreadsheets/d/118WyiPGFQ3ni7Gp6oNhZtkc9wEmAPfqAWynvP2ufgPk/edit#gid=1996978628
187
+ :return: ("118WyiPGFQ3ni7Gp6oNhZtkc9wEmAPfqAWynvP2ufgPk", "1996978628")
188
+ """
189
+ from gspread.utils import extract_id_from_url
190
+
191
+ spread_sheet_id = extract_id_from_url(url)
192
+ gid_pat = re.compile(r"gid=(?P<gid>\d+)")
193
+ gid_mobj = gid_pat.search(url)
194
+ sheet_id = int(gid_mobj.group("gid")) if gid_mobj else None
195
+ return spread_sheet_id, sheet_id
196
+
197
+ def load_df_to_sheet(self, df: DataFrame, sheet: "Worksheet", mode: str, **kwargs):
198
+ """write data to google sheet
199
+
200
+ Args:
201
+ sheet (Worksheet):
202
+ df (DataFrame):
203
+ mode (str): OVERWRITE/APPEND
204
+ """
205
+ # Determine the mode and write the data
206
+ headers = df.columns.values.tolist()
207
+ values = df.values.tolist()
208
+ with self._init_proxy_manager():
209
+ self.load_values_to_sheet(headers, values, sheet, mode, **kwargs)
210
+
211
+ @staticmethod
212
+ def load_values_to_sheet(headers: list[str], values: list[list], sheet: "Worksheet", mode: str, **kwargs):
213
+ if mode == LoadMode.OVERWRITE:
214
+ sheet.clear()
215
+ sheet.update(
216
+ [
217
+ headers,
218
+ ]
219
+ + values,
220
+ **kwargs,
221
+ )
222
+ elif mode == LoadMode.APPEND:
223
+ existing_rows = sheet.get_all_values()
224
+ next_row = len(existing_rows) + 1
225
+ sheet.insert_rows(values, row=next_row, **kwargs)
@@ -0,0 +1,207 @@
1
+ from dataclasses import dataclass
2
+ from typing import Optional, Type
3
+
4
+ from sqlalchemy.engine.url import URL
5
+
6
+ from recurvedata.connectors import get_connection_class
7
+ from recurvedata.connectors._register import register_connector_class
8
+ from recurvedata.connectors.connectors.mysql import CONNECTION_TYPE as MYSQL_CONNECTION_TYPE
9
+ from recurvedata.connectors.connectors.postgres import CONNECTION_TYPE as POSTGRES_CONNECTION_TYPE
10
+ from recurvedata.connectors.dbapi import DBAPIBase
11
+ from recurvedata.consts import ConnectionCategory, ConnectorGroup
12
+ from recurvedata.core.translation import _l
13
+
14
+ HIVE_FIELD_DELIMITER = chr(1)
15
+ HIVE_ARRAY_DELIMITER = chr(2)
16
+ HIVE_MAP_ITEM_DELIMITER = chr(2)
17
+ HIVE_MAP_KV_DELIMITER = chr(3)
18
+ HIVE_NULL = r"\N"
19
+
20
+ CONNECTION_TYPE = "hive"
21
+ UI_CONNECTION_TYPE = "Apache Hive"
22
+
23
+
24
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
25
+ class HiveConnector(DBAPIBase):
26
+ SYSTEM_DATABASES = [
27
+ "information_schema",
28
+ ]
29
+ connection_type = CONNECTION_TYPE
30
+ ui_connection_type = UI_CONNECTION_TYPE
31
+ setup_extras_require = ["PyHive", "thrift-sasl"]
32
+ driver = "hive"
33
+ valid_metastore_types = [
34
+ MYSQL_CONNECTION_TYPE,
35
+ POSTGRES_CONNECTION_TYPE,
36
+ ]
37
+ category = [ConnectionCategory.WAREHOUSE]
38
+ group = [ConnectorGroup.DESTINATION]
39
+
40
+ config_schema = {
41
+ "type": "object",
42
+ "properties": {
43
+ "host": {
44
+ "type": "string",
45
+ "title": _l("Host Address"),
46
+ "default": "127.0.0.1",
47
+ },
48
+ "port": {
49
+ "type": "number",
50
+ "title": _l("Port Number"),
51
+ "default": 10000,
52
+ },
53
+ "user": {"type": "string", "title": _l("Username")},
54
+ "password": {"type": "string", "title": _l("Password")},
55
+ "database": {
56
+ "type": "string",
57
+ "title": _l("Database Name"),
58
+ "description": _l("The name of the database to connect to"),
59
+ "default": "default",
60
+ },
61
+ "hdfs_options": {
62
+ "type": "object",
63
+ "title": _l("HDFS Options"),
64
+ "description": _l("Configuration options for HDFS connection"),
65
+ "properties": {
66
+ "host": {
67
+ "type": "string",
68
+ "title": _l("Host Address"),
69
+ "description": _l("HDFS namenode hostname or IP address"),
70
+ },
71
+ "port": {
72
+ "type": "number",
73
+ "title": _l("Port Number"),
74
+ "description": _l("HDFS namenode port number"),
75
+ "default": 50070,
76
+ },
77
+ "user": {"type": "string", "title": _l("Username")},
78
+ "staging_folder": {
79
+ "type": "string",
80
+ "title": _l("Transfer Staging Folder"),
81
+ "description": _l("Temporary HDFS directory path for data transfer staging"),
82
+ "default": "/tmp/recurve",
83
+ },
84
+ },
85
+ "order": ["host", "port", "user", "staging_folder"],
86
+ },
87
+ "auth": {
88
+ "type": "string",
89
+ "title": _l("Authentication Type"),
90
+ "default": "LDAP",
91
+ },
92
+ "hive_conf": {
93
+ "type": "object",
94
+ "title": _l("Hive Execute Configurations"),
95
+ "description": _l("Additional Hive execution parameters"),
96
+ "properties": {
97
+ "spark.yarn.queue": {
98
+ "type": "string",
99
+ "title": _l("Hive On Spark Queue"),
100
+ "description": _l("YARN queue name for Spark execution"),
101
+ },
102
+ "tez.queue.name": {
103
+ "type": "string",
104
+ "title": _l("Hive On Tez Queue"),
105
+ "description": _l("YARN queue name for Tez execution"),
106
+ },
107
+ },
108
+ "order": ["spark.yarn.queue", "tez.queue.name"],
109
+ },
110
+ # 'metastore': {
111
+ # 'title': 'Hive Metastore Config',
112
+ # 'type': 'object',
113
+ # 'properties': {
114
+ # 'type': {
115
+ # 'type': 'string',
116
+ # 'title': 'Metastore Type',
117
+ # 'default': MYSQL_CONNECTION_TYPE,
118
+ # },
119
+ # 'host': {
120
+ # 'type': 'string',
121
+ # 'title': 'Metastore Host Address',
122
+ # },
123
+ # 'user': {
124
+ # 'type': 'string',
125
+ # 'title': 'Metastore User Name',
126
+ # },
127
+ # 'password': {
128
+ # 'type': 'string',
129
+ # 'title': 'Metastore Password',
130
+ # },
131
+ # 'database': {
132
+ # 'type': 'string',
133
+ # 'title': 'Metastore Database Name',
134
+ # },
135
+ # 'port': {
136
+ # 'type': 'number',
137
+ # 'title': 'Metastore Port Number',
138
+ # },
139
+ # },
140
+ # "order": ['host', 'port', 'user', 'password', 'database'],
141
+ # 'secret': ['password'],
142
+ # },
143
+ # 'ssh_tunnel': SSH_TUNNEL_CONFIG_SCHEMA,
144
+ },
145
+ "order": [
146
+ "host",
147
+ "port",
148
+ "user",
149
+ "password",
150
+ "database",
151
+ "hdfs_options",
152
+ "auth",
153
+ "hive_conf",
154
+ ],
155
+ "required": ["host", "port"],
156
+ "secret": ["password"],
157
+ }
158
+
159
+ @property
160
+ def connect_args(self):
161
+ return {"auth": "LDAP"} # todo
162
+
163
+ # generate_ddl todo: stored as parquet
164
+
165
+ def _extract_column_name(self, column_type):
166
+ visit_type = column_type.__visit_name__
167
+ if visit_type == "type_decorator":
168
+ return column_type.impl.__visit_name__
169
+ return visit_type
170
+
171
+ @property
172
+ def metastore_connector(self) -> Optional[DBAPIBase]:
173
+ if not self.metastore:
174
+ return None
175
+ metastore_config = MetastoreConfig(**self.metastore)
176
+ return metastore_config.get_connector(self.conf.get("ssh_tunnel"))
177
+
178
+ @property
179
+ def sqlalchemy_url(self):
180
+ host, port = self.host, self.port
181
+ if self.ssh_tunnel and self.ssh_tunnel.is_active:
182
+ host, port = self.ssh_tunnel.local_bind_host, self.ssh_tunnel.local_bind_port
183
+
184
+ return URL(self.driver, self.user, self.password, host, port, self.database, query={"auth": self.auth})
185
+
186
+
187
+ @dataclass
188
+ class MetastoreConfig:
189
+ type: str
190
+ host: str
191
+ user: str
192
+ password: str
193
+ database: str
194
+ port: int
195
+
196
+ def get_connector(self, ssh_tunnel_config: Optional[dict]) -> DBAPIBase:
197
+ con_cls: Type[DBAPIBase] = get_connection_class(self.type)
198
+ return con_cls(
199
+ conf={
200
+ "host": self.host,
201
+ "user": self.user,
202
+ "password": self.password,
203
+ "database": self.database,
204
+ "port": self.port,
205
+ "ssh_tunnel": ssh_tunnel_config,
206
+ }
207
+ )