recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
File without changes
@@ -0,0 +1,30 @@
1
+ from recurvedata.connectors._register import register_connector_class
2
+ from recurvedata.connectors.base import RecurveConnectorBase
3
+ from recurvedata.consts import ConnectorGroup
4
+ from recurvedata.core.translation import _l
5
+
6
+ CONNECTION_TYPE = "aliyun_access_key"
7
+ UI_CONNECTION_TYPE = "Aliyun Access Key"
8
+
9
+
10
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
11
+ class AliyunAccessKeyConnector(RecurveConnectorBase):
12
+ connection_type = CONNECTION_TYPE
13
+ ui_connection_type = UI_CONNECTION_TYPE
14
+ group = [ConnectorGroup.DESTINATION]
15
+ test_required = False
16
+
17
+ config_schema = {
18
+ "type": "object",
19
+ "properties": {
20
+ "endpoint": {"type": "string", "title": _l("Endpoint")},
21
+ "access_key_id": {"type": "string", "title": _l("Access Key ID")},
22
+ "access_key_secret": {"type": "string", "title": _l("Access Key Secret")},
23
+ },
24
+ "order": ["endpoint", "access_key_id", "access_key_secret"],
25
+ "required": ["endpoint", "access_key_id", "access_key_secret"],
26
+ "secret": ["access_key_secret"],
27
+ }
28
+
29
+ def test_connection(self):
30
+ pass
@@ -0,0 +1,44 @@
1
+ import json
2
+
3
+ from recurvedata.connectors._register import register_connector_class
4
+ from recurvedata.connectors.base import RecurveConnectorBase
5
+ from recurvedata.core.translation import _l
6
+
7
+ CONNECTION_TYPE = "auth"
8
+ UI_CONNECTION_TYPE = "Auth"
9
+
10
+
11
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
12
+ class Auth(RecurveConnectorBase):
13
+ connection_type = CONNECTION_TYPE
14
+ ui_connection_type = UI_CONNECTION_TYPE
15
+
16
+ config_schema = {
17
+ "type": "object",
18
+ "properties": {
19
+ "host": {"type": "string", "title": _l("Host Address")},
20
+ "user": {"type": "string", "title": _l("Username")},
21
+ "password": {"type": "string", "title": _l("Password")},
22
+ "port": {"type": "number", "title": _l("Port Number")},
23
+ "extra": {
24
+ "type": "string",
25
+ "title": _l("Additional Configuration"),
26
+ "description": _l("Additional configuration parameters in JSON format"),
27
+ "ui:options": {"type": "textarea"},
28
+ },
29
+ },
30
+ "order": ["host", "user", "password", "port", "extra"],
31
+ "required": ["host"],
32
+ "secret": ["password"],
33
+ }
34
+
35
+ def test_connection(self):
36
+ pass
37
+
38
+ @staticmethod
39
+ def preprocess_conf(data):
40
+ data = RecurveConnectorBase.preprocess_conf(data)
41
+ json_data = data.get("extra")
42
+ if json_data and isinstance(json_data, str):
43
+ data["extra"] = json.loads(json_data)
44
+ return data
@@ -0,0 +1,89 @@
1
+ try:
2
+ from adlfs import AzureBlobFileSystem
3
+ except ImportError:
4
+ AzureBlobFileSystem = None
5
+
6
+ from recurvedata.connectors._register import register_connector_class
7
+ from recurvedata.connectors.object_store import ObjectStoreMixin
8
+ from recurvedata.consts import ConnectorGroup
9
+ from recurvedata.core.translation import _l
10
+
11
+ CONNECTION_TYPE = "azure_blob"
12
+ UI_CONNECTION_TYPE = "Azure Blob Storage"
13
+
14
+
15
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
16
+ class AzureBlob(ObjectStoreMixin):
17
+ connection_type = CONNECTION_TYPE
18
+ ui_connection_type = UI_CONNECTION_TYPE
19
+ setup_extras_require = [
20
+ "fsspec[adl]",
21
+ ]
22
+ group = [ConnectorGroup.DESTINATION]
23
+
24
+ config_schema = {
25
+ "type": "object",
26
+ "properties": {
27
+ "connection_string": {
28
+ "type": "string",
29
+ "title": _l("Connection String"),
30
+ "description": _l("Azure Storage connection string containing authentication details"),
31
+ },
32
+ "account_name": {"type": "string", "title": _l("Storage Account Name")},
33
+ "account_key": {"type": "string", "title": _l("Account Access Key")},
34
+ "sas_token": {"type": "string", "title": _l("SAS Token")},
35
+ "container": {"type": "string", "title": _l("Container Name")},
36
+ "endpoint_suffix": {
37
+ "type": "string",
38
+ "title": _l("Endpoint Suffix"),
39
+ "description": _l("Storage endpoint suffix (e.g. core.windows.net)"),
40
+ },
41
+ },
42
+ "order": ["connection_string", "account_name", "account_key", "sas_token", "container", "endpoint_suffix"],
43
+ "required": [],
44
+ "secret": ["account_key", "sas_token"],
45
+ }
46
+
47
+ def init_connection(self, conf) -> AzureBlobFileSystem:
48
+ con = AzureBlobFileSystem(
49
+ connection_string=self.connection_string,
50
+ account_name=conf.get("account_name"),
51
+ account_key=conf.get("account_key"),
52
+ sas_token=conf.get("sas_token"),
53
+ container_name=conf.get("container"),
54
+ )
55
+ self.connector = con
56
+ return con
57
+
58
+ @property
59
+ def connection_string(self):
60
+ if self.conf.get("connection_string"):
61
+ return self.conf["connection_string"]
62
+ parts = [
63
+ "DefaultEndpointsProtocol=https",
64
+ ]
65
+ if self.account_name:
66
+ parts.append(f"AccountName={self.account_name}")
67
+ if self.endpoint_suffix:
68
+ parts.append(f"EndpointSuffix={self.endpoint_suffix}")
69
+ if self.sas_token:
70
+ parts.append(f"SharedAccessSignature={self.sas_token}")
71
+ return ";".join(parts)
72
+
73
+ @property
74
+ def bucket(self):
75
+ return self.conf.get("container")
76
+
77
+ def test_connection(self):
78
+ try:
79
+ self.connector.exists(self.bucket_key(""))
80
+ except Exception as e:
81
+ if "This request is not authorized to perform this operation" in str(e):
82
+ self.connector.ls(self.bucket_key("")) # todo: return_glob?
83
+ return
84
+ raise
85
+
86
+ juice_sync_able = True
87
+
88
+ def juice_sync_path(self, path: str) -> str:
89
+ return f"wasb://{path}" # todo
@@ -0,0 +1,79 @@
1
+ from sqlalchemy.engine import URL
2
+
3
+ from recurvedata.connectors._register import register_connector_class
4
+ from recurvedata.connectors.dbapi import DBAPIBase
5
+ from recurvedata.consts import ConnectionCategory, ConnectorGroup
6
+ from recurvedata.core.translation import _l
7
+
8
+ CONNECTION_TYPE = "azure_synapse"
9
+ UI_CONNECTION_TYPE = "Azure Synapse"
10
+
11
+
12
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
13
+ class SynapseConnector(DBAPIBase):
14
+ connection_type = CONNECTION_TYPE
15
+ ui_connection_type = UI_CONNECTION_TYPE
16
+ setup_extras_require = ["pyodbc"]
17
+ driver = "mssql+pyodbc"
18
+
19
+ category = [
20
+ ConnectionCategory.WAREHOUSE,
21
+ ]
22
+ group = [ConnectorGroup.DESTINATION]
23
+
24
+ config_schema = {
25
+ "type": "object",
26
+ "properties": {
27
+ "host": {"type": "string", "title": _l("Host Address")},
28
+ "port": {
29
+ "type": "number",
30
+ "title": _l("Port Number"),
31
+ "default": 1433,
32
+ },
33
+ "user": {"type": "string", "title": _l("Username")},
34
+ "password": {"type": "string", "title": _l("Password")},
35
+ "database": {"type": "string", "title": _l("Database Name")},
36
+ "odbc_driver": {
37
+ "type": "string",
38
+ "title": _l("ODBC Driver"),
39
+ "default": "ODBC Driver 17 for SQL Server",
40
+ },
41
+ "blob_options": {
42
+ "type": "object",
43
+ "title": _l("Azure Blob Storage Options"),
44
+ "properties": {
45
+ "account_name": {"type": "string", "title": _l("Storage Account Name")},
46
+ "sas_token": {"type": "string", "title": _l("SAS Token")},
47
+ },
48
+ "order": ["account_name", "sas_token"],
49
+ },
50
+ },
51
+ "order": ["host", "port", "user", "password", "database", "odbc_driver", "blob_options"],
52
+ "required": ["host", "port"],
53
+ "secret": ["password", "blob_options.sas_token"],
54
+ }
55
+
56
+ # todo: autocommit
57
+
58
+ @property
59
+ def odbc_driver(self):
60
+ return self.conf["odbc_driver"]
61
+
62
+ @property
63
+ def sqlalchemy_url(self):
64
+ return URL(
65
+ self.driver,
66
+ self.user,
67
+ self.password,
68
+ self.host,
69
+ self.port,
70
+ self.database,
71
+ query={"driver": self.odbc_driver, "autocommit": "True"},
72
+ )
73
+
74
+ def connect(self):
75
+ engine = super().connect()
76
+ # engine = engine.execution_options(
77
+ # isolation_level="AUTOCOMMIT"
78
+ # )
79
+ return engine
@@ -0,0 +1,359 @@
1
+ import base64
2
+ import json
3
+ import os
4
+ from functools import cached_property
5
+ from typing import Any
6
+
7
+ from sqlalchemy import create_engine
8
+ from sqlalchemy.engine import URL
9
+
10
+ from recurvedata.connectors._register import register_connector_class
11
+ from recurvedata.connectors.datasource import DataSourceWrapper
12
+ from recurvedata.connectors.dbapi import DBAPIBase, with_ssh_tunnel
13
+ from recurvedata.connectors.proxy import HTTP_PROXY_CONFIG_SCHEMA, HttpProxyMixin
14
+ from recurvedata.consts import ConnectionCategory, ConnectorGroup
15
+ from recurvedata.core.translation import _l
16
+
17
+ CONNECTION_TYPE = "bigquery"
18
+ UI_CONNECTION_TYPE = "Google BigQuery"
19
+
20
+
21
+ @register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
22
+ class BigQueryConnector(HttpProxyMixin, DBAPIBase):
23
+ setup_extras_require = ["sqlalchemy-bigquery"]
24
+ connection_type = CONNECTION_TYPE
25
+ ui_connection_type = UI_CONNECTION_TYPE
26
+ driver = "bigquery"
27
+ category = [
28
+ ConnectionCategory.WAREHOUSE,
29
+ ConnectionCategory.DATABASE,
30
+ ]
31
+ group = [ConnectorGroup.DESTINATION]
32
+
33
+ # All supported BigQuery data types and their aliases based on official documentation
34
+ available_column_types = [
35
+ # Integer types
36
+ "int64", # 64-bit integer
37
+ "integer", # alias for int64
38
+ "int", # alias for int64
39
+ "smallint", # alias for int64
40
+ "bigint", # alias for int64
41
+ "tinyint", # alias for int64
42
+ "byteint", # alias for int64
43
+ # Floating-point types
44
+ "float64", # 64-bit floating point
45
+ # Decimal types
46
+ "numeric", # exact numeric values
47
+ "decimal", # alias for numeric
48
+ "bignumeric", # high-precision decimal values
49
+ "bigdecimal", # alias for bignumeric
50
+ # String type
51
+ "string", # variable-length character data
52
+ # Boolean type
53
+ "bool", # true or false
54
+ "boolean", # alias for bool
55
+ # Bytes type
56
+ "bytes", # variable-length binary data
57
+ # Date/Time types
58
+ "date", # calendar date
59
+ "datetime", # date and time
60
+ "time", # time of day
61
+ "timestamp", # absolute point in time
62
+ # Complex types
63
+ "array", # ordered list of values
64
+ "struct", # container of ordered fields
65
+ "json",
66
+ # Geography type
67
+ "geography", # spatial data type
68
+ # Interval type
69
+ "interval", # time intervals
70
+ # Range types
71
+ "range",
72
+ ]
73
+
74
+ config_schema = {
75
+ "type": "object",
76
+ "properties": {
77
+ "type": {
78
+ "type": "string",
79
+ "title": _l("Account Type"),
80
+ "default": "service_account",
81
+ },
82
+ "project_id": {"type": "string", "title": _l("Google Cloud Project ID")},
83
+ "private_key_id": {"type": "string", "title": _l("Google Auth Private Key ID")},
84
+ "private_key": {"type": "string", "title": _l("Google Auth Private Key")},
85
+ "client_id": {"type": "string", "title": _l("Google OAuth Client ID")},
86
+ "client_email": {"type": "string", "title": _l("Service Account Email")},
87
+ "token_uri": {
88
+ "type": "string",
89
+ "title": _l("Google OAuth Token URI"),
90
+ "default": "https://oauth2.googleapis.com/token",
91
+ },
92
+ "proxies": HTTP_PROXY_CONFIG_SCHEMA["proxies"],
93
+ },
94
+ "order": [
95
+ "type",
96
+ "project_id",
97
+ "private_key_id",
98
+ "private_key",
99
+ "client_id",
100
+ "client_email",
101
+ "token_uri",
102
+ "proxies",
103
+ ],
104
+ "required": ["type", "project_id", "private_key", "private_key_id", "client_email", "token_uri"],
105
+ "secret": ["private_key"],
106
+ }
107
+
108
+ @property
109
+ def sqlalchemy_url(self):
110
+ return URL(self.driver, self.project_id)
111
+
112
+ def build_credentials(self, used_in_write_file: bool = False) -> dict:
113
+ converted_key = self._convert_private_key(self.private_key, used_in_write_file=used_in_write_file)
114
+
115
+ # Validate the converted private key if not used for write file
116
+ if not used_in_write_file and not self.validate_private_key(converted_key):
117
+ import logging
118
+ logger = logging.getLogger(__name__)
119
+ logger.error("Private key validation failed after conversion")
120
+ logger.error(f"Original key preview: {self.private_key[:100] if self.private_key else 'None'}...")
121
+ logger.error(f"Converted key preview: {converted_key[:100] if converted_key else 'None'}...")
122
+ raise ValueError("Private key format is invalid after conversion. Please check the private key format.")
123
+
124
+ return {
125
+ "type": self.type,
126
+ "project_id": self.project_id,
127
+ "private_key_id": self.private_key_id,
128
+ "private_key": converted_key,
129
+ "client_email": self.client_email,
130
+ "token_uri": self.token_uri,
131
+ }
132
+
133
+ def build_credentials_base64(self, used_in_write_file: bool = False) -> str:
134
+ try:
135
+ credentials_info = self.build_credentials(used_in_write_file=used_in_write_file)
136
+ return base64.b64encode(json.dumps(credentials_info).encode("utf-8")).decode('utf-8')
137
+ except Exception as e:
138
+ # Log the error with context for debugging
139
+ import logging
140
+ logger = logging.getLogger(__name__)
141
+ logger.error(f"Failed to build credentials: {str(e)}")
142
+ logger.error(f"Private key preview (first 50 chars): {self.private_key[:50] if self.private_key else 'None'}...")
143
+ raise
144
+
145
+ @staticmethod
146
+ def _convert_private_key(private_key: str, used_in_write_file: bool = False) -> str:
147
+ """
148
+ Convert private key from various escape formats to proper PEM format.
149
+ Handles multiple levels of escaping that can occur during transmission/storage.
150
+ """
151
+ if not private_key:
152
+ return private_key
153
+
154
+ # Remove any leading/trailing whitespace
155
+ private_key = private_key.strip()
156
+
157
+ # If used in write file, return as-is to preserve formatting
158
+ if used_in_write_file:
159
+ return private_key
160
+
161
+ # Handle various escape sequence patterns
162
+ # Multiple replacement passes to handle nested escaping
163
+
164
+ # Replace quadruple-escaped newlines (\\\\n -> \\n)
165
+ private_key = private_key.replace("\\\\n", "\\n")
166
+
167
+ # Replace double-escaped newlines (\\n -> \n)
168
+ private_key = private_key.replace("\\n", "\n")
169
+
170
+ # Handle edge case where literal \n strings need to become actual newlines
171
+ # This covers cases where the key was stored as a literal string
172
+ if "-----BEGIN PRIVATE KEY-----" in private_key and "\n" not in private_key:
173
+ # If we have the BEGIN marker but no actual newlines, it's likely escaped
174
+ private_key = private_key.replace("-----BEGIN PRIVATE KEY-----", "-----BEGIN PRIVATE KEY-----\n")
175
+ private_key = private_key.replace("-----END PRIVATE KEY-----", "\n-----END PRIVATE KEY-----")
176
+
177
+ # Split the key content and add newlines every 64 characters (standard PEM format)
178
+ lines = private_key.split('\n')
179
+ if len(lines) >= 2:
180
+ # Extract the key content between BEGIN and END
181
+ begin_line = lines[0]
182
+ end_line = lines[-1]
183
+ key_content = ''.join(lines[1:-1])
184
+
185
+ # Split key content into 64-character lines
186
+ formatted_lines = [begin_line]
187
+ for i in range(0, len(key_content), 64):
188
+ formatted_lines.append(key_content[i:i+64])
189
+ formatted_lines.append(end_line)
190
+
191
+ private_key = '\n'.join(formatted_lines)
192
+
193
+ return private_key
194
+
195
+ def validate_private_key(self, private_key: str) -> bool:
196
+ """
197
+ Validate that the private key is in correct PEM format.
198
+ Returns True if valid, False otherwise.
199
+ """
200
+ try:
201
+ if not private_key:
202
+ return False
203
+
204
+ # Check for basic PEM structure
205
+ if "-----BEGIN PRIVATE KEY-----" not in private_key:
206
+ return False
207
+
208
+ if "-----END PRIVATE KEY-----" not in private_key:
209
+ return False
210
+
211
+ # Check if we have proper newlines
212
+ if "\n" not in private_key:
213
+ return False
214
+
215
+ # Try to parse with cryptography library (same as Google uses)
216
+ from cryptography.hazmat.primitives import serialization
217
+ try:
218
+ serialization.load_pem_private_key(
219
+ private_key.encode('utf-8'),
220
+ password=None,
221
+ )
222
+ return True
223
+ except Exception:
224
+ return False
225
+
226
+ except Exception:
227
+ return False
228
+
229
+ def init_proxy(self):
230
+ if hasattr(self, "_proxy_inited"):
231
+ return
232
+ self._proxy_inited = True
233
+ if not self.proxies:
234
+ return
235
+
236
+ for scheme in ["http", "https"]:
237
+ os.environ[f"{scheme}_proxy"] = self.proxies[scheme]
238
+ # todo: grpc proxy
239
+
240
+ def connect(self):
241
+ self.init_proxy()
242
+
243
+ # Build URL with credentials_base64 and list_tables_page_size as query parameters
244
+ from urllib.parse import quote_plus
245
+
246
+ base_url = f"bigquery://{self.project_id}"
247
+
248
+ # Prepare query parameters for BigQuery-specific settings
249
+ credentials_b64 = self.build_credentials_base64()
250
+ query_params = {
251
+ "credentials_base64": quote_plus(credentials_b64),
252
+ "list_tables_page_size": "100"
253
+ }
254
+
255
+ # Build the connection URL with query parameters
256
+ query_string = "&".join([f"{k}={v}" for k, v in query_params.items()])
257
+ connection_url = f"{base_url}?{query_string}"
258
+
259
+ engine = create_engine(
260
+ connection_url,
261
+ arraysize=1000,
262
+ max_overflow=0, # todo: add to const
263
+ pool_recycle=10 * 60, # todo: add to const
264
+ echo=True, # todo
265
+ )
266
+ return engine # todo: thread safe? use session to wrap?
267
+
268
+ @with_ssh_tunnel
269
+ def fetchall(self, query):
270
+ """
271
+ overwrite fetchall method to escape cursor's context manager,
272
+ cuz google.cloud.bigquery.dbapi.cursor does not support context manager.
273
+ """
274
+ engine = self.connect()
275
+ connection = engine.raw_connection()
276
+ cursor = connection.cursor()
277
+ cursor.execute(query)
278
+ res = cursor.fetchall()
279
+ cursor.close()
280
+ connection.close()
281
+ return res
282
+
283
+ @classmethod
284
+ def get_sql_operator_types(cls):
285
+ # pigeon type
286
+ return [cls.connection_type, "google_bigquery"]
287
+
288
+ @with_ssh_tunnel
289
+ def get_tables(self, database: str = None) -> list[str]:
290
+ def _format_table(table_name: str) -> str:
291
+ # inspector will return jaffle_shop.table_name format
292
+ return table_name.split(".")[-1]
293
+
294
+ tables: list[str] = super().get_tables(database)
295
+ return [_format_table(table_name) for table_name in tables]
296
+
297
+ def convert_config_to_dbt_profile(self, database: str, schema: str = None) -> dict:
298
+ return {
299
+ "method": "service-account-json",
300
+ "project": self.project_id,
301
+ "dataset": database or "stripe", # todo: tmp
302
+ "type": self.connection_type,
303
+ "threads": 10,
304
+ "keyfile_json": self.build_credentials(used_in_write_file=True),
305
+ "timeout_seconds": 60,
306
+ "priority": "interactive",
307
+ "retries": 1,
308
+ }
309
+
310
+ def set_env_when_get_dbt_connection(self):
311
+ pass
312
+
313
+ @with_ssh_tunnel
314
+ def get_columns(self, table: str, database: str = None) -> list:
315
+ database = database or self.database
316
+ query = f"""
317
+ SELECT
318
+ column_name,
319
+ data_type,
320
+ is_nullable,
321
+ column_default
322
+ FROM
323
+ `{self.project_id}.{database}.INFORMATION_SCHEMA.COLUMNS`
324
+ WHERE
325
+ table_name = '{table}'
326
+ """
327
+ result = self.fetchall(query)
328
+ column_metas = []
329
+ for row in result:
330
+ column_metas.append(
331
+ {
332
+ "name": row[0],
333
+ "type": row[1].lower() if row[1] else None,
334
+ "nullable": row[2] == "YES",
335
+ "default": row[3],
336
+ "comment": "",
337
+ }
338
+ )
339
+ return column_metas
340
+
341
+ @cached_property
342
+ @with_ssh_tunnel
343
+ def type_code_mapping(self) -> dict:
344
+ """
345
+ type_code from sqlalchemy's cursor.description -> database's dialect data type name
346
+ """
347
+ return {x.upper(): x for x in self.available_column_types}
348
+
349
+ def sqlalchemy_column_type_code_to_name(self, type_code: Any, size: int | None = None) -> str:
350
+ # values refer to https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
351
+ # values of this mapping must be in available_column_types, or just default as string
352
+ return type_code.lower()
353
+
354
+ def convert_config_to_cube_config(self, database: str, schema: str = None, ds: DataSourceWrapper = None) -> dict:
355
+ return {
356
+ "type": "bigquery",
357
+ "projectId": self.project_id,
358
+ "credentials": self.build_credentials_base64(used_in_write_file=True),
359
+ }