recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,115 @@
1
+ import logging
2
+ import os
3
+
4
+ import jsonschema
5
+
6
+ try:
7
+ from recurvedata.pigeon.utils import fs
8
+ except ImportError:
9
+ pass
10
+
11
+ from recurvedata.core.translation import _l
12
+ from recurvedata.operators.transfer_operator.task import LoadTask
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class AzureBlobStorageLoadTask(LoadTask):
18
+ ds_name_fields = ("data_source_name",)
19
+ ds_types = ("azure_blob",)
20
+ should_write_header = True
21
+ worker_install_require = ["pigeon"]
22
+
23
+ def execute_impl(self, *args, **kwargs):
24
+ if fs.is_file_empty(self.filename):
25
+ logger.warning("file %s not exists or has no content, skip.", self.filename)
26
+ return
27
+
28
+ azure_blob_ds = self.must_get_connection_by_name(self.config["data_source_name"])
29
+
30
+ azure_blob = azure_blob_ds.connector
31
+ # 是否自动创建 container
32
+ config_container = self.rendered_config["container"] or azure_blob_ds.extra.get("container")
33
+ # if not self.rendered_config['auto_create_container'] and config_container:
34
+ # if not azure_blob.exists(container_name=config_container):
35
+ # raise ValueError(f'{config_container} not exists')
36
+
37
+ # compress or not
38
+ compress_mode = self.rendered_config["compress_mode"]
39
+ file_upload, _ = self.compress_file(filename=self.filename, compress_mode=compress_mode)
40
+
41
+ blob_name = self.rendered_config["blob"] or os.path.basename(self.filename)
42
+ logger.info(f"uploading {file_upload} to {config_container}/{blob_name}...")
43
+ config = {
44
+ "blob_name": blob_name,
45
+ "overwrite": self.rendered_config["overwrite"],
46
+ "local_file_path": file_upload,
47
+ "container_name": config_container,
48
+ }
49
+ azure_blob.upload(**config)
50
+ fs.remove_files_safely([self.filename, file_upload])
51
+
52
+ @classmethod
53
+ def validate(cls, configuration):
54
+ config = super().validate(configuration)
55
+ must_get_by_name = cls.must_get_connection_by_name
56
+
57
+ if not config.get("container"):
58
+ azure_blob = must_get_by_name(configuration["data_source_name"])
59
+ if not azure_blob.extra.get("container"):
60
+ # ensure container
61
+ raise jsonschema.ValidationError(message="Unknown Container", path=("container",))
62
+ return config
63
+
64
+ @classmethod
65
+ def config_schema(cls):
66
+ # get_choices_by_type = cls.get_connection_names_by_type
67
+ # dss = get_choices_by_type(cls.ds_types)
68
+ schema = {
69
+ "type": "object",
70
+ "properties": {
71
+ "data_source_name": {
72
+ "type": "string",
73
+ "title": _l("Azure Blob Connection"),
74
+ "ui:field": "ProjectConnectionSelectorField",
75
+ "ui:options": {
76
+ "supportTypes": cls.ds_types,
77
+ },
78
+ # 'default': cls.first_or_default(dss, ''),
79
+ },
80
+ "container": {
81
+ "type": "string",
82
+ "title": _l("Container"),
83
+ "description": _l("Container name, required if not set in data source"),
84
+ "ui:field": "CodeEditorWithReferencesField",
85
+ "ui:options": {
86
+ "type": "plain",
87
+ },
88
+ },
89
+ "blob": {
90
+ "type": "string",
91
+ "title": _l("Blob Name"),
92
+ "description": _l("Blob name in the container. Jinja templating is supported."),
93
+ "ui:field": "CodeEditorWithReferencesField",
94
+ "ui:options": {
95
+ "type": "plain",
96
+ },
97
+ },
98
+ "compress_mode": {
99
+ "type": "string",
100
+ "title": _l("Compression Method"),
101
+ "description": _l("Compress file before uploading using specified method"),
102
+ "enum": ["None", "Gzip", "Zip", "Bzip2"],
103
+ "enumNames": ["None", "Gzip", "Zip", "Bzip2"],
104
+ "default": "None",
105
+ },
106
+ "overwrite": {
107
+ "type": "boolean",
108
+ "title": _l("Overwrite Existing"),
109
+ "description": _l("Whether to overwrite if target object already exists"),
110
+ "default": True,
111
+ },
112
+ },
113
+ "required": ["compress_mode", "data_source_name"],
114
+ }
115
+ return schema
@@ -0,0 +1,90 @@
1
+ import copy
2
+
3
+ try:
4
+ from recurvedata.pigeon.loader.csv_to_azure_synapse import CSVToAzureSynapseLoader
5
+ except ImportError:
6
+ pass
7
+
8
+ from recurvedata.core.translation import _l
9
+ from recurvedata.operators.transfer_operator import utils
10
+ from recurvedata.operators.transfer_operator.task import LoadTask
11
+
12
+
13
+ class AzureSynapseLoadTask(LoadTask):
14
+ ds_name_fields = ("azure_synapse_data_source_name",)
15
+ ds_types = ("azure_synapse",)
16
+ default_dumper_handler_options = {}
17
+ worker_install_require = ["pigeon"]
18
+
19
+ def execute_impl(self, *args, **kwargs):
20
+ azure_synapse_ds = self.must_get_connection_by_name(self.config["azure_synapse_data_source_name"])
21
+ load_options = self.rendered_config.copy()
22
+ for k in ["azure_synapse_data_source_name"]:
23
+ load_options.pop(k, None)
24
+ load_options.update(
25
+ {
26
+ "filename": self.filename,
27
+ "azure_synapse_connector": azure_synapse_ds.connector,
28
+ "delete_file": True,
29
+ "compress": True,
30
+ }
31
+ )
32
+ loader = CSVToAzureSynapseLoader(**load_options)
33
+ return loader.execute()
34
+
35
+ @classmethod
36
+ def config_schema(cls):
37
+ # get_choices_by_type = cls.get_connection_names_by_type
38
+ # dws = get_choices_by_type(cls.ds_types)
39
+ schema = {
40
+ "type": "object",
41
+ "properties": {
42
+ "azure_synapse_data_source_name": {
43
+ "type": "string",
44
+ "title": _l("Azure Synapse Connection"),
45
+ "ui:field": "ProjectConnectionSelectorField",
46
+ "ui:options": {
47
+ "supportTypes": cls.ds_types,
48
+ },
49
+ # 'default': cls.first_or_default(dws, ''),
50
+ },
51
+ "schema": {
52
+ "type": "string",
53
+ "title": _l("Database Schema"),
54
+ "description": _l("Schema name in Azure Synapse database"),
55
+ "default": "dbo",
56
+ "ui:field": "CodeEditorWithReferencesField",
57
+ "ui:options": {
58
+ "type": "plain",
59
+ },
60
+ },
61
+ "table": {
62
+ "type": "string",
63
+ "title": _l("Target Table"),
64
+ "description": _l("Name of the table to load data into"),
65
+ "ui:field": "CodeEditorWithReferencesField",
66
+ "ui:options": {
67
+ "type": "plain",
68
+ },
69
+ },
70
+ "create_table_ddl": {
71
+ "type": "string",
72
+ "title": _l("Table Creation SQL"),
73
+ "description": _l(
74
+ "SQL statement to create the target table if it doesn't exist. See "
75
+ "<a target='_blank' href='https://learn.microsoft.com/en-us/sql/t-sql/statements/create-table-azure-sql-data-warehouse'>"
76
+ "Azure Synapse Docs</a> for syntax."
77
+ ),
78
+ "ui:field": "CodeEditorWithReferencesField",
79
+ "ui:options": {
80
+ "type": "code",
81
+ "lang": "sql",
82
+ "sqlLang": "sql",
83
+ },
84
+ },
85
+ },
86
+ "required": ["azure_synapse_data_source_name", "table", "mode"],
87
+ }
88
+ properties_schema = schema["properties"]
89
+ properties_schema.update(copy.deepcopy(utils.LOAD_COMMON))
90
+ return schema
@@ -0,0 +1,167 @@
1
+ import csv
2
+
3
+ try:
4
+ from recurvedata.pigeon.loader.csv_to_clickhouse import CSVToClickHouseLoader
5
+ except ImportError:
6
+ pass
7
+
8
+ from recurvedata.core.translation import _l
9
+ from recurvedata.operators.transfer_operator import const
10
+ from recurvedata.operators.transfer_operator.task import LoadTask
11
+ from recurvedata.operators.transfer_operator.utils import allowed_modes
12
+
13
+
14
+ class ClickHouseLoadTask(LoadTask):
15
+ ds_name_fields = ("data_source_name",)
16
+ ds_types = ("clickhouse",)
17
+ default_dumper_handler_options = {
18
+ "null": r"\N",
19
+ "quoting": csv.QUOTE_MINIMAL,
20
+ }
21
+ worker_install_require = ["pigeon[clickhouse]"]
22
+
23
+ def execute_impl(self, *args, **kwargs):
24
+ ds = self.must_get_connection_by_name(self.config["data_source_name"])
25
+ load_options = self.rendered_config.copy()
26
+ for k in ["data_source_name"]:
27
+ load_options.pop(k, None)
28
+ load_options.update({"filename": self.filename, "connector": ds.connector, "delete_file": True})
29
+ loader = CSVToClickHouseLoader(**load_options)
30
+ return loader.execute()
31
+
32
+ @classmethod
33
+ def config_schema(cls):
34
+ # get_choices_by_type = cls.get_connection_names_by_type
35
+ # dss = get_choices_by_type(cls.ds_types)
36
+ schema = {
37
+ "type": "object",
38
+ "properties": {
39
+ "data_source_name": {
40
+ "type": "string",
41
+ "title": _l("ClickHouse Connection"),
42
+ "ui:field": "ProjectConnectionSelectorField",
43
+ "ui:options": {
44
+ "supportTypes": cls.ds_types,
45
+ },
46
+ # 'default': cls.first_or_default(dss, ''),
47
+ },
48
+ "database": {
49
+ "type": "string",
50
+ "title": _l("Target Database"),
51
+ "description": _l("Name of the database to load data into"),
52
+ "ui:field": "CodeEditorWithReferencesField",
53
+ "ui:options": {
54
+ "type": "plain",
55
+ },
56
+ },
57
+ "table": {
58
+ "type": "string",
59
+ "title": _l("Target Table"),
60
+ "description": _l("Name of the table to load data into"),
61
+ "ui:field": "CodeEditorWithReferencesField",
62
+ "ui:options": {
63
+ "type": "plain",
64
+ },
65
+ },
66
+ "create_table_ddl": {
67
+ "type": "string",
68
+ "title": _l("Table Creation SQL"),
69
+ "description": _l(
70
+ "SQL statement to create the target table if it doesn't exist. See "
71
+ "<a target='_blank' href='https://clickhouse.com/docs/en/sql-reference/statements/create/table'>"
72
+ "ClickHouse Docs</a> for syntax."
73
+ ),
74
+ "ui:field": "CodeEditorWithReferencesField",
75
+ "ui:options": {
76
+ "type": "code",
77
+ "lang": "sql",
78
+ "sqlLang": "sql",
79
+ },
80
+ },
81
+ "table_engine": {
82
+ "type": "string",
83
+ "title": _l("Table Engine"),
84
+ "description": _l(
85
+ "Storage engine for the target table. Ignored if Table Creation SQL is provided. See "
86
+ "<a target='_blank' href='https://clickhouse.com/docs/en/engines/table-engines'>"
87
+ "ClickHouse Docs</a> for options."
88
+ ),
89
+ "default": "Log",
90
+ "ui:field": "CodeEditorWithReferencesField",
91
+ "ui:options": {
92
+ "type": "plain",
93
+ },
94
+ },
95
+ "mode": {
96
+ "type": "string",
97
+ "title": _l("Load Mode"),
98
+ "description": _l("How to handle existing data in the target table"),
99
+ "enum": list(allowed_modes),
100
+ "enumNames": list(allowed_modes),
101
+ "default": const.LOAD_OVERWRITE,
102
+ },
103
+ "primary_keys": {
104
+ "ui:hidden": '{{parentFormData.mode !== "MERGE"}}',
105
+ "type": "string",
106
+ "title": _l("Primary Keys"),
107
+ "description": _l(
108
+ "Comma-separated list of columns used for deduplication in MERGE mode. "
109
+ "Should be primary or unique key columns."
110
+ ),
111
+ "ui:field": "CodeEditorWithReferencesField",
112
+ "ui:options": {
113
+ "type": "plain",
114
+ },
115
+ },
116
+ # "using_insert": {
117
+ # "type": "boolean",
118
+ # "title": "Using INSERT",
119
+ # "default": False,
120
+ # "description": "默认使用 `clickhouse-client` 导入数据,出错时会回退到用 INSERT 语句批量导入数据",
121
+ # },
122
+ "insert_batch_size": {
123
+ # "ui:hidden": "{{!parentFormData.using_insert}}",
124
+ "type": "number",
125
+ "ui:options": {"controls": False},
126
+ "title": _l("Batch Size"),
127
+ "description": _l("Number of rows to insert in each batch"),
128
+ "default": 10000,
129
+ "minimum": 1000,
130
+ "maximum": 100000,
131
+ },
132
+ "insert_concurrency": {
133
+ # "ui:hidden": "{{!parentFormData.using_insert}}",
134
+ "type": "number",
135
+ "ui:options": {"controls": False},
136
+ "title": _l("Concurrent Inserts"),
137
+ "default": 1,
138
+ "minimum": 1,
139
+ "maximum": 5,
140
+ "description": _l("Number of parallel insert operations"),
141
+ },
142
+ # "pre_queries": {
143
+ # "type": "string",
144
+ # "title": "Queries Ran Before Loading",
145
+ # "description": '新数据导入前运行的 SQL,多条 SQL 用 `;` 分隔;支持传入变量,详见 <a target="_blank" href="http://bit.ly/2JMutjn">文档</a>',
146
+ # "ui:field": "CodeEditorWithReferencesField",
147
+ # "ui:options": {
148
+ # "type": "code",
149
+ # "lang": "sql",
150
+ # "sqlLang": "sql",
151
+ # },
152
+ # },
153
+ # "post_queries": {
154
+ # "type": "string",
155
+ # "title": "Queries Ran After Loading",
156
+ # "description": '新数据导入后运行的 SQL,多条 SQL 用 `;` 分隔;支持传入变量,详见 <a target="_blank" href="http://bit.ly/2JMutjn">文档</a>',
157
+ # "ui:field": "CodeEditorWithReferencesField",
158
+ # "ui:options": {
159
+ # "type": "code",
160
+ # "lang": "sql",
161
+ # "sqlLang": "sql",
162
+ # },
163
+ # },
164
+ },
165
+ "required": ["data_source_name", "database", "table", "insert_batch_size"],
166
+ }
167
+ return schema
@@ -0,0 +1,164 @@
1
+ try:
2
+ from recurvedata.pigeon.loader.csv_to_doris import CSVToDorisLoader
3
+ except ImportError:
4
+ pass
5
+
6
+ from typing import TYPE_CHECKING, Any, List, Tuple
7
+
8
+ from recurvedata.core.translation import _l
9
+ from recurvedata.operators.transfer_operator import const
10
+ from recurvedata.operators.transfer_operator.task import LoadTask
11
+ from recurvedata.operators.transfer_operator.utils import allowed_modes
12
+
13
+ if TYPE_CHECKING:
14
+ from recurvedata.connectors.pigeon import DataSource
15
+
16
+
17
+ class DorisLoadTask(LoadTask):
18
+ ds_name_fields: Tuple[str] = ("data_source_name",)
19
+ ds_types: Tuple[str] = ("doris",)
20
+ default_dumper_handler_options = {
21
+ "null": r"\N",
22
+ "lineterminator": "\n",
23
+ "escapechar": "'",
24
+ "doublequote": False,
25
+ }
26
+ worker_install_require: List[str] = [
27
+ "pigeon[doris]",
28
+ ]
29
+
30
+ def execute_impl(self, *args, **kwargs) -> Any:
31
+ """Execute the Doris load task by loading CSV data into a Doris table."""
32
+ # Get the Doris data source connection
33
+ ds: "DataSource" = self.must_get_connection_by_name(self.config["data_source_name"])
34
+
35
+ # Copy and prepare the load options
36
+ load_options: dict = self.rendered_config.copy()
37
+ load_options.pop("data_source_name", None)
38
+
39
+ # Update with required loader options
40
+ load_options.update(
41
+ {
42
+ "filename": self.filename,
43
+ "connector": ds.connector,
44
+ "delete_file": True, # Clean up CSV file after loading
45
+ "load_strict_mode": self.config.get("load_strict_mode", False),
46
+ "max_filter_ratio": self.config.get("max_filter_ratio", 0),
47
+ "database": ds.database,
48
+ }
49
+ )
50
+
51
+ # Initialize and execute the loader
52
+ loader = CSVToDorisLoader(**load_options)
53
+ return loader.execute()
54
+
55
+ @classmethod
56
+ def config_schema(cls) -> dict[str, Any]:
57
+ schema = {
58
+ "type": "object",
59
+ "properties": {
60
+ "data_source_name": {
61
+ "type": "string",
62
+ "title": _l("Doris Connection"),
63
+ "ui:field": "ProjectConnectionSelectorField",
64
+ "ui:options": {
65
+ "supportTypes": cls.ds_types,
66
+ },
67
+ },
68
+ "table": {
69
+ "type": "string",
70
+ "title": _l("Target Table"),
71
+ "description": _l("Name of the table to load data into"),
72
+ "ui:field": "CodeEditorWithReferencesField",
73
+ "ui:options": {
74
+ "type": "plain",
75
+ },
76
+ },
77
+ "create_table_ddl": {
78
+ "type": "string",
79
+ "title": _l("Table Creation SQL"),
80
+ "description": _l(
81
+ "SQL statement to create the target table if it doesn't exist. See "
82
+ "<a target='_blank' href='https://doris.apache.org/docs/sql-manual/sql-statements/table-and-view/table/CREATE-TABLE'>"
83
+ "Doris Docs</a> for syntax."
84
+ ),
85
+ "ui:field": "CodeEditorWithReferencesField",
86
+ "ui:options": {
87
+ "type": "code",
88
+ "lang": "sql",
89
+ "sqlLang": "mysql",
90
+ },
91
+ },
92
+ "mode": {
93
+ "type": "string",
94
+ "title": _l("Load Mode"),
95
+ "description": _l("How to handle existing data in the target table"),
96
+ "enum": list(allowed_modes),
97
+ "default": const.LOAD_OVERWRITE,
98
+ },
99
+ "primary_keys": {
100
+ "ui:hidden": '{{parentFormData.mode !== "MERGE"}}',
101
+ "type": "string",
102
+ "title": _l("Primary Keys"),
103
+ "description": _l(
104
+ "Comma-separated list of columns used for deduplication in MERGE mode. "
105
+ "Should be primary or unique key columns."
106
+ ),
107
+ "ui:field": "CodeEditorWithReferencesField",
108
+ "ui:options": {
109
+ "type": "plain",
110
+ },
111
+ },
112
+ "using_insert": {
113
+ "type": "boolean",
114
+ "title": _l("Use INSERT Mode"),
115
+ "description": _l("By default Stream Load is used. Enable to use INSERT statements instead."),
116
+ "default": False,
117
+ "ui:hidden": True,
118
+ },
119
+ "load_strict_mode": {
120
+ "type": "boolean",
121
+ "title": _l("Enable Strict Mode"),
122
+ "default": False,
123
+ "description": _l(
124
+ "When enabled, validates that data matches target table schema before loading. "
125
+ "Raises error if validation fails."
126
+ ),
127
+ "ui:hidden": "{{parentFormData.using_insert}}",
128
+ },
129
+ "insert_batch_size": {
130
+ "ui:hidden": "{{!parentFormData.using_insert}}",
131
+ "type": "number",
132
+ "ui:options": {"controls": False},
133
+ "title": _l("Batch Size"),
134
+ "default": 500,
135
+ "minimum": 1,
136
+ "maximum": 2000,
137
+ "description": _l("Number of rows to insert in each batch"),
138
+ },
139
+ "insert_concurrency": {
140
+ "ui:hidden": "{{!parentFormData.using_insert}}",
141
+ "type": "number",
142
+ "ui:options": {"controls": False},
143
+ "title": _l("Concurrent Inserts"),
144
+ "default": 1,
145
+ "minimum": 1,
146
+ "maximum": 10,
147
+ "description": _l("Number of parallel insert operations"),
148
+ },
149
+ "max_filter_ratio": {
150
+ "type": "number",
151
+ "ui:options": {"controls": False},
152
+ "title": _l("Max Filter Ratio"),
153
+ "default": 0,
154
+ "minimum": 0,
155
+ "maximum": 1,
156
+ "description": _l(
157
+ "The maximum tolerated ratio of filterable (e.g., non-compliant) data. Default is zero tolerance. Value range: 0~1. If the error rate during import exceeds this value, the import will fail."
158
+ ),
159
+ "ui:hidden": "{{parentFormData.using_insert}}",
160
+ },
161
+ },
162
+ "required": ["data_source_name", "table"],
163
+ }
164
+ return schema