recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,188 @@
1
+ import os
2
+
3
+ try:
4
+ from recurvedata.pigeon.utils import fs
5
+ except ImportError:
6
+ pass
7
+
8
+ from recurvedata.core.translation import _l
9
+ from recurvedata.operators.transfer_operator.task import LoadTask
10
+ from recurvedata.utils.email_util import send_email
11
+
12
+ _default_html = """<div><includetail><div style="font:Verdana normal 14px;color:#000;">
13
+ <div style="position:relative;"><blockquote style="margin-Top: 0px; margin-Bottom: 0px; margin-Left: 0.5em">
14
+ <div class="FoxDiv20190108121908737768">
15
+ <div id="mailContentContainer" style=" font-size: 14px; padding: 0px; height: auto; min-height: auto ; ">
16
+ <div>Dear all,</div>
17
+ <div><br></div>
18
+
19
+ <div>————————</div>
20
+ <div>RecurveData Automatic Reporting</div>
21
+ </div>
22
+ </div></blockquote>
23
+ </div></div>"""
24
+
25
+
26
+ class EmailLoadTask(LoadTask):
27
+ ds_name_fields = ("data_source_name",)
28
+ ds_types = ("mail",)
29
+ worker_install_require = ["pigeon"]
30
+
31
+ def execute_impl(self, *args, **kwargs):
32
+ load_options = self.rendered_config
33
+ ds = self.must_get_connection_by_name(load_options["data_source_name"])
34
+ smtp_config = {
35
+ "host": ds.host,
36
+ "port": ds.port,
37
+ "ssl": ds.extra.get("ssl", True),
38
+ "user": ds.user,
39
+ "password": ds.password,
40
+ "timeout": ds.extra.get("timeout", 60),
41
+ }
42
+
43
+ remove_files = [self.filename]
44
+ filename = load_options.get("filename")
45
+ default_file_ext = ".csv"
46
+ if filename and "." not in filename:
47
+ filename = f"{filename}{default_file_ext}"
48
+ if filename and self.filename:
49
+ # 文件压缩
50
+ uncompress_filename = filename
51
+ if filename.endswith((".zip", ".gz")):
52
+ uncompress_filename = ".".join(filename.split(".")[:-1])
53
+ new_filename = os.path.join(os.path.dirname(self.filename), uncompress_filename)
54
+ os.rename(self.filename, new_filename)
55
+ compress_mode = load_options["compress_mode"]
56
+ file_upload, ext = self.compress_file(filename=new_filename, compress_mode=compress_mode)
57
+ if compress_mode != "None" and not load_options["filename"].endswith(ext):
58
+ filename = f"{filename}{ext}"
59
+
60
+ files = {filename: file_upload}
61
+ remove_files = [new_filename, file_upload]
62
+ else:
63
+ files = None
64
+
65
+ ok = send_email(
66
+ mail_to=self.parse_email_list(load_options["mail_to"]),
67
+ subject=load_options["subject"],
68
+ html=load_options["html"],
69
+ cc=self.parse_email_list(load_options.get("cc")),
70
+ bcc=self.parse_email_list(load_options.get("bcc")),
71
+ files=files,
72
+ mail_from=load_options["mail_from"],
73
+ reply_to=load_options.get("reply_to"),
74
+ smtp_config=smtp_config,
75
+ )
76
+ assert ok, "Failed to send email"
77
+ fs.remove_files_safely(remove_files)
78
+
79
+ @staticmethod
80
+ def parse_email_list(obj, separator=";"):
81
+ if not obj:
82
+ return None
83
+ return obj.split(separator)
84
+
85
+ @classmethod
86
+ def config_schema(cls):
87
+ # get_choices_by_type = cls.get_connection_names_by_type
88
+ # dss = get_choices_by_type(cls.ds_types)
89
+ schema = {
90
+ "type": "object",
91
+ "properties": {
92
+ "data_source_name": {
93
+ "type": "string",
94
+ "title": _l("SMTP Server"),
95
+ "ui:field": "ProjectConnectionSelectorField",
96
+ "ui:options": {
97
+ "supportTypes": cls.ds_types,
98
+ },
99
+ },
100
+ "subject": {
101
+ "type": "string",
102
+ "title": _l("Email Subject"),
103
+ "description": _l("Subject line of the email"),
104
+ "ui:field": "CodeEditorWithReferencesField",
105
+ "ui:options": {
106
+ "type": "plain",
107
+ },
108
+ },
109
+ "mail_to": {
110
+ "type": "string",
111
+ "title": _l("Recipients"),
112
+ "description": _l("Email recipients (separate multiple addresses with semicolons)"),
113
+ "ui:field": "CodeEditorWithReferencesField",
114
+ "ui:options": {
115
+ "type": "plain",
116
+ },
117
+ },
118
+ "mail_from": {
119
+ "type": "string",
120
+ "title": _l("Sender Name"),
121
+ "default": "RecurveData SERVICE",
122
+ "description": _l("Display name that appears as the email sender"),
123
+ "ui:field": "CodeEditorWithReferencesField",
124
+ "ui:options": {
125
+ "type": "plain",
126
+ },
127
+ },
128
+ "cc": {
129
+ "type": "string",
130
+ "title": _l("CC Recipients"),
131
+ "description": _l("Carbon copy recipients (separate multiple addresses with semicolons)"),
132
+ "ui:field": "CodeEditorWithReferencesField",
133
+ "ui:options": {
134
+ "type": "plain",
135
+ },
136
+ },
137
+ "bcc": {
138
+ "type": "string",
139
+ "title": _l("BCC Recipients"),
140
+ "description": _l("Blind carbon copy recipients (separate multiple addresses with semicolons)"),
141
+ "ui:field": "CodeEditorWithReferencesField",
142
+ "ui:options": {
143
+ "type": "plain",
144
+ },
145
+ },
146
+ "html": {
147
+ "type": "string",
148
+ "title": _l("Email Body"),
149
+ "description": _l("HTML content of the email body."),
150
+ "default": _default_html,
151
+ "ui:field": "CodeEditorWithReferencesField",
152
+ "ui:options": {
153
+ "type": "code",
154
+ "lang": "python",
155
+ },
156
+ },
157
+ "filename": {
158
+ "type": "string",
159
+ "title": _l("Attachment Name"),
160
+ "description": _l(
161
+ "Name of the email attachment. Supports template variables. Leave empty for no attachment. Default extension is .csv if none specified."
162
+ ),
163
+ "ui:field": "CodeEditorWithReferencesField",
164
+ "ui:options": {
165
+ "type": "plain",
166
+ },
167
+ },
168
+ "compress_mode": {
169
+ "type": "string",
170
+ "title": _l("Compression Method"),
171
+ "enum": ["None", "Gzip", "Zip"],
172
+ "enumNames": ["None", "Gzip", "Zip"],
173
+ "default": "None",
174
+ "description": _l("Compression method for attachments"),
175
+ },
176
+ "reply_to": {
177
+ "type": "string",
178
+ "title": _l("Reply-To Address"),
179
+ "description": _l("Email address that will receive replies to this email"),
180
+ "ui:field": "CodeEditorWithReferencesField",
181
+ "ui:options": {
182
+ "type": "plain",
183
+ },
184
+ },
185
+ },
186
+ "required": ["data_source_name", "subject", "mail_to", "mail_from", "html"],
187
+ }
188
+ return schema
@@ -0,0 +1,86 @@
1
+ try:
2
+ from recurvedata.pigeon.loader.csv_to_es import CSVToElasticSearchLoader
3
+ except ImportError:
4
+ pass
5
+
6
+ from recurvedata.core.translation import _l
7
+ from recurvedata.operators.transfer_operator.task import LoadTask
8
+ from recurvedata.utils import extract_dict
9
+
10
+
11
+ class ElasticSearchLoadTask(LoadTask):
12
+ ds_name_fields = ("data_source_name",)
13
+ ds_types = ("elasticsearch",)
14
+ should_write_header = True
15
+ worker_install_require = ["pigeon[elasticsearch]"]
16
+
17
+ def execute_impl(self, *args, **kwargs):
18
+ es_ds = self.must_get_connection_by_name(self.config["data_source_name"])
19
+ load_options = extract_dict(self.rendered_config, keys=["index", "doc_type", "id_field", "generate_id"])
20
+ load_options.update(
21
+ {
22
+ "connector": es_ds.connector,
23
+ "filename": self.filename,
24
+ "delete_file": True,
25
+ }
26
+ )
27
+ loader = CSVToElasticSearchLoader(**load_options)
28
+ return loader.execute()
29
+
30
+ @classmethod
31
+ def config_schema(cls):
32
+ # get_choices_by_type = cls.get_connection_names_by_type
33
+ # dss = get_choices_by_type(cls.ds_types)
34
+ return {
35
+ "type": "object",
36
+ "properties": {
37
+ "data_source_name": {
38
+ "type": "string",
39
+ "title": _l("Elasticsearch Data Source"),
40
+ "ui:field": "ProjectConnectionSelectorField",
41
+ "ui:options": {
42
+ "supportTypes": cls.ds_types,
43
+ },
44
+ # 'default': cls.first_or_default(dss, ''),
45
+ },
46
+ "index": {
47
+ "type": "string",
48
+ "title": _l("Elasticsearch Index"),
49
+ "description": _l("Name of the Elasticsearch index to load data into"),
50
+ "ui:field": "CodeEditorWithReferencesField",
51
+ "ui:options": {
52
+ "type": "plain",
53
+ },
54
+ },
55
+ "doc_type": {
56
+ "type": "string",
57
+ "title": _l("Document Type"),
58
+ "description": _l("Type of document to create in Elasticsearch"),
59
+ "default": "_doc",
60
+ "ui:field": "CodeEditorWithReferencesField",
61
+ "ui:options": {
62
+ "type": "plain",
63
+ },
64
+ },
65
+ "id_field": {
66
+ "type": "string",
67
+ "title": _l("Document ID Field"),
68
+ "description": _l(
69
+ "Field from the input data to use as the document ID. Leave empty to auto-generate IDs"
70
+ ),
71
+ "ui:field": "CodeEditorWithReferencesField",
72
+ "ui:options": {
73
+ "type": "plain",
74
+ },
75
+ },
76
+ "generate_id": {
77
+ "type": "boolean",
78
+ "title": _l("Generate Document IDs"),
79
+ "description": _l(
80
+ "Automatically generate unique document IDs based on record content. Takes precedence over ID Field if both are specified"
81
+ ),
82
+ "default": False,
83
+ },
84
+ },
85
+ "required": ["data_source_name", "index"],
86
+ }
@@ -0,0 +1,151 @@
1
+ import datetime
2
+ import logging
3
+ import mimetypes
4
+ import os
5
+ import urllib.parse
6
+
7
+ import requests
8
+
9
+ try:
10
+ from recurvedata.pigeon.utils import fs, trim_suffix
11
+ except ImportError:
12
+ pass
13
+
14
+ from recurvedata.operators.transfer_operator.task import LoadTask
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class FileBrowserLoadTask(LoadTask):
20
+ ds_name_fields = ("data_source_name",)
21
+ ds_types = ("filebrowser",)
22
+ should_write_header = True
23
+ worker_install_require = ["requests", "pigeon"]
24
+
25
+ def execute_impl(self, *args, **kwargs):
26
+ conf = self.rendered_config
27
+ ds = self.must_get_connection_by_name(conf["data_source_name"])
28
+ file_to_upload = self.filename
29
+ remote_filename = conf["filename"]
30
+
31
+ cm = conf["compress_method"]
32
+ if cm != "None":
33
+ logger.info("compressing file using %s...", cm)
34
+ if cm == "Gzip":
35
+ ext = ".gz"
36
+ file_to_upload = fs.gzip_compress(self.filename, using_cmd=True)
37
+ elif cm == "Zip":
38
+ ext = ".zip"
39
+ arcname = trim_suffix(os.path.basename(remote_filename), ext)
40
+ file_to_upload = fs.zip_compress(self.filename, using_cmd=False, arcname=arcname)
41
+ else:
42
+ # won't reach here
43
+ raise ValueError(f"compress method {cm} is not supported")
44
+
45
+ if not remote_filename.endswith(ext):
46
+ remote_filename = f"{remote_filename}{ext}"
47
+
48
+ client = FileBrowserClient(ds.host, ds.user, ds.password)
49
+
50
+ remote_filename = os.path.join(conf["directory"], remote_filename)
51
+ logger.info("uploading %s to %s", file_to_upload, remote_filename)
52
+ client.upload(file_to_upload, remote_filename, override=True)
53
+ fs.remove_files_safely([self.filename, file_to_upload])
54
+
55
+ @classmethod
56
+ def config_schema(cls):
57
+ # get_choices_by_type = cls.get_connection_names_by_type
58
+ # dss = get_choices_by_type(cls.ds_types)
59
+ schema = {
60
+ "type": "object",
61
+ "properties": {
62
+ "data_source_name": {
63
+ "type": "string",
64
+ "title": "FileBrowser Data Source",
65
+ "ui:field": "ProjectConnectionSelectorField",
66
+ "ui:options": {
67
+ "supportTypes": cls.ds_types,
68
+ },
69
+ # 'default': cls.first_or_default(dss, ''),
70
+ },
71
+ "directory": {
72
+ "type": "string",
73
+ "title": "Directory",
74
+ "description": "要上传到的文件夹",
75
+ "ui:field": "CodeEditorWithReferencesField",
76
+ "ui:options": {
77
+ "type": "plain",
78
+ },
79
+ },
80
+ "filename": {
81
+ "type": "string",
82
+ "title": "Filename",
83
+ "description": "上传后的文件名,支持模板变量",
84
+ "ui:field": "CodeEditorWithReferencesField",
85
+ "ui:options": {
86
+ "type": "plain",
87
+ },
88
+ },
89
+ "compress_method": {
90
+ "type": "string",
91
+ "title": "Compress Method",
92
+ "enum": ["None", "Gzip", "Zip"],
93
+ "enumNames": ["None", "Gzip", "Zip"],
94
+ "default": "None",
95
+ "description": "文件的压缩方式,默认不压缩。如果选择了压缩,会在文件名加上相应的后缀。",
96
+ },
97
+ },
98
+ "required": ["data_source_name", "directory", "filename", "compress_method"],
99
+ }
100
+ return schema
101
+
102
+
103
+ class FileBrowserClient(object):
104
+ # token 有效期,实际上默认是 2 小时,这里只保留 1 小时
105
+ TOKEN_AGE = datetime.timedelta(seconds=1 * 60 * 60)
106
+
107
+ def __init__(self, host, username, password):
108
+ self.host = host
109
+ self.username = username
110
+ self.password = password
111
+ self._session = requests.Session()
112
+ self._token = None
113
+ self._token_expires_at = datetime.datetime.fromtimestamp(0)
114
+
115
+ def _request(self, method, url, params=None, data=None, json=None, auth=True, **kwargs):
116
+ full_url = urllib.parse.urljoin(self.host, url)
117
+ if auth:
118
+ headers = {"X-Auth": self.token}
119
+ else:
120
+ headers = {}
121
+ headers.update(kwargs.pop("headers", {}))
122
+ resp = self._session.request(method, full_url, params=params, data=data, json=json, headers=headers, **kwargs)
123
+ resp.raise_for_status()
124
+ return resp
125
+
126
+ @property
127
+ def token(self):
128
+ if self._token is None or self._token_expires_at <= datetime.datetime.now():
129
+ self._token = self.login()
130
+ self._token_expires_at = datetime.datetime.now() + self.TOKEN_AGE
131
+ return self._token
132
+
133
+ def login(self):
134
+ params = {
135
+ "username": self.username,
136
+ "password": self.password,
137
+ "recaptcha": "",
138
+ }
139
+ resp = self._request("POST", "/api/login", json=params, auth=False)
140
+ token = resp.text
141
+ return token
142
+
143
+ def upload(self, local_filename, remote_filename, override=True):
144
+ headers = {}
145
+ content_type, _ = mimetypes.guess_type(local_filename)
146
+ if content_type:
147
+ headers = {"Content-Type": content_type}
148
+ params = {"override": override and "true" or "false"}
149
+ url = f"/api/resources/{urllib.parse.quote(remote_filename)}"
150
+ with open(local_filename, "rb") as f:
151
+ self._request("POST", url, params=params, data=f, headers=headers)
@@ -0,0 +1,19 @@
1
+ import logging
2
+
3
+ from recurvedata.operators.transfer_operator.load_task_sftp import SFTPLoadTask
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class FTPLoadTask(SFTPLoadTask):
9
+ ds_name_fields = ("data_source_name",)
10
+ ds_types = ("ftp",)
11
+
12
+ @staticmethod
13
+ def ensure_directory_exists(ds, conf):
14
+ ftp = ds.connector
15
+ try:
16
+ ftp.list_dir(conf["directory"])
17
+ except OSError:
18
+ logger.warning("failed to list directory %s, maybe not exists, try to make it", conf["directory"])
19
+ ftp.makedir(conf["directory"])
@@ -0,0 +1,90 @@
1
+ import copy
2
+
3
+ try:
4
+ from recurvedata.pigeon.loader.csv_to_google_bigquery import CSVToGoogleBigqueryLoader
5
+ except ImportError:
6
+ pass
7
+
8
+ from recurvedata.core.translation import _l
9
+ from recurvedata.operators.transfer_operator import utils
10
+ from recurvedata.operators.transfer_operator.task import LoadTask
11
+
12
+
13
+ class GoogleBigqueryLoadTask(LoadTask):
14
+ ds_name_fields = ("google_bigquery_data_source_name",)
15
+ ds_types = ("bigquery",)
16
+ default_dumper_handler_options = {}
17
+ worker_install_require = ["pigeon[google_bigquery]"]
18
+
19
+ def execute_impl(self, *args, **kwargs):
20
+ google_bigquery_ds = self.must_get_connection_by_name(self.config["google_bigquery_data_source_name"])
21
+ load_options = self.rendered_config.copy()
22
+ for k in ["google_bigquery_data_source_name"]:
23
+ load_options.pop(k, None)
24
+ load_options.update(
25
+ {
26
+ "filename": self.filename,
27
+ "google_bigquery_connector": google_bigquery_ds.connector,
28
+ "delete_file": True,
29
+ "dataset": google_bigquery_ds.data.get("database"),
30
+ }
31
+ )
32
+ loader = CSVToGoogleBigqueryLoader(**load_options)
33
+ return loader.execute()
34
+
35
+ @classmethod
36
+ def config_schema(cls):
37
+ # get_choices_by_type = cls.get_connection_names_by_type
38
+ # dws = get_choices_by_type(cls.ds_types)
39
+ schema = {
40
+ "type": "object",
41
+ "properties": {
42
+ "google_bigquery_data_source_name": {
43
+ "type": "string",
44
+ "title": _l("BigQuery Connection"),
45
+ "description": _l("Select the BigQuery connection to use"),
46
+ "ui:field": "ProjectConnectionSelectorField",
47
+ "ui:options": {
48
+ "supportTypes": cls.ds_types,
49
+ },
50
+ # 'default': cls.first_or_default(dws, ''),
51
+ },
52
+ # "dataset": {
53
+ # "type": "string",
54
+ # "title": "Dataset",
55
+ # "ui:field": "CodeEditorWithReferencesField",
56
+ # "ui:options": {
57
+ # "type": "plain",
58
+ # },
59
+ # },
60
+ "table": {
61
+ "type": "string",
62
+ "title": _l("Target Table"),
63
+ "description": _l("Name of the table to load data into"),
64
+ "ui:field": "CodeEditorWithReferencesField",
65
+ "ui:options": {
66
+ "type": "plain",
67
+ },
68
+ },
69
+ "create_table_ddl": {
70
+ "type": "string",
71
+ "title": _l("Table Creation SQL"),
72
+ "description": _l("SQL statement to create the destination table if it doesn't exist"),
73
+ "ui:field": "CodeEditorWithReferencesField",
74
+ "ui:options": {
75
+ "type": "code",
76
+ "lang": "sql",
77
+ "sqlLang": "sql",
78
+ },
79
+ },
80
+ },
81
+ "required": ["google_bigquery_data_source_name", "table", "mode"],
82
+ }
83
+ properties_schema = schema["properties"]
84
+ properties_schema.update(copy.deepcopy(utils.LOAD_COMMON))
85
+
86
+ # remove dedup
87
+ properties_schema.pop("dedup", None)
88
+ properties_schema.pop("dedup_uniq_keys", None)
89
+ properties_schema.pop("dedup_orderby", None)
90
+ return schema
@@ -0,0 +1,127 @@
1
+ import logging
2
+ import os
3
+
4
+ try:
5
+ from recurvedata.pigeon.connector.google_cloud_storage import GoogleCloudStorageConnector
6
+ except ImportError:
7
+ pass
8
+
9
+ from recurvedata.core.translation import _l
10
+ from recurvedata.operators.transfer_operator.task import LoadTask
11
+ from recurvedata.utils import extract_dict
12
+ from recurvedata.utils.files import is_file_empty, remove_files_safely
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class GoogleCloudStorageLoadTask(LoadTask):
18
+ ds_name_fields = ("data_source_name",)
19
+ ds_types = ("google_cloud_storage",)
20
+ should_write_header = True
21
+ worker_install_require = ["pigeon[google_bigquery]"]
22
+
23
+ def execute_impl(self, *args, **kwargs):
24
+ if is_file_empty(self.filename):
25
+ logger.warning("file %s not exists or has no content, skip.", self.filename)
26
+ return
27
+
28
+ ds = self.must_get_connection_by_name(self.config["data_source_name"])
29
+
30
+ load_options = self.rendered_config.copy()
31
+ ds_extra_bucket = ds.extra.get("bucket")
32
+ config_bucket = load_options.get("bucket_name")
33
+ bucket_upload = config_bucket if config_bucket else ds_extra_bucket
34
+
35
+ opt_keys = ["key_path", "key_dict", "project", "proxies", "bucket_name"]
36
+ gcs = GoogleCloudStorageConnector(**extract_dict(ds.extra, opt_keys))
37
+
38
+ # 文件压缩
39
+ compress_mode = load_options["compress_mode"]
40
+ if compress_mode != "None" and not load_options["key"].endswith(("/",)):
41
+ target_filename = os.path.join(os.path.dirname(self.filename), os.path.basename(load_options["key"]))
42
+ else:
43
+ target_filename = None
44
+ file_upload, ext = self.compress_file(
45
+ filename=self.filename, target_filename=target_filename, compress_mode=compress_mode
46
+ )
47
+ if compress_mode != "None" and not load_options["key"].endswith(("/", ext)):
48
+ load_options["key"] = f"{load_options['key']}{ext}"
49
+
50
+ # 根据 key 的内容创建 upload 方法需要的 key, folder 参数
51
+ upload_conf = {
52
+ "bucket_name": bucket_upload,
53
+ "filename": file_upload,
54
+ "overwrite": load_options["overwrite"],
55
+ }
56
+ if load_options["key"].endswith("/"):
57
+ upload_conf.update({"folder": load_options["key"]})
58
+ elif load_options["key"]:
59
+ upload_conf.update({"key": load_options["key"]})
60
+ else:
61
+ upload_conf.update({"key": os.path.basename(file_upload)})
62
+
63
+ if ext == ".zip":
64
+ upload_conf["filename"] += ext
65
+
66
+ logger.info("uploading...")
67
+ logger.info(upload_conf)
68
+ gcs.upload(**upload_conf)
69
+ return remove_files_safely([self.filename, file_upload])
70
+
71
+ @classmethod
72
+ def config_schema(cls):
73
+ # get_choices_by_type = cls.get_connection_names_by_type
74
+ # dss = get_choices_by_type(cls.ds_types)
75
+ schema = {
76
+ "type": "object",
77
+ "properties": {
78
+ "data_source_name": {
79
+ "type": "string",
80
+ "title": _l("GCS Connection"),
81
+ "ui:field": "ProjectConnectionSelectorField",
82
+ "ui:options": {
83
+ "supportTypes": cls.ds_types,
84
+ },
85
+ # 'default': cls.first_or_default(dss, ''),
86
+ },
87
+ "bucket_name": {
88
+ "type": "string",
89
+ "title": _l("GCS Bucket Name"),
90
+ "description": _l(
91
+ "The name of the Google Cloud Storage bucket to upload files to. This is required if not already configured in the data source connection."
92
+ ),
93
+ "ui:field": "CodeEditorWithReferencesField",
94
+ "ui:options": {
95
+ "type": "plain",
96
+ },
97
+ },
98
+ "key": {
99
+ "type": "string",
100
+ "title": _l("Upload Path"),
101
+ "description": _l(
102
+ "Target path in the bucket. Can be an object key or folder path (ending with /). "
103
+ "Supports Jinja templating."
104
+ ),
105
+ "ui:field": "CodeEditorWithReferencesField",
106
+ "ui:options": {
107
+ "type": "plain",
108
+ },
109
+ },
110
+ "compress_mode": {
111
+ "type": "string",
112
+ "title": _l("Compression Method"),
113
+ "description": _l("Compress file before uploading using specified method"),
114
+ "enum": ["None", "Gzip", "Zip"],
115
+ "enumNames": ["None", "Gzip", "Zip"],
116
+ "default": "None",
117
+ },
118
+ "overwrite": {
119
+ "type": "boolean",
120
+ "title": _l("Overwrite Existing"),
121
+ "description": _l("Whether to overwrite if target object already exists"),
122
+ "default": True,
123
+ },
124
+ },
125
+ "required": ["compress_mode", "data_source_name", "key"],
126
+ }
127
+ return schema