recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from recurvedata.pigeon.utils import fs
|
|
5
|
+
except ImportError:
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
from recurvedata.core.translation import _l
|
|
9
|
+
from recurvedata.operators.transfer_operator.task import LoadTask
|
|
10
|
+
from recurvedata.utils.email_util import send_email
|
|
11
|
+
|
|
12
|
+
_default_html = """<div><includetail><div style="font:Verdana normal 14px;color:#000;">
|
|
13
|
+
<div style="position:relative;"><blockquote style="margin-Top: 0px; margin-Bottom: 0px; margin-Left: 0.5em">
|
|
14
|
+
<div class="FoxDiv20190108121908737768">
|
|
15
|
+
<div id="mailContentContainer" style=" font-size: 14px; padding: 0px; height: auto; min-height: auto ; ">
|
|
16
|
+
<div>Dear all,</div>
|
|
17
|
+
<div><br></div>
|
|
18
|
+
|
|
19
|
+
<div>————————</div>
|
|
20
|
+
<div>RecurveData Automatic Reporting</div>
|
|
21
|
+
</div>
|
|
22
|
+
</div></blockquote>
|
|
23
|
+
</div></div>"""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class EmailLoadTask(LoadTask):
|
|
27
|
+
ds_name_fields = ("data_source_name",)
|
|
28
|
+
ds_types = ("mail",)
|
|
29
|
+
worker_install_require = ["pigeon"]
|
|
30
|
+
|
|
31
|
+
def execute_impl(self, *args, **kwargs):
|
|
32
|
+
load_options = self.rendered_config
|
|
33
|
+
ds = self.must_get_connection_by_name(load_options["data_source_name"])
|
|
34
|
+
smtp_config = {
|
|
35
|
+
"host": ds.host,
|
|
36
|
+
"port": ds.port,
|
|
37
|
+
"ssl": ds.extra.get("ssl", True),
|
|
38
|
+
"user": ds.user,
|
|
39
|
+
"password": ds.password,
|
|
40
|
+
"timeout": ds.extra.get("timeout", 60),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
remove_files = [self.filename]
|
|
44
|
+
filename = load_options.get("filename")
|
|
45
|
+
default_file_ext = ".csv"
|
|
46
|
+
if filename and "." not in filename:
|
|
47
|
+
filename = f"{filename}{default_file_ext}"
|
|
48
|
+
if filename and self.filename:
|
|
49
|
+
# 文件压缩
|
|
50
|
+
uncompress_filename = filename
|
|
51
|
+
if filename.endswith((".zip", ".gz")):
|
|
52
|
+
uncompress_filename = ".".join(filename.split(".")[:-1])
|
|
53
|
+
new_filename = os.path.join(os.path.dirname(self.filename), uncompress_filename)
|
|
54
|
+
os.rename(self.filename, new_filename)
|
|
55
|
+
compress_mode = load_options["compress_mode"]
|
|
56
|
+
file_upload, ext = self.compress_file(filename=new_filename, compress_mode=compress_mode)
|
|
57
|
+
if compress_mode != "None" and not load_options["filename"].endswith(ext):
|
|
58
|
+
filename = f"{filename}{ext}"
|
|
59
|
+
|
|
60
|
+
files = {filename: file_upload}
|
|
61
|
+
remove_files = [new_filename, file_upload]
|
|
62
|
+
else:
|
|
63
|
+
files = None
|
|
64
|
+
|
|
65
|
+
ok = send_email(
|
|
66
|
+
mail_to=self.parse_email_list(load_options["mail_to"]),
|
|
67
|
+
subject=load_options["subject"],
|
|
68
|
+
html=load_options["html"],
|
|
69
|
+
cc=self.parse_email_list(load_options.get("cc")),
|
|
70
|
+
bcc=self.parse_email_list(load_options.get("bcc")),
|
|
71
|
+
files=files,
|
|
72
|
+
mail_from=load_options["mail_from"],
|
|
73
|
+
reply_to=load_options.get("reply_to"),
|
|
74
|
+
smtp_config=smtp_config,
|
|
75
|
+
)
|
|
76
|
+
assert ok, "Failed to send email"
|
|
77
|
+
fs.remove_files_safely(remove_files)
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
def parse_email_list(obj, separator=";"):
|
|
81
|
+
if not obj:
|
|
82
|
+
return None
|
|
83
|
+
return obj.split(separator)
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def config_schema(cls):
|
|
87
|
+
# get_choices_by_type = cls.get_connection_names_by_type
|
|
88
|
+
# dss = get_choices_by_type(cls.ds_types)
|
|
89
|
+
schema = {
|
|
90
|
+
"type": "object",
|
|
91
|
+
"properties": {
|
|
92
|
+
"data_source_name": {
|
|
93
|
+
"type": "string",
|
|
94
|
+
"title": _l("SMTP Server"),
|
|
95
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
96
|
+
"ui:options": {
|
|
97
|
+
"supportTypes": cls.ds_types,
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
"subject": {
|
|
101
|
+
"type": "string",
|
|
102
|
+
"title": _l("Email Subject"),
|
|
103
|
+
"description": _l("Subject line of the email"),
|
|
104
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
105
|
+
"ui:options": {
|
|
106
|
+
"type": "plain",
|
|
107
|
+
},
|
|
108
|
+
},
|
|
109
|
+
"mail_to": {
|
|
110
|
+
"type": "string",
|
|
111
|
+
"title": _l("Recipients"),
|
|
112
|
+
"description": _l("Email recipients (separate multiple addresses with semicolons)"),
|
|
113
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
114
|
+
"ui:options": {
|
|
115
|
+
"type": "plain",
|
|
116
|
+
},
|
|
117
|
+
},
|
|
118
|
+
"mail_from": {
|
|
119
|
+
"type": "string",
|
|
120
|
+
"title": _l("Sender Name"),
|
|
121
|
+
"default": "RecurveData SERVICE",
|
|
122
|
+
"description": _l("Display name that appears as the email sender"),
|
|
123
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
124
|
+
"ui:options": {
|
|
125
|
+
"type": "plain",
|
|
126
|
+
},
|
|
127
|
+
},
|
|
128
|
+
"cc": {
|
|
129
|
+
"type": "string",
|
|
130
|
+
"title": _l("CC Recipients"),
|
|
131
|
+
"description": _l("Carbon copy recipients (separate multiple addresses with semicolons)"),
|
|
132
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
133
|
+
"ui:options": {
|
|
134
|
+
"type": "plain",
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
"bcc": {
|
|
138
|
+
"type": "string",
|
|
139
|
+
"title": _l("BCC Recipients"),
|
|
140
|
+
"description": _l("Blind carbon copy recipients (separate multiple addresses with semicolons)"),
|
|
141
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
142
|
+
"ui:options": {
|
|
143
|
+
"type": "plain",
|
|
144
|
+
},
|
|
145
|
+
},
|
|
146
|
+
"html": {
|
|
147
|
+
"type": "string",
|
|
148
|
+
"title": _l("Email Body"),
|
|
149
|
+
"description": _l("HTML content of the email body."),
|
|
150
|
+
"default": _default_html,
|
|
151
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
152
|
+
"ui:options": {
|
|
153
|
+
"type": "code",
|
|
154
|
+
"lang": "python",
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
"filename": {
|
|
158
|
+
"type": "string",
|
|
159
|
+
"title": _l("Attachment Name"),
|
|
160
|
+
"description": _l(
|
|
161
|
+
"Name of the email attachment. Supports template variables. Leave empty for no attachment. Default extension is .csv if none specified."
|
|
162
|
+
),
|
|
163
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
164
|
+
"ui:options": {
|
|
165
|
+
"type": "plain",
|
|
166
|
+
},
|
|
167
|
+
},
|
|
168
|
+
"compress_mode": {
|
|
169
|
+
"type": "string",
|
|
170
|
+
"title": _l("Compression Method"),
|
|
171
|
+
"enum": ["None", "Gzip", "Zip"],
|
|
172
|
+
"enumNames": ["None", "Gzip", "Zip"],
|
|
173
|
+
"default": "None",
|
|
174
|
+
"description": _l("Compression method for attachments"),
|
|
175
|
+
},
|
|
176
|
+
"reply_to": {
|
|
177
|
+
"type": "string",
|
|
178
|
+
"title": _l("Reply-To Address"),
|
|
179
|
+
"description": _l("Email address that will receive replies to this email"),
|
|
180
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
181
|
+
"ui:options": {
|
|
182
|
+
"type": "plain",
|
|
183
|
+
},
|
|
184
|
+
},
|
|
185
|
+
},
|
|
186
|
+
"required": ["data_source_name", "subject", "mail_to", "mail_from", "html"],
|
|
187
|
+
}
|
|
188
|
+
return schema
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from recurvedata.pigeon.loader.csv_to_es import CSVToElasticSearchLoader
|
|
3
|
+
except ImportError:
|
|
4
|
+
pass
|
|
5
|
+
|
|
6
|
+
from recurvedata.core.translation import _l
|
|
7
|
+
from recurvedata.operators.transfer_operator.task import LoadTask
|
|
8
|
+
from recurvedata.utils import extract_dict
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ElasticSearchLoadTask(LoadTask):
|
|
12
|
+
ds_name_fields = ("data_source_name",)
|
|
13
|
+
ds_types = ("elasticsearch",)
|
|
14
|
+
should_write_header = True
|
|
15
|
+
worker_install_require = ["pigeon[elasticsearch]"]
|
|
16
|
+
|
|
17
|
+
def execute_impl(self, *args, **kwargs):
|
|
18
|
+
es_ds = self.must_get_connection_by_name(self.config["data_source_name"])
|
|
19
|
+
load_options = extract_dict(self.rendered_config, keys=["index", "doc_type", "id_field", "generate_id"])
|
|
20
|
+
load_options.update(
|
|
21
|
+
{
|
|
22
|
+
"connector": es_ds.connector,
|
|
23
|
+
"filename": self.filename,
|
|
24
|
+
"delete_file": True,
|
|
25
|
+
}
|
|
26
|
+
)
|
|
27
|
+
loader = CSVToElasticSearchLoader(**load_options)
|
|
28
|
+
return loader.execute()
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def config_schema(cls):
|
|
32
|
+
# get_choices_by_type = cls.get_connection_names_by_type
|
|
33
|
+
# dss = get_choices_by_type(cls.ds_types)
|
|
34
|
+
return {
|
|
35
|
+
"type": "object",
|
|
36
|
+
"properties": {
|
|
37
|
+
"data_source_name": {
|
|
38
|
+
"type": "string",
|
|
39
|
+
"title": _l("Elasticsearch Data Source"),
|
|
40
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
41
|
+
"ui:options": {
|
|
42
|
+
"supportTypes": cls.ds_types,
|
|
43
|
+
},
|
|
44
|
+
# 'default': cls.first_or_default(dss, ''),
|
|
45
|
+
},
|
|
46
|
+
"index": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"title": _l("Elasticsearch Index"),
|
|
49
|
+
"description": _l("Name of the Elasticsearch index to load data into"),
|
|
50
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
51
|
+
"ui:options": {
|
|
52
|
+
"type": "plain",
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
"doc_type": {
|
|
56
|
+
"type": "string",
|
|
57
|
+
"title": _l("Document Type"),
|
|
58
|
+
"description": _l("Type of document to create in Elasticsearch"),
|
|
59
|
+
"default": "_doc",
|
|
60
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
61
|
+
"ui:options": {
|
|
62
|
+
"type": "plain",
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
"id_field": {
|
|
66
|
+
"type": "string",
|
|
67
|
+
"title": _l("Document ID Field"),
|
|
68
|
+
"description": _l(
|
|
69
|
+
"Field from the input data to use as the document ID. Leave empty to auto-generate IDs"
|
|
70
|
+
),
|
|
71
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
72
|
+
"ui:options": {
|
|
73
|
+
"type": "plain",
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
"generate_id": {
|
|
77
|
+
"type": "boolean",
|
|
78
|
+
"title": _l("Generate Document IDs"),
|
|
79
|
+
"description": _l(
|
|
80
|
+
"Automatically generate unique document IDs based on record content. Takes precedence over ID Field if both are specified"
|
|
81
|
+
),
|
|
82
|
+
"default": False,
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
"required": ["data_source_name", "index"],
|
|
86
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import logging
|
|
3
|
+
import mimetypes
|
|
4
|
+
import os
|
|
5
|
+
import urllib.parse
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from recurvedata.pigeon.utils import fs, trim_suffix
|
|
11
|
+
except ImportError:
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
from recurvedata.operators.transfer_operator.task import LoadTask
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FileBrowserLoadTask(LoadTask):
|
|
20
|
+
ds_name_fields = ("data_source_name",)
|
|
21
|
+
ds_types = ("filebrowser",)
|
|
22
|
+
should_write_header = True
|
|
23
|
+
worker_install_require = ["requests", "pigeon"]
|
|
24
|
+
|
|
25
|
+
def execute_impl(self, *args, **kwargs):
|
|
26
|
+
conf = self.rendered_config
|
|
27
|
+
ds = self.must_get_connection_by_name(conf["data_source_name"])
|
|
28
|
+
file_to_upload = self.filename
|
|
29
|
+
remote_filename = conf["filename"]
|
|
30
|
+
|
|
31
|
+
cm = conf["compress_method"]
|
|
32
|
+
if cm != "None":
|
|
33
|
+
logger.info("compressing file using %s...", cm)
|
|
34
|
+
if cm == "Gzip":
|
|
35
|
+
ext = ".gz"
|
|
36
|
+
file_to_upload = fs.gzip_compress(self.filename, using_cmd=True)
|
|
37
|
+
elif cm == "Zip":
|
|
38
|
+
ext = ".zip"
|
|
39
|
+
arcname = trim_suffix(os.path.basename(remote_filename), ext)
|
|
40
|
+
file_to_upload = fs.zip_compress(self.filename, using_cmd=False, arcname=arcname)
|
|
41
|
+
else:
|
|
42
|
+
# won't reach here
|
|
43
|
+
raise ValueError(f"compress method {cm} is not supported")
|
|
44
|
+
|
|
45
|
+
if not remote_filename.endswith(ext):
|
|
46
|
+
remote_filename = f"{remote_filename}{ext}"
|
|
47
|
+
|
|
48
|
+
client = FileBrowserClient(ds.host, ds.user, ds.password)
|
|
49
|
+
|
|
50
|
+
remote_filename = os.path.join(conf["directory"], remote_filename)
|
|
51
|
+
logger.info("uploading %s to %s", file_to_upload, remote_filename)
|
|
52
|
+
client.upload(file_to_upload, remote_filename, override=True)
|
|
53
|
+
fs.remove_files_safely([self.filename, file_to_upload])
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def config_schema(cls):
|
|
57
|
+
# get_choices_by_type = cls.get_connection_names_by_type
|
|
58
|
+
# dss = get_choices_by_type(cls.ds_types)
|
|
59
|
+
schema = {
|
|
60
|
+
"type": "object",
|
|
61
|
+
"properties": {
|
|
62
|
+
"data_source_name": {
|
|
63
|
+
"type": "string",
|
|
64
|
+
"title": "FileBrowser Data Source",
|
|
65
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
66
|
+
"ui:options": {
|
|
67
|
+
"supportTypes": cls.ds_types,
|
|
68
|
+
},
|
|
69
|
+
# 'default': cls.first_or_default(dss, ''),
|
|
70
|
+
},
|
|
71
|
+
"directory": {
|
|
72
|
+
"type": "string",
|
|
73
|
+
"title": "Directory",
|
|
74
|
+
"description": "要上传到的文件夹",
|
|
75
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
76
|
+
"ui:options": {
|
|
77
|
+
"type": "plain",
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
"filename": {
|
|
81
|
+
"type": "string",
|
|
82
|
+
"title": "Filename",
|
|
83
|
+
"description": "上传后的文件名,支持模板变量",
|
|
84
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
85
|
+
"ui:options": {
|
|
86
|
+
"type": "plain",
|
|
87
|
+
},
|
|
88
|
+
},
|
|
89
|
+
"compress_method": {
|
|
90
|
+
"type": "string",
|
|
91
|
+
"title": "Compress Method",
|
|
92
|
+
"enum": ["None", "Gzip", "Zip"],
|
|
93
|
+
"enumNames": ["None", "Gzip", "Zip"],
|
|
94
|
+
"default": "None",
|
|
95
|
+
"description": "文件的压缩方式,默认不压缩。如果选择了压缩,会在文件名加上相应的后缀。",
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
"required": ["data_source_name", "directory", "filename", "compress_method"],
|
|
99
|
+
}
|
|
100
|
+
return schema
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class FileBrowserClient(object):
|
|
104
|
+
# token 有效期,实际上默认是 2 小时,这里只保留 1 小时
|
|
105
|
+
TOKEN_AGE = datetime.timedelta(seconds=1 * 60 * 60)
|
|
106
|
+
|
|
107
|
+
def __init__(self, host, username, password):
|
|
108
|
+
self.host = host
|
|
109
|
+
self.username = username
|
|
110
|
+
self.password = password
|
|
111
|
+
self._session = requests.Session()
|
|
112
|
+
self._token = None
|
|
113
|
+
self._token_expires_at = datetime.datetime.fromtimestamp(0)
|
|
114
|
+
|
|
115
|
+
def _request(self, method, url, params=None, data=None, json=None, auth=True, **kwargs):
|
|
116
|
+
full_url = urllib.parse.urljoin(self.host, url)
|
|
117
|
+
if auth:
|
|
118
|
+
headers = {"X-Auth": self.token}
|
|
119
|
+
else:
|
|
120
|
+
headers = {}
|
|
121
|
+
headers.update(kwargs.pop("headers", {}))
|
|
122
|
+
resp = self._session.request(method, full_url, params=params, data=data, json=json, headers=headers, **kwargs)
|
|
123
|
+
resp.raise_for_status()
|
|
124
|
+
return resp
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def token(self):
|
|
128
|
+
if self._token is None or self._token_expires_at <= datetime.datetime.now():
|
|
129
|
+
self._token = self.login()
|
|
130
|
+
self._token_expires_at = datetime.datetime.now() + self.TOKEN_AGE
|
|
131
|
+
return self._token
|
|
132
|
+
|
|
133
|
+
def login(self):
|
|
134
|
+
params = {
|
|
135
|
+
"username": self.username,
|
|
136
|
+
"password": self.password,
|
|
137
|
+
"recaptcha": "",
|
|
138
|
+
}
|
|
139
|
+
resp = self._request("POST", "/api/login", json=params, auth=False)
|
|
140
|
+
token = resp.text
|
|
141
|
+
return token
|
|
142
|
+
|
|
143
|
+
def upload(self, local_filename, remote_filename, override=True):
|
|
144
|
+
headers = {}
|
|
145
|
+
content_type, _ = mimetypes.guess_type(local_filename)
|
|
146
|
+
if content_type:
|
|
147
|
+
headers = {"Content-Type": content_type}
|
|
148
|
+
params = {"override": override and "true" or "false"}
|
|
149
|
+
url = f"/api/resources/{urllib.parse.quote(remote_filename)}"
|
|
150
|
+
with open(local_filename, "rb") as f:
|
|
151
|
+
self._request("POST", url, params=params, data=f, headers=headers)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from recurvedata.operators.transfer_operator.load_task_sftp import SFTPLoadTask
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FTPLoadTask(SFTPLoadTask):
|
|
9
|
+
ds_name_fields = ("data_source_name",)
|
|
10
|
+
ds_types = ("ftp",)
|
|
11
|
+
|
|
12
|
+
@staticmethod
|
|
13
|
+
def ensure_directory_exists(ds, conf):
|
|
14
|
+
ftp = ds.connector
|
|
15
|
+
try:
|
|
16
|
+
ftp.list_dir(conf["directory"])
|
|
17
|
+
except OSError:
|
|
18
|
+
logger.warning("failed to list directory %s, maybe not exists, try to make it", conf["directory"])
|
|
19
|
+
ftp.makedir(conf["directory"])
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from recurvedata.pigeon.loader.csv_to_google_bigquery import CSVToGoogleBigqueryLoader
|
|
5
|
+
except ImportError:
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
from recurvedata.core.translation import _l
|
|
9
|
+
from recurvedata.operators.transfer_operator import utils
|
|
10
|
+
from recurvedata.operators.transfer_operator.task import LoadTask
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GoogleBigqueryLoadTask(LoadTask):
|
|
14
|
+
ds_name_fields = ("google_bigquery_data_source_name",)
|
|
15
|
+
ds_types = ("bigquery",)
|
|
16
|
+
default_dumper_handler_options = {}
|
|
17
|
+
worker_install_require = ["pigeon[google_bigquery]"]
|
|
18
|
+
|
|
19
|
+
def execute_impl(self, *args, **kwargs):
|
|
20
|
+
google_bigquery_ds = self.must_get_connection_by_name(self.config["google_bigquery_data_source_name"])
|
|
21
|
+
load_options = self.rendered_config.copy()
|
|
22
|
+
for k in ["google_bigquery_data_source_name"]:
|
|
23
|
+
load_options.pop(k, None)
|
|
24
|
+
load_options.update(
|
|
25
|
+
{
|
|
26
|
+
"filename": self.filename,
|
|
27
|
+
"google_bigquery_connector": google_bigquery_ds.connector,
|
|
28
|
+
"delete_file": True,
|
|
29
|
+
"dataset": google_bigquery_ds.data.get("database"),
|
|
30
|
+
}
|
|
31
|
+
)
|
|
32
|
+
loader = CSVToGoogleBigqueryLoader(**load_options)
|
|
33
|
+
return loader.execute()
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def config_schema(cls):
|
|
37
|
+
# get_choices_by_type = cls.get_connection_names_by_type
|
|
38
|
+
# dws = get_choices_by_type(cls.ds_types)
|
|
39
|
+
schema = {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"properties": {
|
|
42
|
+
"google_bigquery_data_source_name": {
|
|
43
|
+
"type": "string",
|
|
44
|
+
"title": _l("BigQuery Connection"),
|
|
45
|
+
"description": _l("Select the BigQuery connection to use"),
|
|
46
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
47
|
+
"ui:options": {
|
|
48
|
+
"supportTypes": cls.ds_types,
|
|
49
|
+
},
|
|
50
|
+
# 'default': cls.first_or_default(dws, ''),
|
|
51
|
+
},
|
|
52
|
+
# "dataset": {
|
|
53
|
+
# "type": "string",
|
|
54
|
+
# "title": "Dataset",
|
|
55
|
+
# "ui:field": "CodeEditorWithReferencesField",
|
|
56
|
+
# "ui:options": {
|
|
57
|
+
# "type": "plain",
|
|
58
|
+
# },
|
|
59
|
+
# },
|
|
60
|
+
"table": {
|
|
61
|
+
"type": "string",
|
|
62
|
+
"title": _l("Target Table"),
|
|
63
|
+
"description": _l("Name of the table to load data into"),
|
|
64
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
65
|
+
"ui:options": {
|
|
66
|
+
"type": "plain",
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
"create_table_ddl": {
|
|
70
|
+
"type": "string",
|
|
71
|
+
"title": _l("Table Creation SQL"),
|
|
72
|
+
"description": _l("SQL statement to create the destination table if it doesn't exist"),
|
|
73
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
74
|
+
"ui:options": {
|
|
75
|
+
"type": "code",
|
|
76
|
+
"lang": "sql",
|
|
77
|
+
"sqlLang": "sql",
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
"required": ["google_bigquery_data_source_name", "table", "mode"],
|
|
82
|
+
}
|
|
83
|
+
properties_schema = schema["properties"]
|
|
84
|
+
properties_schema.update(copy.deepcopy(utils.LOAD_COMMON))
|
|
85
|
+
|
|
86
|
+
# remove dedup
|
|
87
|
+
properties_schema.pop("dedup", None)
|
|
88
|
+
properties_schema.pop("dedup_uniq_keys", None)
|
|
89
|
+
properties_schema.pop("dedup_orderby", None)
|
|
90
|
+
return schema
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from recurvedata.pigeon.connector.google_cloud_storage import GoogleCloudStorageConnector
|
|
6
|
+
except ImportError:
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
from recurvedata.core.translation import _l
|
|
10
|
+
from recurvedata.operators.transfer_operator.task import LoadTask
|
|
11
|
+
from recurvedata.utils import extract_dict
|
|
12
|
+
from recurvedata.utils.files import is_file_empty, remove_files_safely
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class GoogleCloudStorageLoadTask(LoadTask):
|
|
18
|
+
ds_name_fields = ("data_source_name",)
|
|
19
|
+
ds_types = ("google_cloud_storage",)
|
|
20
|
+
should_write_header = True
|
|
21
|
+
worker_install_require = ["pigeon[google_bigquery]"]
|
|
22
|
+
|
|
23
|
+
def execute_impl(self, *args, **kwargs):
|
|
24
|
+
if is_file_empty(self.filename):
|
|
25
|
+
logger.warning("file %s not exists or has no content, skip.", self.filename)
|
|
26
|
+
return
|
|
27
|
+
|
|
28
|
+
ds = self.must_get_connection_by_name(self.config["data_source_name"])
|
|
29
|
+
|
|
30
|
+
load_options = self.rendered_config.copy()
|
|
31
|
+
ds_extra_bucket = ds.extra.get("bucket")
|
|
32
|
+
config_bucket = load_options.get("bucket_name")
|
|
33
|
+
bucket_upload = config_bucket if config_bucket else ds_extra_bucket
|
|
34
|
+
|
|
35
|
+
opt_keys = ["key_path", "key_dict", "project", "proxies", "bucket_name"]
|
|
36
|
+
gcs = GoogleCloudStorageConnector(**extract_dict(ds.extra, opt_keys))
|
|
37
|
+
|
|
38
|
+
# 文件压缩
|
|
39
|
+
compress_mode = load_options["compress_mode"]
|
|
40
|
+
if compress_mode != "None" and not load_options["key"].endswith(("/",)):
|
|
41
|
+
target_filename = os.path.join(os.path.dirname(self.filename), os.path.basename(load_options["key"]))
|
|
42
|
+
else:
|
|
43
|
+
target_filename = None
|
|
44
|
+
file_upload, ext = self.compress_file(
|
|
45
|
+
filename=self.filename, target_filename=target_filename, compress_mode=compress_mode
|
|
46
|
+
)
|
|
47
|
+
if compress_mode != "None" and not load_options["key"].endswith(("/", ext)):
|
|
48
|
+
load_options["key"] = f"{load_options['key']}{ext}"
|
|
49
|
+
|
|
50
|
+
# 根据 key 的内容创建 upload 方法需要的 key, folder 参数
|
|
51
|
+
upload_conf = {
|
|
52
|
+
"bucket_name": bucket_upload,
|
|
53
|
+
"filename": file_upload,
|
|
54
|
+
"overwrite": load_options["overwrite"],
|
|
55
|
+
}
|
|
56
|
+
if load_options["key"].endswith("/"):
|
|
57
|
+
upload_conf.update({"folder": load_options["key"]})
|
|
58
|
+
elif load_options["key"]:
|
|
59
|
+
upload_conf.update({"key": load_options["key"]})
|
|
60
|
+
else:
|
|
61
|
+
upload_conf.update({"key": os.path.basename(file_upload)})
|
|
62
|
+
|
|
63
|
+
if ext == ".zip":
|
|
64
|
+
upload_conf["filename"] += ext
|
|
65
|
+
|
|
66
|
+
logger.info("uploading...")
|
|
67
|
+
logger.info(upload_conf)
|
|
68
|
+
gcs.upload(**upload_conf)
|
|
69
|
+
return remove_files_safely([self.filename, file_upload])
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def config_schema(cls):
|
|
73
|
+
# get_choices_by_type = cls.get_connection_names_by_type
|
|
74
|
+
# dss = get_choices_by_type(cls.ds_types)
|
|
75
|
+
schema = {
|
|
76
|
+
"type": "object",
|
|
77
|
+
"properties": {
|
|
78
|
+
"data_source_name": {
|
|
79
|
+
"type": "string",
|
|
80
|
+
"title": _l("GCS Connection"),
|
|
81
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
82
|
+
"ui:options": {
|
|
83
|
+
"supportTypes": cls.ds_types,
|
|
84
|
+
},
|
|
85
|
+
# 'default': cls.first_or_default(dss, ''),
|
|
86
|
+
},
|
|
87
|
+
"bucket_name": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"title": _l("GCS Bucket Name"),
|
|
90
|
+
"description": _l(
|
|
91
|
+
"The name of the Google Cloud Storage bucket to upload files to. This is required if not already configured in the data source connection."
|
|
92
|
+
),
|
|
93
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
94
|
+
"ui:options": {
|
|
95
|
+
"type": "plain",
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
"key": {
|
|
99
|
+
"type": "string",
|
|
100
|
+
"title": _l("Upload Path"),
|
|
101
|
+
"description": _l(
|
|
102
|
+
"Target path in the bucket. Can be an object key or folder path (ending with /). "
|
|
103
|
+
"Supports Jinja templating."
|
|
104
|
+
),
|
|
105
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
106
|
+
"ui:options": {
|
|
107
|
+
"type": "plain",
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
"compress_mode": {
|
|
111
|
+
"type": "string",
|
|
112
|
+
"title": _l("Compression Method"),
|
|
113
|
+
"description": _l("Compress file before uploading using specified method"),
|
|
114
|
+
"enum": ["None", "Gzip", "Zip"],
|
|
115
|
+
"enumNames": ["None", "Gzip", "Zip"],
|
|
116
|
+
"default": "None",
|
|
117
|
+
},
|
|
118
|
+
"overwrite": {
|
|
119
|
+
"type": "boolean",
|
|
120
|
+
"title": _l("Overwrite Existing"),
|
|
121
|
+
"description": _l("Whether to overwrite if target object already exists"),
|
|
122
|
+
"default": True,
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
"required": ["compress_mode", "data_source_name", "key"],
|
|
126
|
+
}
|
|
127
|
+
return schema
|