recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from recurvedata.pigeon.loader.csv_to_redshift import CSVToRedshiftLoader
|
|
5
|
+
except ImportError:
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
from recurvedata.core.translation import _l
|
|
9
|
+
from recurvedata.operators.transfer_operator import utils
|
|
10
|
+
from recurvedata.operators.transfer_operator.task import LoadTask
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RedshiftLoadTask(LoadTask):
|
|
14
|
+
ds_name_fields = ("redshift_data_source_name",)
|
|
15
|
+
ds_types = ("redshift",)
|
|
16
|
+
worker_install_require = ["pigeon[redshift]"]
|
|
17
|
+
|
|
18
|
+
def execute_impl(self, *args, **kwargs):
|
|
19
|
+
redshift_ds = self.must_get_connection_by_name(self.config["redshift_data_source_name"])
|
|
20
|
+
|
|
21
|
+
load_options = self.rendered_config.copy()
|
|
22
|
+
for k in ["redshift_data_source_name"]:
|
|
23
|
+
load_options.pop(k, None)
|
|
24
|
+
|
|
25
|
+
load_options.update(
|
|
26
|
+
{
|
|
27
|
+
"filename": self.filename,
|
|
28
|
+
"redshift_connector": redshift_ds.connector,
|
|
29
|
+
"delete_file": True,
|
|
30
|
+
"database": redshift_ds.database,
|
|
31
|
+
"schema": redshift_ds.data.get("schema"),
|
|
32
|
+
}
|
|
33
|
+
)
|
|
34
|
+
loader = CSVToRedshiftLoader(**load_options)
|
|
35
|
+
return loader.execute()
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def config_schema(cls):
|
|
39
|
+
# get_choices_by_type = cls.get_connection_names_by_type
|
|
40
|
+
# dss = get_choices_by_type(cls.ds_types)
|
|
41
|
+
schema = {
|
|
42
|
+
"type": "object",
|
|
43
|
+
"properties": {
|
|
44
|
+
"redshift_data_source_name": {
|
|
45
|
+
"type": "string",
|
|
46
|
+
"title": _l("Redshift Connection"),
|
|
47
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
48
|
+
"ui:options": {
|
|
49
|
+
"supportTypes": cls.ds_types,
|
|
50
|
+
},
|
|
51
|
+
# 'default': cls.first_or_default(dss, ''),
|
|
52
|
+
},
|
|
53
|
+
# "database": {
|
|
54
|
+
# "type": "string",
|
|
55
|
+
# "title": "Database",
|
|
56
|
+
# "ui:field": "CodeEditorWithReferencesField",
|
|
57
|
+
# "ui:options": {
|
|
58
|
+
# "type": "plain",
|
|
59
|
+
# },
|
|
60
|
+
# },
|
|
61
|
+
# "schema": {
|
|
62
|
+
# "type": "string",
|
|
63
|
+
# "title": "Schema",
|
|
64
|
+
# "default": "public",
|
|
65
|
+
# "ui:field": "CodeEditorWithReferencesField",
|
|
66
|
+
# "ui:options": {
|
|
67
|
+
# "type": "plain",
|
|
68
|
+
# },
|
|
69
|
+
# },
|
|
70
|
+
"table": {
|
|
71
|
+
"type": "string",
|
|
72
|
+
"title": _l("Target Table"),
|
|
73
|
+
"description": _l("Name of the table to load data into"),
|
|
74
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
75
|
+
"ui:options": {
|
|
76
|
+
"type": "plain",
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
"create_table_ddl": {
|
|
80
|
+
"type": "string",
|
|
81
|
+
"title": _l("Table Creation SQL"),
|
|
82
|
+
"description": _l("SQL statement to create the target table if it doesn't exist"),
|
|
83
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
84
|
+
"ui:options": {
|
|
85
|
+
"type": "code",
|
|
86
|
+
"lang": "sql",
|
|
87
|
+
"sqlLang": "redshift",
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
"required": ["redshift_data_source_name", "table", "mode"],
|
|
92
|
+
}
|
|
93
|
+
properties_schema = schema["properties"]
|
|
94
|
+
properties_schema.update(copy.deepcopy(utils.LOAD_COMMON))
|
|
95
|
+
return schema
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import jsonschema
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from recurvedata.pigeon.connector import new_s3_connector
|
|
9
|
+
from recurvedata.pigeon.utils import fs
|
|
10
|
+
except ImportError:
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
from recurvedata.core.translation import _l
|
|
14
|
+
from recurvedata.operators.transfer_operator.task import LoadTask
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class S3LoadTask(LoadTask):
|
|
20
|
+
ds_name_fields = ("data_source_name",)
|
|
21
|
+
ds_types = ("s3",)
|
|
22
|
+
should_write_header = True
|
|
23
|
+
worker_install_require = ["pigeon"]
|
|
24
|
+
|
|
25
|
+
def execute_impl(self, *args, **kwargs):
|
|
26
|
+
if fs.is_file_empty(self.filename):
|
|
27
|
+
logger.warning("file %s not exists or has no content, skip.", self.filename)
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
s3_ds = self.must_get_connection_by_name(self.config["data_source_name"])
|
|
31
|
+
|
|
32
|
+
load_options = self.rendered_config.copy()
|
|
33
|
+
ds_extra_bucket = s3_ds.extra.get("bucket")
|
|
34
|
+
config_bucket = load_options.get("bucket_name")
|
|
35
|
+
bucket_upload = config_bucket if config_bucket else ds_extra_bucket
|
|
36
|
+
|
|
37
|
+
# 文件压缩
|
|
38
|
+
compress_mode = load_options["compress_mode"]
|
|
39
|
+
file_upload, ext = self.compress_file(filename=self.filename, compress_mode=compress_mode)
|
|
40
|
+
if compress_mode != "None" and not load_options["key"].endswith(("/", ext)):
|
|
41
|
+
load_options["key"] = f"{load_options['key']}{ext}"
|
|
42
|
+
|
|
43
|
+
# s3 connector 配置项
|
|
44
|
+
s3_conf = s3_ds.extra.copy()
|
|
45
|
+
if load_options.get("proxies"):
|
|
46
|
+
s3_conf["proxies"] = json.loads(load_options["proxies"])
|
|
47
|
+
|
|
48
|
+
# 创建 connector,如果不选择自动创建 Bucket,不存在则报错
|
|
49
|
+
s3 = new_s3_connector(conf=s3_conf)
|
|
50
|
+
if not load_options.get("auto_create_bucket") and config_bucket:
|
|
51
|
+
if not s3.has_bucket(bucket_name=config_bucket):
|
|
52
|
+
raise ValueError("Bucket not exists")
|
|
53
|
+
|
|
54
|
+
# 根据 key 的内容创建 upload 方法需要的 key, folder 参数
|
|
55
|
+
upload_conf = {
|
|
56
|
+
"bucket_name": bucket_upload,
|
|
57
|
+
"filename": file_upload,
|
|
58
|
+
"overwrite": load_options["overwrite"],
|
|
59
|
+
}
|
|
60
|
+
if load_options["key"].endswith("/"):
|
|
61
|
+
upload_conf.update({"folder": load_options["key"]})
|
|
62
|
+
elif load_options["key"]:
|
|
63
|
+
upload_conf.update({"key": load_options["key"]})
|
|
64
|
+
else:
|
|
65
|
+
upload_conf.update({"key": os.path.basename(file_upload)})
|
|
66
|
+
|
|
67
|
+
logger.info("uploading ...")
|
|
68
|
+
s3.upload(**upload_conf)
|
|
69
|
+
return fs.remove_files_safely([self.filename, file_upload])
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def validate(cls, configuration):
|
|
73
|
+
config = super().validate(configuration)
|
|
74
|
+
|
|
75
|
+
if not config.get("bucket_name"):
|
|
76
|
+
s3_ds = cls.must_get_connection_by_name(configuration["data_source_name"])
|
|
77
|
+
if not s3_ds.extra.get("bucket"):
|
|
78
|
+
raise jsonschema.ValidationError(message="Unknown Bucket", path=("bucket_name",))
|
|
79
|
+
return config
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def config_schema(cls):
|
|
83
|
+
# get_choices_by_type = cls.get_connection_names_by_type
|
|
84
|
+
# dss = get_choices_by_type(cls.ds_types)
|
|
85
|
+
schema = {
|
|
86
|
+
"type": "object",
|
|
87
|
+
"properties": {
|
|
88
|
+
"data_source_name": {
|
|
89
|
+
"type": "string",
|
|
90
|
+
"title": _l("S3 Connection"),
|
|
91
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
92
|
+
"ui:options": {
|
|
93
|
+
"supportTypes": cls.ds_types,
|
|
94
|
+
},
|
|
95
|
+
# 'default': cls.first_or_default(dss, ''),
|
|
96
|
+
},
|
|
97
|
+
"bucket_name": {
|
|
98
|
+
"type": "string",
|
|
99
|
+
"title": _l("Bucket"),
|
|
100
|
+
"description": _l("S3 bucket name, required if not set in data source"),
|
|
101
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
102
|
+
"ui:options": {
|
|
103
|
+
"type": "plain",
|
|
104
|
+
},
|
|
105
|
+
},
|
|
106
|
+
# "auto_create_bucket": {
|
|
107
|
+
# "type": "boolean",
|
|
108
|
+
# "title": "Auto Create Bucket",
|
|
109
|
+
# "default": False,
|
|
110
|
+
# "description": "如果 Bucket 不存在,是否需要按输入创建 Bucket,命名规则见 https://amzn.to/2HL8VDX",
|
|
111
|
+
# },
|
|
112
|
+
"key": {
|
|
113
|
+
"type": "string",
|
|
114
|
+
"title": _l("Upload Path"),
|
|
115
|
+
"description": _l(
|
|
116
|
+
"Target path in the bucket. Can be an object key or folder path (ending with /). "
|
|
117
|
+
"Supports Jinja templating."
|
|
118
|
+
),
|
|
119
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
120
|
+
"ui:options": {
|
|
121
|
+
"type": "plain",
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
"proxies": {
|
|
125
|
+
"type": "string",
|
|
126
|
+
"title": _l("Proxies"),
|
|
127
|
+
"description": _l('HTTP/HTTPS proxy to use. Format: {"https": "http://example.com:3128"}'),
|
|
128
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
129
|
+
"ui:options": {
|
|
130
|
+
"type": "plain",
|
|
131
|
+
},
|
|
132
|
+
},
|
|
133
|
+
"compress_mode": {
|
|
134
|
+
"type": "string",
|
|
135
|
+
"title": _l("Compression Method"),
|
|
136
|
+
"description": _l("Compress file before uploading using specified method"),
|
|
137
|
+
"enum": ["None", "Gzip", "Zip"],
|
|
138
|
+
"enumNames": ["None", "Gzip", "Zip"],
|
|
139
|
+
"default": "None",
|
|
140
|
+
},
|
|
141
|
+
"overwrite": {
|
|
142
|
+
"type": "boolean",
|
|
143
|
+
"title": _l("Overwrite Existing"),
|
|
144
|
+
"description": _l("Whether to overwrite if target object already exists"),
|
|
145
|
+
"default": True,
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
"required": ["compress_mode", "data_source_name"],
|
|
149
|
+
}
|
|
150
|
+
return schema
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from recurvedata.core.translation import _l
|
|
5
|
+
from recurvedata.operators.transfer_operator.task import LoadTask
|
|
6
|
+
from recurvedata.utils.files import remove_files_safely
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SFTPLoadTask(LoadTask):
|
|
12
|
+
ds_name_fields = ("data_source_name",)
|
|
13
|
+
ds_types = ("sftp",)
|
|
14
|
+
should_write_header = True
|
|
15
|
+
worker_install_require = ["pigeon[sftp]"]
|
|
16
|
+
|
|
17
|
+
def execute_impl(self, *args, **kwargs):
|
|
18
|
+
conf = self.rendered_config
|
|
19
|
+
ds = self.must_get_connection_by_name(conf["data_source_name"])
|
|
20
|
+
|
|
21
|
+
cm = conf["compress_method"]
|
|
22
|
+
file_to_upload, ext = self.compress_file(filename=self.filename, compress_mode=cm)
|
|
23
|
+
remote_filename = conf["filename"]
|
|
24
|
+
if cm != "None" and not remote_filename.endswith(ext):
|
|
25
|
+
remote_filename = f"{remote_filename}{ext}"
|
|
26
|
+
|
|
27
|
+
self.ensure_directory_exists(ds, conf)
|
|
28
|
+
remote_filename = os.path.join(conf["directory"], remote_filename)
|
|
29
|
+
|
|
30
|
+
logger.info("uploading %s to %s", file_to_upload, remote_filename)
|
|
31
|
+
ds.connector.upload_file(file_to_upload, remote_filename)
|
|
32
|
+
remove_files_safely([self.filename, file_to_upload])
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def ensure_directory_exists(ds, conf):
|
|
36
|
+
sftp = ds.connector
|
|
37
|
+
# 确保目录存在
|
|
38
|
+
try:
|
|
39
|
+
sftp.sftp.listdir(conf["directory"])
|
|
40
|
+
except OSError:
|
|
41
|
+
logger.warning("failed to list directory %s, maybe not exists, try to make it", conf["directory"])
|
|
42
|
+
# 这一步可以抛出异常,需要人工介入
|
|
43
|
+
sftp.sftp.mkdir(conf["directory"])
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def config_schema(cls):
|
|
47
|
+
# get_choices_by_type = cls.get_connection_names_by_type
|
|
48
|
+
# dss = get_choices_by_type(cls.ds_types)
|
|
49
|
+
schema = {
|
|
50
|
+
"type": "object",
|
|
51
|
+
"properties": {
|
|
52
|
+
"data_source_name": {
|
|
53
|
+
"type": "string",
|
|
54
|
+
"title": _l("SFTP Connection"),
|
|
55
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
56
|
+
"ui:options": {
|
|
57
|
+
"supportTypes": cls.ds_types,
|
|
58
|
+
},
|
|
59
|
+
# 'default': cls.first_or_default(dss, ''),
|
|
60
|
+
},
|
|
61
|
+
"directory": {
|
|
62
|
+
"type": "string",
|
|
63
|
+
"title": _l("Upload Path"),
|
|
64
|
+
"description": _l("The directory to upload the file to, Jinja template supported"),
|
|
65
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
66
|
+
"ui:options": {
|
|
67
|
+
"type": "plain",
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
"filename": {
|
|
71
|
+
"type": "string",
|
|
72
|
+
"title": _l("Filename"),
|
|
73
|
+
"description": _l("The filename to save as, Jinja template supported"),
|
|
74
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
75
|
+
"ui:options": {
|
|
76
|
+
"type": "plain",
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
"compress_method": {
|
|
80
|
+
"type": "string",
|
|
81
|
+
"title": _l("Compression Method"),
|
|
82
|
+
"description": _l("Compress file before uploading using specified method"),
|
|
83
|
+
"enum": ["None", "Gzip", "Zip"],
|
|
84
|
+
"enumNames": ["None", "Gzip", "Zip"],
|
|
85
|
+
"default": "None",
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
"required": ["data_source_name", "filename", "compress_method"],
|
|
89
|
+
}
|
|
90
|
+
return schema
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from recurvedata.pigeon.loader.csv_to_starrocks import CSVToStarRocksLoader
|
|
3
|
+
except ImportError:
|
|
4
|
+
pass
|
|
5
|
+
|
|
6
|
+
from recurvedata.core.translation import _l
|
|
7
|
+
from recurvedata.operators.transfer_operator import const
|
|
8
|
+
from recurvedata.operators.transfer_operator.task import LoadTask
|
|
9
|
+
from recurvedata.operators.transfer_operator.utils import allowed_modes
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class StarRocksLoadTask(LoadTask):
|
|
13
|
+
ds_name_fields = ("data_source_name",)
|
|
14
|
+
ds_types = ("starrocks",)
|
|
15
|
+
default_dumper_handler_options = {
|
|
16
|
+
"null": r"\N",
|
|
17
|
+
"lineterminator": "\n",
|
|
18
|
+
"escapechar": "'",
|
|
19
|
+
"doublequote": False,
|
|
20
|
+
}
|
|
21
|
+
worker_install_require = [
|
|
22
|
+
"pigeon[starrocks]",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
def execute_impl(self, *args, **kwargs):
|
|
26
|
+
ds = self.must_get_connection_by_name(self.config["data_source_name"])
|
|
27
|
+
load_options = self.rendered_config.copy()
|
|
28
|
+
for k in ["data_source_name"]:
|
|
29
|
+
load_options.pop(k, None)
|
|
30
|
+
load_options.update(
|
|
31
|
+
{
|
|
32
|
+
"filename": self.filename,
|
|
33
|
+
"connector": ds.connector,
|
|
34
|
+
"delete_file": True,
|
|
35
|
+
"load_strict_mode": self.config.get("load_strict_mode", False),
|
|
36
|
+
"database": ds.database,
|
|
37
|
+
}
|
|
38
|
+
)
|
|
39
|
+
loader = CSVToStarRocksLoader(**load_options)
|
|
40
|
+
return loader.execute()
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def config_schema(cls):
|
|
44
|
+
schema = {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"properties": {
|
|
47
|
+
"data_source_name": {
|
|
48
|
+
"type": "string",
|
|
49
|
+
"title": _l("StarRocks Connection"),
|
|
50
|
+
"ui:field": "ProjectConnectionSelectorField",
|
|
51
|
+
"ui:options": {
|
|
52
|
+
"supportTypes": cls.ds_types,
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
# "database": {
|
|
56
|
+
# "type": "string",
|
|
57
|
+
# "title": "Database",
|
|
58
|
+
# "ui:field": "CodeEditorWithReferencesField",
|
|
59
|
+
# "ui:options": {
|
|
60
|
+
# "type": "plain",
|
|
61
|
+
# },
|
|
62
|
+
# },
|
|
63
|
+
"table": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"title": _l("Target Table"),
|
|
66
|
+
"description": _l("Name of the table to load data into"),
|
|
67
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
68
|
+
"ui:options": {
|
|
69
|
+
"type": "plain",
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
"create_table_ddl": {
|
|
73
|
+
"type": "string",
|
|
74
|
+
"title": _l("Table Creation SQL"),
|
|
75
|
+
"description": _l(
|
|
76
|
+
"SQL statement to create the target table if it doesn't exist. See "
|
|
77
|
+
"<a target='_blank' href='https://docs.starrocks.io/docs/sql-reference/sql-statements/table_bucket_part_index/CREATE_TABLE'>"
|
|
78
|
+
"StarRocks Docs</a> for details"
|
|
79
|
+
),
|
|
80
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
81
|
+
"ui:options": {
|
|
82
|
+
"type": "code",
|
|
83
|
+
"lang": "sql",
|
|
84
|
+
"sqlLang": "mysql",
|
|
85
|
+
},
|
|
86
|
+
},
|
|
87
|
+
"mode": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"title": _l("Load Mode"),
|
|
90
|
+
"description": _l("How to handle existing data in the target table"),
|
|
91
|
+
"enum": list(allowed_modes),
|
|
92
|
+
"default": const.LOAD_OVERWRITE,
|
|
93
|
+
},
|
|
94
|
+
"primary_keys": {
|
|
95
|
+
"ui:hidden": '{{parentFormData.mode !== "MERGE"}}',
|
|
96
|
+
"type": "string",
|
|
97
|
+
"title": _l("Primary Keys"),
|
|
98
|
+
"description": _l(
|
|
99
|
+
"Comma-separated list of columns used for deduplication in MERGE mode. "
|
|
100
|
+
"Should be primary or unique key columns."
|
|
101
|
+
),
|
|
102
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
103
|
+
"ui:options": {
|
|
104
|
+
"type": "plain",
|
|
105
|
+
},
|
|
106
|
+
},
|
|
107
|
+
"using_insert": {
|
|
108
|
+
"type": "boolean",
|
|
109
|
+
"title": _l("Use INSERT Mode"),
|
|
110
|
+
"description": _l("By default Stream Load is used. Enable to use INSERT statements instead."),
|
|
111
|
+
"default": False,
|
|
112
|
+
"ui:hidden": True,
|
|
113
|
+
},
|
|
114
|
+
"load_strict_mode": {
|
|
115
|
+
"type": "boolean",
|
|
116
|
+
"title": _l("Enable Strict Mode"),
|
|
117
|
+
"default": False,
|
|
118
|
+
"description": _l(
|
|
119
|
+
"When enabled, validates that data matches target table schema before loading. "
|
|
120
|
+
"Raises error if validation fails."
|
|
121
|
+
),
|
|
122
|
+
"ui:hidden": "{{parentFormData.using_insert}}",
|
|
123
|
+
},
|
|
124
|
+
"insert_batch_size": {
|
|
125
|
+
"ui:hidden": "{{!parentFormData.using_insert}}",
|
|
126
|
+
"type": "number",
|
|
127
|
+
"ui:options": {"controls": False},
|
|
128
|
+
"title": _l("Batch Size"),
|
|
129
|
+
"default": 500,
|
|
130
|
+
"minimum": 1,
|
|
131
|
+
"maximum": 2000,
|
|
132
|
+
"description": _l("Number of rows to insert in each batch"),
|
|
133
|
+
},
|
|
134
|
+
"insert_concurrency": {
|
|
135
|
+
"ui:hidden": "{{!parentFormData.using_insert}}",
|
|
136
|
+
"type": "number",
|
|
137
|
+
"ui:options": {"controls": False},
|
|
138
|
+
"title": _l("Concurrent Inserts"),
|
|
139
|
+
"default": 1,
|
|
140
|
+
"minimum": 1,
|
|
141
|
+
"maximum": 10,
|
|
142
|
+
"description": _l("Number of parallel insert operations"),
|
|
143
|
+
},
|
|
144
|
+
# "pre_queries": {
|
|
145
|
+
# "type": "string",
|
|
146
|
+
# "title": "Queries Ran Before Loading",
|
|
147
|
+
# "description": '新数据导入前运行的 SQL,多条 SQL 用 `;` 分隔;支持传入变量,详见 <a target="_blank" href="http://bit.ly/2JMutjn">文档</a>',
|
|
148
|
+
# "ui:field": "CodeEditorWithReferencesField",
|
|
149
|
+
# "ui:options": {
|
|
150
|
+
# "type": "code",
|
|
151
|
+
# "lang": "sql",
|
|
152
|
+
# "sqlLang": "mysql",
|
|
153
|
+
# },
|
|
154
|
+
# },
|
|
155
|
+
# "post_queries": {
|
|
156
|
+
# "type": "string",
|
|
157
|
+
# "title": "Queries Ran After Loading",
|
|
158
|
+
# "description": '新数据导入后运行的 SQL,多条 SQL 用 `;` 分隔;支持传入变量,详见 <a target="_blank" href="http://bit.ly/2JMutjn">文档</a>',
|
|
159
|
+
# "ui:field": "CodeEditorWithReferencesField",
|
|
160
|
+
# "ui:options": {
|
|
161
|
+
# "type": "code",
|
|
162
|
+
# "lang": "sql",
|
|
163
|
+
# "sqlLang": "mysql",
|
|
164
|
+
# },
|
|
165
|
+
# },
|
|
166
|
+
},
|
|
167
|
+
"required": ["data_source_name", "table"],
|
|
168
|
+
}
|
|
169
|
+
return schema
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import jsonschema
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from recurvedata.pigeon.schema import Schema
|
|
8
|
+
from recurvedata.pigeon.utils import fs
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import pyhive.hive # noqa see https://github.com/cloudera/impyla/issues/277
|
|
12
|
+
from yicrowds_client.core import YiCrowdsClient
|
|
13
|
+
except ImportError:
|
|
14
|
+
pass
|
|
15
|
+
from recurvedata.operators.transfer_operator.task import LoadTask
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class YiCrowdsLoadTask(LoadTask):
|
|
21
|
+
def _infer_column(self):
|
|
22
|
+
schema_file = fs.schema_filename(self.filename)
|
|
23
|
+
logger.info("infer column by schema file %s", schema_file)
|
|
24
|
+
if not os.path.exists(schema_file):
|
|
25
|
+
logger.error("file not exists, not able to infer column")
|
|
26
|
+
schema = Schema.load(schema_file)
|
|
27
|
+
columns = []
|
|
28
|
+
for f in schema:
|
|
29
|
+
columns.append(f.name)
|
|
30
|
+
return columns
|
|
31
|
+
|
|
32
|
+
def add_column_to_file(self, columns):
|
|
33
|
+
df = pd.read_csv(self.filename, header=None, names=columns)
|
|
34
|
+
df.to_csv(self.filename, index=False)
|
|
35
|
+
|
|
36
|
+
def execute_impl(self, *args, **kwargs):
|
|
37
|
+
conf = self.rendered_config
|
|
38
|
+
self.add_column_to_file(self._infer_column())
|
|
39
|
+
environment = conf.get("environment", "prod")
|
|
40
|
+
project_id = conf.get("yc_project_id")
|
|
41
|
+
tag = conf.get("yc_tag")
|
|
42
|
+
no_error = YiCrowdsClient(env=environment).upload_data_local(project_id=project_id, tags=tag, fp=self.filename)
|
|
43
|
+
if not no_error:
|
|
44
|
+
raise Exception("upload_data_local上传异常")
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def validate(cls, configuration):
|
|
48
|
+
conf = super().validate(configuration)
|
|
49
|
+
environment = conf.get("environment", "prod")
|
|
50
|
+
project_id = conf.get("yc_project_id")
|
|
51
|
+
tag = conf.get("yc_tag")
|
|
52
|
+
|
|
53
|
+
yca = YiCrowdsClient(env=environment)
|
|
54
|
+
if not yca.if_project_id_exists(project_id=project_id):
|
|
55
|
+
raise jsonschema.ValidationError(
|
|
56
|
+
message=f"YiCrowds Project ID: {project_id} does not exist", path=("yc_project_id",)
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
if yca.if_tag_exists(project_id=project_id, tags=tag, is_regex=conf.get("is_regex")):
|
|
60
|
+
raise jsonschema.ValidationError(
|
|
61
|
+
message=f"YiCrowds Tag: {tag} exist", path=("yc_tag", "yc_project_id", "is_regex")
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def config_schema(cls):
|
|
66
|
+
return {
|
|
67
|
+
"type": "object",
|
|
68
|
+
"properties": {
|
|
69
|
+
"environment": {
|
|
70
|
+
"type": "string",
|
|
71
|
+
"title": "Environment",
|
|
72
|
+
"enum": ["prod", "dev"],
|
|
73
|
+
"enumNames": ["prod", "dev"],
|
|
74
|
+
"default": "prod",
|
|
75
|
+
"description": "YiCrowds 有正式环境和测试环境,默认正式环境",
|
|
76
|
+
},
|
|
77
|
+
"yc_project_id": {
|
|
78
|
+
"type": "string",
|
|
79
|
+
"title": "YiCrowds Project ID",
|
|
80
|
+
"description": "YiCrowds项目ID,可以通过URL地址获取",
|
|
81
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
82
|
+
"ui:options": {
|
|
83
|
+
"type": "plain",
|
|
84
|
+
},
|
|
85
|
+
},
|
|
86
|
+
"yc_tag": {
|
|
87
|
+
"type": "string",
|
|
88
|
+
"title": "YiCrowds Tag",
|
|
89
|
+
"description": "需要写入数据的tag",
|
|
90
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
91
|
+
"ui:options": {
|
|
92
|
+
"type": "plain",
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
"required": ["yc_project_id", "yc_tag"],
|
|
97
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from recurvedata.pigeon.handler.csv_handler import HiveCSVFileHandler
|
|
7
|
+
except ImportError:
|
|
8
|
+
pass
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class HiveTextfileConverterMixin(object):
|
|
13
|
+
def convert_csv_to_hive_text_if_needed(self):
|
|
14
|
+
"""把 CSV 文件转换成 Hive 的文本文件"""
|
|
15
|
+
if not self.handler_options.get("hive"):
|
|
16
|
+
return
|
|
17
|
+
|
|
18
|
+
if not os.path.exists(self.filename):
|
|
19
|
+
logger.warning("%s is not exists", self.filename)
|
|
20
|
+
return
|
|
21
|
+
|
|
22
|
+
new_name = f"{self.filename}.hive"
|
|
23
|
+
handler = HiveCSVFileHandler(filename=new_name)
|
|
24
|
+
with open(self.filename, newline="") as inf:
|
|
25
|
+
reader = csv.reader(inf)
|
|
26
|
+
for row in reader:
|
|
27
|
+
handler.handle(tuple(row))
|
|
28
|
+
handler.close()
|
|
29
|
+
|
|
30
|
+
if os.path.exists(new_name):
|
|
31
|
+
os.rename(new_name, self.filename)
|