recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
from functools import partial
|
|
2
|
+
|
|
3
|
+
from recurvedata.pigeon.connector._registry import get_connector_class
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def new_azure_synapse_connector(connection=None, database=None):
|
|
7
|
+
"""
|
|
8
|
+
only connection string accepted
|
|
9
|
+
database switching between azure data warehouses is not allowed.
|
|
10
|
+
"""
|
|
11
|
+
from .azure_synapse import AzureSynapseConnector
|
|
12
|
+
|
|
13
|
+
conf = connection.copy()
|
|
14
|
+
if database:
|
|
15
|
+
conf["database"] = database
|
|
16
|
+
return AzureSynapseConnector(**conf)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def new_azure_blob_connector(
|
|
20
|
+
conn_string: str = None,
|
|
21
|
+
account_url: str = None,
|
|
22
|
+
endpoint_suffix: str = "core.chinacloudapi.cn",
|
|
23
|
+
account_name: str = None,
|
|
24
|
+
sas_token: str = None,
|
|
25
|
+
**kwargs,
|
|
26
|
+
):
|
|
27
|
+
"""only connection string accepted"""
|
|
28
|
+
from .azure_blob import AzureBlobConnector
|
|
29
|
+
|
|
30
|
+
return AzureBlobConnector(
|
|
31
|
+
connection_string=conn_string,
|
|
32
|
+
account_url=account_url,
|
|
33
|
+
endpoint_suffix=endpoint_suffix,
|
|
34
|
+
account_name=account_name,
|
|
35
|
+
sas_token=sas_token,
|
|
36
|
+
**kwargs,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def new_mysql_connector(connection=None, database=None, **kwargs):
|
|
41
|
+
"""Factory function to create a new MySQLConnector.
|
|
42
|
+
|
|
43
|
+
:param connection: the connection properties,
|
|
44
|
+
:type connection: dict
|
|
45
|
+
:param database: the optional database name
|
|
46
|
+
:type database: str
|
|
47
|
+
"""
|
|
48
|
+
from .mysql import MySQLConnector
|
|
49
|
+
|
|
50
|
+
conf = connection.copy()
|
|
51
|
+
return MySQLConnector(database=database, **conf)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def new_tidb_connector(connection=None, database=None):
|
|
55
|
+
"""Factory function to create a new TiDBConnector (MySQLConnector).
|
|
56
|
+
|
|
57
|
+
Similar to new_mysql_connector, but with different default connection parameter.
|
|
58
|
+
|
|
59
|
+
:param connection: the connection properties
|
|
60
|
+
:type connection: dict
|
|
61
|
+
:param database: the optional database name
|
|
62
|
+
:type database: str
|
|
63
|
+
"""
|
|
64
|
+
return new_mysql_connector(connection, database)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def new_starrocks_connector(connection=None, database=None):
|
|
68
|
+
"""Factory function to create a new StarRocksConnector.
|
|
69
|
+
|
|
70
|
+
:type connection: dict
|
|
71
|
+
:param database: the optional database name
|
|
72
|
+
:type database: str
|
|
73
|
+
"""
|
|
74
|
+
from .starrocks import StarRocksConnector
|
|
75
|
+
|
|
76
|
+
return StarRocksConnector(database=database, **connection)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def new_hive_connector(connection=None, database=None, **kwargs):
|
|
80
|
+
"""Factory function to create a new HiveConnector.
|
|
81
|
+
|
|
82
|
+
:param connection: the connection properties
|
|
83
|
+
:type connection: dict
|
|
84
|
+
:param database: the optional database name
|
|
85
|
+
:type database: str
|
|
86
|
+
"""
|
|
87
|
+
from .hive_impala import HiveConnector
|
|
88
|
+
|
|
89
|
+
conf = connection.copy()
|
|
90
|
+
return HiveConnector(database=database, **conf)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def new_impala_connector(connection=None, database=None, **kwargs):
|
|
94
|
+
"""Factory function to create a new ImpalaConnector.
|
|
95
|
+
|
|
96
|
+
:param connection: the connection properties
|
|
97
|
+
:type connection: dict
|
|
98
|
+
:param database: the optional database name
|
|
99
|
+
:type database: str
|
|
100
|
+
"""
|
|
101
|
+
from .hive_impala import ImpalaConnector
|
|
102
|
+
|
|
103
|
+
conf = connection.copy()
|
|
104
|
+
return ImpalaConnector(database=database, **conf)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def new_webhdfs_connector(conf=None, **kwargs):
|
|
108
|
+
from .hdfs import HDFSConnector
|
|
109
|
+
|
|
110
|
+
conf = conf.copy()
|
|
111
|
+
return HDFSConnector(**conf)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def new_redshift_connector(connection=None, database=None):
|
|
115
|
+
"""Factory function to create a new RedshiftConnector.
|
|
116
|
+
|
|
117
|
+
:param connection: the connection properties
|
|
118
|
+
:type connection: dict
|
|
119
|
+
:param database: the optional database name
|
|
120
|
+
:type database: str
|
|
121
|
+
"""
|
|
122
|
+
from .redshift import RedshiftConnector
|
|
123
|
+
|
|
124
|
+
conf = connection.copy()
|
|
125
|
+
return RedshiftConnector(database=database, **conf)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def new_postgresql_connector(connection=None, database=None):
|
|
129
|
+
"""Factory function to create a new PostgresConnector.
|
|
130
|
+
|
|
131
|
+
:param connection: the connection properties
|
|
132
|
+
:type connection: dict
|
|
133
|
+
:param database: the optional database name
|
|
134
|
+
:type database: str
|
|
135
|
+
"""
|
|
136
|
+
from .postgresql import PostgresConnector
|
|
137
|
+
|
|
138
|
+
conf = connection.copy()
|
|
139
|
+
if database is not None:
|
|
140
|
+
conf["database"] = database
|
|
141
|
+
return PostgresConnector(**conf)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def new_cassandra_connector(connection, database=None):
|
|
145
|
+
"""Factory function to create a new CassandraConnector
|
|
146
|
+
|
|
147
|
+
:param connection: the connection properties
|
|
148
|
+
:type connection: dict
|
|
149
|
+
:param database: the optional database name
|
|
150
|
+
:type database: str
|
|
151
|
+
"""
|
|
152
|
+
from .cass import CassandraConnector
|
|
153
|
+
|
|
154
|
+
return CassandraConnector(database=database, **connection)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def new_s3_connector(conf=None):
|
|
158
|
+
from .awss3 import S3Connector
|
|
159
|
+
|
|
160
|
+
conf = conf.copy()
|
|
161
|
+
return S3Connector(**conf)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def new_elasticsearch_connector(conf=None):
|
|
165
|
+
from .es import ElasticSearchConnector
|
|
166
|
+
|
|
167
|
+
return ElasticSearchConnector(**conf)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def new_ftp_connector(conf=None):
|
|
171
|
+
from .ftp import FtpConnector
|
|
172
|
+
|
|
173
|
+
conf = (conf or {}).copy()
|
|
174
|
+
return FtpConnector(**conf)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def new_sftp_connector(conf):
|
|
178
|
+
from .sftp import SFtpConnector
|
|
179
|
+
|
|
180
|
+
return SFtpConnector(**conf)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def new_mssql_connector(connection=None, database=None, is_azure=False):
|
|
184
|
+
from .mssql import AzureSQLServerConnector, MSSQLConnector
|
|
185
|
+
|
|
186
|
+
conf = connection.copy()
|
|
187
|
+
if database:
|
|
188
|
+
conf["database"] = database
|
|
189
|
+
if is_azure:
|
|
190
|
+
connector_cls = AzureSQLServerConnector
|
|
191
|
+
else:
|
|
192
|
+
connector_cls = MSSQLConnector
|
|
193
|
+
return connector_cls(**conf)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def new_clickhouse_connector(connection=None, database=None, native=True):
|
|
197
|
+
conf = connection.copy()
|
|
198
|
+
if not native:
|
|
199
|
+
from .clickhouse import ClickHouseConnector
|
|
200
|
+
else:
|
|
201
|
+
from .clickhouse_native import ClickHouseConnector
|
|
202
|
+
return ClickHouseConnector(database=database, **conf)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def new_phoenix_connector(connection=None, **kwargs):
|
|
206
|
+
from .hbase_phoenix import PhoenixConnector
|
|
207
|
+
|
|
208
|
+
conf = connection.copy()
|
|
209
|
+
return PhoenixConnector(**conf)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def new_mongodb_connector(connection=None, **kwargs):
|
|
213
|
+
from .mongodb import MongoDBConnector
|
|
214
|
+
|
|
215
|
+
conf = connection.copy()
|
|
216
|
+
return MongoDBConnector(**conf)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def new_google_bigquery_connector(*args, **kwargs):
|
|
220
|
+
from .google_bigquery import GoogleBigqueryConnector
|
|
221
|
+
|
|
222
|
+
return GoogleBigqueryConnector(*args, **kwargs)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def new_feishu_connector(app_id=None, app_secret=None):
|
|
226
|
+
from .feishu import FeishuBot
|
|
227
|
+
|
|
228
|
+
conf = {}
|
|
229
|
+
if app_id:
|
|
230
|
+
conf["app_id"] = app_id
|
|
231
|
+
conf["app_secret"] = app_secret
|
|
232
|
+
return FeishuBot(**conf)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def new_owncloud_connector(url: str = None, user: str = None, password: str = None, **kwargs):
|
|
236
|
+
from .owncloud import OwncloudConnector
|
|
237
|
+
|
|
238
|
+
conf = {}
|
|
239
|
+
if url and user and password:
|
|
240
|
+
conf["url"] = url
|
|
241
|
+
conf["user"] = user
|
|
242
|
+
conf["password"] = password
|
|
243
|
+
conf.update(kwargs)
|
|
244
|
+
else:
|
|
245
|
+
raise ValueError("You must provide owncloud URL, user and password.")
|
|
246
|
+
return OwncloudConnector(**conf)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def new_sqlite_connector(in_memory: bool, max_memory_gb: int = 2, **kwargs):
|
|
250
|
+
if not in_memory:
|
|
251
|
+
raise ValueError("Currently only supports in-memory database.")
|
|
252
|
+
from .sqlite import SQLiteMemoryDbConnector
|
|
253
|
+
|
|
254
|
+
conf = {}
|
|
255
|
+
conf.update(kwargs)
|
|
256
|
+
return SQLiteMemoryDbConnector(max_memory_gb=max_memory_gb, **conf)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def new_doris_connector(connection=None, database=None):
|
|
260
|
+
from .doris import DorisConnector
|
|
261
|
+
|
|
262
|
+
conf = connection.copy()
|
|
263
|
+
return DorisConnector(database=database, **conf)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
_factory_registry = {
|
|
267
|
+
"mysql": new_mysql_connector,
|
|
268
|
+
"tidb": new_tidb_connector,
|
|
269
|
+
"hive": new_hive_connector,
|
|
270
|
+
"impala": new_impala_connector,
|
|
271
|
+
"redshift": new_redshift_connector,
|
|
272
|
+
"cassandra": new_cassandra_connector,
|
|
273
|
+
"s3": new_s3_connector,
|
|
274
|
+
"elasticsearch": new_elasticsearch_connector,
|
|
275
|
+
"es": new_elasticsearch_connector,
|
|
276
|
+
"ftp": new_ftp_connector,
|
|
277
|
+
"azure_synapse": new_azure_synapse_connector,
|
|
278
|
+
"azure_blob": new_azure_blob_connector,
|
|
279
|
+
"mssql": new_mssql_connector,
|
|
280
|
+
"clickhouse": new_clickhouse_connector,
|
|
281
|
+
"clickhouse_native": partial(new_clickhouse_connector, native=True),
|
|
282
|
+
"phoenix": new_phoenix_connector,
|
|
283
|
+
"mongodb": new_mongodb_connector,
|
|
284
|
+
"gbq": new_google_bigquery_connector,
|
|
285
|
+
"google_bigquery": new_google_bigquery_connector,
|
|
286
|
+
"sqlite": new_sqlite_connector,
|
|
287
|
+
"postgres": new_postgresql_connector,
|
|
288
|
+
"doris": new_doris_connector,
|
|
289
|
+
"starrocks": new_starrocks_connector,
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def get_connector(db_type, *args, **kwargs):
|
|
294
|
+
return _factory_registry[db_type](*args, **kwargs)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from recurvedata.pigeon.utils import ensure_str_list
|
|
2
|
+
|
|
3
|
+
_registry = {}
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class register_connector_class(object):
|
|
7
|
+
def __init__(self, ctype):
|
|
8
|
+
self.ctype = ensure_str_list(ctype)
|
|
9
|
+
|
|
10
|
+
def __call__(self, connector):
|
|
11
|
+
for t in self.ctype:
|
|
12
|
+
_registry[t] = connector
|
|
13
|
+
return connector
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_connector_class(ctype):
|
|
17
|
+
return _registry[ctype]
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import oss2
|
|
5
|
+
|
|
6
|
+
from recurvedata.pigeon.connector._registry import register_connector_class
|
|
7
|
+
from recurvedata.pigeon.utils.timing import ProgressCallback
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@register_connector_class("oss")
|
|
11
|
+
class OSSBucketConnector(object):
|
|
12
|
+
def __init__(self, access_key_id, access_key_secret, endpoint, bucket_name, **kwargs):
|
|
13
|
+
self.access_key_id = access_key_id
|
|
14
|
+
self.access_key_secret = access_key_secret
|
|
15
|
+
self.endpoint = endpoint
|
|
16
|
+
self.bucket_name = bucket_name
|
|
17
|
+
|
|
18
|
+
if not all((self.access_key_id, self.access_key_secret)):
|
|
19
|
+
logging.info("access_key_id or access_key_secret is missing, fallback to ")
|
|
20
|
+
self._auth = oss2.AnonymousAuth()
|
|
21
|
+
else:
|
|
22
|
+
self._auth = oss2.make_auth(self.access_key_id, self.access_key_secret)
|
|
23
|
+
|
|
24
|
+
self.bucket = oss2.Bucket(self._auth, self.endpoint, self.bucket_name)
|
|
25
|
+
proxies = kwargs.get("proxies")
|
|
26
|
+
if proxies:
|
|
27
|
+
# pass proxies to the underlying requests.Session
|
|
28
|
+
logging.info("use %s as proxies", proxies)
|
|
29
|
+
self.bucket.session.session.proxies = proxies
|
|
30
|
+
|
|
31
|
+
def has_object(self, key):
|
|
32
|
+
return self.bucket.object_exists(key)
|
|
33
|
+
|
|
34
|
+
def delete_key(self, key):
|
|
35
|
+
self.bucket.delete_object(key)
|
|
36
|
+
|
|
37
|
+
def delete_keys_by_prefix(self, prefix):
|
|
38
|
+
keys = []
|
|
39
|
+
batch_size = 100
|
|
40
|
+
for obj in oss2.ObjectIteratorV2(bucket=self.bucket, prefix=prefix):
|
|
41
|
+
keys.append(obj.key)
|
|
42
|
+
if len(keys) >= batch_size:
|
|
43
|
+
self.bucket.batch_delete_objects(keys)
|
|
44
|
+
keys = []
|
|
45
|
+
if keys:
|
|
46
|
+
self.bucket.batch_delete_objects(keys)
|
|
47
|
+
|
|
48
|
+
def get_keys(self, prefix="", delimiter=""):
|
|
49
|
+
keys = [x.key for x in oss2.ObjectIteratorV2(bucket=self.bucket, prefix=prefix, delimiter=delimiter)]
|
|
50
|
+
if delimiter:
|
|
51
|
+
keys = [x for x in keys if not x.endswith(delimiter)]
|
|
52
|
+
|
|
53
|
+
return keys
|
|
54
|
+
|
|
55
|
+
def upload(self, filename, key=None, folder=None, overwrite=True, num_threads=4, **kwargs):
|
|
56
|
+
if not key:
|
|
57
|
+
key = os.path.basename(filename)
|
|
58
|
+
if folder:
|
|
59
|
+
key = os.path.join(folder, key)
|
|
60
|
+
|
|
61
|
+
if not overwrite:
|
|
62
|
+
if self.has_object(key=key):
|
|
63
|
+
return key
|
|
64
|
+
|
|
65
|
+
oss2.resumable_upload(self.bucket, key, filename, progress_callback=ProgressCallback(), num_threads=num_threads)
|
|
66
|
+
return key
|
|
67
|
+
|
|
68
|
+
def download(self, key, folder=None, filename=None, overwrite=True, num_threads=4, **kwargs):
|
|
69
|
+
if not filename:
|
|
70
|
+
filename = os.path.basename(key)
|
|
71
|
+
if folder:
|
|
72
|
+
filename = os.path.join(folder, filename)
|
|
73
|
+
|
|
74
|
+
if not overwrite and os.path.exists(filename):
|
|
75
|
+
return filename
|
|
76
|
+
|
|
77
|
+
oss2.resumable_download(
|
|
78
|
+
self.bucket, key, filename, progress_callback=ProgressCallback(), num_threads=num_threads
|
|
79
|
+
)
|
|
80
|
+
return filename
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import urllib.parse
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
import botocore.exceptions
|
|
6
|
+
from botocore.config import Config
|
|
7
|
+
|
|
8
|
+
from recurvedata.pigeon.connector._registry import register_connector_class
|
|
9
|
+
from recurvedata.pigeon.utils.timing import DisplayProgress
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@register_connector_class("s3")
|
|
13
|
+
class S3Connector(object):
|
|
14
|
+
def __init__(self, aws_access_key_id, aws_secret_access_key, region="cn-north-1", proxies=None, **kwargs):
|
|
15
|
+
self.aws_access_key_id = aws_access_key_id
|
|
16
|
+
self.aws_secret_access_key = aws_secret_access_key
|
|
17
|
+
self.region = region
|
|
18
|
+
|
|
19
|
+
self.s3 = boto3.resource(
|
|
20
|
+
"s3",
|
|
21
|
+
region_name=self.region,
|
|
22
|
+
aws_access_key_id=aws_access_key_id,
|
|
23
|
+
aws_secret_access_key=aws_secret_access_key,
|
|
24
|
+
config=Config(proxies=proxies),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def create_bucket(self, bucket_name):
|
|
28
|
+
return self.s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={"LocationConstraint": self.region})
|
|
29
|
+
|
|
30
|
+
def has_bucket(self, bucket_name):
|
|
31
|
+
exists = True
|
|
32
|
+
try:
|
|
33
|
+
self.s3.meta.client.head_bucket(Bucket=bucket_name)
|
|
34
|
+
except botocore.exceptions.ClientError as e:
|
|
35
|
+
error_code = int(e.response["Error"]["Code"])
|
|
36
|
+
if error_code == 404:
|
|
37
|
+
exists = False
|
|
38
|
+
return exists
|
|
39
|
+
|
|
40
|
+
def get_bucket(self, bucket_name):
|
|
41
|
+
if self.has_bucket(bucket_name):
|
|
42
|
+
return self.s3.Bucket(bucket_name)
|
|
43
|
+
return self.create_bucket(bucket_name)
|
|
44
|
+
|
|
45
|
+
def delete_bucket(self, bucket_name):
|
|
46
|
+
bucket = self.get_bucket(bucket_name)
|
|
47
|
+
for key in bucket.objects.all():
|
|
48
|
+
key.delete()
|
|
49
|
+
bucket.delete()
|
|
50
|
+
|
|
51
|
+
def has_object(self, bucket_name, key):
|
|
52
|
+
exists = True
|
|
53
|
+
try:
|
|
54
|
+
self.s3.meta.client.head_object(Bucket=bucket_name, Key=key)
|
|
55
|
+
except botocore.exceptions.ClientError as e:
|
|
56
|
+
error_code = int(e.response["Error"]["Code"])
|
|
57
|
+
if error_code == 404:
|
|
58
|
+
exists = False
|
|
59
|
+
return exists
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def parse_s3_url(s3url):
|
|
63
|
+
parsed_url = urllib.parse.urlparse(s3url)
|
|
64
|
+
if not parsed_url.netloc:
|
|
65
|
+
raise ValueError("Please provide a bucket_name")
|
|
66
|
+
|
|
67
|
+
bucket_name = parsed_url.netloc
|
|
68
|
+
key = parsed_url.path.strip("/")
|
|
69
|
+
return bucket_name, key
|
|
70
|
+
|
|
71
|
+
def delete_key(self, key, bucket_name=None):
|
|
72
|
+
if bucket_name is None:
|
|
73
|
+
bucket_name, key = self.parse_s3_url(key)
|
|
74
|
+
bucket = self.get_bucket(bucket_name)
|
|
75
|
+
bucket.Object(key).delete()
|
|
76
|
+
|
|
77
|
+
def delete_keys_by_prefix(self, bucket_name, prefix):
|
|
78
|
+
bucket = self.get_bucket(bucket_name)
|
|
79
|
+
for key in bucket.objects.filter(Prefix=prefix):
|
|
80
|
+
key.delete()
|
|
81
|
+
|
|
82
|
+
def get_keys(self, bucket_name, prefix=None):
|
|
83
|
+
bucket = self.get_bucket(bucket_name)
|
|
84
|
+
if prefix is not None:
|
|
85
|
+
all_keys = bucket.objects.filter(Prefix=prefix)
|
|
86
|
+
else:
|
|
87
|
+
all_keys = bucket.objects.all()
|
|
88
|
+
|
|
89
|
+
return [x.key for x in all_keys]
|
|
90
|
+
|
|
91
|
+
def upload(self, bucket_name, filename, key=None, folder=None, overwrite=True, **kwargs):
|
|
92
|
+
if not key:
|
|
93
|
+
key = os.path.basename(filename)
|
|
94
|
+
if folder:
|
|
95
|
+
key = os.path.join(folder, key)
|
|
96
|
+
|
|
97
|
+
if not overwrite:
|
|
98
|
+
if self.has_object(bucket_name=bucket_name, key=key):
|
|
99
|
+
return key
|
|
100
|
+
|
|
101
|
+
size = os.path.getsize(filename)
|
|
102
|
+
bucket = self.get_bucket(bucket_name)
|
|
103
|
+
with open(filename, "rb") as data:
|
|
104
|
+
bucket.upload_fileobj(data, key, Callback=DisplayProgress(size), **kwargs)
|
|
105
|
+
return key
|
|
106
|
+
|
|
107
|
+
def download(self, bucket_name, key, folder=None, filename=None, overwrite=True, **kwargs):
|
|
108
|
+
if not self.has_object(bucket_name, key):
|
|
109
|
+
raise ValueError(f"{key} not exists in {bucket_name}")
|
|
110
|
+
|
|
111
|
+
if not filename:
|
|
112
|
+
filename = os.path.basename(key)
|
|
113
|
+
if folder:
|
|
114
|
+
filename = os.path.join(folder, filename)
|
|
115
|
+
|
|
116
|
+
if not overwrite and os.path.exists(filename):
|
|
117
|
+
return filename
|
|
118
|
+
|
|
119
|
+
size = float(self.s3.meta.client.head_object(Bucket=bucket_name, Key=key)["ContentLength"])
|
|
120
|
+
bucket = self.get_bucket(bucket_name)
|
|
121
|
+
with open(filename, "wb") as data:
|
|
122
|
+
bucket.download_fileobj(key, data, Callback=DisplayProgress(size), **kwargs)
|
|
123
|
+
return filename
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from azure.core.exceptions import ResourceExistsError
|
|
6
|
+
from azure.identity import AzureAuthorityHosts, ClientSecretCredential
|
|
7
|
+
from azure.storage.blob import BlobServiceClient, StorageStreamDownloader
|
|
8
|
+
|
|
9
|
+
from recurvedata.pigeon.connector._registry import register_connector_class
|
|
10
|
+
from recurvedata.pigeon.utils.timing import DisplayProgress
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@register_connector_class("azure_blob")
|
|
16
|
+
class AzureBlobConnector:
|
|
17
|
+
"""Connector for Azure Blob Storage.
|
|
18
|
+
|
|
19
|
+
Four ways to config:
|
|
20
|
+
- using connection_string
|
|
21
|
+
- using account_url + sas_token
|
|
22
|
+
- using endpoint_suffix + account_name + sas_token
|
|
23
|
+
- using endpoint_suffix + account_name + tenant_id + client_id + client_secret
|
|
24
|
+
|
|
25
|
+
kwargs:
|
|
26
|
+
spn_authority_host: authority host for spn, default is AzureAuthorityHosts.AZURE_CHINA
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
connection_string: str = None,
|
|
32
|
+
account_url: str = None,
|
|
33
|
+
endpoint_suffix: str = "core.chinacloudapi.cn",
|
|
34
|
+
account_name: str = None,
|
|
35
|
+
sas_token: str = None,
|
|
36
|
+
tenant_id: str = None,
|
|
37
|
+
client_id: str = None,
|
|
38
|
+
client_secret: str = None,
|
|
39
|
+
**kwargs,
|
|
40
|
+
):
|
|
41
|
+
self.conn_string = connection_string
|
|
42
|
+
self.account_url = account_url
|
|
43
|
+
self.endpoint_suffix = endpoint_suffix
|
|
44
|
+
self.account_name = account_name
|
|
45
|
+
self.sas_token = sas_token
|
|
46
|
+
self.kwargs = kwargs
|
|
47
|
+
self.spn_authority_host = self.kwargs.get("spn_authority_host") or AzureAuthorityHosts.AZURE_CHINA
|
|
48
|
+
|
|
49
|
+
authorize_by_conn_string = False
|
|
50
|
+
authorize_by_sas_token = False
|
|
51
|
+
authorize_by_spn_secret = False
|
|
52
|
+
if connection_string:
|
|
53
|
+
authorize_by_conn_string = True
|
|
54
|
+
if sas_token and (account_url or all((account_name, endpoint_suffix))):
|
|
55
|
+
authorize_by_sas_token = True
|
|
56
|
+
if tenant_id and client_id and client_secret and all((account_name, endpoint_suffix)):
|
|
57
|
+
authorize_by_spn_secret = True
|
|
58
|
+
|
|
59
|
+
if not any((authorize_by_conn_string, authorize_by_sas_token, authorize_by_spn_secret)):
|
|
60
|
+
raise ValueError(
|
|
61
|
+
"""
|
|
62
|
+
invalid authorization info
|
|
63
|
+
Four ways to config:
|
|
64
|
+
- using connection_string
|
|
65
|
+
- using account_url + sas_token
|
|
66
|
+
- using endpoint_suffix + account_name + sas_token
|
|
67
|
+
- using endpoint_suffix + account_name + tenant_id + client_id + client_secret
|
|
68
|
+
"""
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if authorize_by_conn_string:
|
|
72
|
+
self.blob_service = BlobServiceClient.from_connection_string(connection_string, **kwargs)
|
|
73
|
+
elif authorize_by_sas_token:
|
|
74
|
+
if not account_url:
|
|
75
|
+
account_url = f"https://{account_name}.blob.{endpoint_suffix}"
|
|
76
|
+
self.blob_service = BlobServiceClient(account_url, credential=sas_token, **kwargs)
|
|
77
|
+
else:
|
|
78
|
+
credential = ClientSecretCredential(tenant_id, client_id, client_secret, authority=self.spn_authority_host)
|
|
79
|
+
account_url = f"https://{account_name}.blob.{endpoint_suffix}"
|
|
80
|
+
self.blob_service = BlobServiceClient(account_url=account_url, credential=credential)
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def account_key(self) -> Optional[str]:
|
|
84
|
+
if not self.conn_string:
|
|
85
|
+
return None
|
|
86
|
+
kvs = self.parse_conn_string(self.conn_string)
|
|
87
|
+
return kvs["accountkey"]
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def parse_conn_string(conn_string: str):
|
|
91
|
+
parts = conn_string.strip(";").split(";")
|
|
92
|
+
kvs = {}
|
|
93
|
+
for p in parts:
|
|
94
|
+
k, v = p.split("=", 1)
|
|
95
|
+
kvs[k.lower()] = v
|
|
96
|
+
return kvs
|
|
97
|
+
|
|
98
|
+
def get_url(self, container: str, blob: str) -> str:
|
|
99
|
+
return f"https://{self.blob_service.primary_hostname}/{container}/{blob}"
|
|
100
|
+
|
|
101
|
+
def create_container(self, container_name: str, exist_ok=True):
|
|
102
|
+
"""create container"""
|
|
103
|
+
try:
|
|
104
|
+
return self.blob_service.create_container(container_name)
|
|
105
|
+
except ResourceExistsError as e:
|
|
106
|
+
if exist_ok:
|
|
107
|
+
logger.info(f"container {container_name} already exists, skip")
|
|
108
|
+
else:
|
|
109
|
+
raise e
|
|
110
|
+
|
|
111
|
+
def delete_container(self, container_name: str, **kwargs):
|
|
112
|
+
"""if container not exists, error will be suppressed with the fail_not_exist parameter"""
|
|
113
|
+
self.blob_service.delete_container(container_name, **kwargs)
|
|
114
|
+
|
|
115
|
+
def exists(self, container_name: str, blob_name: str = None, **kwargs) -> bool:
|
|
116
|
+
"""
|
|
117
|
+
if blob name is none, check whether container exists or not
|
|
118
|
+
if blob name specified, check blob exists or not in the container
|
|
119
|
+
"""
|
|
120
|
+
if blob_name is None:
|
|
121
|
+
client = self.blob_service.get_container_client(container_name)
|
|
122
|
+
else:
|
|
123
|
+
client = self.blob_service.get_blob_client(container_name, blob_name)
|
|
124
|
+
return client.exists(**kwargs)
|
|
125
|
+
|
|
126
|
+
def delete_blob(self, container_name, blob_name, **kwargs):
|
|
127
|
+
container = self.blob_service.get_container_client(container_name)
|
|
128
|
+
container.delete_blob(blob_name, **kwargs)
|
|
129
|
+
|
|
130
|
+
def list_blobs(self, container_name, name_starts_with=None, include=None, **kwargs) -> List[str]:
|
|
131
|
+
container = self.blob_service.get_container_client(container_name)
|
|
132
|
+
generator = container.list_blobs(name_starts_with=name_starts_with, include=include, **kwargs)
|
|
133
|
+
return [blob.name for blob in generator]
|
|
134
|
+
|
|
135
|
+
def upload(self, container_name, local_file_path, blob_name=None, overwrite=True, is_progress_hook=True, **kwargs):
|
|
136
|
+
"""
|
|
137
|
+
Upload local file to container with specified blob name.
|
|
138
|
+
The specified container will also be created if not exists.
|
|
139
|
+
"""
|
|
140
|
+
if not blob_name:
|
|
141
|
+
blob_name = os.path.basename(local_file_path)
|
|
142
|
+
|
|
143
|
+
# container_blob = f'{container_name}/{blob_name}'
|
|
144
|
+
blob = self.blob_service.get_blob_client(container_name, blob_name)
|
|
145
|
+
if not overwrite and blob.exists():
|
|
146
|
+
logger.info("Blob exists, skip!")
|
|
147
|
+
return blob_name
|
|
148
|
+
|
|
149
|
+
size = os.path.getsize(local_file_path)
|
|
150
|
+
options = {"overwrite": True, "max_concurrency": 4}
|
|
151
|
+
if is_progress_hook:
|
|
152
|
+
options["progress_hook"] = DisplayProgress(size, stream=False)
|
|
153
|
+
|
|
154
|
+
options.update(kwargs)
|
|
155
|
+
with open(local_file_path, "rb") as data:
|
|
156
|
+
blob.upload_blob(data, **options)
|
|
157
|
+
return blob_name
|
|
158
|
+
|
|
159
|
+
def download(self, container_name, blob_name, local_file_path, **kwargs):
|
|
160
|
+
"""download blob to local"""
|
|
161
|
+
blob = self.blob_service.get_blob_client(container_name, blob_name)
|
|
162
|
+
size = blob.get_blob_properties().size
|
|
163
|
+
if size == 0:
|
|
164
|
+
logging.warning("blob %s has no content, create an empty file and exit", blob_name)
|
|
165
|
+
with open(local_file_path, "w"):
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
options = {
|
|
169
|
+
"max_concurrency": 4,
|
|
170
|
+
"progress_hook": DisplayProgress(size, stream=False),
|
|
171
|
+
}
|
|
172
|
+
options.update(kwargs)
|
|
173
|
+
with open(local_file_path, "wb") as f:
|
|
174
|
+
data: StorageStreamDownloader = blob.download_blob(**options)
|
|
175
|
+
data.readinto(f)
|
|
176
|
+
return local_file_path
|