recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from functools import cached_property, wraps
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
import sqlalchemy
|
|
9
|
+
import sqlalchemy.sql.schema
|
|
10
|
+
import sqlglot
|
|
11
|
+
from sqlalchemy import create_engine, insert, inspect
|
|
12
|
+
from sqlalchemy import text as sqlalchemy_text
|
|
13
|
+
from sqlalchemy.engine.reflection import Inspector
|
|
14
|
+
from sqlalchemy.engine.url import URL
|
|
15
|
+
from sqlalchemy.schema import CreateTable, MetaData
|
|
16
|
+
from sqlalchemy.sql.compiler import DDLCompiler
|
|
17
|
+
from sqlglot import exp
|
|
18
|
+
|
|
19
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
20
|
+
from recurvedata.connectors.const import ENV_VAR_DBT_PASSWORD, ENV_VAR_DBT_USER, set_env_dbt_password, set_env_dbt_user
|
|
21
|
+
from recurvedata.consts import ConnectionCategory
|
|
22
|
+
from recurvedata.utils.imports import MockModule
|
|
23
|
+
|
|
24
|
+
# Lazy imports for non-simple dependencies
|
|
25
|
+
try:
|
|
26
|
+
import pandas as pd
|
|
27
|
+
import sqlparse
|
|
28
|
+
import sshtunnel
|
|
29
|
+
from paramiko import RSAKey
|
|
30
|
+
except ImportError:
|
|
31
|
+
pd = MockModule("pandas")
|
|
32
|
+
sqlparse = MockModule("sqlparse")
|
|
33
|
+
sshtunnel = MockModule("sshtunnel")
|
|
34
|
+
RSAKey = MockModule("paramiko.RSAKey")
|
|
35
|
+
|
|
36
|
+
if sqlalchemy.__version__ >= "2":
|
|
37
|
+
text = sqlalchemy_text
|
|
38
|
+
else:
|
|
39
|
+
|
|
40
|
+
def text(v):
|
|
41
|
+
return v
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class DBAPIABC(ABC):
|
|
45
|
+
@property
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def sqlalchemy_url(self) -> URL:
|
|
48
|
+
...
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def test_query(self) -> str:
|
|
53
|
+
...
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def test_connection(self):
|
|
57
|
+
...
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def connect_args(self) -> Optional[dict]:
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
@abstractmethod
|
|
65
|
+
def connect(self):
|
|
66
|
+
...
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
def execute(self, query: str):
|
|
70
|
+
...
|
|
71
|
+
|
|
72
|
+
@property # todo: cache
|
|
73
|
+
@abstractmethod
|
|
74
|
+
def inspector(self) -> Inspector:
|
|
75
|
+
...
|
|
76
|
+
|
|
77
|
+
@abstractmethod
|
|
78
|
+
def has_table(self, table, database=None):
|
|
79
|
+
...
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def get_columns(self, table: str, database=None):
|
|
83
|
+
...
|
|
84
|
+
|
|
85
|
+
@abstractmethod
|
|
86
|
+
def _reflect_table(self, table: str, database=None, engine=None) -> sqlalchemy.sql.schema.Table:
|
|
87
|
+
...
|
|
88
|
+
|
|
89
|
+
@abstractmethod
|
|
90
|
+
def generate_ddl(self, table: str, database=None):
|
|
91
|
+
...
|
|
92
|
+
|
|
93
|
+
@abstractmethod
|
|
94
|
+
def fetchall(self, query: str):
|
|
95
|
+
...
|
|
96
|
+
|
|
97
|
+
@abstractmethod
|
|
98
|
+
def fetchmany(self, query: str, size=None):
|
|
99
|
+
...
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def fetchone(self, query: str):
|
|
103
|
+
...
|
|
104
|
+
|
|
105
|
+
@abstractmethod
|
|
106
|
+
def get_pandas_df(self, query: str, parameters=None, **kwargs):
|
|
107
|
+
...
|
|
108
|
+
|
|
109
|
+
# def commit(self):
|
|
110
|
+
# raise NotImplementedError
|
|
111
|
+
|
|
112
|
+
@abstractmethod
|
|
113
|
+
def insert(self, table: str, data: list[dict], database: str = None):
|
|
114
|
+
...
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def with_ssh_tunnel(func):
|
|
118
|
+
"""
|
|
119
|
+
a decorator that wrap func with a ssh tunnel
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
@wraps(func)
|
|
123
|
+
def wrapper(self, *args, **kwargs):
|
|
124
|
+
tunnel = self.ssh_tunnel
|
|
125
|
+
if not tunnel:
|
|
126
|
+
return func(self, *args, **kwargs)
|
|
127
|
+
try:
|
|
128
|
+
with tunnel:
|
|
129
|
+
return func(self, *args, **kwargs)
|
|
130
|
+
finally:
|
|
131
|
+
tunnel.stop()
|
|
132
|
+
|
|
133
|
+
return wrapper
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class DBAPIBase(RecurveConnectorBase):
|
|
137
|
+
SYSTEM_DATABASES = []
|
|
138
|
+
setup_extras_require = ["sqlalchemy", "sshtunnel", "paramiko"]
|
|
139
|
+
driver = ""
|
|
140
|
+
config_schema = {}
|
|
141
|
+
category = [
|
|
142
|
+
ConnectionCategory.DATABASE,
|
|
143
|
+
]
|
|
144
|
+
column_type_mapping = {}
|
|
145
|
+
# Common data types supported by all connectors
|
|
146
|
+
# each connector can add its own types
|
|
147
|
+
available_column_types = [
|
|
148
|
+
"smallint",
|
|
149
|
+
"int",
|
|
150
|
+
"bigint",
|
|
151
|
+
"float",
|
|
152
|
+
"double",
|
|
153
|
+
"decimal",
|
|
154
|
+
"date",
|
|
155
|
+
"timestamp",
|
|
156
|
+
"char",
|
|
157
|
+
"varchar",
|
|
158
|
+
"json",
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
def __init__(self, conf, *args, **kwargs):
|
|
162
|
+
if conf.get("password") == "":
|
|
163
|
+
conf["password"] = None
|
|
164
|
+
super().__init__(conf, *args, **kwargs)
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def sqlalchemy_url(self):
|
|
168
|
+
host, port = self.host, self.port
|
|
169
|
+
if self.ssh_tunnel and self.ssh_tunnel.is_active:
|
|
170
|
+
host, port = self.ssh_tunnel.local_bind_host, self.ssh_tunnel.local_bind_port
|
|
171
|
+
|
|
172
|
+
if sqlalchemy.__version__ >= "2":
|
|
173
|
+
return URL(self.driver, self.user, self.password, host, port, self.database, query={})
|
|
174
|
+
return URL(self.driver, self.user, self.password, host, port, self.database)
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def test_query(self):
|
|
178
|
+
return "select 1"
|
|
179
|
+
|
|
180
|
+
@cached_property
|
|
181
|
+
@with_ssh_tunnel
|
|
182
|
+
def type_code_mapping(self) -> dict:
|
|
183
|
+
"""
|
|
184
|
+
type_code from sqlalchemy's cursor.description -> database's dialect data type name
|
|
185
|
+
"""
|
|
186
|
+
raise NotImplementedError
|
|
187
|
+
|
|
188
|
+
def sqlalchemy_column_type_code_to_name(self, type_code: Any, size: int | None = None) -> str:
|
|
189
|
+
"""
|
|
190
|
+
since cursor.description return type code only, we need to map it to dialect data type name
|
|
191
|
+
|
|
192
|
+
:param type_code: an object returned by cursor.description
|
|
193
|
+
:return: a string of column type name, in lower case
|
|
194
|
+
"""
|
|
195
|
+
raise NotImplementedError
|
|
196
|
+
|
|
197
|
+
def test_connection(self):
|
|
198
|
+
self.execute(self.test_query)
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def connect_args(self):
|
|
202
|
+
return {}
|
|
203
|
+
|
|
204
|
+
def connect(self):
|
|
205
|
+
engine = create_engine(
|
|
206
|
+
self.sqlalchemy_url,
|
|
207
|
+
max_overflow=0, # todo: add to const
|
|
208
|
+
pool_recycle=10 * 60, # todo: add to const
|
|
209
|
+
connect_args=self.connect_args,
|
|
210
|
+
echo=False,
|
|
211
|
+
)
|
|
212
|
+
return engine # todo: thread safe? use session to wrap?
|
|
213
|
+
|
|
214
|
+
@with_ssh_tunnel
|
|
215
|
+
def execute(self, query: str):
|
|
216
|
+
if isinstance(query, list):
|
|
217
|
+
queries = list(itertools.chain(*map(sqlparse.split, query)))
|
|
218
|
+
else:
|
|
219
|
+
queries = sqlparse.split(query)
|
|
220
|
+
|
|
221
|
+
engine = self.connect()
|
|
222
|
+
with engine.connect() as con:
|
|
223
|
+
for q in queries:
|
|
224
|
+
con.execute(text(q))
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def inspector(self) -> Inspector:
|
|
228
|
+
engine = self.connect()
|
|
229
|
+
inspector: Inspector = inspect(engine)
|
|
230
|
+
return inspector
|
|
231
|
+
|
|
232
|
+
@with_ssh_tunnel
|
|
233
|
+
def has_table(self, table, database=None):
|
|
234
|
+
database = database or self.database
|
|
235
|
+
return self.inspector.has_table(table, schema=database)
|
|
236
|
+
|
|
237
|
+
def _extract_column_name(self, column_type):
|
|
238
|
+
return column_type.__visit_name__
|
|
239
|
+
|
|
240
|
+
@with_ssh_tunnel
|
|
241
|
+
def get_columns(self, table: str, database: str = None) -> list[str]:
|
|
242
|
+
database = database or self.database
|
|
243
|
+
column_dcts = self.inspector.get_columns(table, schema=database)
|
|
244
|
+
for dct in column_dcts:
|
|
245
|
+
dct["type"] = self._extract_column_name(dct["type"]).lower()
|
|
246
|
+
return column_dcts
|
|
247
|
+
|
|
248
|
+
@staticmethod
|
|
249
|
+
def format_key(key):
|
|
250
|
+
key = key.strip("`")
|
|
251
|
+
return f"`{key}`"
|
|
252
|
+
|
|
253
|
+
def _reflect_table(self, table, database=None, engine=None) -> sqlalchemy.sql.schema.Table:
|
|
254
|
+
if not engine:
|
|
255
|
+
engine = self.connect()
|
|
256
|
+
meta = MetaData()
|
|
257
|
+
meta.reflect(
|
|
258
|
+
bind=engine,
|
|
259
|
+
schema=database,
|
|
260
|
+
only=[
|
|
261
|
+
table,
|
|
262
|
+
],
|
|
263
|
+
)
|
|
264
|
+
table = meta.sorted_tables[0]
|
|
265
|
+
return table
|
|
266
|
+
|
|
267
|
+
@with_ssh_tunnel
|
|
268
|
+
def generate_ddl(self, table, database=None):
|
|
269
|
+
engine = self.connect()
|
|
270
|
+
table = self._reflect_table(table, database=database, engine=engine)
|
|
271
|
+
ddl: DDLCompiler = CreateTable(table).compile(engine)
|
|
272
|
+
return ddl.string
|
|
273
|
+
|
|
274
|
+
@with_ssh_tunnel
|
|
275
|
+
def fetchall(self, query):
|
|
276
|
+
engine = self.connect()
|
|
277
|
+
connection = engine.raw_connection()
|
|
278
|
+
with connection.cursor() as cursor:
|
|
279
|
+
cursor.execute(query)
|
|
280
|
+
res = cursor.fetchall()
|
|
281
|
+
connection.close()
|
|
282
|
+
return res
|
|
283
|
+
|
|
284
|
+
@with_ssh_tunnel
|
|
285
|
+
def fetchmany(self, query, size=None):
|
|
286
|
+
engine = self.connect()
|
|
287
|
+
connection = engine.raw_connection()
|
|
288
|
+
with connection.cursor() as cursor:
|
|
289
|
+
cursor.execute(query)
|
|
290
|
+
res = cursor.fetchmany(size=size)
|
|
291
|
+
connection.close()
|
|
292
|
+
return res
|
|
293
|
+
|
|
294
|
+
@with_ssh_tunnel
|
|
295
|
+
def fetchone(self, query):
|
|
296
|
+
engine = self.connect()
|
|
297
|
+
connection = engine.raw_connection()
|
|
298
|
+
with connection.cursor() as cursor:
|
|
299
|
+
cursor.execute(query)
|
|
300
|
+
res = cursor.fetchone()
|
|
301
|
+
connection.close()
|
|
302
|
+
return res
|
|
303
|
+
|
|
304
|
+
@with_ssh_tunnel
|
|
305
|
+
def get_pandas_df(self, query, parameters=None, **kwargs):
|
|
306
|
+
engine = self.connect()
|
|
307
|
+
try:
|
|
308
|
+
df = pd.read_sql_query(sql=query, con=engine, params=parameters, **kwargs)
|
|
309
|
+
finally:
|
|
310
|
+
engine.dispose()
|
|
311
|
+
return df
|
|
312
|
+
|
|
313
|
+
# def commit(self):
|
|
314
|
+
# raise NotImplementedError
|
|
315
|
+
|
|
316
|
+
@with_ssh_tunnel
|
|
317
|
+
def insert(self, table: str, data: list[dict], database: str = None):
|
|
318
|
+
engine = self.connect()
|
|
319
|
+
table = self._reflect_table(table, database=database, engine=engine)
|
|
320
|
+
with engine.connect() as conn:
|
|
321
|
+
conn.execute(insert(table), data)
|
|
322
|
+
engine.dispose()
|
|
323
|
+
|
|
324
|
+
@with_ssh_tunnel
|
|
325
|
+
def get_databases(self):
|
|
326
|
+
return [d for d in self.inspector.get_schema_names() if d.lower() not in self.SYSTEM_DATABASES]
|
|
327
|
+
|
|
328
|
+
@with_ssh_tunnel
|
|
329
|
+
def get_tables(self, database: str = None):
|
|
330
|
+
database = database or self.database
|
|
331
|
+
return self.inspector.get_table_names(database)
|
|
332
|
+
|
|
333
|
+
@with_ssh_tunnel
|
|
334
|
+
def get_views(self, database: str = None):
|
|
335
|
+
database = database or self.database
|
|
336
|
+
return self.inspector.get_view_names(database)
|
|
337
|
+
|
|
338
|
+
def _init_ssh_tunnel(self):
|
|
339
|
+
"""
|
|
340
|
+
init a ssh tunnel based on self.ssh_tunnel_config
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
def _init_private_key(config: SSHTunnelConfig):
|
|
344
|
+
if config.private_key_str:
|
|
345
|
+
pk_str = config.private_key_str.replace("\\n", "\n")
|
|
346
|
+
return RSAKey.from_private_key(StringIO(pk_str), password=config.private_key_passphrase)
|
|
347
|
+
|
|
348
|
+
tunnel_config = self.ssh_tunnel_config
|
|
349
|
+
if not tunnel_config:
|
|
350
|
+
return
|
|
351
|
+
|
|
352
|
+
tunnel = sshtunnel.SSHTunnelForwarder(
|
|
353
|
+
ssh_address_or_host=(tunnel_config.host, tunnel_config.port),
|
|
354
|
+
ssh_username=tunnel_config.user,
|
|
355
|
+
ssh_password=tunnel_config.password,
|
|
356
|
+
ssh_pkey=_init_private_key(tunnel_config),
|
|
357
|
+
remote_bind_address=(self.host, self.port),
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
return tunnel
|
|
361
|
+
|
|
362
|
+
@property
|
|
363
|
+
def ssh_tunnel(self):
|
|
364
|
+
tunnel_config = self.ssh_tunnel_config
|
|
365
|
+
if not tunnel_config:
|
|
366
|
+
return
|
|
367
|
+
if not hasattr(self, "_ssh_tunnel"):
|
|
368
|
+
self._ssh_tunnel = self._init_ssh_tunnel()
|
|
369
|
+
return self._ssh_tunnel
|
|
370
|
+
|
|
371
|
+
@property
|
|
372
|
+
def ssh_tunnel_config(self) -> Optional["SSHTunnelConfig"]:
|
|
373
|
+
ssh_config = self.conf.get("ssh_tunnel", {})
|
|
374
|
+
if not (ssh_config and ssh_config.get("host")):
|
|
375
|
+
return
|
|
376
|
+
return SSHTunnelConfig(**ssh_config)
|
|
377
|
+
|
|
378
|
+
@classmethod
|
|
379
|
+
def get_sql_operator_types(cls):
|
|
380
|
+
return [
|
|
381
|
+
cls.connection_type,
|
|
382
|
+
]
|
|
383
|
+
|
|
384
|
+
def convert_config_to_dbt_profile(self, database: str, schema: str = None) -> dict:
|
|
385
|
+
return {
|
|
386
|
+
"server": self.host,
|
|
387
|
+
"port": self.port,
|
|
388
|
+
"user": ENV_VAR_DBT_USER,
|
|
389
|
+
"password": ENV_VAR_DBT_PASSWORD,
|
|
390
|
+
"schema": database or self.database,
|
|
391
|
+
"type": self.connection_type,
|
|
392
|
+
"threads": 10,
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
def set_env_when_get_dbt_connection(self):
|
|
396
|
+
set_env_dbt_user(self.user or "")
|
|
397
|
+
set_env_dbt_password(self.password or "")
|
|
398
|
+
|
|
399
|
+
@classmethod
|
|
400
|
+
def order_sql(cls, sql: str, orders: list[dict[str, str]] = None, return_sql: bool = True):
|
|
401
|
+
"""
|
|
402
|
+
order the sql by the orders
|
|
403
|
+
"""
|
|
404
|
+
# dialect impala -> hive, cuz there is no dialect 'impala' in sqlglot
|
|
405
|
+
dialect = "hive" if cls.connection_type == "impala" else (cls.connection_type or None)
|
|
406
|
+
# Parse the SQL query
|
|
407
|
+
parsed = sqlglot.parse_one(sql, read=dialect)
|
|
408
|
+
# since some sql dialects have special identifier, we need to use the dialect to generate the clean sql
|
|
409
|
+
clean_sql = parsed.sql(dialect=dialect, comments=False)
|
|
410
|
+
# Wrap the entire query with a subquery
|
|
411
|
+
alias = "_recurve_limit_subquery"
|
|
412
|
+
subquery = exp.Subquery(this=clean_sql, alias=alias)
|
|
413
|
+
|
|
414
|
+
# Create a new SELECT statement with the subquery and the LIMIT clause
|
|
415
|
+
outer_select = exp.select("*").from_(subquery)
|
|
416
|
+
if orders:
|
|
417
|
+
order_clauses = []
|
|
418
|
+
for order in orders:
|
|
419
|
+
if cls.connection_type in ["postgres", "redshift"]:
|
|
420
|
+
field_expr = f'{alias}."{order["field"]}"'
|
|
421
|
+
else:
|
|
422
|
+
field_expr = exp.Column(this=order["field"], table=alias)
|
|
423
|
+
field_expr = field_expr.sql(dialect=dialect)
|
|
424
|
+
|
|
425
|
+
order_clauses.append(f'{field_expr} {order["order"]}')
|
|
426
|
+
|
|
427
|
+
order_stmt = ", ".join(order_clauses)
|
|
428
|
+
outer_select = outer_select.order_by(order_stmt)
|
|
429
|
+
|
|
430
|
+
return outer_select.sql(dialect=dialect) if return_sql else outer_select
|
|
431
|
+
|
|
432
|
+
@classmethod
|
|
433
|
+
def limit_sql(cls, sql: str, limit: int = 100, orders: list[dict[str, str]] = None, offset: int = 0) -> str:
|
|
434
|
+
"""
|
|
435
|
+
used for preview, parse sql and wrap sql with limit.
|
|
436
|
+
no validation on sql.
|
|
437
|
+
If the sql is DML, then execute it will raise an error.
|
|
438
|
+
"""
|
|
439
|
+
dialect = "hive" if cls.connection_type == "impala" else (cls.connection_type or None)
|
|
440
|
+
|
|
441
|
+
outer_select = cls.order_sql(sql, orders, return_sql=False)
|
|
442
|
+
|
|
443
|
+
if offset:
|
|
444
|
+
outer_select = outer_select.offset(offset)
|
|
445
|
+
|
|
446
|
+
outer_select = outer_select.limit(limit)
|
|
447
|
+
|
|
448
|
+
result = outer_select.sql(dialect=dialect)
|
|
449
|
+
|
|
450
|
+
return result
|
|
451
|
+
|
|
452
|
+
@classmethod
|
|
453
|
+
def count_sql(cls, sql: str) -> str:
|
|
454
|
+
"""
|
|
455
|
+
used for preview, parse sql and wrap sql with count.
|
|
456
|
+
no validation on sql.
|
|
457
|
+
If the sql is DML, then execute it will raise an error.
|
|
458
|
+
"""
|
|
459
|
+
return f"SELECT COUNT(1) FROM ({sql}) AS cnt_subquery"
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
@dataclass
|
|
463
|
+
class SSHTunnelConfig:
|
|
464
|
+
host: str
|
|
465
|
+
port: int
|
|
466
|
+
user: str
|
|
467
|
+
password: str = None
|
|
468
|
+
private_key_str: str = None # 私钥字符串,非文件名
|
|
469
|
+
private_key_passphrase: str = None # 私钥的 passphrase
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FileConnectorABC(ABC):
|
|
7
|
+
@abstractmethod
|
|
8
|
+
def exists(self, key) -> bool:
|
|
9
|
+
...
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def stat(self, key):
|
|
13
|
+
...
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def test_connection(self):
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def get(self, key, local_file):
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def put(self, local_file, object_store_key):
|
|
25
|
+
...
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def delete(self, key):
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def ls(self, key):
|
|
33
|
+
...
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class FileConnectorMixin(FileConnectorABC, RecurveConnectorBase):
|
|
37
|
+
def __init__(self, conf: dict, *args, **kwargs):
|
|
38
|
+
self.conf = conf
|
|
39
|
+
self.connector = self.init_connection(conf)
|
|
40
|
+
|
|
41
|
+
def init_connection(self, conf):
|
|
42
|
+
raise NotImplementedError
|
|
43
|
+
|
|
44
|
+
def exists(self, key) -> bool:
|
|
45
|
+
return self.connector.exists(key)
|
|
46
|
+
|
|
47
|
+
def stat(self, key):
|
|
48
|
+
return self.connector.stat(key)
|
|
49
|
+
|
|
50
|
+
def mkdir(self, key):
|
|
51
|
+
return self.connector.mkdir(key)
|
|
52
|
+
|
|
53
|
+
def test_connection(self):
|
|
54
|
+
self.connector.ls("/")
|
|
55
|
+
|
|
56
|
+
def get(self, key, local_file):
|
|
57
|
+
return self.connector.get(key, local_file)
|
|
58
|
+
|
|
59
|
+
def put(self, local_file, object_store_key):
|
|
60
|
+
return self.connector.put(local_file, object_store_key)
|
|
61
|
+
|
|
62
|
+
def delete(self, key):
|
|
63
|
+
return self.connector.rm(key)
|
|
64
|
+
|
|
65
|
+
def ls(self, key):
|
|
66
|
+
return self.connector.ls(key)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
2
|
+
from recurvedata.connectors.fs import FileConnectorABC
|
|
3
|
+
from recurvedata.consts import ConnectionCategory
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FTPMixin(RecurveConnectorBase, FileConnectorABC):
|
|
7
|
+
category = [
|
|
8
|
+
ConnectionCategory.STORAGE,
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
def __init__(self, conf: dict, *args, **kwargs):
|
|
12
|
+
self.conf = conf
|
|
13
|
+
self.connector = self.init_connection(conf)
|
|
14
|
+
|
|
15
|
+
def init_connection(self, conf):
|
|
16
|
+
raise NotImplementedError
|
|
17
|
+
|
|
18
|
+
def exists(self, key) -> bool:
|
|
19
|
+
return self.connector.exists(key)
|
|
20
|
+
|
|
21
|
+
def stat(self, key):
|
|
22
|
+
return self.connector.stat(key)
|
|
23
|
+
|
|
24
|
+
def mkdir(self, key):
|
|
25
|
+
return self.connector.mkdir(key)
|
|
26
|
+
|
|
27
|
+
def test_connection(self):
|
|
28
|
+
self.connector.ls("/")
|
|
29
|
+
|
|
30
|
+
def get(self, key, local_file):
|
|
31
|
+
return self.connector.get(key, local_file)
|
|
32
|
+
|
|
33
|
+
def put(self, local_file, object_store_key):
|
|
34
|
+
return self.connector.put(local_file, object_store_key)
|
|
35
|
+
|
|
36
|
+
def delete(self, key):
|
|
37
|
+
return self.connector.rm(key)
|
|
38
|
+
|
|
39
|
+
def ls(self, key):
|
|
40
|
+
return self.connector.ls(key)
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
2
|
+
from recurvedata.connectors.fs import FileConnectorABC
|
|
3
|
+
from recurvedata.consts import ConnectionCategory
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ObjectStoreMixin(RecurveConnectorBase, FileConnectorABC):
|
|
7
|
+
category = [
|
|
8
|
+
ConnectionCategory.STORAGE,
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
def __init__(self, conf: dict, *args, **kwargs):
|
|
12
|
+
super().__init__(conf, *args, **kwargs)
|
|
13
|
+
self.connector = self.init_connection(conf)
|
|
14
|
+
|
|
15
|
+
def init_connection(self, conf):
|
|
16
|
+
raise NotImplementedError
|
|
17
|
+
|
|
18
|
+
def exists(self, key) -> bool:
|
|
19
|
+
key = self.bucket_key(key)
|
|
20
|
+
return self.connector.exists(key)
|
|
21
|
+
|
|
22
|
+
def stat(self, key):
|
|
23
|
+
key = self.bucket_key(key)
|
|
24
|
+
return self.connector.stat(key)
|
|
25
|
+
|
|
26
|
+
def mkdir(self, key):
|
|
27
|
+
key = self.bucket_key(key)
|
|
28
|
+
return self.connector.mkdir(key)
|
|
29
|
+
|
|
30
|
+
def test_connection(self):
|
|
31
|
+
self.connector.ls(self.bucket_key("/"))
|
|
32
|
+
|
|
33
|
+
def get(self, key, local_file):
|
|
34
|
+
key = self.bucket_key(key)
|
|
35
|
+
return self.connector.get(key, local_file)
|
|
36
|
+
|
|
37
|
+
def put(self, local_file, object_store_key):
|
|
38
|
+
object_store_key = self.bucket_key(object_store_key)
|
|
39
|
+
return self.connector.put(local_file, object_store_key)
|
|
40
|
+
|
|
41
|
+
def delete(self, key):
|
|
42
|
+
key = self.bucket_key(key)
|
|
43
|
+
return self.connector.rm(key)
|
|
44
|
+
|
|
45
|
+
def ls(self, key):
|
|
46
|
+
key = self.bucket_key(key)
|
|
47
|
+
return self.connector.ls(key)
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def bucket(self):
|
|
51
|
+
return self.conf.get("bucket")
|
|
52
|
+
|
|
53
|
+
def bucket_key(self, key):
|
|
54
|
+
if self.bucket:
|
|
55
|
+
if key.startswith("/"):
|
|
56
|
+
return f"{self.bucket}{key}"
|
|
57
|
+
return f"{self.bucket}/{key}"
|
|
58
|
+
return key
|
|
59
|
+
|
|
60
|
+
# todo: delete by prefix
|