recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from sqlalchemy.engine import URL
|
|
2
|
+
|
|
3
|
+
from recurvedata.connectors._register import register_connector_class
|
|
4
|
+
from recurvedata.connectors.dbapi import DBAPIBase
|
|
5
|
+
from recurvedata.consts import ConnectorGroup
|
|
6
|
+
|
|
7
|
+
CONNECTION_TYPE = "phoenix"
|
|
8
|
+
UI_CONNECTION_TYPE = "HBase Phoenix"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
|
|
12
|
+
class PhoenixConnector(DBAPIBase):
|
|
13
|
+
connection_type = CONNECTION_TYPE
|
|
14
|
+
ui_connection_type = UI_CONNECTION_TYPE
|
|
15
|
+
group = [ConnectorGroup.DESTINATION]
|
|
16
|
+
setup_extras_require = ["sqlalchemy-phoenix"]
|
|
17
|
+
driver = "phoenix"
|
|
18
|
+
|
|
19
|
+
config_schema = {
|
|
20
|
+
"type": "object",
|
|
21
|
+
"properties": {
|
|
22
|
+
"host": {"type": "string", "title": "Host Address"},
|
|
23
|
+
"port": {
|
|
24
|
+
"type": "number",
|
|
25
|
+
"title": "Port Number",
|
|
26
|
+
"default": 8765,
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
"order": ["host", "port"],
|
|
30
|
+
"required": ["host", "port"],
|
|
31
|
+
"secret": [],
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def sqlalchemy_url(self):
|
|
36
|
+
return URL(self.driver, host=self.host, port=self.port)
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
from functools import cached_property
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from recurvedata.connectors._register import register_connector_class
|
|
5
|
+
from recurvedata.connectors.const import ENV_VAR_DBT_PASSWORD, ENV_VAR_DBT_USER
|
|
6
|
+
from recurvedata.connectors.datasource import DataSourceWrapper
|
|
7
|
+
from recurvedata.connectors.dbapi import DBAPIBase, with_ssh_tunnel
|
|
8
|
+
from recurvedata.consts import ConnectorGroup
|
|
9
|
+
from recurvedata.core.translation import _l
|
|
10
|
+
|
|
11
|
+
CONNECTION_TYPE = "postgres"
|
|
12
|
+
UI_CONNECTION_TYPE = "PostgreSQL"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
|
|
16
|
+
class PostgresConnector(DBAPIBase):
|
|
17
|
+
SYSTEM_DATABASES = [
|
|
18
|
+
"information_schema",
|
|
19
|
+
"pg_catalog",
|
|
20
|
+
"pg_global",
|
|
21
|
+
"pg_statistic",
|
|
22
|
+
"pg_toast",
|
|
23
|
+
"pg_temp_1",
|
|
24
|
+
"pg_temp_2",
|
|
25
|
+
"pg_toast_temp_1",
|
|
26
|
+
"pg_toast_temp_2",
|
|
27
|
+
"pg_type",
|
|
28
|
+
]
|
|
29
|
+
connection_type = CONNECTION_TYPE
|
|
30
|
+
ui_connection_type = UI_CONNECTION_TYPE
|
|
31
|
+
group = [ConnectorGroup.DESTINATION]
|
|
32
|
+
setup_extras_require = ["psycopg2-binary"]
|
|
33
|
+
driver = "postgresql"
|
|
34
|
+
config_schema = {
|
|
35
|
+
"type": "object",
|
|
36
|
+
"properties": {
|
|
37
|
+
"host": {
|
|
38
|
+
"type": "string",
|
|
39
|
+
"title": _l("Host Address"),
|
|
40
|
+
"default": "127.0.0.1",
|
|
41
|
+
},
|
|
42
|
+
"port": {
|
|
43
|
+
"type": "number",
|
|
44
|
+
"title": _l("Port Number"),
|
|
45
|
+
"default": 5432,
|
|
46
|
+
},
|
|
47
|
+
"user": {"type": "string", "title": _l("Username")},
|
|
48
|
+
"password": {"type": "string", "title": _l("Password")},
|
|
49
|
+
"database": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"title": _l("Database Name"),
|
|
52
|
+
"description": _l("The name of the database to connect to"),
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
"order": ["host", "port", "user", "password", "database"],
|
|
56
|
+
"required": [
|
|
57
|
+
"host",
|
|
58
|
+
"user",
|
|
59
|
+
"password",
|
|
60
|
+
],
|
|
61
|
+
"secret": ["password"],
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
column_type_mapping = {
|
|
65
|
+
"integer": ["int2", "int4", "int8", "serial", "bigserial", "smallserial"],
|
|
66
|
+
"float": ["float4", "float8", "real", "numeric", "decimal"],
|
|
67
|
+
"datetime": ["timestamptz"],
|
|
68
|
+
"time": ["timetz"],
|
|
69
|
+
"binary": ["bytea"],
|
|
70
|
+
"string": ["uuid"],
|
|
71
|
+
"json": ["jsonb"],
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# Extend base types with PostgreSQL specific types
|
|
75
|
+
available_column_types = DBAPIBase.available_column_types + [
|
|
76
|
+
# Numeric types
|
|
77
|
+
"int2", # alias for smallint
|
|
78
|
+
"integer",
|
|
79
|
+
"int4", # alias for integer
|
|
80
|
+
"int8", # alias for bigint
|
|
81
|
+
"real",
|
|
82
|
+
"float4", # alias for real
|
|
83
|
+
"double precision",
|
|
84
|
+
"float8", # alias for double precision
|
|
85
|
+
"serial",
|
|
86
|
+
"serial4", # alias for serial
|
|
87
|
+
"bigserial",
|
|
88
|
+
"serial8", # alias for bigserial
|
|
89
|
+
"smallserial",
|
|
90
|
+
"serial2", # alias for smallserial
|
|
91
|
+
"money",
|
|
92
|
+
"numeric",
|
|
93
|
+
# Character types
|
|
94
|
+
"text",
|
|
95
|
+
"bpchar", # blank-padded char
|
|
96
|
+
"character",
|
|
97
|
+
"character varying",
|
|
98
|
+
# Date/Time types
|
|
99
|
+
"timestamp without time zone",
|
|
100
|
+
"timestamp with time zone",
|
|
101
|
+
"timestamptz", # alias for timestamp with time zone
|
|
102
|
+
"time",
|
|
103
|
+
"time without time zone",
|
|
104
|
+
"time with time zone",
|
|
105
|
+
"timetz", # alias for time with time zone
|
|
106
|
+
"interval",
|
|
107
|
+
# Boolean type
|
|
108
|
+
"boolean",
|
|
109
|
+
"bool", # alias for boolean
|
|
110
|
+
# Geometric types
|
|
111
|
+
"point",
|
|
112
|
+
"line",
|
|
113
|
+
"lseg",
|
|
114
|
+
"box",
|
|
115
|
+
"path",
|
|
116
|
+
"polygon",
|
|
117
|
+
"circle",
|
|
118
|
+
# Network address types
|
|
119
|
+
"cidr",
|
|
120
|
+
"inet",
|
|
121
|
+
"macaddr",
|
|
122
|
+
"macaddr8",
|
|
123
|
+
# Binary data
|
|
124
|
+
"bytea",
|
|
125
|
+
# UUID type
|
|
126
|
+
"uuid",
|
|
127
|
+
# JSON types
|
|
128
|
+
"jsonb",
|
|
129
|
+
# XML type
|
|
130
|
+
"xml",
|
|
131
|
+
# Bit string
|
|
132
|
+
"bit",
|
|
133
|
+
"bit varying",
|
|
134
|
+
"varbit",
|
|
135
|
+
# Text search
|
|
136
|
+
"tsvector",
|
|
137
|
+
"tsquery",
|
|
138
|
+
# Range types
|
|
139
|
+
"int4range",
|
|
140
|
+
"int8range",
|
|
141
|
+
"numrange",
|
|
142
|
+
"tsrange",
|
|
143
|
+
"tstzrange",
|
|
144
|
+
"daterange",
|
|
145
|
+
]
|
|
146
|
+
|
|
147
|
+
def convert_config_to_dbt_profile(self, database: str, schema: str = None) -> dict:
|
|
148
|
+
return {
|
|
149
|
+
"host": self.host,
|
|
150
|
+
"port": self.port,
|
|
151
|
+
"user": ENV_VAR_DBT_USER,
|
|
152
|
+
"password": ENV_VAR_DBT_PASSWORD,
|
|
153
|
+
"dbname": database or self.database,
|
|
154
|
+
"schema": schema or f"dbt_{database or self.database}",
|
|
155
|
+
"type": self.connection_type,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
@with_ssh_tunnel
|
|
159
|
+
def get_columns(self, table: str, database: str = None) -> list:
|
|
160
|
+
database = database or self.database
|
|
161
|
+
query = f"""
|
|
162
|
+
WITH table_info AS (
|
|
163
|
+
SELECT c.oid AS table_oid
|
|
164
|
+
FROM pg_class c
|
|
165
|
+
JOIN pg_namespace n ON c.relnamespace = n.oid
|
|
166
|
+
WHERE n.nspname = '{database}'
|
|
167
|
+
AND c.relname = '{table}'
|
|
168
|
+
)
|
|
169
|
+
SELECT
|
|
170
|
+
a.attname AS column_name,
|
|
171
|
+
t.typname AS data_type,
|
|
172
|
+
NOT a.attnotnull AS nullable,
|
|
173
|
+
pg_get_expr(ad.adbin, ad.adrelid) AS "default",
|
|
174
|
+
col_description(a.attrelid, a.attnum) AS comment
|
|
175
|
+
FROM pg_attribute a
|
|
176
|
+
JOIN table_info ti ON a.attrelid = ti.table_oid
|
|
177
|
+
JOIN pg_type t ON a.atttypid = t.oid
|
|
178
|
+
LEFT JOIN pg_attrdef ad ON a.attrelid = ad.adrelid AND a.attnum = ad.adnum
|
|
179
|
+
WHERE a.attnum > 0
|
|
180
|
+
AND NOT a.attisdropped
|
|
181
|
+
ORDER BY a.attnum;
|
|
182
|
+
"""
|
|
183
|
+
result = self.fetchall(query)
|
|
184
|
+
column_metas = []
|
|
185
|
+
for row in result:
|
|
186
|
+
column_metas.append(
|
|
187
|
+
{
|
|
188
|
+
"name": row[0],
|
|
189
|
+
"type": row[1].lower() if row[1] else "",
|
|
190
|
+
"nullable": row[2],
|
|
191
|
+
"default": row[3],
|
|
192
|
+
"comment": row[4],
|
|
193
|
+
}
|
|
194
|
+
)
|
|
195
|
+
return column_metas
|
|
196
|
+
|
|
197
|
+
@cached_property
|
|
198
|
+
@with_ssh_tunnel
|
|
199
|
+
def type_code_mapping(self) -> dict:
|
|
200
|
+
try:
|
|
201
|
+
rv = self.fetchall("SELECT oid, typname FROM pg_type")
|
|
202
|
+
return {row[0]: row[1] for row in rv if row[1]}
|
|
203
|
+
except Exception:
|
|
204
|
+
pass
|
|
205
|
+
|
|
206
|
+
def sqlalchemy_column_type_code_to_name(self, type_code: Any, size: int | None = None) -> str:
|
|
207
|
+
if self.type_code_mapping:
|
|
208
|
+
type_name = self.type_code_mapping.get(type_code)
|
|
209
|
+
if type_name:
|
|
210
|
+
return type_name
|
|
211
|
+
|
|
212
|
+
import psycopg2.extensions
|
|
213
|
+
|
|
214
|
+
pg_type = psycopg2.extensions.string_types.get(type_code)
|
|
215
|
+
pg_type_name = pg_type.name.lower() if pg_type else None
|
|
216
|
+
if pg_type_name in self.available_column_types:
|
|
217
|
+
return pg_type_name
|
|
218
|
+
return "varchar"
|
|
219
|
+
|
|
220
|
+
def convert_config_to_cube_config(
|
|
221
|
+
self, database: str, schema: str = None, datasource: DataSourceWrapper = None
|
|
222
|
+
) -> dict:
|
|
223
|
+
return {
|
|
224
|
+
"type": "postgres",
|
|
225
|
+
"host": self.host,
|
|
226
|
+
"port": self.port,
|
|
227
|
+
"user": datasource.user,
|
|
228
|
+
"password": datasource.password,
|
|
229
|
+
"database": database or self.database,
|
|
230
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from recurvedata.connectors._register import register_connector_class
|
|
2
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
3
|
+
from recurvedata.consts import ConnectorGroup
|
|
4
|
+
from recurvedata.core.translation import _l
|
|
5
|
+
|
|
6
|
+
CONNECTION_TYPE = "python"
|
|
7
|
+
UI_CONNECTION_TYPE = "Python"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
|
|
11
|
+
class Python(RecurveConnectorBase):
|
|
12
|
+
connection_type = CONNECTION_TYPE
|
|
13
|
+
ui_connection_type = UI_CONNECTION_TYPE
|
|
14
|
+
|
|
15
|
+
group = [ConnectorGroup.DESTINATION]
|
|
16
|
+
test_required = False
|
|
17
|
+
config_schema = {
|
|
18
|
+
"type": "object",
|
|
19
|
+
"properties": {
|
|
20
|
+
"python_version": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"title": _l("Python Version"),
|
|
23
|
+
"enum": ["3.11.9", "3.10.14"],
|
|
24
|
+
"enumNames": ["3.11.9", "3.10.14"],
|
|
25
|
+
"default": "3.11.9",
|
|
26
|
+
},
|
|
27
|
+
"pyenv": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"title": _l("Python Virtual Environment Name"),
|
|
30
|
+
"default": "recurve_executor",
|
|
31
|
+
},
|
|
32
|
+
"requirements": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"title": _l("Python Package Requirements"),
|
|
35
|
+
"description": _l(
|
|
36
|
+
"List of Python packages and versions to install, the same format as requirements.txt"
|
|
37
|
+
),
|
|
38
|
+
"ui:options": {
|
|
39
|
+
"type": "textarea",
|
|
40
|
+
"rows": 10,
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
},
|
|
44
|
+
"order": ["python_version", "pyenv", "requirements"],
|
|
45
|
+
"required": ["python_version", "pyenv"],
|
|
46
|
+
"secret": [],
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
def test_connection(self):
|
|
50
|
+
pass
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
from functools import cached_property
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from recurvedata.connectors._register import register_connector_class
|
|
5
|
+
from recurvedata.connectors.const import ENV_VAR_DBT_PASSWORD, ENV_VAR_DBT_USER
|
|
6
|
+
from recurvedata.connectors.datasource import DataSourceWrapper
|
|
7
|
+
from recurvedata.connectors.dbapi import DBAPIBase, with_ssh_tunnel
|
|
8
|
+
from recurvedata.consts import ConnectorGroup
|
|
9
|
+
from recurvedata.core.translation import _l
|
|
10
|
+
|
|
11
|
+
CONNECTION_TYPE = "redshift"
|
|
12
|
+
UI_CONNECTION_TYPE = "Redshift"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
|
|
16
|
+
class RedshiftConnector(DBAPIBase):
|
|
17
|
+
SYSTEM_DATABASES = ["information_schema", "pg_catalog", "public", "temporary"]
|
|
18
|
+
connection_type = CONNECTION_TYPE
|
|
19
|
+
ui_connection_type = UI_CONNECTION_TYPE
|
|
20
|
+
group = [ConnectorGroup.DESTINATION]
|
|
21
|
+
driver = "redshift+psycopg2"
|
|
22
|
+
setup_extras_require = ["psycopg2-binary", "sqlalchemy_redshift==0.8.15+recurve"]
|
|
23
|
+
config_schema = {
|
|
24
|
+
"type": "object",
|
|
25
|
+
"properties": {
|
|
26
|
+
"host": {"type": "string", "title": _l("Host Address")},
|
|
27
|
+
"port": {
|
|
28
|
+
"type": "number",
|
|
29
|
+
"title": _l("Port Number"),
|
|
30
|
+
"default": 5439,
|
|
31
|
+
},
|
|
32
|
+
"user": {"type": "string", "title": _l("Username")},
|
|
33
|
+
"password": {"type": "string", "title": _l("Password")},
|
|
34
|
+
"database": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"title": _l("Database Name"),
|
|
37
|
+
"description": _l("The name of the database to connect to"),
|
|
38
|
+
},
|
|
39
|
+
"s3_options": {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"title": _l("S3 Configuration"),
|
|
42
|
+
"description": _l("AWS S3 credentials for data loading and unloading"),
|
|
43
|
+
"properties": {
|
|
44
|
+
"access_key_id": {"type": "string", "title": _l("AWS Access Key ID")},
|
|
45
|
+
"secret_access_key": {"type": "string", "title": _l("AWS Secret Access Key")},
|
|
46
|
+
"region": {"type": "string", "title": _l("AWS Region")},
|
|
47
|
+
},
|
|
48
|
+
"order": ["access_key_id", "secret_access_key", "region"],
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
"order": ["host", "port", "user", "password", "database", "s3_options"],
|
|
52
|
+
"required": ["host", "port"],
|
|
53
|
+
"secret": ["password"],
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# All supported Redshift data types based on official documentation
|
|
57
|
+
available_column_types = [
|
|
58
|
+
# Numeric types
|
|
59
|
+
"smallint",
|
|
60
|
+
"int2",
|
|
61
|
+
"integer",
|
|
62
|
+
"int",
|
|
63
|
+
"int4",
|
|
64
|
+
"bigint",
|
|
65
|
+
"int8",
|
|
66
|
+
"decimal",
|
|
67
|
+
"numeric",
|
|
68
|
+
"real",
|
|
69
|
+
"float",
|
|
70
|
+
"float4",
|
|
71
|
+
"double precision",
|
|
72
|
+
"float8",
|
|
73
|
+
# Character types
|
|
74
|
+
"char",
|
|
75
|
+
"character",
|
|
76
|
+
"nchar",
|
|
77
|
+
"bpchar",
|
|
78
|
+
"varchar",
|
|
79
|
+
"character varying",
|
|
80
|
+
"nvarchar",
|
|
81
|
+
"text",
|
|
82
|
+
# Datetime types
|
|
83
|
+
"date",
|
|
84
|
+
"timestamp",
|
|
85
|
+
"timestamptz",
|
|
86
|
+
"timestamp with time zone",
|
|
87
|
+
"timestamp without time zone",
|
|
88
|
+
"time",
|
|
89
|
+
"timetz",
|
|
90
|
+
"time with time zone",
|
|
91
|
+
"time without time zone",
|
|
92
|
+
# Boolean type
|
|
93
|
+
"boolean",
|
|
94
|
+
"bool",
|
|
95
|
+
# Special types
|
|
96
|
+
"super",
|
|
97
|
+
"hllsketch",
|
|
98
|
+
"geometry",
|
|
99
|
+
"geography",
|
|
100
|
+
"varbyte",
|
|
101
|
+
]
|
|
102
|
+
|
|
103
|
+
def convert_config_to_dbt_profile(self, database: str, schema: str = None) -> dict:
|
|
104
|
+
return {
|
|
105
|
+
"host": self.host,
|
|
106
|
+
"port": self.port,
|
|
107
|
+
"user": ENV_VAR_DBT_USER,
|
|
108
|
+
"password": ENV_VAR_DBT_PASSWORD,
|
|
109
|
+
"dbname": database,
|
|
110
|
+
"schema": schema,
|
|
111
|
+
"type": self.connection_type,
|
|
112
|
+
"threads": 10,
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
@with_ssh_tunnel
|
|
116
|
+
def get_columns(self, table: str, database=None):
|
|
117
|
+
database = database or self.database
|
|
118
|
+
query = f"""
|
|
119
|
+
WITH table_info AS (
|
|
120
|
+
SELECT c.oid AS table_oid
|
|
121
|
+
FROM pg_class c
|
|
122
|
+
JOIN pg_namespace n ON c.relnamespace = n.oid
|
|
123
|
+
WHERE n.nspname = '{database}'
|
|
124
|
+
AND c.relname = '{table}'
|
|
125
|
+
)
|
|
126
|
+
SELECT
|
|
127
|
+
a.attname AS column_name,
|
|
128
|
+
t.typname AS data_type,
|
|
129
|
+
NOT a.attnotnull AS nullable,
|
|
130
|
+
pg_get_expr(ad.adbin, ad.adrelid) AS "default",
|
|
131
|
+
col_description(a.attrelid, a.attnum) AS comment
|
|
132
|
+
FROM pg_attribute a
|
|
133
|
+
JOIN table_info ti ON a.attrelid = ti.table_oid
|
|
134
|
+
JOIN pg_type t ON a.atttypid = t.oid
|
|
135
|
+
LEFT JOIN pg_attrdef ad ON a.attrelid = ad.adrelid AND a.attnum = ad.adnum
|
|
136
|
+
WHERE a.attnum > 0
|
|
137
|
+
AND NOT a.attisdropped
|
|
138
|
+
ORDER BY a.attnum;
|
|
139
|
+
"""
|
|
140
|
+
result = self.fetchall(query)
|
|
141
|
+
column_metas = []
|
|
142
|
+
for row in result:
|
|
143
|
+
column_metas.append(
|
|
144
|
+
{
|
|
145
|
+
"name": row[0],
|
|
146
|
+
"type": row[1].lower() if row[1] else "",
|
|
147
|
+
"nullable": row[2],
|
|
148
|
+
"default": row[3],
|
|
149
|
+
"comment": row[4],
|
|
150
|
+
}
|
|
151
|
+
)
|
|
152
|
+
return column_metas
|
|
153
|
+
|
|
154
|
+
@cached_property
|
|
155
|
+
@with_ssh_tunnel
|
|
156
|
+
def type_code_mapping(self) -> dict:
|
|
157
|
+
try:
|
|
158
|
+
rv = self.fetchall("SELECT oid, typname FROM pg_catalog.pg_type")
|
|
159
|
+
return {row[0]: row[1] for row in rv if row[1]}
|
|
160
|
+
except Exception:
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
def sqlalchemy_column_type_code_to_name(self, type_code: Any, size: int | None = None) -> str:
|
|
164
|
+
if self.type_code_mapping:
|
|
165
|
+
type_name = self.type_code_mapping.get(type_code)
|
|
166
|
+
if type_name:
|
|
167
|
+
return type_name
|
|
168
|
+
|
|
169
|
+
import psycopg2.extensions
|
|
170
|
+
|
|
171
|
+
pg_type = psycopg2.extensions.string_types.get(type_code)
|
|
172
|
+
pg_type_name = pg_type.name.lower() if pg_type else None
|
|
173
|
+
if pg_type_name in self.available_column_types:
|
|
174
|
+
return pg_type_name
|
|
175
|
+
return "varchar"
|
|
176
|
+
|
|
177
|
+
def convert_config_to_cube_config(
|
|
178
|
+
self, database: str, schema: str = None, datasource: DataSourceWrapper = None
|
|
179
|
+
) -> dict:
|
|
180
|
+
return {
|
|
181
|
+
"type": "redshift",
|
|
182
|
+
"host": self.host,
|
|
183
|
+
"port": self.port,
|
|
184
|
+
"user": datasource.user,
|
|
185
|
+
"password": datasource.password,
|
|
186
|
+
"database": database or self.database,
|
|
187
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from recurvedata.connectors._register import register_connector_class
|
|
5
|
+
from recurvedata.connectors.object_store import ObjectStoreMixin
|
|
6
|
+
from recurvedata.connectors.proxy import HTTP_PROXY_CONFIG_SCHEMA, HttpProxyMixin
|
|
7
|
+
from recurvedata.connectors.utils import juice_sync_process_special_character_within_secret
|
|
8
|
+
from recurvedata.consts import ConnectorGroup
|
|
9
|
+
from recurvedata.core.translation import _l
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import boto3
|
|
13
|
+
from botocore.config import Config
|
|
14
|
+
from s3fs import S3FileSystem
|
|
15
|
+
except ImportError:
|
|
16
|
+
S3FileSystem = None
|
|
17
|
+
|
|
18
|
+
CONNECTION_TYPE = "s3"
|
|
19
|
+
UI_CONNECTION_TYPE = "AWS S3"
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
|
|
25
|
+
class S3(HttpProxyMixin, ObjectStoreMixin):
|
|
26
|
+
connection_type = CONNECTION_TYPE
|
|
27
|
+
ui_connection_type = UI_CONNECTION_TYPE
|
|
28
|
+
group = [ConnectorGroup.DESTINATION]
|
|
29
|
+
setup_extras_require = ["fsspec[s3]", "s3fs>=2021.08", "boto3"]
|
|
30
|
+
|
|
31
|
+
config_schema = {
|
|
32
|
+
"type": "object",
|
|
33
|
+
"properties": {
|
|
34
|
+
"access_key_id": {"type": "string", "title": _l("AWS Access Key ID")},
|
|
35
|
+
"secret_access_key": {"type": "string", "title": _l("AWS Secret Access Key")},
|
|
36
|
+
"region": {"type": "string", "title": _l("AWS Region")},
|
|
37
|
+
"bucket": {"type": "string", "title": _l("Bucket Name")},
|
|
38
|
+
"proxies": HTTP_PROXY_CONFIG_SCHEMA["proxies"],
|
|
39
|
+
},
|
|
40
|
+
"order": ["access_key_id", "secret_access_key", "region", "bucket", "proxies"],
|
|
41
|
+
"required": ["access_key_id", "secret_access_key"],
|
|
42
|
+
"secret": ["secret_access_key"],
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
def init_connection(self, conf) -> S3FileSystem:
|
|
46
|
+
# todo: proxy
|
|
47
|
+
with self._init_proxy_manager():
|
|
48
|
+
client_kwargs = {}
|
|
49
|
+
if "region" in conf and conf["region"]:
|
|
50
|
+
client_kwargs["region_name"] = conf["region"]
|
|
51
|
+
con = S3FileSystem(key=conf["access_key_id"], secret=conf["secret_access_key"], client_kwargs=client_kwargs)
|
|
52
|
+
self.connector = con
|
|
53
|
+
return con
|
|
54
|
+
|
|
55
|
+
def test_connection(self):
|
|
56
|
+
with self._init_proxy_manager():
|
|
57
|
+
logger.info(
|
|
58
|
+
f'test s3 connection with bucket {self.bucket} and region {self.region}, proxy: {os.environ.get("http_proxy")}'
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
session = boto3.Session(
|
|
62
|
+
aws_access_key_id=self.access_key_id,
|
|
63
|
+
aws_secret_access_key=self.secret_access_key,
|
|
64
|
+
region_name=self.region,
|
|
65
|
+
)
|
|
66
|
+
timeout = 30
|
|
67
|
+
s3_client = session.client("s3", config=Config(connect_timeout=timeout, read_timeout=timeout))
|
|
68
|
+
if self.bucket:
|
|
69
|
+
s3_client.list_objects_v2(Bucket=self.bucket, MaxKeys=1)
|
|
70
|
+
else:
|
|
71
|
+
s3_client.list_buckets()
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def endpoint(self):
|
|
75
|
+
if not self.region:
|
|
76
|
+
raise ValueError("there is no region for endpoint")
|
|
77
|
+
return f"s3.{self.region}.amazonaws.com"
|
|
78
|
+
|
|
79
|
+
juice_sync_able = True
|
|
80
|
+
|
|
81
|
+
def juice_sync_path(self, path: str) -> str:
|
|
82
|
+
"""
|
|
83
|
+
:param path: the s3 path, note that path is not started with bucket
|
|
84
|
+
:return:
|
|
85
|
+
"""
|
|
86
|
+
if not self.bucket:
|
|
87
|
+
raise ValueError("the connection bucket cannot be empty in juice sync")
|
|
88
|
+
secret_part = f"{self.access_key_id}:{self.secret_access_key}"
|
|
89
|
+
secret_part = juice_sync_process_special_character_within_secret(secret_part)
|
|
90
|
+
endpoint_path_part = f'{self.bucket}.{self.endpoint}/{path.lstrip("/")}'
|
|
91
|
+
path_with_secret = f"s3://{secret_part}@{endpoint_path_part}"
|
|
92
|
+
path_without_secret = f"s3://{endpoint_path_part}"
|
|
93
|
+
return path_with_secret, path_without_secret
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from recurvedata.consts import ConnectorGroup
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from fsspec.implementations.sftp import SFTPFileSystem
|
|
7
|
+
except ImportError:
|
|
8
|
+
SFTPFileSystem = None
|
|
9
|
+
|
|
10
|
+
from recurvedata.connectors._register import register_connector_class
|
|
11
|
+
from recurvedata.connectors.ftp import FTPMixin
|
|
12
|
+
from recurvedata.core.translation import _l
|
|
13
|
+
|
|
14
|
+
CONNECTION_TYPE = "sftp"
|
|
15
|
+
UI_CONNECTION_TYPE = "SFTP"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
|
|
19
|
+
class SFTP(FTPMixin):
|
|
20
|
+
connection_type = CONNECTION_TYPE
|
|
21
|
+
ui_connection_type = UI_CONNECTION_TYPE
|
|
22
|
+
group = [ConnectorGroup.DESTINATION]
|
|
23
|
+
setup_extras_require = ["fsspec[sftp]", "paramiko"]
|
|
24
|
+
|
|
25
|
+
config_schema = {
|
|
26
|
+
"type": "object",
|
|
27
|
+
"properties": {
|
|
28
|
+
"host": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"title": _l("Host Address"),
|
|
31
|
+
"default": "127.0.0.1",
|
|
32
|
+
},
|
|
33
|
+
"port": {
|
|
34
|
+
"type": "number",
|
|
35
|
+
"title": _l("Port Number"),
|
|
36
|
+
"default": 22,
|
|
37
|
+
},
|
|
38
|
+
"user": {"type": "string", "title": _l("Username")},
|
|
39
|
+
"password": {"type": "string", "title": _l("Password")},
|
|
40
|
+
"private_key_path": {"type": "string", "title": _l("Private Key File Path")},
|
|
41
|
+
},
|
|
42
|
+
"order": ["host", "port", "user", "password", "private_key_path"],
|
|
43
|
+
"required": ["host", "port"],
|
|
44
|
+
"secret": ["password"],
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def _build_ssh_kwargs(self) -> dict:
|
|
48
|
+
"""
|
|
49
|
+
build fsspec ssh_kwargs
|
|
50
|
+
:return:
|
|
51
|
+
"""
|
|
52
|
+
import paramiko
|
|
53
|
+
|
|
54
|
+
pkey = None
|
|
55
|
+
pk_path = self.conf.get("private_key_path")
|
|
56
|
+
if pk_path:
|
|
57
|
+
pk_path = os.path.expanduser(pk_path)
|
|
58
|
+
pkey = paramiko.RSAKey.from_private_key_file(pk_path, password=self.conf.get("password"))
|
|
59
|
+
return {
|
|
60
|
+
"username": self.conf["user"],
|
|
61
|
+
"password": self.conf.get("password"),
|
|
62
|
+
"port": self.conf["port"],
|
|
63
|
+
"pkey": pkey,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
def init_connection(self, conf) -> SFTPFileSystem:
|
|
67
|
+
con = SFTPFileSystem(host=conf["host"], **self._build_ssh_kwargs())
|
|
68
|
+
self.connector = con
|
|
69
|
+
return con
|
|
70
|
+
|
|
71
|
+
def test_connection(self):
|
|
72
|
+
self.connector.ls(".")
|
|
73
|
+
|
|
74
|
+
juice_sync_able = True
|
|
75
|
+
|
|
76
|
+
def juice_sync_path(self, path: str) -> str:
|
|
77
|
+
from urllib.parse import quote
|
|
78
|
+
|
|
79
|
+
username = self.conf["user"]
|
|
80
|
+
password = self.conf["password"]
|
|
81
|
+
password = quote(password)
|
|
82
|
+
port = self.conf["port"]
|
|
83
|
+
host = self.conf["host"]
|
|
84
|
+
# tmp only allow password
|
|
85
|
+
secret_path = f"{username}:{password}@{host}:{port}{path}"
|
|
86
|
+
non_secret_path = f"{username}:********@{host}:{port}{path}"
|
|
87
|
+
return secret_path, non_secret_path
|