recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from recurvedata.connectors._register import register_connector_class
|
|
4
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
5
|
+
from recurvedata.consts import ConnectorGroup
|
|
6
|
+
from recurvedata.core.translation import _l
|
|
7
|
+
|
|
8
|
+
CONNECTION_TYPE = "generic"
|
|
9
|
+
UI_CONNECTION_TYPE = "Generic"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
|
|
13
|
+
class GenericConnector(RecurveConnectorBase):
|
|
14
|
+
connection_type = CONNECTION_TYPE
|
|
15
|
+
ui_connection_type = UI_CONNECTION_TYPE
|
|
16
|
+
group = [ConnectorGroup.DESTINATION]
|
|
17
|
+
test_required = False
|
|
18
|
+
|
|
19
|
+
config_schema = {
|
|
20
|
+
"type": "object",
|
|
21
|
+
"properties": {
|
|
22
|
+
"host": {"type": "string", "title": _l("Host")},
|
|
23
|
+
"port": {"type": "integer", "title": _l("Port")},
|
|
24
|
+
"user": {"type": "string", "title": _l("Username")},
|
|
25
|
+
"password": {"type": "string", "title": _l("Password")},
|
|
26
|
+
"timeout": {"type": "integer", "title": _l("Timeout (seconds)"), "default": 30},
|
|
27
|
+
"custom": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"title": _l("Custom Configuration"),
|
|
30
|
+
"description": _l("Custom configuration parameters in JSON format"),
|
|
31
|
+
"ui:field": "CodeEditorWithReferencesField",
|
|
32
|
+
"ui:options": {"type": "code", "lang": "json"},
|
|
33
|
+
},
|
|
34
|
+
},
|
|
35
|
+
"order": ["host", "port", "user", "password", "timeout", "custom"],
|
|
36
|
+
"required": ["host", "port", "user", "password"],
|
|
37
|
+
"secret": ["password"],
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
def test_connection(self):
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def preprocess_conf(data):
|
|
45
|
+
data = RecurveConnectorBase.preprocess_conf(data)
|
|
46
|
+
json_data = data.get("custom")
|
|
47
|
+
if json_data and isinstance(json_data, str):
|
|
48
|
+
data["custom"] = json.loads(json_data)
|
|
49
|
+
return data
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from recurvedata.connectors._register import register_connector_class
|
|
2
|
+
from recurvedata.connectors.object_store import ObjectStoreMixin
|
|
3
|
+
from recurvedata.connectors.proxy import HTTP_PROXY_CONFIG_SCHEMA, HttpProxyMixin
|
|
4
|
+
from recurvedata.consts import ConnectorGroup
|
|
5
|
+
from recurvedata.core.translation import _l
|
|
6
|
+
|
|
7
|
+
CONNECTION_TYPE = "google_cloud_storage"
|
|
8
|
+
UI_CONNECTION_TYPE = "Google Cloud Storage"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
|
|
12
|
+
class GoogleCloudStorage(HttpProxyMixin, ObjectStoreMixin):
|
|
13
|
+
connection_type = CONNECTION_TYPE
|
|
14
|
+
ui_connection_type = UI_CONNECTION_TYPE
|
|
15
|
+
setup_extras_require = []
|
|
16
|
+
group = [ConnectorGroup.DESTINATION]
|
|
17
|
+
test_required = False
|
|
18
|
+
|
|
19
|
+
config_schema = {
|
|
20
|
+
"type": "object",
|
|
21
|
+
"properties": {
|
|
22
|
+
"key_dict": {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"title": _l("Service Account Key"),
|
|
25
|
+
"description": _l("Google Cloud service account key credentials"),
|
|
26
|
+
"properties": {
|
|
27
|
+
"type": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"title": _l("Account Type"),
|
|
30
|
+
"default": "service_account",
|
|
31
|
+
},
|
|
32
|
+
"project_id": {"type": "string", "title": _l("Google Cloud Project ID")},
|
|
33
|
+
"private_key_id": {"type": "string", "title": _l("Google Auth Private Key ID")},
|
|
34
|
+
"private_key": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"title": _l("Google Auth Private Key"),
|
|
37
|
+
"ui:options": {"type": "textarea"},
|
|
38
|
+
},
|
|
39
|
+
"client_email": {"type": "string", "title": _l("Service Account Email")},
|
|
40
|
+
"client_id": {"type": "string", "title": _l("Google OAuth Client ID")},
|
|
41
|
+
"auth_uri": {
|
|
42
|
+
"type": "string",
|
|
43
|
+
"title": _l("Google OAuth Auth URI"),
|
|
44
|
+
"default": "https://accounts.google.com/o/oauth2/auth",
|
|
45
|
+
},
|
|
46
|
+
"token_uri": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"title": _l("Google OAuth Token URI"),
|
|
49
|
+
"default": "https://oauth2.googleapis.com/token",
|
|
50
|
+
},
|
|
51
|
+
"auth_provider_x509_cert_url": {
|
|
52
|
+
"type": "string",
|
|
53
|
+
"title": _l("Google OAuth Certificate URL (Auth Provider)"),
|
|
54
|
+
"default": "https://www.googleapis.com/oauth2/v1/certs",
|
|
55
|
+
},
|
|
56
|
+
"client_x509_cert_url": {
|
|
57
|
+
"type": "string",
|
|
58
|
+
"title": _l("Google OAuth Certificate URL (Client)"),
|
|
59
|
+
"default": "https://www.googleapis.com/robot/v1/metadata/x509/recurvedata-gcs%40brand-portal-prod.iam.gserviceaccount.com",
|
|
60
|
+
},
|
|
61
|
+
},
|
|
62
|
+
"order": [
|
|
63
|
+
"type",
|
|
64
|
+
"project_id",
|
|
65
|
+
"private_key_id",
|
|
66
|
+
"private_key",
|
|
67
|
+
"client_email",
|
|
68
|
+
"client_id",
|
|
69
|
+
"auth_uri",
|
|
70
|
+
"token_uri",
|
|
71
|
+
"auth_provider_x509_cert_url",
|
|
72
|
+
"client_x509_cert_url",
|
|
73
|
+
],
|
|
74
|
+
"required": [
|
|
75
|
+
"type",
|
|
76
|
+
"project_id",
|
|
77
|
+
"private_key_id",
|
|
78
|
+
"private_key",
|
|
79
|
+
"client_id",
|
|
80
|
+
],
|
|
81
|
+
"secret": [
|
|
82
|
+
"private_key",
|
|
83
|
+
],
|
|
84
|
+
},
|
|
85
|
+
"bucket": {
|
|
86
|
+
"type": "string",
|
|
87
|
+
"title": _l("Bucket Name"),
|
|
88
|
+
"description": _l("Name of the Google Cloud Storage bucket"),
|
|
89
|
+
},
|
|
90
|
+
"proxies": HTTP_PROXY_CONFIG_SCHEMA["proxies"],
|
|
91
|
+
},
|
|
92
|
+
"order": [
|
|
93
|
+
"key_dict",
|
|
94
|
+
"bucket",
|
|
95
|
+
"proxies",
|
|
96
|
+
],
|
|
97
|
+
"required": [
|
|
98
|
+
"key_dict",
|
|
99
|
+
],
|
|
100
|
+
"secret": [
|
|
101
|
+
"key_dict.private_key",
|
|
102
|
+
],
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
def init_connection(self, conf):
|
|
106
|
+
self.connector = None # todo
|
|
107
|
+
|
|
108
|
+
def test_connection(self):
|
|
109
|
+
# todo
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
juice_sync_able = True
|
|
113
|
+
|
|
114
|
+
def juice_sync_path(self, path: str) -> str:
|
|
115
|
+
return f"gcs://{path}" # todo
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from recurvedata.consts import ConnectionCategory, ConnectorGroup
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from pandas import DataFrame
|
|
7
|
+
except ImportError:
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
from recurvedata.connectors._register import register_connector_class
|
|
11
|
+
from recurvedata.connectors.base import RecurveConnectorBase
|
|
12
|
+
from recurvedata.connectors.const import LoadMode
|
|
13
|
+
from recurvedata.connectors.proxy import HTTP_PROXY_CONFIG_SCHEMA, HttpProxyMixin
|
|
14
|
+
from recurvedata.core.translation import _l
|
|
15
|
+
|
|
16
|
+
CONNECTION_TYPE = "google_service_account"
|
|
17
|
+
UI_CONNECTION_TYPE = "Google Service Account"
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
import gspread # noqa
|
|
21
|
+
import pandas as pd
|
|
22
|
+
from google.oauth2 import service_account
|
|
23
|
+
from gspread.worksheet import Worksheet # noqa
|
|
24
|
+
except ImportError:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
|
|
29
|
+
class GoogleServiceAccount(HttpProxyMixin, RecurveConnectorBase):
|
|
30
|
+
connection_type = CONNECTION_TYPE
|
|
31
|
+
ui_connection_type = UI_CONNECTION_TYPE
|
|
32
|
+
group = [ConnectorGroup.DESTINATION]
|
|
33
|
+
category = [ConnectionCategory.SERVICE]
|
|
34
|
+
setup_extras_require = [
|
|
35
|
+
"google-auth",
|
|
36
|
+
]
|
|
37
|
+
# gspread 暂时不加到 setup_extras_require 里,而是加到 operator 的 setup 里
|
|
38
|
+
default_timeout = 120
|
|
39
|
+
|
|
40
|
+
config_schema = {
|
|
41
|
+
"type": "object",
|
|
42
|
+
"properties": {
|
|
43
|
+
"project_id": {"type": "string", "title": _l("Google Cloud Project ID")},
|
|
44
|
+
"private_key_id": {"type": "string", "title": _l("Google Auth Private Key ID")},
|
|
45
|
+
"private_key": {
|
|
46
|
+
"type": "string",
|
|
47
|
+
"title": _l("Google Auth Private Key"),
|
|
48
|
+
"ui:options": {"type": "textarea"},
|
|
49
|
+
},
|
|
50
|
+
"client_email": {"type": "string", "title": _l("Service Account Email")},
|
|
51
|
+
"client_id": {"type": "string", "title": _l("Google OAuth Client ID")},
|
|
52
|
+
"auth_uri": {
|
|
53
|
+
"type": "string",
|
|
54
|
+
"title": _l("Google OAuth Auth URI"),
|
|
55
|
+
"default": "https://accounts.google.com/o/oauth2/auth",
|
|
56
|
+
},
|
|
57
|
+
"token_uri": {
|
|
58
|
+
"type": "string",
|
|
59
|
+
"title": _l("Google OAuth Token URI"),
|
|
60
|
+
"default": "https://oauth2.googleapis.com/token",
|
|
61
|
+
},
|
|
62
|
+
"auth_provider_x509_cert_url": {
|
|
63
|
+
"type": "string",
|
|
64
|
+
"title": _l("Google OAuth Certificate URL (Auth Provider)"),
|
|
65
|
+
"default": "https://www.googleapis.com/oauth2/v1/certs",
|
|
66
|
+
},
|
|
67
|
+
"client_x509_cert_url": {
|
|
68
|
+
"type": "string",
|
|
69
|
+
"title": _l("Google OAuth Certificate URL (Client)"),
|
|
70
|
+
"default": "https://www.googleapis.com/robot/v1/metadata/x509/recurvedata-gcs%40brand-portal-prod.iam.gserviceaccount.com",
|
|
71
|
+
},
|
|
72
|
+
"universe_domain": {
|
|
73
|
+
"type": "string",
|
|
74
|
+
"title": _l("Universe Domain"),
|
|
75
|
+
"default": "googleapis.com",
|
|
76
|
+
},
|
|
77
|
+
"proxies": HTTP_PROXY_CONFIG_SCHEMA["proxies"],
|
|
78
|
+
},
|
|
79
|
+
"order": [
|
|
80
|
+
"project_id",
|
|
81
|
+
"private_key_id",
|
|
82
|
+
"private_key",
|
|
83
|
+
"client_email",
|
|
84
|
+
"client_id",
|
|
85
|
+
"auth_uri",
|
|
86
|
+
"token_uri",
|
|
87
|
+
"auth_provider_x509_cert_url",
|
|
88
|
+
"client_x509_cert_url",
|
|
89
|
+
"universe_domain",
|
|
90
|
+
"proxies",
|
|
91
|
+
],
|
|
92
|
+
"required": [
|
|
93
|
+
"project_id",
|
|
94
|
+
"private_key_id",
|
|
95
|
+
"private_key",
|
|
96
|
+
"client_email",
|
|
97
|
+
],
|
|
98
|
+
"secret": [
|
|
99
|
+
"private_key",
|
|
100
|
+
],
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
def init_credential_key_dict(self):
|
|
104
|
+
self.private_key = self._convert_private_key(self.private_key)
|
|
105
|
+
_key_dict = {
|
|
106
|
+
"type": "service_account",
|
|
107
|
+
"project_id": self.project_id,
|
|
108
|
+
"private_key_id": self.private_key_id,
|
|
109
|
+
"private_key": self.private_key,
|
|
110
|
+
"client_email": self.client_email,
|
|
111
|
+
"auth_uri": self.auth_uri,
|
|
112
|
+
"token_uri": self.token_uri,
|
|
113
|
+
"auth_provider_x509_cert_url": self.auth_provider_x509_cert_url,
|
|
114
|
+
"client_x509_cert_url": self.client_x509_cert_url,
|
|
115
|
+
"universe_domain": self.universe_domain,
|
|
116
|
+
}
|
|
117
|
+
if self.client_id:
|
|
118
|
+
_key_dict["client_id"] = self.client_id
|
|
119
|
+
return _key_dict
|
|
120
|
+
|
|
121
|
+
def init_credential(self):
|
|
122
|
+
credentials = service_account.Credentials.from_service_account_info(info=self.init_credential_key_dict())
|
|
123
|
+
return credentials
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def _convert_private_key(private_key: str):
|
|
127
|
+
# Depending on how the JSON was formatted, it may contain
|
|
128
|
+
# escaped newlines. Convert those to actual newlines.
|
|
129
|
+
private_key = private_key.replace("\\\n", "\n")
|
|
130
|
+
return private_key.replace("\\n", "\n")
|
|
131
|
+
|
|
132
|
+
def test_connection(self):
|
|
133
|
+
# 暂时不校验。如果私钥有问题这里好像会报错
|
|
134
|
+
with self._init_proxy_manager():
|
|
135
|
+
_ = self.init_credential()
|
|
136
|
+
|
|
137
|
+
def get_sheet(self, url: str, sheet_gid: int = None):
|
|
138
|
+
"""
|
|
139
|
+
不传 sheet_gid,默认返回第一个 sheet
|
|
140
|
+
:param url:
|
|
141
|
+
:param sheet_gid:
|
|
142
|
+
:return:
|
|
143
|
+
"""
|
|
144
|
+
with self._init_proxy_manager():
|
|
145
|
+
gc = gspread.service_account_from_dict(self.init_credential_key_dict())
|
|
146
|
+
gc.set_timeout(self.default_timeout)
|
|
147
|
+
spread_sheet = gc.open_by_url(url)
|
|
148
|
+
sheets = spread_sheet.worksheets()
|
|
149
|
+
if sheet_gid is not None:
|
|
150
|
+
for sheet in sheets:
|
|
151
|
+
if sheet.id == sheet_gid:
|
|
152
|
+
return sheet
|
|
153
|
+
if sheets:
|
|
154
|
+
return sheets[0]
|
|
155
|
+
|
|
156
|
+
def read_sheet_to_df(
|
|
157
|
+
self, sheet: "Worksheet", cell_range: str = None, columns: list[str] = None, dataframe_kwargs: dict = None
|
|
158
|
+
) -> "pd.DataFrame":
|
|
159
|
+
"""
|
|
160
|
+
:param sheet:
|
|
161
|
+
:param cell_range:
|
|
162
|
+
不传的话,默认读取整个 sheet
|
|
163
|
+
传的话,例如 'A1:B5'
|
|
164
|
+
:param columns:
|
|
165
|
+
不传的话,默认取 sheet 第一行作为 columns
|
|
166
|
+
传的话,例如 ['col1', 'col2', 'col3']
|
|
167
|
+
:param dataframe_kwargs:
|
|
168
|
+
pandas.DataFrame 传入的参数
|
|
169
|
+
:return:
|
|
170
|
+
"""
|
|
171
|
+
with self._init_proxy_manager(): # todo: use wrapper
|
|
172
|
+
if not cell_range:
|
|
173
|
+
data = sheet.get_all_values()
|
|
174
|
+
else:
|
|
175
|
+
data = sheet.get(cell_range)
|
|
176
|
+
dataframe_kwargs = dataframe_kwargs or {}
|
|
177
|
+
if not columns:
|
|
178
|
+
df = pd.DataFrame(data[1:], columns=data[0], **dataframe_kwargs)
|
|
179
|
+
else:
|
|
180
|
+
df = pd.DataFrame(data, columns=columns, **dataframe_kwargs)
|
|
181
|
+
return df
|
|
182
|
+
|
|
183
|
+
@staticmethod
|
|
184
|
+
def parse_sheet_url(url: str) -> (str, int):
|
|
185
|
+
"""输入 URL,返回 token 和 sheet id
|
|
186
|
+
:param url: https://docs.google.com/spreadsheets/d/118WyiPGFQ3ni7Gp6oNhZtkc9wEmAPfqAWynvP2ufgPk/edit#gid=1996978628
|
|
187
|
+
:return: ("118WyiPGFQ3ni7Gp6oNhZtkc9wEmAPfqAWynvP2ufgPk", "1996978628")
|
|
188
|
+
"""
|
|
189
|
+
from gspread.utils import extract_id_from_url
|
|
190
|
+
|
|
191
|
+
spread_sheet_id = extract_id_from_url(url)
|
|
192
|
+
gid_pat = re.compile(r"gid=(?P<gid>\d+)")
|
|
193
|
+
gid_mobj = gid_pat.search(url)
|
|
194
|
+
sheet_id = int(gid_mobj.group("gid")) if gid_mobj else None
|
|
195
|
+
return spread_sheet_id, sheet_id
|
|
196
|
+
|
|
197
|
+
def load_df_to_sheet(self, df: DataFrame, sheet: "Worksheet", mode: str, **kwargs):
|
|
198
|
+
"""write data to google sheet
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
sheet (Worksheet):
|
|
202
|
+
df (DataFrame):
|
|
203
|
+
mode (str): OVERWRITE/APPEND
|
|
204
|
+
"""
|
|
205
|
+
# Determine the mode and write the data
|
|
206
|
+
headers = df.columns.values.tolist()
|
|
207
|
+
values = df.values.tolist()
|
|
208
|
+
with self._init_proxy_manager():
|
|
209
|
+
self.load_values_to_sheet(headers, values, sheet, mode, **kwargs)
|
|
210
|
+
|
|
211
|
+
@staticmethod
|
|
212
|
+
def load_values_to_sheet(headers: list[str], values: list[list], sheet: "Worksheet", mode: str, **kwargs):
|
|
213
|
+
if mode == LoadMode.OVERWRITE:
|
|
214
|
+
sheet.clear()
|
|
215
|
+
sheet.update(
|
|
216
|
+
[
|
|
217
|
+
headers,
|
|
218
|
+
]
|
|
219
|
+
+ values,
|
|
220
|
+
**kwargs,
|
|
221
|
+
)
|
|
222
|
+
elif mode == LoadMode.APPEND:
|
|
223
|
+
existing_rows = sheet.get_all_values()
|
|
224
|
+
next_row = len(existing_rows) + 1
|
|
225
|
+
sheet.insert_rows(values, row=next_row, **kwargs)
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Optional, Type
|
|
3
|
+
|
|
4
|
+
from sqlalchemy.engine.url import URL
|
|
5
|
+
|
|
6
|
+
from recurvedata.connectors import get_connection_class
|
|
7
|
+
from recurvedata.connectors._register import register_connector_class
|
|
8
|
+
from recurvedata.connectors.connectors.mysql import CONNECTION_TYPE as MYSQL_CONNECTION_TYPE
|
|
9
|
+
from recurvedata.connectors.connectors.postgres import CONNECTION_TYPE as POSTGRES_CONNECTION_TYPE
|
|
10
|
+
from recurvedata.connectors.dbapi import DBAPIBase
|
|
11
|
+
from recurvedata.consts import ConnectionCategory, ConnectorGroup
|
|
12
|
+
from recurvedata.core.translation import _l
|
|
13
|
+
|
|
14
|
+
HIVE_FIELD_DELIMITER = chr(1)
|
|
15
|
+
HIVE_ARRAY_DELIMITER = chr(2)
|
|
16
|
+
HIVE_MAP_ITEM_DELIMITER = chr(2)
|
|
17
|
+
HIVE_MAP_KV_DELIMITER = chr(3)
|
|
18
|
+
HIVE_NULL = r"\N"
|
|
19
|
+
|
|
20
|
+
CONNECTION_TYPE = "hive"
|
|
21
|
+
UI_CONNECTION_TYPE = "Apache Hive"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@register_connector_class([CONNECTION_TYPE, UI_CONNECTION_TYPE])
|
|
25
|
+
class HiveConnector(DBAPIBase):
|
|
26
|
+
SYSTEM_DATABASES = [
|
|
27
|
+
"information_schema",
|
|
28
|
+
]
|
|
29
|
+
connection_type = CONNECTION_TYPE
|
|
30
|
+
ui_connection_type = UI_CONNECTION_TYPE
|
|
31
|
+
setup_extras_require = ["PyHive", "thrift-sasl"]
|
|
32
|
+
driver = "hive"
|
|
33
|
+
valid_metastore_types = [
|
|
34
|
+
MYSQL_CONNECTION_TYPE,
|
|
35
|
+
POSTGRES_CONNECTION_TYPE,
|
|
36
|
+
]
|
|
37
|
+
category = [ConnectionCategory.WAREHOUSE]
|
|
38
|
+
group = [ConnectorGroup.DESTINATION]
|
|
39
|
+
|
|
40
|
+
config_schema = {
|
|
41
|
+
"type": "object",
|
|
42
|
+
"properties": {
|
|
43
|
+
"host": {
|
|
44
|
+
"type": "string",
|
|
45
|
+
"title": _l("Host Address"),
|
|
46
|
+
"default": "127.0.0.1",
|
|
47
|
+
},
|
|
48
|
+
"port": {
|
|
49
|
+
"type": "number",
|
|
50
|
+
"title": _l("Port Number"),
|
|
51
|
+
"default": 10000,
|
|
52
|
+
},
|
|
53
|
+
"user": {"type": "string", "title": _l("Username")},
|
|
54
|
+
"password": {"type": "string", "title": _l("Password")},
|
|
55
|
+
"database": {
|
|
56
|
+
"type": "string",
|
|
57
|
+
"title": _l("Database Name"),
|
|
58
|
+
"description": _l("The name of the database to connect to"),
|
|
59
|
+
"default": "default",
|
|
60
|
+
},
|
|
61
|
+
"hdfs_options": {
|
|
62
|
+
"type": "object",
|
|
63
|
+
"title": _l("HDFS Options"),
|
|
64
|
+
"description": _l("Configuration options for HDFS connection"),
|
|
65
|
+
"properties": {
|
|
66
|
+
"host": {
|
|
67
|
+
"type": "string",
|
|
68
|
+
"title": _l("Host Address"),
|
|
69
|
+
"description": _l("HDFS namenode hostname or IP address"),
|
|
70
|
+
},
|
|
71
|
+
"port": {
|
|
72
|
+
"type": "number",
|
|
73
|
+
"title": _l("Port Number"),
|
|
74
|
+
"description": _l("HDFS namenode port number"),
|
|
75
|
+
"default": 50070,
|
|
76
|
+
},
|
|
77
|
+
"user": {"type": "string", "title": _l("Username")},
|
|
78
|
+
"staging_folder": {
|
|
79
|
+
"type": "string",
|
|
80
|
+
"title": _l("Transfer Staging Folder"),
|
|
81
|
+
"description": _l("Temporary HDFS directory path for data transfer staging"),
|
|
82
|
+
"default": "/tmp/recurve",
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
"order": ["host", "port", "user", "staging_folder"],
|
|
86
|
+
},
|
|
87
|
+
"auth": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"title": _l("Authentication Type"),
|
|
90
|
+
"default": "LDAP",
|
|
91
|
+
},
|
|
92
|
+
"hive_conf": {
|
|
93
|
+
"type": "object",
|
|
94
|
+
"title": _l("Hive Execute Configurations"),
|
|
95
|
+
"description": _l("Additional Hive execution parameters"),
|
|
96
|
+
"properties": {
|
|
97
|
+
"spark.yarn.queue": {
|
|
98
|
+
"type": "string",
|
|
99
|
+
"title": _l("Hive On Spark Queue"),
|
|
100
|
+
"description": _l("YARN queue name for Spark execution"),
|
|
101
|
+
},
|
|
102
|
+
"tez.queue.name": {
|
|
103
|
+
"type": "string",
|
|
104
|
+
"title": _l("Hive On Tez Queue"),
|
|
105
|
+
"description": _l("YARN queue name for Tez execution"),
|
|
106
|
+
},
|
|
107
|
+
},
|
|
108
|
+
"order": ["spark.yarn.queue", "tez.queue.name"],
|
|
109
|
+
},
|
|
110
|
+
# 'metastore': {
|
|
111
|
+
# 'title': 'Hive Metastore Config',
|
|
112
|
+
# 'type': 'object',
|
|
113
|
+
# 'properties': {
|
|
114
|
+
# 'type': {
|
|
115
|
+
# 'type': 'string',
|
|
116
|
+
# 'title': 'Metastore Type',
|
|
117
|
+
# 'default': MYSQL_CONNECTION_TYPE,
|
|
118
|
+
# },
|
|
119
|
+
# 'host': {
|
|
120
|
+
# 'type': 'string',
|
|
121
|
+
# 'title': 'Metastore Host Address',
|
|
122
|
+
# },
|
|
123
|
+
# 'user': {
|
|
124
|
+
# 'type': 'string',
|
|
125
|
+
# 'title': 'Metastore User Name',
|
|
126
|
+
# },
|
|
127
|
+
# 'password': {
|
|
128
|
+
# 'type': 'string',
|
|
129
|
+
# 'title': 'Metastore Password',
|
|
130
|
+
# },
|
|
131
|
+
# 'database': {
|
|
132
|
+
# 'type': 'string',
|
|
133
|
+
# 'title': 'Metastore Database Name',
|
|
134
|
+
# },
|
|
135
|
+
# 'port': {
|
|
136
|
+
# 'type': 'number',
|
|
137
|
+
# 'title': 'Metastore Port Number',
|
|
138
|
+
# },
|
|
139
|
+
# },
|
|
140
|
+
# "order": ['host', 'port', 'user', 'password', 'database'],
|
|
141
|
+
# 'secret': ['password'],
|
|
142
|
+
# },
|
|
143
|
+
# 'ssh_tunnel': SSH_TUNNEL_CONFIG_SCHEMA,
|
|
144
|
+
},
|
|
145
|
+
"order": [
|
|
146
|
+
"host",
|
|
147
|
+
"port",
|
|
148
|
+
"user",
|
|
149
|
+
"password",
|
|
150
|
+
"database",
|
|
151
|
+
"hdfs_options",
|
|
152
|
+
"auth",
|
|
153
|
+
"hive_conf",
|
|
154
|
+
],
|
|
155
|
+
"required": ["host", "port"],
|
|
156
|
+
"secret": ["password"],
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def connect_args(self):
|
|
161
|
+
return {"auth": "LDAP"} # todo
|
|
162
|
+
|
|
163
|
+
# generate_ddl todo: stored as parquet
|
|
164
|
+
|
|
165
|
+
def _extract_column_name(self, column_type):
|
|
166
|
+
visit_type = column_type.__visit_name__
|
|
167
|
+
if visit_type == "type_decorator":
|
|
168
|
+
return column_type.impl.__visit_name__
|
|
169
|
+
return visit_type
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def metastore_connector(self) -> Optional[DBAPIBase]:
|
|
173
|
+
if not self.metastore:
|
|
174
|
+
return None
|
|
175
|
+
metastore_config = MetastoreConfig(**self.metastore)
|
|
176
|
+
return metastore_config.get_connector(self.conf.get("ssh_tunnel"))
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def sqlalchemy_url(self):
|
|
180
|
+
host, port = self.host, self.port
|
|
181
|
+
if self.ssh_tunnel and self.ssh_tunnel.is_active:
|
|
182
|
+
host, port = self.ssh_tunnel.local_bind_host, self.ssh_tunnel.local_bind_port
|
|
183
|
+
|
|
184
|
+
return URL(self.driver, self.user, self.password, host, port, self.database, query={"auth": self.auth})
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@dataclass
|
|
188
|
+
class MetastoreConfig:
|
|
189
|
+
type: str
|
|
190
|
+
host: str
|
|
191
|
+
user: str
|
|
192
|
+
password: str
|
|
193
|
+
database: str
|
|
194
|
+
port: int
|
|
195
|
+
|
|
196
|
+
def get_connector(self, ssh_tunnel_config: Optional[dict]) -> DBAPIBase:
|
|
197
|
+
con_cls: Type[DBAPIBase] = get_connection_class(self.type)
|
|
198
|
+
return con_cls(
|
|
199
|
+
conf={
|
|
200
|
+
"host": self.host,
|
|
201
|
+
"user": self.user,
|
|
202
|
+
"password": self.password,
|
|
203
|
+
"database": self.database,
|
|
204
|
+
"port": self.port,
|
|
205
|
+
"ssh_tunnel": ssh_tunnel_config,
|
|
206
|
+
}
|
|
207
|
+
)
|