recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,571 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import itertools
|
|
3
|
+
import pickle
|
|
4
|
+
import re
|
|
5
|
+
import threading
|
|
6
|
+
from queue import Full, Queue
|
|
7
|
+
|
|
8
|
+
import sqlalchemy
|
|
9
|
+
import sqlalchemy.engine.url
|
|
10
|
+
import sqlparse
|
|
11
|
+
from sqlalchemy.pool import QueuePool
|
|
12
|
+
|
|
13
|
+
from recurvedata.pigeon.schema import Schema
|
|
14
|
+
from recurvedata.pigeon.utils import LoggingMixin, replace_null_values, trim_prefix, trim_suffix
|
|
15
|
+
from recurvedata.pigeon.utils.timing import TimeCounter
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class NullCursor(LoggingMixin):
|
|
19
|
+
"""
|
|
20
|
+
NullCursor implements DBAPI Cursor but does nothing at all.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def execute(self, operation, args=None, **kwargs):
|
|
24
|
+
if args is None:
|
|
25
|
+
sql = operation
|
|
26
|
+
else:
|
|
27
|
+
sql = operation % args
|
|
28
|
+
self.logger.info(sql)
|
|
29
|
+
return 0
|
|
30
|
+
|
|
31
|
+
def executemany(self, query, args):
|
|
32
|
+
if not args:
|
|
33
|
+
return
|
|
34
|
+
return sum(self.execute(query, arg) for arg in args)
|
|
35
|
+
|
|
36
|
+
def fetchone(self):
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
def fetchmany(self, size=None):
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
def fetchall(self):
|
|
43
|
+
return []
|
|
44
|
+
|
|
45
|
+
def __iter__(self):
|
|
46
|
+
return iter(self.fetchone, None)
|
|
47
|
+
|
|
48
|
+
def __enter__(self):
|
|
49
|
+
return self
|
|
50
|
+
|
|
51
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
52
|
+
self.close()
|
|
53
|
+
|
|
54
|
+
def close(self):
|
|
55
|
+
self.logger.info("closing null cursor")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class _ShowTableLikeMixin(object):
|
|
59
|
+
def has_table(self, table, database=None, cursor=None, **kwargs):
|
|
60
|
+
close_cursor_at_exit = False
|
|
61
|
+
if cursor is None:
|
|
62
|
+
cursor = self.cursor()
|
|
63
|
+
close_cursor_at_exit = True
|
|
64
|
+
|
|
65
|
+
if database is not None and database != self.database:
|
|
66
|
+
cursor.execute("USE {}".format(database))
|
|
67
|
+
cursor.execute("SHOW TABLES LIKE '{}'".format(table))
|
|
68
|
+
rv = cursor.fetchall()
|
|
69
|
+
if close_cursor_at_exit:
|
|
70
|
+
cursor.close()
|
|
71
|
+
return bool(rv)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class _ConnectionPoolMixin(object):
|
|
75
|
+
def enable_connection_pooling(self, **pool_kwargs):
|
|
76
|
+
self.pooling_enabled = True
|
|
77
|
+
self._pool_kwargs = pool_kwargs
|
|
78
|
+
self._pools = {} # we do not use lock, threadsafe is not guaranteed
|
|
79
|
+
|
|
80
|
+
def get_connection_pooling_first(self, autocommit=False, *args, **kwargs):
|
|
81
|
+
if not getattr(self, "pooling_enabled", False):
|
|
82
|
+
return self.connect_impl(autocommit=autocommit, *args, **kwargs)
|
|
83
|
+
|
|
84
|
+
pool_id = hash(pickle.dumps((autocommit, args, kwargs)))
|
|
85
|
+
pool = self._pools.get(pool_id)
|
|
86
|
+
if not pool:
|
|
87
|
+
|
|
88
|
+
def creator():
|
|
89
|
+
return self.connect_impl(autocommit=autocommit, *args, **kwargs)
|
|
90
|
+
|
|
91
|
+
pool = QueuePool(creator=creator, **self._pool_kwargs)
|
|
92
|
+
self._pools[pool_id] = pool
|
|
93
|
+
conn = pool.connect()
|
|
94
|
+
return conn
|
|
95
|
+
|
|
96
|
+
def dispose(self):
|
|
97
|
+
try:
|
|
98
|
+
for _, p in self._pools.items():
|
|
99
|
+
if isinstance(p, QueuePool):
|
|
100
|
+
p.dispose()
|
|
101
|
+
except Exception as e:
|
|
102
|
+
self.logger.error(f"dispose error: {e}")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ClosingCursor:
|
|
106
|
+
def __init__(self, connection, commit_on_close=True):
|
|
107
|
+
self.connection = connection
|
|
108
|
+
self._cursor = connection.cursor()
|
|
109
|
+
self._commit_on_close = commit_on_close
|
|
110
|
+
|
|
111
|
+
def __getattr__(self, name):
|
|
112
|
+
return getattr(self._cursor, name)
|
|
113
|
+
|
|
114
|
+
def __enter__(self):
|
|
115
|
+
return self
|
|
116
|
+
|
|
117
|
+
def __iter__(self):
|
|
118
|
+
# the Iterable check will not invoke __getattr__
|
|
119
|
+
# we must delegates it explictly
|
|
120
|
+
return iter(self._cursor)
|
|
121
|
+
|
|
122
|
+
def __exit__(self, exc_type, exc, traceback):
|
|
123
|
+
if not exc and self._commit_on_close:
|
|
124
|
+
self.connection.commit()
|
|
125
|
+
self.close()
|
|
126
|
+
|
|
127
|
+
def close(self):
|
|
128
|
+
self._cursor.close()
|
|
129
|
+
self.connection.close()
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class DBAPIConnector(LoggingMixin, _ConnectionPoolMixin):
|
|
133
|
+
_log_query = True
|
|
134
|
+
_sqla_driver = None
|
|
135
|
+
_sqla_url_query = {}
|
|
136
|
+
_identifier_start_quote = "`"
|
|
137
|
+
_identifier_end_quote = "`"
|
|
138
|
+
_param_placeholder = "%s"
|
|
139
|
+
_default_port = None
|
|
140
|
+
_default_database = None
|
|
141
|
+
|
|
142
|
+
def __init__(self, host, port=None, database=None, user=None, password=None, schema=None, *args, **kwargs):
|
|
143
|
+
self.host = host
|
|
144
|
+
self.port = port or self._default_port
|
|
145
|
+
self.database = database or self._default_database
|
|
146
|
+
self.user = user
|
|
147
|
+
self.password = password
|
|
148
|
+
self.args = args
|
|
149
|
+
self.kwargs = kwargs
|
|
150
|
+
self.schema = schema
|
|
151
|
+
|
|
152
|
+
def connect(self, autocommit=False, *args, **kwargs):
|
|
153
|
+
"""Returns a DBAPI connection"""
|
|
154
|
+
return self.get_connection_pooling_first(autocommit, *args, **kwargs)
|
|
155
|
+
|
|
156
|
+
def connect_impl(self, autocommit=False, *args, **kwargs):
|
|
157
|
+
raise NotImplementedError("connect must be implemented by subclasses")
|
|
158
|
+
|
|
159
|
+
def cursor(self, autocommit=False, dryrun=False, commit_on_close=True, **kwargs):
|
|
160
|
+
"""Returns a DBAPI cursor"""
|
|
161
|
+
if dryrun:
|
|
162
|
+
return NullCursor()
|
|
163
|
+
conn = self.connect(autocommit, **kwargs)
|
|
164
|
+
return ClosingCursor(conn, commit_on_close=commit_on_close)
|
|
165
|
+
|
|
166
|
+
closing_cursor = cursor
|
|
167
|
+
|
|
168
|
+
def execute(self, query, parameters=None, **cursor_options):
|
|
169
|
+
"""Execute one or more sql queries in a same session."""
|
|
170
|
+
if isinstance(query, list):
|
|
171
|
+
queries = list(itertools.chain(*map(sqlparse.split, query)))
|
|
172
|
+
else:
|
|
173
|
+
queries = sqlparse.split(query)
|
|
174
|
+
|
|
175
|
+
with self.cursor(**cursor_options) as cursor:
|
|
176
|
+
for q in queries:
|
|
177
|
+
# remove the trailing `;`
|
|
178
|
+
q = q.rstrip(";")
|
|
179
|
+
if not q:
|
|
180
|
+
continue
|
|
181
|
+
self._log(q)
|
|
182
|
+
if parameters is not None:
|
|
183
|
+
cursor.execute(q, parameters)
|
|
184
|
+
else:
|
|
185
|
+
cursor.execute(q)
|
|
186
|
+
|
|
187
|
+
def fetchone(self, query, parameters=None):
|
|
188
|
+
return self._fetch_query("one", query, parameters)
|
|
189
|
+
|
|
190
|
+
def fetchmany(self, query, parameters=None, size=None):
|
|
191
|
+
return self._fetch_query("many", query, parameters, size=size)
|
|
192
|
+
|
|
193
|
+
def fetchall(self, query, parameters=None):
|
|
194
|
+
return self._fetch_query("all", query, parameters)
|
|
195
|
+
|
|
196
|
+
def _fetch_query(self, howmany, query, parameters=None, size=None):
|
|
197
|
+
self._log(query)
|
|
198
|
+
with self.cursor() as cursor:
|
|
199
|
+
if parameters is not None:
|
|
200
|
+
cursor.execute(query, parameters)
|
|
201
|
+
else:
|
|
202
|
+
cursor.execute(query)
|
|
203
|
+
|
|
204
|
+
if howmany == "many":
|
|
205
|
+
rv = cursor.fetchmany(size=size)
|
|
206
|
+
elif howmany == "all":
|
|
207
|
+
rv = cursor.fetchall()
|
|
208
|
+
else:
|
|
209
|
+
rv = cursor.fetchone()
|
|
210
|
+
return rv
|
|
211
|
+
|
|
212
|
+
def _log(self, msg, *args, **kwargs):
|
|
213
|
+
if not self._log_query:
|
|
214
|
+
return
|
|
215
|
+
self.logger.info("\n" + str(msg), *args, **kwargs)
|
|
216
|
+
|
|
217
|
+
def _get_sqlalchemy_uri(self):
|
|
218
|
+
url = sqlalchemy.engine.url.URL(
|
|
219
|
+
drivername=self._sqla_driver,
|
|
220
|
+
host=self.host,
|
|
221
|
+
port=self.port,
|
|
222
|
+
username=self.user,
|
|
223
|
+
password=self.password,
|
|
224
|
+
database=self.database,
|
|
225
|
+
query=self._sqla_url_query,
|
|
226
|
+
)
|
|
227
|
+
return url.__to_string__(hide_password=False)
|
|
228
|
+
|
|
229
|
+
def create_engine(self, engine_kwargs=None):
|
|
230
|
+
"""Returns a SQLAlchemy engine"""
|
|
231
|
+
if engine_kwargs is None:
|
|
232
|
+
engine_kwargs = {}
|
|
233
|
+
# engine_kwargs.update({'encoding': 'utf8'})
|
|
234
|
+
return sqlalchemy.create_engine(self._get_sqlalchemy_uri(), **engine_kwargs)
|
|
235
|
+
|
|
236
|
+
def get_pandas_df(self, query, parameters=None, **kwargs):
|
|
237
|
+
import pandas as pd
|
|
238
|
+
|
|
239
|
+
query = sqlalchemy.text(query) # if '%' in query, it will error without sqlalchemy.text in sqlalchemy 2.0
|
|
240
|
+
con = self.create_engine()
|
|
241
|
+
try:
|
|
242
|
+
df = pd.read_sql_query(sql=query, con=con, params=parameters, **kwargs)
|
|
243
|
+
finally:
|
|
244
|
+
con.dispose()
|
|
245
|
+
return df
|
|
246
|
+
|
|
247
|
+
def has_table(self, table, database=None, **kwargs):
|
|
248
|
+
raise NotImplementedError
|
|
249
|
+
|
|
250
|
+
def clone(self):
|
|
251
|
+
return self.__class__(
|
|
252
|
+
host=self.host,
|
|
253
|
+
port=self.port,
|
|
254
|
+
database=self.database,
|
|
255
|
+
user=self.user,
|
|
256
|
+
password=self.password,
|
|
257
|
+
schema=self.schema,
|
|
258
|
+
*self.args,
|
|
259
|
+
**self.kwargs,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def get_columns(self, table, database=None, exclude=None):
|
|
263
|
+
if database is None:
|
|
264
|
+
database = self.database
|
|
265
|
+
with self.cursor() as cursor:
|
|
266
|
+
if not self.has_table(table, database, cursor=cursor):
|
|
267
|
+
raise ValueError("Table {!r} not exists in {!r}".format(table, database))
|
|
268
|
+
cursor.execute(
|
|
269
|
+
"SELECT * FROM {}.{} LIMIT 0".format(self.quote_identifier(database), self.quote_identifier(table))
|
|
270
|
+
)
|
|
271
|
+
cursor.fetchall()
|
|
272
|
+
cols = self.get_columns_from_cursor(cursor)
|
|
273
|
+
if exclude:
|
|
274
|
+
cols = [x for x in cols if x not in exclude]
|
|
275
|
+
return cols
|
|
276
|
+
|
|
277
|
+
@staticmethod
|
|
278
|
+
def get_columns_from_cursor(cursor):
|
|
279
|
+
cols = []
|
|
280
|
+
for item in cursor.description:
|
|
281
|
+
name = item[0]
|
|
282
|
+
if "." in name:
|
|
283
|
+
cols.append(name.split(".")[1])
|
|
284
|
+
else:
|
|
285
|
+
cols.append(name)
|
|
286
|
+
return cols
|
|
287
|
+
|
|
288
|
+
def quote_identifier(self, v):
|
|
289
|
+
parts = []
|
|
290
|
+
for x in v.split("."):
|
|
291
|
+
x = trim_prefix(x, self._identifier_start_quote)
|
|
292
|
+
x = trim_suffix(x, self._identifier_end_quote)
|
|
293
|
+
x = f"{self._identifier_start_quote}{x}{self._identifier_end_quote}"
|
|
294
|
+
parts.append(x)
|
|
295
|
+
return ".".join(parts)
|
|
296
|
+
|
|
297
|
+
def cursor_to_schema(self, cursor):
|
|
298
|
+
schema = Schema()
|
|
299
|
+
for item in cursor.description:
|
|
300
|
+
name = item[0]
|
|
301
|
+
if "." in name:
|
|
302
|
+
name = name.split(".")[1]
|
|
303
|
+
|
|
304
|
+
type_code = item[1]
|
|
305
|
+
size = item[3]
|
|
306
|
+
|
|
307
|
+
ttype = self.to_canonical_type(type_code, size)
|
|
308
|
+
schema.add_field_by_attrs(name, ttype, size)
|
|
309
|
+
return schema
|
|
310
|
+
|
|
311
|
+
@staticmethod
|
|
312
|
+
def to_canonical_type(type_code, size):
|
|
313
|
+
raise NotImplementedError()
|
|
314
|
+
|
|
315
|
+
@staticmethod
|
|
316
|
+
def from_canonical_type(canonical_type, size):
|
|
317
|
+
raise NotImplementedError()
|
|
318
|
+
|
|
319
|
+
def generate_create_table_ddl(self, name, schema, **kwargs):
|
|
320
|
+
cols = []
|
|
321
|
+
for f in schema:
|
|
322
|
+
col_name = self.quote_identifier(f.name)
|
|
323
|
+
if f.comment:
|
|
324
|
+
cols.append(f"{col_name} {self.from_canonical_type(f.type, f.size)} COMMENT {f.comment!r}")
|
|
325
|
+
else:
|
|
326
|
+
cols.append(f"{col_name} {self.from_canonical_type(f.type, f.size)}")
|
|
327
|
+
|
|
328
|
+
col_types = ",\n".join(cols)
|
|
329
|
+
name = self.quote_identifier(name)
|
|
330
|
+
ddl = f"CREATE TABLE {name} (\n{col_types}\n)"
|
|
331
|
+
return ddl
|
|
332
|
+
|
|
333
|
+
def generate_ddl(self, table, database=None, if_exists=True):
|
|
334
|
+
raise NotImplementedError
|
|
335
|
+
|
|
336
|
+
def load_csv_by_inserting(
|
|
337
|
+
self,
|
|
338
|
+
table,
|
|
339
|
+
filename,
|
|
340
|
+
columns=None,
|
|
341
|
+
delimiter=",",
|
|
342
|
+
quotechar='"',
|
|
343
|
+
lineterminator="\r\n",
|
|
344
|
+
escapechar=None,
|
|
345
|
+
skiprows=0,
|
|
346
|
+
null_values=("NULL", r"\N"),
|
|
347
|
+
null_replacer=None,
|
|
348
|
+
batch_size=1000,
|
|
349
|
+
values_hook=None,
|
|
350
|
+
concurrency=1,
|
|
351
|
+
**kwargs,
|
|
352
|
+
):
|
|
353
|
+
csv_options = dict(
|
|
354
|
+
delimiter=delimiter, quotechar=quotechar, lineterminator=lineterminator, escapechar=escapechar
|
|
355
|
+
)
|
|
356
|
+
csv_options.update(kwargs)
|
|
357
|
+
|
|
358
|
+
if values_hook is None:
|
|
359
|
+
values_hook = lambda x: x # noqa: E731
|
|
360
|
+
|
|
361
|
+
if concurrency <= 1:
|
|
362
|
+
# fallback to use the main thread itself to avoid the overhead of queue
|
|
363
|
+
self._insert_in_serial(
|
|
364
|
+
table, filename, columns, csv_options, skiprows, null_values, null_replacer, batch_size, values_hook
|
|
365
|
+
)
|
|
366
|
+
else:
|
|
367
|
+
self._insert_in_parallel(
|
|
368
|
+
table,
|
|
369
|
+
filename,
|
|
370
|
+
columns,
|
|
371
|
+
csv_options,
|
|
372
|
+
skiprows,
|
|
373
|
+
null_values,
|
|
374
|
+
null_replacer,
|
|
375
|
+
batch_size,
|
|
376
|
+
values_hook,
|
|
377
|
+
concurrency,
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
def _insert_in_serial(
|
|
381
|
+
self,
|
|
382
|
+
table,
|
|
383
|
+
filename,
|
|
384
|
+
columns,
|
|
385
|
+
csv_options,
|
|
386
|
+
skiprows=0,
|
|
387
|
+
null_values=("NULL", r"\N"),
|
|
388
|
+
null_replacer=None,
|
|
389
|
+
batch_size=1000,
|
|
390
|
+
values_hook=None,
|
|
391
|
+
):
|
|
392
|
+
cursor = self.cursor()
|
|
393
|
+
|
|
394
|
+
counter = TimeCounter(name="main", log_threshold=batch_size * 10, logger=self.logger)
|
|
395
|
+
with open(filename, newline="") as fd:
|
|
396
|
+
if skiprows:
|
|
397
|
+
for _ in range(skiprows):
|
|
398
|
+
fd.readline()
|
|
399
|
+
|
|
400
|
+
reader = csv.reader(fd, **csv_options)
|
|
401
|
+
rows = []
|
|
402
|
+
for row in reader:
|
|
403
|
+
row = replace_null_values(row, null_values, null_replacer)
|
|
404
|
+
row = values_hook(row)
|
|
405
|
+
rows.append(row)
|
|
406
|
+
counter.incr(1)
|
|
407
|
+
if len(rows) == batch_size:
|
|
408
|
+
self._bulk_insert(cursor, table, columns, rows)
|
|
409
|
+
rows = []
|
|
410
|
+
|
|
411
|
+
self._bulk_insert(cursor, table, columns, rows)
|
|
412
|
+
|
|
413
|
+
counter.show_stat()
|
|
414
|
+
cursor.close()
|
|
415
|
+
|
|
416
|
+
def _insert_in_parallel(
|
|
417
|
+
self,
|
|
418
|
+
table,
|
|
419
|
+
filename,
|
|
420
|
+
columns,
|
|
421
|
+
csv_options,
|
|
422
|
+
skiprows=0,
|
|
423
|
+
null_values=("NULL", r"\N"),
|
|
424
|
+
null_replacer=None,
|
|
425
|
+
batch_size=1000,
|
|
426
|
+
values_hook=None,
|
|
427
|
+
concurrency=1,
|
|
428
|
+
):
|
|
429
|
+
data_queue = Queue(maxsize=2 * concurrency)
|
|
430
|
+
exc_queue = Queue()
|
|
431
|
+
# start workers
|
|
432
|
+
workers = []
|
|
433
|
+
for _ in range(concurrency):
|
|
434
|
+
t = threading.Thread(target=self._write_worker, args=(table, columns, batch_size, data_queue, exc_queue))
|
|
435
|
+
t.setDaemon(True)
|
|
436
|
+
t.start()
|
|
437
|
+
workers.append(t)
|
|
438
|
+
|
|
439
|
+
# send tasks to queue
|
|
440
|
+
counter = TimeCounter(name="main", log_threshold=batch_size * 10, logger=self.logger)
|
|
441
|
+
with open(filename, newline="") as fd:
|
|
442
|
+
if skiprows:
|
|
443
|
+
for _ in range(skiprows):
|
|
444
|
+
fd.readline()
|
|
445
|
+
|
|
446
|
+
reader = csv.reader(fd, **csv_options)
|
|
447
|
+
|
|
448
|
+
rows = []
|
|
449
|
+
for row in reader:
|
|
450
|
+
row = replace_null_values(row, null_values, null_replacer)
|
|
451
|
+
row = values_hook(row)
|
|
452
|
+
counter.incr(1)
|
|
453
|
+
rows.append(row)
|
|
454
|
+
if len(rows) == batch_size:
|
|
455
|
+
while True:
|
|
456
|
+
try:
|
|
457
|
+
# wait up to 2 minutes before checking state of workers
|
|
458
|
+
# terminate immediately if any worker fails
|
|
459
|
+
data_queue.put(rows, block=True, timeout=120)
|
|
460
|
+
except Full:
|
|
461
|
+
if not exc_queue.empty():
|
|
462
|
+
raise RuntimeError(f"{exc_queue.qsize()} of {concurrency} workers failed")
|
|
463
|
+
else:
|
|
464
|
+
break
|
|
465
|
+
rows = []
|
|
466
|
+
|
|
467
|
+
if rows:
|
|
468
|
+
# this operation should not be fail in most cases
|
|
469
|
+
data_queue.put(rows)
|
|
470
|
+
|
|
471
|
+
self.logger.info("sending finish signal to all workers")
|
|
472
|
+
for _ in workers:
|
|
473
|
+
data_queue.put(None)
|
|
474
|
+
|
|
475
|
+
self.logger.info("waiting for workers to exit")
|
|
476
|
+
for t in workers:
|
|
477
|
+
t.join()
|
|
478
|
+
|
|
479
|
+
if not exc_queue.empty():
|
|
480
|
+
raise RuntimeError(f"{exc_queue.qsize()} of {concurrency} workers failed")
|
|
481
|
+
|
|
482
|
+
counter.show_stat()
|
|
483
|
+
|
|
484
|
+
def _write_worker(self, table, cols, batch_size, data_queue: Queue, exc_queue: Queue):
|
|
485
|
+
log_threshold = 5 * batch_size
|
|
486
|
+
cursor = self.cursor()
|
|
487
|
+
counter = TimeCounter(name="worker", log_threshold=log_threshold, logger=self.logger)
|
|
488
|
+
while True:
|
|
489
|
+
rows = data_queue.get()
|
|
490
|
+
if rows is None:
|
|
491
|
+
break
|
|
492
|
+
|
|
493
|
+
# data_queue.task_done()
|
|
494
|
+
counter.incr(len(rows))
|
|
495
|
+
try:
|
|
496
|
+
self._bulk_insert(cursor, table, cols, rows)
|
|
497
|
+
rows = []
|
|
498
|
+
except Exception as e:
|
|
499
|
+
self.logger.exception("failed to insert %d rows, break", len(rows))
|
|
500
|
+
# 发生异常就终止
|
|
501
|
+
exc_queue.put(e)
|
|
502
|
+
break
|
|
503
|
+
|
|
504
|
+
counter.show_stat()
|
|
505
|
+
cursor.close()
|
|
506
|
+
self.logger.info("ready to exit.")
|
|
507
|
+
|
|
508
|
+
def _bulk_insert(self, cursor, table, cols, rows):
|
|
509
|
+
if not rows:
|
|
510
|
+
return
|
|
511
|
+
|
|
512
|
+
col_count = len(rows[0])
|
|
513
|
+
|
|
514
|
+
if cols:
|
|
515
|
+
field_names = "({})".format(", ".join([self.quote_identifier(x) for x in cols]))
|
|
516
|
+
else:
|
|
517
|
+
field_names = ""
|
|
518
|
+
|
|
519
|
+
placeholders = ", ".join([self._param_placeholder] * col_count)
|
|
520
|
+
sql = f"INSERT INTO {table} {field_names} VALUES ({placeholders})"
|
|
521
|
+
|
|
522
|
+
cursor.executemany(sql, rows)
|
|
523
|
+
cursor.connection.commit()
|
|
524
|
+
|
|
525
|
+
def add_leading_comment(self, query, comment):
|
|
526
|
+
tokens = []
|
|
527
|
+
for q in sqlparse.split(query.strip()):
|
|
528
|
+
tokens.append(self._add_leading_comment_impl(q.strip().rstrip(";"), comment))
|
|
529
|
+
return ";\n".join(tokens)
|
|
530
|
+
|
|
531
|
+
def _add_leading_comment_impl(self, query, comment):
|
|
532
|
+
comment = self._safe_comment(comment)
|
|
533
|
+
return "/* {} */\n{}".format(comment, query)
|
|
534
|
+
|
|
535
|
+
def _safe_comment(self, comment):
|
|
536
|
+
# 强行将 comment 中可能存在的 */ 或 /* 替换为 '', 以免 comment 失效报错
|
|
537
|
+
comment = re.sub(pattern=r"\*\/|\/\*", repl="", string=comment)
|
|
538
|
+
return ", ".join(comment.split("\n"))
|
|
539
|
+
|
|
540
|
+
def is_mysql(self):
|
|
541
|
+
return False
|
|
542
|
+
|
|
543
|
+
def is_impala(self):
|
|
544
|
+
return False
|
|
545
|
+
|
|
546
|
+
def is_hive(self):
|
|
547
|
+
return False
|
|
548
|
+
|
|
549
|
+
def is_postgres(self):
|
|
550
|
+
return False
|
|
551
|
+
|
|
552
|
+
def is_redshift(self):
|
|
553
|
+
return False
|
|
554
|
+
|
|
555
|
+
def is_mssql(self):
|
|
556
|
+
return False
|
|
557
|
+
|
|
558
|
+
def is_azure_synapse(self):
|
|
559
|
+
return False
|
|
560
|
+
|
|
561
|
+
def is_clickhouse(self):
|
|
562
|
+
return False
|
|
563
|
+
|
|
564
|
+
def is_clickhouse_native(self):
|
|
565
|
+
return False
|
|
566
|
+
|
|
567
|
+
def is_phoenix(self):
|
|
568
|
+
return False
|
|
569
|
+
|
|
570
|
+
def is_google_bigquery(self):
|
|
571
|
+
return False
|