recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
from typing import Callable
|
|
2
|
+
|
|
3
|
+
import sqlparse
|
|
4
|
+
import sqlparse.keywords
|
|
5
|
+
import sqlparse.sql
|
|
6
|
+
import sqlparse.tokens
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def staging_table_of(table: str) -> str:
|
|
10
|
+
return f"z_{table}_staging"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def reconcile_table_of(table: str) -> str:
|
|
14
|
+
return f"z_{table}_reconcile"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def bak_table_of(table: str) -> str:
|
|
18
|
+
return f"z_{table}_bak"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def trim_prefix(s: str, sub: str, ignore_case: bool = True) -> str:
|
|
22
|
+
head = s[: len(sub)]
|
|
23
|
+
if ignore_case:
|
|
24
|
+
has_prefix = head.lower() == sub.lower()
|
|
25
|
+
else:
|
|
26
|
+
has_prefix = head == sub
|
|
27
|
+
if not has_prefix:
|
|
28
|
+
return s
|
|
29
|
+
return s[len(sub) :]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def apply_where_naively(query: str, where: str) -> str:
|
|
33
|
+
if not where:
|
|
34
|
+
return query
|
|
35
|
+
|
|
36
|
+
where = trim_prefix(where, "where")
|
|
37
|
+
if "where" in query.lower():
|
|
38
|
+
query = "{} AND {}".format(query, where)
|
|
39
|
+
else:
|
|
40
|
+
query = "{} WHERE {}".format(query, where)
|
|
41
|
+
return query
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def apply_where_safely(query: str, where: str) -> str:
|
|
45
|
+
if not where:
|
|
46
|
+
return query
|
|
47
|
+
|
|
48
|
+
where = trim_prefix(where, "where")
|
|
49
|
+
|
|
50
|
+
parsed = sqlparse.parse(query)[0]
|
|
51
|
+
|
|
52
|
+
idx, old_where_token = parsed.token_next_by(i=sqlparse.sql.Where)
|
|
53
|
+
# there is already a WHERE clause, replace it
|
|
54
|
+
if idx is not None:
|
|
55
|
+
# add the new condition to an new line, see https://gitlab.yimian.com.cn/etl/pigeon/issues/4
|
|
56
|
+
new_where = "{}\nAND {}\n".format(old_where_token.value, where)
|
|
57
|
+
new_where_token = sqlparse.sql.Where([sqlparse.sql.Token(None, new_where)])
|
|
58
|
+
parsed.tokens[idx] = new_where_token
|
|
59
|
+
return str(parsed)
|
|
60
|
+
|
|
61
|
+
# there is no WHERE clause, so we should create a new one and insert into the right place
|
|
62
|
+
next_idx = None
|
|
63
|
+
for i, token in enumerate(parsed.tokens):
|
|
64
|
+
if token.is_keyword and token.value.upper() in ("ORDER", "GROUP", "LIMIT", "HAVING"):
|
|
65
|
+
next_idx = i
|
|
66
|
+
break
|
|
67
|
+
|
|
68
|
+
# add WHERE clause to an new line, see https://gitlab.yimian.com.cn/etl/pigeon/issues/4
|
|
69
|
+
new_where = "\nWHERE {}\n".format(where)
|
|
70
|
+
# sqlparse.sql.Where.ttype is None
|
|
71
|
+
new_where_token = sqlparse.sql.Where([sqlparse.sql.Token(None, new_where)])
|
|
72
|
+
if next_idx is None:
|
|
73
|
+
next_idx = len(parsed.tokens)
|
|
74
|
+
parsed.insert_before(next_idx, new_where_token)
|
|
75
|
+
return str(parsed)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def extract_from_clause(query: str) -> str:
|
|
79
|
+
"""Extract the FROM clause from a SQL query.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
query (str): The SQL query
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
str: The FROM clause without GROUP BY, ORDER BY, HAVING, or LIMIT
|
|
86
|
+
"""
|
|
87
|
+
parsed = sqlparse.parse(query)[0]
|
|
88
|
+
|
|
89
|
+
start_idx = None
|
|
90
|
+
end_idx = None
|
|
91
|
+
|
|
92
|
+
# Find FROM token
|
|
93
|
+
for i, t in enumerate(parsed.tokens):
|
|
94
|
+
if t.value.upper() == "FROM":
|
|
95
|
+
start_idx = i + 1
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
if start_idx is None:
|
|
99
|
+
return ""
|
|
100
|
+
|
|
101
|
+
# Find the end of FROM clause by looking for GROUP BY, ORDER BY, HAVING, LIMIT
|
|
102
|
+
for i, t in enumerate(parsed.tokens[start_idx:], start=start_idx):
|
|
103
|
+
if t.is_keyword and t.value.upper() in ("GROUP", "ORDER", "LIMIT", "HAVING"):
|
|
104
|
+
end_idx = i
|
|
105
|
+
break
|
|
106
|
+
elif isinstance(t, sqlparse.sql.Where):
|
|
107
|
+
end_idx = i
|
|
108
|
+
break
|
|
109
|
+
|
|
110
|
+
if end_idx is None:
|
|
111
|
+
end_idx = len(parsed.tokens)
|
|
112
|
+
|
|
113
|
+
tokens = parsed.tokens[start_idx:end_idx]
|
|
114
|
+
tl = sqlparse.sql.TokenList(tokens)
|
|
115
|
+
return str(tl).strip()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def extract_where_clause(query: str) -> str:
|
|
119
|
+
parsed = sqlparse.parse(query)[0]
|
|
120
|
+
idx, where_token = parsed.token_next_by(i=sqlparse.sql.Where)
|
|
121
|
+
if idx is None:
|
|
122
|
+
return ""
|
|
123
|
+
|
|
124
|
+
return where_token.value
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def extract_limit_count(query: str) -> int | None:
|
|
128
|
+
parsed = sqlparse.parse(query)[0]
|
|
129
|
+
idx = 0
|
|
130
|
+
for i, t in enumerate(parsed.tokens):
|
|
131
|
+
if t.value.upper() == "LIMIT":
|
|
132
|
+
idx = i + 2
|
|
133
|
+
break
|
|
134
|
+
if not idx:
|
|
135
|
+
return None
|
|
136
|
+
return int(parsed.tokens[idx].value)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def mssql_extract_limit_count(query: str) -> int | None:
|
|
140
|
+
"""Extract the TOP limit from a SQL Server query.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
query (str): The SQL query
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
int | None: The TOP limit value, or None if not found
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def _get_first_token_from_identifier_list(token):
|
|
150
|
+
if isinstance(token, (sqlparse.sql.IdentifierList, sqlparse.sql.Identifier)):
|
|
151
|
+
return _get_first_token_from_identifier_list(token.token_first(skip_cm=True))
|
|
152
|
+
return token
|
|
153
|
+
|
|
154
|
+
if "TOP" not in sqlparse.keywords.KEYWORDS:
|
|
155
|
+
sqlparse.keywords.KEYWORDS["TOP"] = sqlparse.tokens.Keyword
|
|
156
|
+
|
|
157
|
+
parsed = sqlparse.parse(query)[0]
|
|
158
|
+
idx = 0
|
|
159
|
+
for i, t in enumerate(parsed.tokens):
|
|
160
|
+
if t.value.upper() == "TOP":
|
|
161
|
+
idx = i + 2
|
|
162
|
+
break
|
|
163
|
+
if not idx:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
value_token = _get_first_token_from_identifier_list(parsed.tokens[idx])
|
|
167
|
+
# Remove parentheses if present
|
|
168
|
+
value = value_token.value.strip("()")
|
|
169
|
+
return int(value)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def apply_limit(query: str, count: int) -> str:
|
|
173
|
+
parsed = sqlparse.parse(query)[0]
|
|
174
|
+
idx = 0
|
|
175
|
+
for i, t in enumerate(parsed.tokens):
|
|
176
|
+
if t.value.upper() == "LIMIT":
|
|
177
|
+
idx = i + 2
|
|
178
|
+
break
|
|
179
|
+
if not idx:
|
|
180
|
+
return f"{query} LIMIT {count}"
|
|
181
|
+
parsed.tokens[idx].value = str(count)
|
|
182
|
+
return str(parsed)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def mssql_apply_limit(query: str, count: int) -> str:
|
|
186
|
+
parsed = sqlparse.parse(query)[0]
|
|
187
|
+
|
|
188
|
+
select_idx = top_idx = sel_start_idx = None
|
|
189
|
+
for i, t in enumerate(parsed.tokens):
|
|
190
|
+
if select_idx is None and t.value.upper() == "SELECT":
|
|
191
|
+
select_idx = i
|
|
192
|
+
if select_idx is not None and sel_start_idx is None:
|
|
193
|
+
if isinstance(t, (sqlparse.sql.IdentifierList, sqlparse.sql.Identifier)):
|
|
194
|
+
sel_start_idx = i
|
|
195
|
+
if isinstance(t, sqlparse.sql.Token) and t.ttype == sqlparse.tokens.Wildcard:
|
|
196
|
+
sel_start_idx = i
|
|
197
|
+
if t.value.upper() == "TOP":
|
|
198
|
+
top_idx = i + 2
|
|
199
|
+
break
|
|
200
|
+
if not top_idx:
|
|
201
|
+
white_space = sqlparse.sql.Token(sqlparse.tokens.Whitespace, " ")
|
|
202
|
+
add_tokens = [
|
|
203
|
+
sqlparse.sql.Token(sqlparse.tokens.Keyword, "TOP"),
|
|
204
|
+
white_space,
|
|
205
|
+
sqlparse.sql.Token(sqlparse.tokens.Number, count),
|
|
206
|
+
white_space,
|
|
207
|
+
]
|
|
208
|
+
parsed.tokens = parsed.tokens[:sel_start_idx] + add_tokens + parsed.tokens[sel_start_idx:]
|
|
209
|
+
return str(parsed)
|
|
210
|
+
parsed.tokens[top_idx].value = str(count)
|
|
211
|
+
return str(parsed)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def apply_sql_no_cache(query: str) -> str:
|
|
215
|
+
"""Add SQL_NO_CACHE hint to a SELECT query.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
query (str): The SQL query
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
str: Query with SQL_NO_CACHE hint added
|
|
222
|
+
"""
|
|
223
|
+
parsed = sqlparse.parse(query)[0]
|
|
224
|
+
if "/*" in query:
|
|
225
|
+
return query
|
|
226
|
+
|
|
227
|
+
comment = "/*!40001 SQL_NO_CACHE*/"
|
|
228
|
+
token = sqlparse.sql.Comment([sqlparse.sql.Token(None, comment)])
|
|
229
|
+
|
|
230
|
+
# Find SELECT token and insert hint right after it
|
|
231
|
+
for i, t in enumerate(parsed.tokens):
|
|
232
|
+
if t.value.upper() == "SELECT":
|
|
233
|
+
# Add a single space after SELECT
|
|
234
|
+
space_token = sqlparse.sql.Token(sqlparse.tokens.Whitespace, " ")
|
|
235
|
+
parsed.tokens.insert(i + 1, space_token)
|
|
236
|
+
parsed.tokens.insert(i + 2, token)
|
|
237
|
+
|
|
238
|
+
# Add a space after the comment
|
|
239
|
+
space_token2 = sqlparse.sql.Token(sqlparse.tokens.Whitespace, " ")
|
|
240
|
+
parsed.tokens.insert(i + 3, space_token2)
|
|
241
|
+
|
|
242
|
+
break
|
|
243
|
+
return str(parsed)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def sqlformat(query: str, reindent: bool = False, **kwargs) -> str:
|
|
247
|
+
kwargs.update({"reindent": True, "keyword_case": "upper"})
|
|
248
|
+
rv = sqlparse.format(query.strip(), **kwargs)
|
|
249
|
+
if not reindent:
|
|
250
|
+
rv = " ".join(x.strip() for x in rv.splitlines())
|
|
251
|
+
return rv
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def add_schema_to_create_table(
|
|
255
|
+
create_table_ddl: str, schema: str, quote_callback: Callable[[str], str] | None = None
|
|
256
|
+
) -> str:
|
|
257
|
+
"""Add schema to a CREATE TABLE statement if the table name doesn't already have a schema.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
create_table_ddl (str): The CREATE TABLE DDL statement
|
|
261
|
+
schema (str): The schema name to add
|
|
262
|
+
quote_callback (Optional[QuoteCallback]): Optional callback function to quote table names.
|
|
263
|
+
The callback should accept a string (either 'table' or 'schema.table')
|
|
264
|
+
and return the properly quoted string.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
str: Modified CREATE TABLE statement with schema added, or original if no modification needed
|
|
268
|
+
"""
|
|
269
|
+
if not schema or not create_table_ddl or "CREATE TABLE" not in create_table_ddl.upper():
|
|
270
|
+
return create_table_ddl
|
|
271
|
+
|
|
272
|
+
parsed = sqlparse.parse(create_table_ddl)[0]
|
|
273
|
+
|
|
274
|
+
# Find the CREATE TABLE tokens
|
|
275
|
+
for token in parsed.tokens:
|
|
276
|
+
if isinstance(token, sqlparse.sql.Identifier):
|
|
277
|
+
token_str = str(token)
|
|
278
|
+
|
|
279
|
+
# Check if it already has a schema
|
|
280
|
+
if "." in token_str:
|
|
281
|
+
# Already has schema, if we have a quote callback, apply it
|
|
282
|
+
if quote_callback:
|
|
283
|
+
parts = token_str.split(".")
|
|
284
|
+
schema_part = parts[0].strip()
|
|
285
|
+
table_part = ".".join(parts[1:]).strip()
|
|
286
|
+
|
|
287
|
+
# Extract actual names without quotes
|
|
288
|
+
quote_chars = ["`", '"', "[", "]"]
|
|
289
|
+
clean_schema = schema_part
|
|
290
|
+
clean_table = table_part
|
|
291
|
+
|
|
292
|
+
for char in quote_chars:
|
|
293
|
+
clean_schema = clean_schema.replace(char, "")
|
|
294
|
+
clean_table = clean_table.replace(char, "")
|
|
295
|
+
|
|
296
|
+
# Apply quote callback to the extracted schema.table
|
|
297
|
+
qualified_name = f"{clean_schema}.{clean_table}"
|
|
298
|
+
new_name = quote_callback(qualified_name)
|
|
299
|
+
|
|
300
|
+
# Replace the token
|
|
301
|
+
token.tokens = [sqlparse.sql.Token(sqlparse.tokens.Name, new_name)]
|
|
302
|
+
return str(parsed)
|
|
303
|
+
|
|
304
|
+
# No schema, need to add one
|
|
305
|
+
table_name = token.get_real_name()
|
|
306
|
+
original = token.value
|
|
307
|
+
|
|
308
|
+
# Special case for SQL Server bracket style
|
|
309
|
+
if original.startswith("[") and original.endswith("]"):
|
|
310
|
+
new_name = f"{schema}.{table_name}"
|
|
311
|
+
# Handle other quoted identifiers
|
|
312
|
+
elif "`" in original:
|
|
313
|
+
new_name = f"{schema}.`{table_name}`"
|
|
314
|
+
elif '"' in original:
|
|
315
|
+
new_name = f'{schema}."{table_name}"'
|
|
316
|
+
else:
|
|
317
|
+
new_name = f"{schema}.{table_name}"
|
|
318
|
+
|
|
319
|
+
if quote_callback:
|
|
320
|
+
# Apply quote callback to new schema.table
|
|
321
|
+
qualified_name = f"{schema}.{table_name}"
|
|
322
|
+
new_name = quote_callback(qualified_name)
|
|
323
|
+
|
|
324
|
+
# Replace the token
|
|
325
|
+
token.tokens = [sqlparse.sql.Token(sqlparse.tokens.Name, new_name)]
|
|
326
|
+
break
|
|
327
|
+
|
|
328
|
+
return str(parsed)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import functools
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
from recurvedata.utils.imports import MockModule
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import humanize
|
|
10
|
+
except ImportError:
|
|
11
|
+
humanize = MockModule("humanize")
|
|
12
|
+
|
|
13
|
+
_logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def time_since(dt):
|
|
17
|
+
return datetime.datetime.now() - dt
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Timer(object):
|
|
21
|
+
def __init__(self, delay=False, logger=None):
|
|
22
|
+
self.logger = logger or _logger
|
|
23
|
+
self.start_dttm = None
|
|
24
|
+
if not delay:
|
|
25
|
+
self.reset()
|
|
26
|
+
|
|
27
|
+
def reset(self):
|
|
28
|
+
self.start_dttm = datetime.datetime.now()
|
|
29
|
+
|
|
30
|
+
def debug(self, message, *args):
|
|
31
|
+
self._log(self.logger.debug, message, *args)
|
|
32
|
+
|
|
33
|
+
def info(self, message, *args):
|
|
34
|
+
self._log(self.logger.info, message, *args)
|
|
35
|
+
|
|
36
|
+
def warning(self, message, *args):
|
|
37
|
+
self._log(self.logger.warning, message, *args)
|
|
38
|
+
|
|
39
|
+
def error(self, message, *args):
|
|
40
|
+
self._log(self.logger.error, message, *args)
|
|
41
|
+
|
|
42
|
+
def _log(self, func, message, *args):
|
|
43
|
+
message = message.rstrip() + " took %s"
|
|
44
|
+
# TODO: humanize timedelta
|
|
45
|
+
args = args + (time_since(self.start_dttm),)
|
|
46
|
+
func(message, *args)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class timing(object):
|
|
50
|
+
def __init__(self, operation="", logger=None):
|
|
51
|
+
self.operation = operation
|
|
52
|
+
self._timer = Timer(delay=True, logger=logger)
|
|
53
|
+
|
|
54
|
+
def __call__(self, func):
|
|
55
|
+
if not self.operation:
|
|
56
|
+
self.operation = "calling {}".format(func)
|
|
57
|
+
|
|
58
|
+
@functools.wraps(func)
|
|
59
|
+
def inner(*args, **kwargs):
|
|
60
|
+
self._timer.reset()
|
|
61
|
+
rv = func(*args, **kwargs)
|
|
62
|
+
self._timer.info(self.operation)
|
|
63
|
+
return rv
|
|
64
|
+
|
|
65
|
+
return inner
|
|
66
|
+
|
|
67
|
+
def __enter__(self):
|
|
68
|
+
self._timer.reset()
|
|
69
|
+
|
|
70
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
71
|
+
self._timer.info(self.operation or "operation finished")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class TimeCounter(object):
|
|
75
|
+
def __init__(self, name="", log_threshold=5000, total=None, logger=None):
|
|
76
|
+
self.name = name
|
|
77
|
+
self.log_threshold = log_threshold
|
|
78
|
+
self.total = total
|
|
79
|
+
|
|
80
|
+
self._logger = logger or logging
|
|
81
|
+
self._count = 0
|
|
82
|
+
self._start_time = datetime.datetime.now()
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def count(self):
|
|
86
|
+
return self._count
|
|
87
|
+
|
|
88
|
+
def incr(self, by=1):
|
|
89
|
+
self._count += by
|
|
90
|
+
if self.log_threshold and self._count % self.log_threshold == 0:
|
|
91
|
+
self.show_stat()
|
|
92
|
+
|
|
93
|
+
def show_stat(self):
|
|
94
|
+
d = datetime.datetime.now() - self._start_time
|
|
95
|
+
speed = self._count / d.total_seconds()
|
|
96
|
+
if not self.total:
|
|
97
|
+
self._logger.info("<%s> finished %d in %s, speed: %.2f/s", self.name, self._count, d, speed)
|
|
98
|
+
else:
|
|
99
|
+
progress = 100.0 * self._count / self.total
|
|
100
|
+
self._logger.info(
|
|
101
|
+
"<%s> finished %d in %s, speed: %.2f/s, progress: %.2f", self.name, self._count, d, speed, progress
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class DisplayProgress:
|
|
106
|
+
def __init__(self, total_amount: int = None, display_interval: float = 1024 * 1024, stream: bool = True):
|
|
107
|
+
self._stream = stream
|
|
108
|
+
self._seen_so_far = 0
|
|
109
|
+
self._interval = display_interval
|
|
110
|
+
self._start_time = datetime.datetime.now()
|
|
111
|
+
self._size = total_amount
|
|
112
|
+
|
|
113
|
+
def __call__(self, bytes_amount: int, total_amount: int = None):
|
|
114
|
+
if self._stream:
|
|
115
|
+
self._seen_so_far += bytes_amount
|
|
116
|
+
else:
|
|
117
|
+
self._seen_so_far = bytes_amount
|
|
118
|
+
|
|
119
|
+
total_amount = total_amount or self._size
|
|
120
|
+
if total_amount != 0:
|
|
121
|
+
progress = (self._seen_so_far / total_amount) * 100
|
|
122
|
+
else:
|
|
123
|
+
progress = 0
|
|
124
|
+
|
|
125
|
+
if not self._seen_so_far or (self._seen_so_far < total_amount and self._seen_so_far % self._interval != 0):
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
duration = datetime.datetime.now() - self._start_time
|
|
129
|
+
speed = self._seen_so_far / duration.total_seconds()
|
|
130
|
+
_logger.info(
|
|
131
|
+
"transferred %s in %s, average speed: %s/s, progress: %.2f%%",
|
|
132
|
+
humanize.naturalsize(self._seen_so_far, gnu=True),
|
|
133
|
+
duration,
|
|
134
|
+
humanize.naturalsize(speed, gnu=True),
|
|
135
|
+
progress,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class ProgressCallback:
|
|
140
|
+
def __init__(self):
|
|
141
|
+
self._start_time = time.time()
|
|
142
|
+
|
|
143
|
+
def __call__(self, consumed_bytes, total_bytes):
|
|
144
|
+
if not total_bytes:
|
|
145
|
+
return
|
|
146
|
+
duration = time.time() - self._start_time
|
|
147
|
+
speed = consumed_bytes / duration
|
|
148
|
+
progress = 100 * (float(consumed_bytes) / float(total_bytes))
|
|
149
|
+
logging.info(
|
|
150
|
+
"transferred %s of %s, avg speed: %s/s, progress: %.2f%%",
|
|
151
|
+
humanize.naturalsize(consumed_bytes, gnu=True),
|
|
152
|
+
humanize.naturalsize(total_bytes, gnu=True),
|
|
153
|
+
humanize.naturalsize(speed, gnu=True),
|
|
154
|
+
progress,
|
|
155
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# schedulers package
|