deepfos 1.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepfos/__init__.py +6 -0
- deepfos/_version.py +21 -0
- deepfos/algo/__init__.py +0 -0
- deepfos/algo/graph.py +171 -0
- deepfos/algo/segtree.py +31 -0
- deepfos/api/V1_1/__init__.py +0 -0
- deepfos/api/V1_1/business_model.py +119 -0
- deepfos/api/V1_1/dimension.py +599 -0
- deepfos/api/V1_1/models/__init__.py +0 -0
- deepfos/api/V1_1/models/business_model.py +1033 -0
- deepfos/api/V1_1/models/dimension.py +2768 -0
- deepfos/api/V1_2/__init__.py +0 -0
- deepfos/api/V1_2/dimension.py +285 -0
- deepfos/api/V1_2/models/__init__.py +0 -0
- deepfos/api/V1_2/models/dimension.py +2923 -0
- deepfos/api/__init__.py +0 -0
- deepfos/api/account.py +167 -0
- deepfos/api/accounting_engines.py +147 -0
- deepfos/api/app.py +626 -0
- deepfos/api/approval_process.py +198 -0
- deepfos/api/base.py +983 -0
- deepfos/api/business_model.py +160 -0
- deepfos/api/consolidation.py +129 -0
- deepfos/api/consolidation_process.py +106 -0
- deepfos/api/datatable.py +341 -0
- deepfos/api/deep_pipeline.py +61 -0
- deepfos/api/deepconnector.py +36 -0
- deepfos/api/deepfos_task.py +92 -0
- deepfos/api/deepmodel.py +188 -0
- deepfos/api/dimension.py +486 -0
- deepfos/api/financial_model.py +319 -0
- deepfos/api/journal_model.py +119 -0
- deepfos/api/journal_template.py +132 -0
- deepfos/api/memory_financial_model.py +98 -0
- deepfos/api/models/__init__.py +3 -0
- deepfos/api/models/account.py +483 -0
- deepfos/api/models/accounting_engines.py +756 -0
- deepfos/api/models/app.py +1338 -0
- deepfos/api/models/approval_process.py +1043 -0
- deepfos/api/models/base.py +234 -0
- deepfos/api/models/business_model.py +805 -0
- deepfos/api/models/consolidation.py +711 -0
- deepfos/api/models/consolidation_process.py +248 -0
- deepfos/api/models/datatable_mysql.py +427 -0
- deepfos/api/models/deep_pipeline.py +55 -0
- deepfos/api/models/deepconnector.py +28 -0
- deepfos/api/models/deepfos_task.py +386 -0
- deepfos/api/models/deepmodel.py +308 -0
- deepfos/api/models/dimension.py +1576 -0
- deepfos/api/models/financial_model.py +1796 -0
- deepfos/api/models/journal_model.py +341 -0
- deepfos/api/models/journal_template.py +854 -0
- deepfos/api/models/memory_financial_model.py +478 -0
- deepfos/api/models/platform.py +178 -0
- deepfos/api/models/python.py +221 -0
- deepfos/api/models/reconciliation_engine.py +411 -0
- deepfos/api/models/reconciliation_report.py +161 -0
- deepfos/api/models/role_strategy.py +884 -0
- deepfos/api/models/smartlist.py +237 -0
- deepfos/api/models/space.py +1137 -0
- deepfos/api/models/system.py +1065 -0
- deepfos/api/models/variable.py +463 -0
- deepfos/api/models/workflow.py +946 -0
- deepfos/api/platform.py +199 -0
- deepfos/api/python.py +90 -0
- deepfos/api/reconciliation_engine.py +181 -0
- deepfos/api/reconciliation_report.py +64 -0
- deepfos/api/role_strategy.py +234 -0
- deepfos/api/smartlist.py +69 -0
- deepfos/api/space.py +582 -0
- deepfos/api/system.py +372 -0
- deepfos/api/variable.py +154 -0
- deepfos/api/workflow.py +264 -0
- deepfos/boost/__init__.py +6 -0
- deepfos/boost/py_jstream.py +89 -0
- deepfos/boost/py_pandas.py +20 -0
- deepfos/cache.py +121 -0
- deepfos/config.py +6 -0
- deepfos/core/__init__.py +27 -0
- deepfos/core/cube/__init__.py +10 -0
- deepfos/core/cube/_base.py +462 -0
- deepfos/core/cube/constants.py +21 -0
- deepfos/core/cube/cube.py +408 -0
- deepfos/core/cube/formula.py +707 -0
- deepfos/core/cube/syscube.py +532 -0
- deepfos/core/cube/typing.py +7 -0
- deepfos/core/cube/utils.py +238 -0
- deepfos/core/dimension/__init__.py +11 -0
- deepfos/core/dimension/_base.py +506 -0
- deepfos/core/dimension/dimcreator.py +184 -0
- deepfos/core/dimension/dimension.py +472 -0
- deepfos/core/dimension/dimexpr.py +271 -0
- deepfos/core/dimension/dimmember.py +155 -0
- deepfos/core/dimension/eledimension.py +22 -0
- deepfos/core/dimension/filters.py +99 -0
- deepfos/core/dimension/sysdimension.py +168 -0
- deepfos/core/logictable/__init__.py +5 -0
- deepfos/core/logictable/_cache.py +141 -0
- deepfos/core/logictable/_operator.py +663 -0
- deepfos/core/logictable/nodemixin.py +673 -0
- deepfos/core/logictable/sqlcondition.py +609 -0
- deepfos/core/logictable/tablemodel.py +497 -0
- deepfos/db/__init__.py +36 -0
- deepfos/db/cipher.py +660 -0
- deepfos/db/clickhouse.py +191 -0
- deepfos/db/connector.py +195 -0
- deepfos/db/daclickhouse.py +171 -0
- deepfos/db/dameng.py +101 -0
- deepfos/db/damysql.py +189 -0
- deepfos/db/dbkits.py +358 -0
- deepfos/db/deepengine.py +99 -0
- deepfos/db/deepmodel.py +82 -0
- deepfos/db/deepmodel_kingbase.py +83 -0
- deepfos/db/edb.py +214 -0
- deepfos/db/gauss.py +83 -0
- deepfos/db/kingbase.py +83 -0
- deepfos/db/mysql.py +184 -0
- deepfos/db/oracle.py +131 -0
- deepfos/db/postgresql.py +192 -0
- deepfos/db/sqlserver.py +99 -0
- deepfos/db/utils.py +135 -0
- deepfos/element/__init__.py +89 -0
- deepfos/element/accounting.py +348 -0
- deepfos/element/apvlprocess.py +215 -0
- deepfos/element/base.py +398 -0
- deepfos/element/bizmodel.py +1269 -0
- deepfos/element/datatable.py +2467 -0
- deepfos/element/deep_pipeline.py +186 -0
- deepfos/element/deepconnector.py +59 -0
- deepfos/element/deepmodel.py +1806 -0
- deepfos/element/dimension.py +1254 -0
- deepfos/element/fact_table.py +427 -0
- deepfos/element/finmodel.py +1485 -0
- deepfos/element/journal.py +840 -0
- deepfos/element/journal_template.py +943 -0
- deepfos/element/pyscript.py +412 -0
- deepfos/element/reconciliation.py +553 -0
- deepfos/element/rolestrategy.py +243 -0
- deepfos/element/smartlist.py +457 -0
- deepfos/element/variable.py +756 -0
- deepfos/element/workflow.py +560 -0
- deepfos/exceptions/__init__.py +239 -0
- deepfos/exceptions/hook.py +86 -0
- deepfos/lazy.py +104 -0
- deepfos/lazy_import.py +84 -0
- deepfos/lib/__init__.py +0 -0
- deepfos/lib/_javaobj.py +366 -0
- deepfos/lib/asynchronous.py +879 -0
- deepfos/lib/concurrency.py +107 -0
- deepfos/lib/constant.py +39 -0
- deepfos/lib/decorator.py +310 -0
- deepfos/lib/deepchart.py +778 -0
- deepfos/lib/deepux.py +477 -0
- deepfos/lib/discovery.py +273 -0
- deepfos/lib/edb_lexer.py +789 -0
- deepfos/lib/eureka.py +156 -0
- deepfos/lib/filterparser.py +751 -0
- deepfos/lib/httpcli.py +106 -0
- deepfos/lib/jsonstreamer.py +80 -0
- deepfos/lib/msg.py +394 -0
- deepfos/lib/nacos.py +225 -0
- deepfos/lib/patch.py +92 -0
- deepfos/lib/redis.py +241 -0
- deepfos/lib/serutils.py +181 -0
- deepfos/lib/stopwatch.py +99 -0
- deepfos/lib/subtask.py +572 -0
- deepfos/lib/sysutils.py +703 -0
- deepfos/lib/utils.py +1003 -0
- deepfos/local.py +160 -0
- deepfos/options.py +670 -0
- deepfos/translation.py +237 -0
- deepfos-1.1.60.dist-info/METADATA +33 -0
- deepfos-1.1.60.dist-info/RECORD +175 -0
- deepfos-1.1.60.dist-info/WHEEL +5 -0
- deepfos-1.1.60.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2467 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import warnings
|
|
3
|
+
from contextvars import ContextVar
|
|
4
|
+
import pprint
|
|
5
|
+
from contextlib import contextmanager, asynccontextmanager
|
|
6
|
+
from typing import (
|
|
7
|
+
Iterable, List, Dict, Union, Type,
|
|
8
|
+
Tuple, TypeVar, Any, Sequence, Optional,
|
|
9
|
+
TYPE_CHECKING,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import numpy as np
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from pypika import Field, Query, Table, ClickHouseQuery, Order, OracleQuery, MSSQLQuery, PostgreSQLQuery
|
|
16
|
+
from pypika.terms import Term, EmptyCriterion
|
|
17
|
+
from pypika.utils import format_quotes, format_alias_sql
|
|
18
|
+
from requests.utils import CaseInsensitiveDict
|
|
19
|
+
|
|
20
|
+
from .base import ElementBase, SyncMeta
|
|
21
|
+
from deepfos.api.datatable import (
|
|
22
|
+
MySQLAPI, ClickHouseAPI, SQLServerAPI,
|
|
23
|
+
OracleAPI, KingBaseAPI, GaussAPI, DaMengAPI,
|
|
24
|
+
PostgreSQLAPI, DeepEngineAPI, DeepModelAPI,
|
|
25
|
+
DeepModelKingBaseAPI
|
|
26
|
+
)
|
|
27
|
+
from deepfos.api.models.datatable_mysql import (
|
|
28
|
+
CustomSqlRespDTO, MiscModel, DatatableDataDeleteDTO
|
|
29
|
+
)
|
|
30
|
+
from deepfos.lib.utils import FrozenClass, split_dataframe
|
|
31
|
+
from deepfos.lib.asynchronous import future_property, evloop
|
|
32
|
+
from deepfos.lib.decorator import flagmethod, cached_property
|
|
33
|
+
from deepfos.lib.constant import UNSET
|
|
34
|
+
from deepfos.db.dbkits import null, Skip, DataframeSQLConvertor, escape_string, escape_mysql_string, escape_pg_string
|
|
35
|
+
from deepfos.db.oracle import OracleDFSQLConvertor # noqa
|
|
36
|
+
from deepfos.db.clickhouse import ClickHouseConvertor
|
|
37
|
+
from deepfos.db.postgresql import PostgreSQLConvertor
|
|
38
|
+
from deepfos.db.sqlserver import SQLServerDFSQLConvertor
|
|
39
|
+
from deepfos.db.deepengine import DeepEngineDFSQLConvertor
|
|
40
|
+
from deepfos.options import OPTION
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
'Datatable',
|
|
44
|
+
'AsyncDataTableMySQL',
|
|
45
|
+
'DataTableMySQL',
|
|
46
|
+
'AsyncDataTableClickHouse',
|
|
47
|
+
'DataTableClickHouse',
|
|
48
|
+
'AsyncDataTableOracle',
|
|
49
|
+
'DataTableOracle',
|
|
50
|
+
'AsyncDataTableSQLServer',
|
|
51
|
+
'DataTableSQLServer',
|
|
52
|
+
'AsyncDataTableKingBase',
|
|
53
|
+
'DataTableKingBase',
|
|
54
|
+
'AsyncDataTableGauss',
|
|
55
|
+
'DataTableGauss',
|
|
56
|
+
'AsyncDataTableDaMeng',
|
|
57
|
+
'DataTableDaMeng',
|
|
58
|
+
'AsyncDataTablePostgreSQL',
|
|
59
|
+
'DataTablePostgreSQL',
|
|
60
|
+
'AsyncDataTableDeepEngine',
|
|
61
|
+
'DataTableDeepEngine',
|
|
62
|
+
'AsyncDataTableDeepModel',
|
|
63
|
+
'DataTableDeepModel',
|
|
64
|
+
'AsyncDataTableDeepModelKingBase',
|
|
65
|
+
'DataTableDeepModelKingBase',
|
|
66
|
+
'null',
|
|
67
|
+
'Skip',
|
|
68
|
+
'Field',
|
|
69
|
+
'get_table_class',
|
|
70
|
+
'T_DatatableClass',
|
|
71
|
+
'T_DatatableInstance',
|
|
72
|
+
'T_AsyncDatatableClass',
|
|
73
|
+
'T_AsyncDatatableInstance'
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
SQL_LOG_MAX_LEN = 1024
|
|
77
|
+
# -----------------------------------------------------------------------------
|
|
78
|
+
# typing
|
|
79
|
+
KT = TypeVar('KT', Field, str)
|
|
80
|
+
VT = TypeVar('VT', str, int)
|
|
81
|
+
T_DictLike = Union[Dict[KT, VT], Iterable[Tuple[KT, VT]]]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# -----------------------------------------------------------------------------
|
|
85
|
+
# Columns
|
|
86
|
+
class BaseColumn:
|
|
87
|
+
null_val = UNSET
|
|
88
|
+
dtype = UNSET
|
|
89
|
+
|
|
90
|
+
def __init__(self, column: MiscModel):
|
|
91
|
+
self.column = column
|
|
92
|
+
self.col_name = column.name
|
|
93
|
+
self.col_type = column.type
|
|
94
|
+
self.nullable = column.whetherEmpty
|
|
95
|
+
|
|
96
|
+
def fit(self, df: pd.DataFrame, column: str):
|
|
97
|
+
"""
|
|
98
|
+
使 :class:`Dataframe` 对应的列符合列的限制条件。
|
|
99
|
+
一般在需要把df的数据登录至DB时使用。包含两部分工作:
|
|
100
|
+
|
|
101
|
+
1. 填充空值。前提是子类 :attr:`nullable` 为 ``False``,\
|
|
102
|
+
并且子类定义了类属性 :attr:`null_val` 作为填充值。
|
|
103
|
+
2. 其他转换工作。子类通过实现 :meth:`extra_fit` 定义。
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
df: 待转换的 :class:`Dataframe`
|
|
107
|
+
column: 需要转化的列名
|
|
108
|
+
"""
|
|
109
|
+
if not self.nullable and self.null_val is not UNSET:
|
|
110
|
+
df[column] = df[column].fillna(self.null_val)
|
|
111
|
+
self.extra_fit(df, column)
|
|
112
|
+
|
|
113
|
+
def extra_fit(self, df: pd.DataFrame, column: str):
|
|
114
|
+
# df[self.col_name] = df[self.col_name].astype(self.dtype, errors='ignore')
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
def cast(self, df: pd.DataFrame, column: str):
|
|
118
|
+
"""
|
|
119
|
+
对 :class:`Dataframe` 对应的列作类型转换。
|
|
120
|
+
一般在获取 :class:`Dataframe` 时使用。
|
|
121
|
+
"""
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
def __repr__(self): # pragma: no cover
|
|
125
|
+
return self.__class__.__name__
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class ColumnFloat(BaseColumn):
|
|
129
|
+
dtype = 'float'
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class ColumnDateTime(BaseColumn):
|
|
133
|
+
dtype = 'datetime64[ns]'
|
|
134
|
+
|
|
135
|
+
def cast(self, df, column: str):
|
|
136
|
+
df[column] = pd.to_datetime(df[column])
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def format_datetime(dt):
|
|
140
|
+
if not isnull(dt):
|
|
141
|
+
return "'" + dt.strftime("%Y-%m-%d %H:%M:%S") + "'"
|
|
142
|
+
return pd.NaT
|
|
143
|
+
|
|
144
|
+
def extra_fit(self, df: pd.DataFrame, column: str):
|
|
145
|
+
df[column] = df[column].apply(self.format_datetime)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class ColumnOracleDateTime(ColumnDateTime):
|
|
149
|
+
dtype = 'datetime64[ns]'
|
|
150
|
+
|
|
151
|
+
def cast(self, df, column: str):
|
|
152
|
+
df[column] = pd.to_datetime(df[column])
|
|
153
|
+
|
|
154
|
+
@staticmethod
|
|
155
|
+
def format_datetime(dt):
|
|
156
|
+
if not isnull(dt):
|
|
157
|
+
return f"TO_DATE('{dt.strftime('%Y-%m-%d %H:%M:%S')}', 'YYYY-MM-DD HH24:MI:SS')"
|
|
158
|
+
return pd.NaT
|
|
159
|
+
|
|
160
|
+
def extra_fit(self, df: pd.DataFrame, column: str):
|
|
161
|
+
df[column] = df[column].apply(self.format_datetime)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class ColumnInt(BaseColumn):
|
|
165
|
+
dtype = 'int'
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class ColumnString(BaseColumn):
|
|
169
|
+
null_val = 'null'
|
|
170
|
+
dtype = 'object'
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def escape_string(string):
|
|
174
|
+
if string is null:
|
|
175
|
+
return null
|
|
176
|
+
if string:
|
|
177
|
+
return f"'{escape_string(string)}'"
|
|
178
|
+
return "''"
|
|
179
|
+
|
|
180
|
+
def extra_fit(self, df: pd.DataFrame, column: str):
|
|
181
|
+
if self.nullable:
|
|
182
|
+
df[column] = df[column].fillna(null)
|
|
183
|
+
df[column] = df[column].apply(self.escape_string)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class MySQLColumnString(ColumnString):
|
|
187
|
+
null_val = 'null'
|
|
188
|
+
dtype = 'object'
|
|
189
|
+
|
|
190
|
+
@staticmethod
|
|
191
|
+
def escape_string(string):
|
|
192
|
+
if string is null:
|
|
193
|
+
return null
|
|
194
|
+
if string:
|
|
195
|
+
return f"'{escape_mysql_string(string)}'"
|
|
196
|
+
return "''"
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class PGColumnString(ColumnString):
|
|
200
|
+
null_val = 'null'
|
|
201
|
+
dtype = 'object'
|
|
202
|
+
|
|
203
|
+
@staticmethod
|
|
204
|
+
def escape_string(string):
|
|
205
|
+
if string is null:
|
|
206
|
+
return null
|
|
207
|
+
if string:
|
|
208
|
+
return f"'{escape_pg_string(string)}'"
|
|
209
|
+
return "''"
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class ColumnDecimal(BaseColumn):
|
|
213
|
+
dtype = 'float'
|
|
214
|
+
|
|
215
|
+
def extra_fit(self, df, column: str):
|
|
216
|
+
digits = self.column.length.rsplit(',')[1]
|
|
217
|
+
df[column] = np.where(
|
|
218
|
+
df[column].isna(),
|
|
219
|
+
df[column], df[column].fillna(0).round(int(digits)))
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class ColumnFactory:
|
|
223
|
+
col_map = {
|
|
224
|
+
"datetime": ColumnDateTime,
|
|
225
|
+
"oracle_datetime": ColumnOracleDateTime,
|
|
226
|
+
"date": ColumnDateTime,
|
|
227
|
+
"int": ColumnInt,
|
|
228
|
+
"smallint": ColumnInt,
|
|
229
|
+
"tinyint": ColumnInt,
|
|
230
|
+
"bigint": ColumnInt,
|
|
231
|
+
"integer": ColumnInt,
|
|
232
|
+
"varchar": ColumnString,
|
|
233
|
+
"pg_varchar": PGColumnString,
|
|
234
|
+
"mysql_varchar": MySQLColumnString,
|
|
235
|
+
"pg_text": PGColumnString,
|
|
236
|
+
"mysql_text": MySQLColumnString,
|
|
237
|
+
"text": ColumnString,
|
|
238
|
+
"float": ColumnFloat,
|
|
239
|
+
"double": ColumnFloat,
|
|
240
|
+
"decimal": ColumnDecimal,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
def __new__(cls, column: MiscModel):
|
|
244
|
+
col_class = cls.col_map.get(cls.get_col_key(column.type), BaseColumn)
|
|
245
|
+
return col_class(column)
|
|
246
|
+
|
|
247
|
+
@staticmethod
|
|
248
|
+
def get_col_key(col_type):
|
|
249
|
+
return col_type.lower()
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
class MySQLColumnFactory(ColumnFactory):
|
|
253
|
+
@staticmethod
|
|
254
|
+
def get_col_key(col_type):
|
|
255
|
+
if col_type.lower() == 'varchar':
|
|
256
|
+
return "mysql_varchar"
|
|
257
|
+
if col_type.lower() == 'text':
|
|
258
|
+
return "mysql_text"
|
|
259
|
+
return col_type.lower()
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
class ClickHouseColumnFactory(ColumnFactory):
|
|
263
|
+
@staticmethod
|
|
264
|
+
def get_col_key(col_type):
|
|
265
|
+
if col_type.lower() == 'varchar':
|
|
266
|
+
return "mysql_varchar"
|
|
267
|
+
if col_type.lower() == 'text':
|
|
268
|
+
return "mysql_text"
|
|
269
|
+
if col_type == 'LowCardinality(String)':
|
|
270
|
+
return "mysql_varchar"
|
|
271
|
+
return col_type.lower()
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class OracleColumnFactory(ColumnFactory):
|
|
275
|
+
@staticmethod
|
|
276
|
+
def get_col_key(col_type):
|
|
277
|
+
if col_type.lower() == 'datetime':
|
|
278
|
+
return "oracle_datetime"
|
|
279
|
+
return col_type.lower()
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
class PGColumnFactory(ColumnFactory):
|
|
283
|
+
@staticmethod
|
|
284
|
+
def get_col_key(col_type):
|
|
285
|
+
if col_type.lower() == 'varchar':
|
|
286
|
+
return "pg_varchar"
|
|
287
|
+
if col_type.lower() == 'text':
|
|
288
|
+
return "pg_text"
|
|
289
|
+
return col_type.lower()
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
class TableStructure:
|
|
293
|
+
"""
|
|
294
|
+
表结构
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
meta_info: 表的元数据,包含各个列的列名及数据类型
|
|
298
|
+
|
|
299
|
+
"""
|
|
300
|
+
ColumnFactory = ColumnFactory
|
|
301
|
+
|
|
302
|
+
def __init__(self, meta_info: List[MiscModel]):
|
|
303
|
+
self.columns = CaseInsensitiveDict({
|
|
304
|
+
col.name: self.ColumnFactory(col)
|
|
305
|
+
for col in meta_info
|
|
306
|
+
})
|
|
307
|
+
|
|
308
|
+
def fit(self, df: pd.DataFrame, columns: Iterable[str] = None):
|
|
309
|
+
"""
|
|
310
|
+
对传入的DataFrame的指定数据列执行fit操作。
|
|
311
|
+
直接影响DataFrame数据。
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
df: 数据源
|
|
315
|
+
columns: 数据列
|
|
316
|
+
|
|
317
|
+
See Also:
|
|
318
|
+
:meth:`BaseColumn.fit`
|
|
319
|
+
|
|
320
|
+
"""
|
|
321
|
+
if columns is None:
|
|
322
|
+
columns = self.columns
|
|
323
|
+
|
|
324
|
+
valid_cols = []
|
|
325
|
+
for col in columns:
|
|
326
|
+
if col in self.columns:
|
|
327
|
+
valid_cols.append(col)
|
|
328
|
+
self.columns[col].fit(df, col)
|
|
329
|
+
return df[valid_cols]
|
|
330
|
+
|
|
331
|
+
def fit_single(self, df: pd.DataFrame, column: str): # pragma: no cover
|
|
332
|
+
"""
|
|
333
|
+
对传入的DataFrame的某一指定列执行fit操作。
|
|
334
|
+
直接影响DataFrame数据。
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
df: 数据源
|
|
338
|
+
column: 数据列名
|
|
339
|
+
|
|
340
|
+
See Also:
|
|
341
|
+
:meth:`fit` , :meth:`BaseColumn.fit`
|
|
342
|
+
|
|
343
|
+
"""
|
|
344
|
+
if column not in self.columns:
|
|
345
|
+
raise KeyError(f"Given column: {column} doesn't exist.")
|
|
346
|
+
self.columns[column].fit(df)
|
|
347
|
+
|
|
348
|
+
def cast(self, df: pd.DataFrame):
|
|
349
|
+
"""
|
|
350
|
+
对传入的DataFrame的所有列执行cast操作。
|
|
351
|
+
直接影响DataFrame数据。
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
df: 数据源
|
|
355
|
+
|
|
356
|
+
See Also:
|
|
357
|
+
:meth:`BaseColumn.cast`
|
|
358
|
+
|
|
359
|
+
"""
|
|
360
|
+
for col in df.columns:
|
|
361
|
+
if col in self.columns:
|
|
362
|
+
self.columns[col].cast(df, col)
|
|
363
|
+
|
|
364
|
+
def __repr__(self): # pragma: no cover
|
|
365
|
+
return pprint.pformat(self.columns)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
class MySQLTableStructure(TableStructure):
|
|
369
|
+
ColumnFactory = MySQLColumnFactory
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
class OracleTableStructure(TableStructure):
|
|
373
|
+
ColumnFactory = OracleColumnFactory
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
class PGTableStructure(TableStructure):
|
|
377
|
+
ColumnFactory = PGColumnFactory
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
class ClickHouseTableStructure(TableStructure):
|
|
381
|
+
ColumnFactory = ClickHouseColumnFactory
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
# -----------------------------------------------------------------------------
|
|
385
|
+
# utils
|
|
386
|
+
class _DataTableDFConvertor(DataframeSQLConvertor):
|
|
387
|
+
def convert(
|
|
388
|
+
self,
|
|
389
|
+
dataframe: pd.DataFrame,
|
|
390
|
+
tablename: str,
|
|
391
|
+
updatecol: Iterable[str] = None,
|
|
392
|
+
**opts
|
|
393
|
+
) -> str:
|
|
394
|
+
"""
|
|
395
|
+
DataFrame对象转换为插库sql
|
|
396
|
+
如果不传updatecol,用作INSERT INTO语法;
|
|
397
|
+
如果传了updatecol,用作INSERT INTO ON DUPLICATE语法,
|
|
398
|
+
无主键重复时作为插入,主键相同时更新指定列
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
dataframe: 待插入数据
|
|
402
|
+
tablename: 数据库表名
|
|
403
|
+
updatecol: 更新的列
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
sql语句
|
|
407
|
+
"""
|
|
408
|
+
if dataframe.empty:
|
|
409
|
+
return ''
|
|
410
|
+
|
|
411
|
+
data_df = dataframe.fillna(null).astype(str, errors='ignore')
|
|
412
|
+
data_series = "(" + pd.Series(data_df.values.tolist()).str.join(',') + ")"
|
|
413
|
+
columns = self.build_column_string(dataframe.columns)
|
|
414
|
+
|
|
415
|
+
return self.build_sql(columns, data_series, tablename, updatecol)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
class _OracleDFConvertor(_DataTableDFConvertor):
|
|
419
|
+
def build_sql(
|
|
420
|
+
self,
|
|
421
|
+
columns: str,
|
|
422
|
+
values_in_line: Iterable[str],
|
|
423
|
+
tablename: str,
|
|
424
|
+
updatecol: Iterable[str] = None,
|
|
425
|
+
**opts
|
|
426
|
+
):
|
|
427
|
+
return OracleDFSQLConvertor(self.quote_char).build_sql(columns, values_in_line, tablename, updatecol, **opts)
|
|
428
|
+
|
|
429
|
+
def build_column_string(self, columns):
|
|
430
|
+
return ','.join(columns.map(
|
|
431
|
+
lambda x: f'"{x.upper()}"'
|
|
432
|
+
))
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
class _ClickHouseDFConvertor(_DataTableDFConvertor):
|
|
436
|
+
def build_sql(
|
|
437
|
+
self,
|
|
438
|
+
columns: str,
|
|
439
|
+
values_in_line: Iterable[str],
|
|
440
|
+
tablename: str,
|
|
441
|
+
updatecol: Iterable[str] = None,
|
|
442
|
+
**opts
|
|
443
|
+
):
|
|
444
|
+
return ClickHouseConvertor(self.quote_char).build_sql(columns, values_in_line, tablename, updatecol, **opts)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
class _SQLServerDFConvertor(_DataTableDFConvertor):
|
|
448
|
+
def build_sql(
|
|
449
|
+
self,
|
|
450
|
+
columns: str,
|
|
451
|
+
values_in_line: Iterable[str],
|
|
452
|
+
tablename: str,
|
|
453
|
+
updatecol: Iterable[str] = None,
|
|
454
|
+
**opts
|
|
455
|
+
):
|
|
456
|
+
return SQLServerDFSQLConvertor(self.quote_char).build_sql(columns, values_in_line, tablename, updatecol, **opts)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
class _DeepEngineDFConvertor(_DataTableDFConvertor):
|
|
460
|
+
def build_sql(
|
|
461
|
+
self,
|
|
462
|
+
columns: str,
|
|
463
|
+
values_in_line: Iterable[str],
|
|
464
|
+
tablename: str,
|
|
465
|
+
updatecol: Iterable[str] = None,
|
|
466
|
+
**opts
|
|
467
|
+
):
|
|
468
|
+
return DeepEngineDFSQLConvertor(self.quote_char).build_sql(columns, values_in_line, tablename, updatecol, **opts)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
class _PostgreSQLDFConvertor(PostgreSQLConvertor):
|
|
472
|
+
def convert(
|
|
473
|
+
self,
|
|
474
|
+
dataframe: pd.DataFrame,
|
|
475
|
+
tablename: str,
|
|
476
|
+
updatecol: Iterable[str] = None,
|
|
477
|
+
conflict_target: Iterable[str] = None,
|
|
478
|
+
**opts
|
|
479
|
+
) -> str:
|
|
480
|
+
"""
|
|
481
|
+
DataFrame对象转换为插库sql
|
|
482
|
+
如果不传updatecol,用作INSERT INTO语法;
|
|
483
|
+
如果传了updatecol,用作INSERT INTO ON CONFLICT语法,
|
|
484
|
+
无主键重复时作为插入,主键相同时更新指定列
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
dataframe: 待插入数据
|
|
488
|
+
tablename: 数据库表名
|
|
489
|
+
updatecol: 更新的列
|
|
490
|
+
conflict_target: 使用INSERT INTO ON CONFLICT语法时的conflict基准列信息
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
sql语句
|
|
494
|
+
"""
|
|
495
|
+
if dataframe.empty:
|
|
496
|
+
return ''
|
|
497
|
+
|
|
498
|
+
data_df = dataframe.fillna(null).astype(str, errors='ignore')
|
|
499
|
+
data_series = "(" + pd.Series(data_df.values.tolist()).str.join(',') + ")"
|
|
500
|
+
columns = self.build_column_string(dataframe.columns)
|
|
501
|
+
|
|
502
|
+
return self.build_sql(columns, data_series, tablename, updatecol, conflict_target=conflict_target, **opts)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def isnull(obj: Any) -> bool:
|
|
506
|
+
return (obj is null) or pd.isna(obj)
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def ensure_pikafield(table: Table, fields: Iterable[Union[str, int, Field, Term]]):
|
|
510
|
+
for fld in fields:
|
|
511
|
+
if isinstance(fld, str):
|
|
512
|
+
yield table.__getattr__(fld)
|
|
513
|
+
elif isinstance(fld, int):
|
|
514
|
+
yield table.__getattr__(str(fld))
|
|
515
|
+
else:
|
|
516
|
+
yield fld
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
txn_support = flagmethod('_txn_support_')
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
class _TxnConfig:
|
|
523
|
+
__slots__ = ('async_api', 'sql', 'in_txn', 'txn_support', 'flatten')
|
|
524
|
+
|
|
525
|
+
def __init__(self):
|
|
526
|
+
self.async_api = None
|
|
527
|
+
self.sql = [[]]
|
|
528
|
+
self.in_txn = [False]
|
|
529
|
+
self.txn_support = False
|
|
530
|
+
self.flatten = False
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
DOC_TEMPLATE = """{DB}数据表
|
|
534
|
+
|
|
535
|
+
提供单表的增删改查操作
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
table_name: 数据表的真实表名,已知的情况下,可以避免内部重复查询表名。能提高性能。
|
|
539
|
+
"""
|
|
540
|
+
|
|
541
|
+
DOC_START_TX_TEMPLATE = """开启事务
|
|
542
|
+
|
|
543
|
+
上下文管理器,使用with语法开启上下文,上下文中的sql将作为事务执行。
|
|
544
|
+
退出with语句块后,事务将立即执行,执行过程中如果报错会直接抛出,
|
|
545
|
+
执行结果可通过 :attr:`transaction_result` 查询。
|
|
546
|
+
|
|
547
|
+
.. admonition:: 示例
|
|
548
|
+
|
|
549
|
+
.. code-block:: python
|
|
550
|
+
|
|
551
|
+
tbl = %s('table_example')
|
|
552
|
+
t = tbl.table
|
|
553
|
+
with tbl.start_transaction():
|
|
554
|
+
tbl.insert({'key': 101, 'value': 'txn'})
|
|
555
|
+
tbl.update({'value': 'new_txn'}, where=t.key == 101)
|
|
556
|
+
tbl.delete(where=t.key >= 99)
|
|
557
|
+
result = tbl.transaction_result
|
|
558
|
+
|
|
559
|
+
Args:
|
|
560
|
+
flatten: 是否拉平嵌套事务,如果开启,嵌套的事务将会作为一个事务执行
|
|
561
|
+
|
|
562
|
+
Important:
|
|
563
|
+
仅 ``insert/delete/update`` **系列** (包括 :meth:`insert_df`,
|
|
564
|
+
:meth:`copy_rows` 等)的sql支持在事务中执行,
|
|
565
|
+
支持事务运行的方法可以通过源码查看,带有 ``@txn_support``
|
|
566
|
+
装饰器的方法即支持事务。
|
|
567
|
+
|
|
568
|
+
如果在事务中执行select,查询结果也将立刻返回。
|
|
569
|
+
|
|
570
|
+
"""
|
|
571
|
+
|
|
572
|
+
# -----------------------------------------------------------------------------
|
|
573
|
+
# core
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
class AsyncDataTableMySQL(ElementBase):
|
|
577
|
+
__doc__ = DOC_TEMPLATE.format(DB='MySQL')
|
|
578
|
+
api_class = MySQLAPI
|
|
579
|
+
api: MySQLAPI
|
|
580
|
+
query = Query
|
|
581
|
+
quote_char = '`'
|
|
582
|
+
convertor = _DataTableDFConvertor(quote_char=quote_char)
|
|
583
|
+
|
|
584
|
+
_txn_ = ContextVar('TXN')
|
|
585
|
+
#: 事务执行结果
|
|
586
|
+
transaction_result = None
|
|
587
|
+
|
|
588
|
+
def __init__(
|
|
589
|
+
self,
|
|
590
|
+
element_name: str,
|
|
591
|
+
folder_id: str = None,
|
|
592
|
+
path: str = None,
|
|
593
|
+
table_name: str = None,
|
|
594
|
+
server_name: str = None,
|
|
595
|
+
):
|
|
596
|
+
self.__tbl_name = table_name
|
|
597
|
+
super().__init__(element_name, folder_id, path, server_name)
|
|
598
|
+
|
|
599
|
+
def _safe_get_txn_conf(self) -> _TxnConfig:
|
|
600
|
+
try:
|
|
601
|
+
config = self._txn_.get()
|
|
602
|
+
except LookupError:
|
|
603
|
+
config = _TxnConfig()
|
|
604
|
+
self._txn_.set(config)
|
|
605
|
+
return config
|
|
606
|
+
|
|
607
|
+
@property
|
|
608
|
+
def _txn_support_(self):
|
|
609
|
+
return self._safe_get_txn_conf().txn_support
|
|
610
|
+
|
|
611
|
+
@_txn_support_.setter
|
|
612
|
+
def _txn_support_(self, val):
|
|
613
|
+
self._safe_get_txn_conf().txn_support = val
|
|
614
|
+
|
|
615
|
+
@future_property
|
|
616
|
+
async def meta(self):
|
|
617
|
+
"""数据表的元配置信息"""
|
|
618
|
+
api = await self.wait_for('async_api')
|
|
619
|
+
element_info = await self.wait_for('element_info')
|
|
620
|
+
r = await api.dml.table_info_field([element_info])
|
|
621
|
+
return r[0]
|
|
622
|
+
|
|
623
|
+
@cached_property
|
|
624
|
+
def table_name(self) -> str:
|
|
625
|
+
"""数据表真实表名"""
|
|
626
|
+
if self.__tbl_name is None:
|
|
627
|
+
self.__tbl_name = self.meta.datatableInfo.actualTableName
|
|
628
|
+
return self.__tbl_name
|
|
629
|
+
|
|
630
|
+
@cached_property
|
|
631
|
+
def table(self) -> Table:
|
|
632
|
+
"""pipyka的Table对象
|
|
633
|
+
|
|
634
|
+
主要用于创建查询条件
|
|
635
|
+
|
|
636
|
+
.. admonition:: 示例
|
|
637
|
+
|
|
638
|
+
.. code-block:: python
|
|
639
|
+
|
|
640
|
+
tbl = DataTableMySQL("test")
|
|
641
|
+
t = tbl.table
|
|
642
|
+
where = (
|
|
643
|
+
((t.f1 > 1) | (t.f2 == '23'))
|
|
644
|
+
&
|
|
645
|
+
(t.f3.isin([1, 2, 3]) | t.f4.like('f%'))
|
|
646
|
+
)
|
|
647
|
+
tbl.select(where=where)
|
|
648
|
+
|
|
649
|
+
将执行sql:
|
|
650
|
+
|
|
651
|
+
.. code-block:: sql
|
|
652
|
+
|
|
653
|
+
SELECT
|
|
654
|
+
*
|
|
655
|
+
FROM
|
|
656
|
+
test
|
|
657
|
+
WHERE
|
|
658
|
+
(`f1`>1 OR `f2`='23')
|
|
659
|
+
AND
|
|
660
|
+
(`f3` IN (1,2,3) OR `f4` LIKE 'f%')
|
|
661
|
+
|
|
662
|
+
See Also:
|
|
663
|
+
关于table的更多使用方法,可以查看
|
|
664
|
+
`pypika的github <https://github.com/kayak/pypika#tables-columns-schemas-and-databases>`_
|
|
665
|
+
|
|
666
|
+
"""
|
|
667
|
+
return Table(self.table_name)
|
|
668
|
+
|
|
669
|
+
@cached_property
|
|
670
|
+
def _quoted_table_name(self):
|
|
671
|
+
return self.table.get_sql(quote_char=self.quote_char)
|
|
672
|
+
|
|
673
|
+
@cached_property
|
|
674
|
+
def structure(self) -> MySQLTableStructure:
|
|
675
|
+
"""数据表的表结构
|
|
676
|
+
|
|
677
|
+
主要包含了所有列的列名和类型信息,用于
|
|
678
|
+
在查询和保存时对数据做类型转化的预处理。
|
|
679
|
+
"""
|
|
680
|
+
return MySQLTableStructure(self.meta.datatableColumn)
|
|
681
|
+
|
|
682
|
+
@cached_property
|
|
683
|
+
def _field_map_templates(self) -> Tuple[Dict[str, None], Dict[str, Type[null]]]:
|
|
684
|
+
base_tmpl = {}
|
|
685
|
+
incr_cols = {}
|
|
686
|
+
|
|
687
|
+
for col in self.meta.datatableColumn:
|
|
688
|
+
if col.whetherIncrement:
|
|
689
|
+
incr_cols[col.name] = null
|
|
690
|
+
else:
|
|
691
|
+
base_tmpl[col.name] = None
|
|
692
|
+
|
|
693
|
+
return base_tmpl, incr_cols
|
|
694
|
+
|
|
695
|
+
async def select(
|
|
696
|
+
self,
|
|
697
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
698
|
+
where: Union[str, Term, EmptyCriterion] = None,
|
|
699
|
+
distinct: bool = False,
|
|
700
|
+
groupby: Iterable[Union[str, int, Term]] = None,
|
|
701
|
+
having: Iterable[Union[Term, EmptyCriterion]] = None,
|
|
702
|
+
orderby: Iterable[Union[str, Field]] = None,
|
|
703
|
+
order: Union[Order, str] = Order.asc,
|
|
704
|
+
limit: int = None,
|
|
705
|
+
offset: int = None,
|
|
706
|
+
) -> pd.DataFrame:
|
|
707
|
+
"""从数据表获取 ``DataFrame`` 格式的数据
|
|
708
|
+
|
|
709
|
+
根据查询条件返回 ``DataFrame`` 类型的二维数据表,
|
|
710
|
+
会根据列类型作自动数据转换。如 ``DATETIME`` 字段将会转换为日期类型。
|
|
711
|
+
|
|
712
|
+
Important:
|
|
713
|
+
使用方法与 :meth:`select_raw` 完全相同,使用示例请参考 :meth:`select_raw`
|
|
714
|
+
的文档。
|
|
715
|
+
|
|
716
|
+
Args:
|
|
717
|
+
columns: 查询字段
|
|
718
|
+
where: 查询条件(聚合条件也可以)
|
|
719
|
+
distinct: 是否使用select distinct语法
|
|
720
|
+
groupby: 用于groupby的列
|
|
721
|
+
having: 用于having语句的条件
|
|
722
|
+
orderby: 用于orderby的列
|
|
723
|
+
order: orderby的顺序,ASC/DESC
|
|
724
|
+
limit: limit限制返回数据量
|
|
725
|
+
offset: offset偏移量
|
|
726
|
+
|
|
727
|
+
Returns:
|
|
728
|
+
查询的二维数据表
|
|
729
|
+
|
|
730
|
+
See Also:
|
|
731
|
+
如果希望获取原始数据,可以使用 :meth:`select_raw`
|
|
732
|
+
|
|
733
|
+
"""
|
|
734
|
+
raw_data = await self.select_raw(
|
|
735
|
+
columns,
|
|
736
|
+
where=where,
|
|
737
|
+
distinct=distinct,
|
|
738
|
+
groupby=groupby,
|
|
739
|
+
having=having,
|
|
740
|
+
orderby=orderby,
|
|
741
|
+
order=order,
|
|
742
|
+
limit=limit,
|
|
743
|
+
offset=offset
|
|
744
|
+
)
|
|
745
|
+
data = pd.DataFrame.from_records(raw_data)
|
|
746
|
+
self.structure.cast(data)
|
|
747
|
+
if data.empty:
|
|
748
|
+
if columns:
|
|
749
|
+
add_cols = self._get_valid_columns(columns)
|
|
750
|
+
else:
|
|
751
|
+
add_cols = self.structure.columns.keys()
|
|
752
|
+
return pd.DataFrame(columns=add_cols)
|
|
753
|
+
return data
|
|
754
|
+
|
|
755
|
+
async def select_raw(
|
|
756
|
+
self,
|
|
757
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
758
|
+
where: Union[str, Term, EmptyCriterion] = None,
|
|
759
|
+
distinct: bool = False,
|
|
760
|
+
groupby: Iterable[Union[str, int, Term]] = None,
|
|
761
|
+
having: Iterable[Union[Term, EmptyCriterion]] = None,
|
|
762
|
+
orderby: Iterable[Union[str, Field]] = None,
|
|
763
|
+
order: Union[Order, str] = Order.asc,
|
|
764
|
+
limit: int = None,
|
|
765
|
+
offset: int = None,
|
|
766
|
+
) -> List[dict]:
|
|
767
|
+
"""根据查询条件从数据表获取数据
|
|
768
|
+
|
|
769
|
+
根据查询条件查询数据表接口并返回。
|
|
770
|
+
数据类型将只含有 **JSON基本数据类型**
|
|
771
|
+
|
|
772
|
+
Args:
|
|
773
|
+
columns: 查询字段
|
|
774
|
+
where: 查询条件(聚合条件也可以)
|
|
775
|
+
distinct: 是否使用select distinct语法
|
|
776
|
+
groupby: 用于groupby的列
|
|
777
|
+
having: 用于having语句的条件
|
|
778
|
+
orderby: 用于orderby的列
|
|
779
|
+
order: orderby的顺序,ASC/DESC
|
|
780
|
+
limit: limit限制返回数据量
|
|
781
|
+
offset: offset偏移量
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
.. admonition:: 示例
|
|
785
|
+
|
|
786
|
+
.. code-block:: python
|
|
787
|
+
|
|
788
|
+
import pypika.functions as pf
|
|
789
|
+
tbl = DataTableMySQL("example")
|
|
790
|
+
t = tbl.table
|
|
791
|
+
|
|
792
|
+
#. 执行全表查询
|
|
793
|
+
|
|
794
|
+
.. code-block:: python
|
|
795
|
+
|
|
796
|
+
tbl.select()
|
|
797
|
+
|
|
798
|
+
#. 指定列查询,对数据列使用公式
|
|
799
|
+
|
|
800
|
+
.. code-block:: python
|
|
801
|
+
|
|
802
|
+
columns = [
|
|
803
|
+
'col_a',
|
|
804
|
+
pf.Max('col_b'),
|
|
805
|
+
t.col_c,
|
|
806
|
+
t.col_d + 25,
|
|
807
|
+
pf.Avg(t.col_e)
|
|
808
|
+
]
|
|
809
|
+
tbl.select(columns)
|
|
810
|
+
|
|
811
|
+
将执行sql:
|
|
812
|
+
|
|
813
|
+
.. code-block:: sql
|
|
814
|
+
|
|
815
|
+
SELECT
|
|
816
|
+
`col_a`,
|
|
817
|
+
MAX('col_b'),
|
|
818
|
+
`col_c`,
|
|
819
|
+
`col_d`+ 25,
|
|
820
|
+
AVG(`col_e`)
|
|
821
|
+
FROM
|
|
822
|
+
`example`
|
|
823
|
+
|
|
824
|
+
#. 指定查询条件
|
|
825
|
+
|
|
826
|
+
.. code-block:: python
|
|
827
|
+
|
|
828
|
+
where = (
|
|
829
|
+
((t.col_a > 1) | (t.col_b == '23'))
|
|
830
|
+
&
|
|
831
|
+
(t.col_c.isin([1, 2, 3]) | t.col_d.like('f%'))
|
|
832
|
+
)
|
|
833
|
+
tbl.select(where=where)
|
|
834
|
+
|
|
835
|
+
将执行sql:
|
|
836
|
+
|
|
837
|
+
.. code-block:: sql
|
|
838
|
+
|
|
839
|
+
SELECT
|
|
840
|
+
*
|
|
841
|
+
FROM
|
|
842
|
+
`example`
|
|
843
|
+
WHERE
|
|
844
|
+
(`col_a`>1
|
|
845
|
+
OR `col_b`= '23')
|
|
846
|
+
AND (`col_c` IN (1, 2, 3)
|
|
847
|
+
OR `col_d` LIKE 'f%')
|
|
848
|
+
|
|
849
|
+
#. 聚合条件等
|
|
850
|
+
|
|
851
|
+
.. code-block:: python
|
|
852
|
+
|
|
853
|
+
tbl.select(
|
|
854
|
+
[pf.Max('col_a')],
|
|
855
|
+
groupby=[t.col_c],
|
|
856
|
+
limit=10,
|
|
857
|
+
offset=5,
|
|
858
|
+
)
|
|
859
|
+
|
|
860
|
+
将执行sql:
|
|
861
|
+
|
|
862
|
+
.. code-block:: sql
|
|
863
|
+
|
|
864
|
+
SELECT
|
|
865
|
+
MAX('col_a')
|
|
866
|
+
FROM
|
|
867
|
+
`example`
|
|
868
|
+
GROUP BY
|
|
869
|
+
`col_c`
|
|
870
|
+
LIMIT 10 OFFSET 5
|
|
871
|
+
|
|
872
|
+
Warnings:
|
|
873
|
+
虽然目前参数where可以接收str类型,但这种支持会在将来被移除。
|
|
874
|
+
因为这会导致最终执行SQL:
|
|
875
|
+
``"SELECT {distinct} {fields} FROM {table_name} WHERE {where}"``
|
|
876
|
+
即groupby,limit等参数都将失效,虽然可以写在where条件中,
|
|
877
|
+
但这种方式会使你的代码可读性下降,因此并不被推荐。
|
|
878
|
+
请务必按照示例中的写法使用本方法,否则您可能在代码中看到一些
|
|
879
|
+
warning甚至在将来无法使用。
|
|
880
|
+
|
|
881
|
+
See Also:
|
|
882
|
+
如果希望返回 ``DataFrame`` 的数据,可以使用 :meth:`select`
|
|
883
|
+
|
|
884
|
+
Returns:
|
|
885
|
+
形如 [{column -> value}, ... , {column -> value}] 的数据。
|
|
886
|
+
|
|
887
|
+
例如原始数据为
|
|
888
|
+
|
|
889
|
+
+------+------+
|
|
890
|
+
| col1 | col2 |
|
|
891
|
+
+======+======+
|
|
892
|
+
| 1 | 2 |
|
|
893
|
+
+------+------+
|
|
894
|
+
| 3 | 4 |
|
|
895
|
+
+------+------+
|
|
896
|
+
|
|
897
|
+
则返回 ``[{'col1': 1, 'col2': 2}, {'col1': 3, 'col2': 4}]`` 。
|
|
898
|
+
|
|
899
|
+
"""
|
|
900
|
+
sql = self._build_select_sql(
|
|
901
|
+
columns,
|
|
902
|
+
where=where,
|
|
903
|
+
distinct=distinct,
|
|
904
|
+
groupby=groupby,
|
|
905
|
+
having=having,
|
|
906
|
+
orderby=orderby,
|
|
907
|
+
order=order,
|
|
908
|
+
limit=limit,
|
|
909
|
+
offset=offset
|
|
910
|
+
)
|
|
911
|
+
r = await self._run_sql(sql)
|
|
912
|
+
return r.selectResult
|
|
913
|
+
|
|
914
|
+
def _build_select_sql(
|
|
915
|
+
self,
|
|
916
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
917
|
+
where: Union[str, Term, EmptyCriterion] = None,
|
|
918
|
+
distinct: bool = False,
|
|
919
|
+
groupby: Iterable[Union[str, int, Term]] = None,
|
|
920
|
+
having: Iterable[Union[Term, EmptyCriterion]] = None,
|
|
921
|
+
orderby: Iterable[Union[str, Field]] = None,
|
|
922
|
+
order: Union[Order, str] = Order.asc,
|
|
923
|
+
limit: int = None,
|
|
924
|
+
offset: int = None,
|
|
925
|
+
) -> str:
|
|
926
|
+
q = self.query.from_(self.table)
|
|
927
|
+
if distinct:
|
|
928
|
+
q = q.distinct()
|
|
929
|
+
|
|
930
|
+
if columns is None:
|
|
931
|
+
q = q.select('*')
|
|
932
|
+
else:
|
|
933
|
+
q = q.select(*ensure_pikafield(self.table, columns))
|
|
934
|
+
|
|
935
|
+
if isinstance(where, str):
|
|
936
|
+
warnings.warn(
|
|
937
|
+
"使用字符串类型作为where条件入参将被弃用。推荐方式请参考此方法的文档。",
|
|
938
|
+
DeprecationWarning)
|
|
939
|
+
sql = f"{q.get_sql(quote_char=self.quote_char)} WHERE {where}"
|
|
940
|
+
else:
|
|
941
|
+
if where is not None:
|
|
942
|
+
q = q.where(where)
|
|
943
|
+
if groupby is not None:
|
|
944
|
+
q = q.groupby(*ensure_pikafield(self.table, groupby))
|
|
945
|
+
if having is not None:
|
|
946
|
+
q = q.having(*having)
|
|
947
|
+
if orderby is not None:
|
|
948
|
+
if isinstance(order, str):
|
|
949
|
+
order = Order[order.lower()]
|
|
950
|
+
q = q.orderby(*ensure_pikafield(self.table, orderby), order=order)
|
|
951
|
+
if limit is not None:
|
|
952
|
+
q = q.limit(limit)
|
|
953
|
+
if offset is not None:
|
|
954
|
+
q = q.offset(offset)
|
|
955
|
+
sql = q.get_sql(quote_char=self.quote_char)
|
|
956
|
+
return sql
|
|
957
|
+
|
|
958
|
+
@staticmethod
|
|
959
|
+
def _get_valid_columns(columns: Iterable[Union[str, Term]]):
|
|
960
|
+
res = []
|
|
961
|
+
for c in columns:
|
|
962
|
+
if isinstance(c, str):
|
|
963
|
+
res.append(c)
|
|
964
|
+
elif isinstance(c, Term):
|
|
965
|
+
if c.alias is not None:
|
|
966
|
+
res.append(c.alias)
|
|
967
|
+
else:
|
|
968
|
+
res.append(c.get_sql(quote_char=''))
|
|
969
|
+
else:
|
|
970
|
+
res.append(str(c))
|
|
971
|
+
return res
|
|
972
|
+
|
|
973
|
+
@txn_support
|
|
974
|
+
async def insert_df(
|
|
975
|
+
self,
|
|
976
|
+
dataframe: pd.DataFrame,
|
|
977
|
+
updatecol: Iterable = None,
|
|
978
|
+
chunksize: int = 5000,
|
|
979
|
+
auto_fit: bool = True,
|
|
980
|
+
) -> Union[CustomSqlRespDTO, Dict, None]:
|
|
981
|
+
"""将 ``DataFrame`` 的数据插入当前数据表
|
|
982
|
+
|
|
983
|
+
入库前会对DataFrame的数据作以下处理:
|
|
984
|
+
|
|
985
|
+
#. (强制)所有空值变更为 null,确保能正常入库
|
|
986
|
+
#. (非强制)对于 **不可为空** 的字符串类型数据列,会填充 ``'null'`` 字符串(未来可能会修改,不要依赖这个逻辑)
|
|
987
|
+
#. (非强制)对于decimal类型,自动 ``round`` 至规定小数位
|
|
988
|
+
|
|
989
|
+
上述 **(非强制)** 逻辑,可以通过指定 ``auto_fit=False`` 关闭。
|
|
990
|
+
|
|
991
|
+
Args:
|
|
992
|
+
dataframe: 待插入数据
|
|
993
|
+
updatecol: 更新的列 (用于INSERT INTO ON DUPLICATE)
|
|
994
|
+
chunksize: 单次插库的数据行数
|
|
995
|
+
auto_fit: 是否自动进行数据调整
|
|
996
|
+
|
|
997
|
+
Hint:
|
|
998
|
+
如果单次入库数据过多,导致超出数据库的单条sql语句的上限,可以降低
|
|
999
|
+
chuncksize,此方法将把一条较大的sql拆分成多条执行。
|
|
1000
|
+
|
|
1001
|
+
Returns:
|
|
1002
|
+
执行的操作记录
|
|
1003
|
+
|
|
1004
|
+
"""
|
|
1005
|
+
if dataframe.empty:
|
|
1006
|
+
return
|
|
1007
|
+
|
|
1008
|
+
if auto_fit:
|
|
1009
|
+
dataframe = dataframe.copy()
|
|
1010
|
+
dataframe = self.structure.fit(dataframe, dataframe.columns)
|
|
1011
|
+
else:
|
|
1012
|
+
dataframe = dataframe[dataframe.columns.intersection(self.structure.columns)]
|
|
1013
|
+
|
|
1014
|
+
sqls = self.convertor.iter_sql(dataframe, self.table.get_table_name(), updatecol, chunksize)
|
|
1015
|
+
return await self._maybe_submit_in_txn(sqls)
|
|
1016
|
+
|
|
1017
|
+
async def _maybe_submit_in_txn(self, sqls: Iterable[str]):
|
|
1018
|
+
if self._txn_.get().in_txn[-1]:
|
|
1019
|
+
for sql in sqls:
|
|
1020
|
+
await self.run_sql(sql)
|
|
1021
|
+
else:
|
|
1022
|
+
return await self._trxn_execute(self, list(sqls))
|
|
1023
|
+
|
|
1024
|
+
def _parse_where(self, where: Union[None, Term, EmptyCriterion]) -> str:
|
|
1025
|
+
if isinstance(where, (Term, EmptyCriterion)):
|
|
1026
|
+
return where.get_sql(quote_char=self.quote_char)
|
|
1027
|
+
if isinstance(where, str):
|
|
1028
|
+
return where
|
|
1029
|
+
raise TypeError(f"Unsupported type: {type(where)} for where.")
|
|
1030
|
+
|
|
1031
|
+
@txn_support
|
|
1032
|
+
async def delete(
|
|
1033
|
+
self,
|
|
1034
|
+
where: Union[str, Term, EmptyCriterion],
|
|
1035
|
+
) -> CustomSqlRespDTO:
|
|
1036
|
+
"""删除数据表的数据
|
|
1037
|
+
|
|
1038
|
+
Args:
|
|
1039
|
+
where: 删除条件
|
|
1040
|
+
|
|
1041
|
+
.. admonition:: 示例
|
|
1042
|
+
|
|
1043
|
+
.. code-block:: python
|
|
1044
|
+
|
|
1045
|
+
tbl = DataTableMySQL("example")
|
|
1046
|
+
t = tbl.table
|
|
1047
|
+
where = (
|
|
1048
|
+
((t.col_a > 1) | (t.col_b == '23'))
|
|
1049
|
+
&
|
|
1050
|
+
(t.col_c.isin([1, 2, 3]) | t.col_d.like('f%'))
|
|
1051
|
+
)
|
|
1052
|
+
tbl.delete(where)
|
|
1053
|
+
|
|
1054
|
+
将执行sql:
|
|
1055
|
+
|
|
1056
|
+
.. code-block:: sql
|
|
1057
|
+
|
|
1058
|
+
DELETE
|
|
1059
|
+
FROM
|
|
1060
|
+
`example`
|
|
1061
|
+
WHERE
|
|
1062
|
+
(`col_a`>1
|
|
1063
|
+
OR `col_b`= '23')
|
|
1064
|
+
AND (`col_c` IN (1, 2, 3)
|
|
1065
|
+
OR `col_d` LIKE 'f%')
|
|
1066
|
+
|
|
1067
|
+
Warnings:
|
|
1068
|
+
where参数对 ``str`` 格式的支持会在将来移除,请按照示例中的调用方式使用。
|
|
1069
|
+
|
|
1070
|
+
"""
|
|
1071
|
+
sql = f"DELETE FROM {self._quoted_table_name} WHERE {self._parse_where(where)}"
|
|
1072
|
+
return await self.run_sql(sql)
|
|
1073
|
+
|
|
1074
|
+
@txn_support
|
|
1075
|
+
async def update(
|
|
1076
|
+
self,
|
|
1077
|
+
assignment_list: T_DictLike,
|
|
1078
|
+
where: Union[None, Term, EmptyCriterion]
|
|
1079
|
+
):
|
|
1080
|
+
"""
|
|
1081
|
+
更新数据表的数据
|
|
1082
|
+
|
|
1083
|
+
Args:
|
|
1084
|
+
assignment_list: 更新的字段与对应的更新值
|
|
1085
|
+
where: 更新行满足的条件
|
|
1086
|
+
|
|
1087
|
+
.. admonition:: 示例
|
|
1088
|
+
|
|
1089
|
+
.. code-block:: python
|
|
1090
|
+
|
|
1091
|
+
tbl = DataTableMySQL("example")
|
|
1092
|
+
t = tbl.table
|
|
1093
|
+
tbl.update({'col1': 'val1', 'col2': t.col2 + 1}, where=t.key == 101)
|
|
1094
|
+
tbl.update([('col1', 'val1'), ('col2', t.col2 + 1)], where=t.key == 101)
|
|
1095
|
+
|
|
1096
|
+
两个 ``update`` 是一样的,将执行sql:
|
|
1097
|
+
|
|
1098
|
+
.. code-block:: sql
|
|
1099
|
+
|
|
1100
|
+
UPDATE
|
|
1101
|
+
`example`
|
|
1102
|
+
SET
|
|
1103
|
+
`col1`= 'val1',
|
|
1104
|
+
`col2`=`col2`+ 1
|
|
1105
|
+
WHERE
|
|
1106
|
+
`KEY`= 101
|
|
1107
|
+
|
|
1108
|
+
Important:
|
|
1109
|
+
为了避免使用者忘记指定where条件而执行了全表更新,
|
|
1110
|
+
此方法中where为必须参数,如果确实需要执行全表更新,
|
|
1111
|
+
请显式传入 ``where = None`` 。
|
|
1112
|
+
|
|
1113
|
+
"""
|
|
1114
|
+
|
|
1115
|
+
q = self.query.update(self.table)
|
|
1116
|
+
|
|
1117
|
+
if isinstance(assignment_list, Dict):
|
|
1118
|
+
iter_items = assignment_list.items()
|
|
1119
|
+
else:
|
|
1120
|
+
iter_items = assignment_list
|
|
1121
|
+
|
|
1122
|
+
for field, value in iter_items:
|
|
1123
|
+
if isinstance(field, str):
|
|
1124
|
+
field = self.table.__getattr__(field)
|
|
1125
|
+
q = q.set(field, value)
|
|
1126
|
+
|
|
1127
|
+
if where is not None:
|
|
1128
|
+
q = q.where(where)
|
|
1129
|
+
return await self.run_sql(q.get_sql(quote_char=self.quote_char))
|
|
1130
|
+
|
|
1131
|
+
@txn_support
|
|
1132
|
+
async def update_from_dataframe(
|
|
1133
|
+
self,
|
|
1134
|
+
source: pd.DataFrame,
|
|
1135
|
+
chucksize: Optional[int] = None
|
|
1136
|
+
):
|
|
1137
|
+
"""使用 :class:`DataFrame` 更新数据表
|
|
1138
|
+
|
|
1139
|
+
Args:
|
|
1140
|
+
source: 更新数据源
|
|
1141
|
+
chucksize: 每批更新最大使用的DataFrame行数
|
|
1142
|
+
|
|
1143
|
+
Important:
|
|
1144
|
+
:class:`DataFrame` ``source`` 必须包含 ``where`` 列,
|
|
1145
|
+
其列类型可以为字符串,也可以是pypika的条件语句。
|
|
1146
|
+
该列指明每行数据对应的更新条件。为了防止条件缺失而进行了全表更新,
|
|
1147
|
+
该列所有数据不允许为空。
|
|
1148
|
+
|
|
1149
|
+
如果有部分行不想进行所有字段的更新,可在对应单元格内填充Skip值。
|
|
1150
|
+
|
|
1151
|
+
.. admonition:: 示例
|
|
1152
|
+
|
|
1153
|
+
.. code-block:: python
|
|
1154
|
+
|
|
1155
|
+
from deepfos.element.datatable import Skip
|
|
1156
|
+
|
|
1157
|
+
df = pd.DataFrame(data=[
|
|
1158
|
+
[1, 'Foo', 'Foo@x.com'],
|
|
1159
|
+
[2, 'Bar', 'bar@x.com'],
|
|
1160
|
+
[3, 'Jack', Skip]
|
|
1161
|
+
], columns=['id', 'name', 'email'])
|
|
1162
|
+
|
|
1163
|
+
df['where'] = pd.Series(f"id='{i + 1}'" for i in range(3))
|
|
1164
|
+
|
|
1165
|
+
tbl = DataTableMySQL("example")
|
|
1166
|
+
tbl.update_from_dataframe(df)
|
|
1167
|
+
|
|
1168
|
+
将执行以下SQL:
|
|
1169
|
+
|
|
1170
|
+
.. code-block:: SQL
|
|
1171
|
+
|
|
1172
|
+
UPDATE `example`
|
|
1173
|
+
SET `id`=1,`name`='Foo',`email`='Foo@x.com'
|
|
1174
|
+
WHERE
|
|
1175
|
+
id = 1;
|
|
1176
|
+
UPDATE `example`
|
|
1177
|
+
SET `id`=2,`name`='Bar',`email`='bar@x.com'
|
|
1178
|
+
WHERE
|
|
1179
|
+
id = 2;
|
|
1180
|
+
UPDATE `example`
|
|
1181
|
+
SET `id`=3,`name`='Jack' -- email字段值为Skip,因此不更新
|
|
1182
|
+
WHERE
|
|
1183
|
+
id = 3;
|
|
1184
|
+
|
|
1185
|
+
"""
|
|
1186
|
+
key_where = 'where'
|
|
1187
|
+
if key_where not in source.columns:
|
|
1188
|
+
raise ValueError(f"Column <{key_where}> is missing in source dataframe.")
|
|
1189
|
+
|
|
1190
|
+
valid_columns = source.columns.intersection(self.structure.columns.keys())
|
|
1191
|
+
table = self.table
|
|
1192
|
+
|
|
1193
|
+
def yield_sql(df):
|
|
1194
|
+
where_col = df[key_where]
|
|
1195
|
+
for idx, upd_data in enumerate(df[valid_columns].to_dict(orient='records')):
|
|
1196
|
+
q = self.query.update(table)
|
|
1197
|
+
|
|
1198
|
+
any_updates = False
|
|
1199
|
+
for field, value in upd_data.items():
|
|
1200
|
+
if value is Skip:
|
|
1201
|
+
continue
|
|
1202
|
+
|
|
1203
|
+
any_updates = True
|
|
1204
|
+
if isinstance(field, str):
|
|
1205
|
+
field = table.__getattr__(field)
|
|
1206
|
+
q = q.set(field, value)
|
|
1207
|
+
|
|
1208
|
+
if not any_updates:
|
|
1209
|
+
continue
|
|
1210
|
+
|
|
1211
|
+
if isnull(where := where_col.iloc[idx]):
|
|
1212
|
+
raise ValueError(
|
|
1213
|
+
f"The where condition in [row: {idx}] is null, "
|
|
1214
|
+
f"which is strictly prohibited.")
|
|
1215
|
+
|
|
1216
|
+
if isinstance(where, str):
|
|
1217
|
+
yield f"{q.get_sql(quote_char=self.quote_char)} WHERE {where}"
|
|
1218
|
+
elif isinstance(where, (Term, EmptyCriterion)):
|
|
1219
|
+
q = q.where(where)
|
|
1220
|
+
yield q.get_sql(quote_char=self.quote_char)
|
|
1221
|
+
|
|
1222
|
+
ret = []
|
|
1223
|
+
for dataframe in split_dataframe(source, chucksize):
|
|
1224
|
+
r = await self._maybe_submit_in_txn(yield_sql(dataframe))
|
|
1225
|
+
ret.append(r)
|
|
1226
|
+
return ret
|
|
1227
|
+
|
|
1228
|
+
async def count(
|
|
1229
|
+
self,
|
|
1230
|
+
where: Union[str, Term, EmptyCriterion],
|
|
1231
|
+
) -> int:
|
|
1232
|
+
"""
|
|
1233
|
+
查询数据记录数
|
|
1234
|
+
|
|
1235
|
+
查询满足给定查询条件的数据记录数量。
|
|
1236
|
+
|
|
1237
|
+
Args:
|
|
1238
|
+
where: 查询条件
|
|
1239
|
+
|
|
1240
|
+
"""
|
|
1241
|
+
sql = f"SELECT COUNT(1) FROM {self._quoted_table_name} WHERE {self._parse_where(where)};"
|
|
1242
|
+
resp = await self._run_sql(sql)
|
|
1243
|
+
return list(resp.selectResult[0].values())[0]
|
|
1244
|
+
|
|
1245
|
+
def _format_field(
|
|
1246
|
+
self,
|
|
1247
|
+
field_map: Dict[str, Union[str, int, FrozenClass, Term]]
|
|
1248
|
+
) -> Tuple[str, str]:
|
|
1249
|
+
base, incr = self._field_map_templates
|
|
1250
|
+
fmap = {**base, **field_map, **incr}
|
|
1251
|
+
|
|
1252
|
+
field_strings = []
|
|
1253
|
+
|
|
1254
|
+
for field, value in fmap.items():
|
|
1255
|
+
if value is None:
|
|
1256
|
+
field_strings.append(f"{self.quote_char}{field}{self.quote_char}")
|
|
1257
|
+
elif isinstance(value, Term):
|
|
1258
|
+
value = value.get_sql(quote_char=self.quote_char)
|
|
1259
|
+
field_strings.append(f"{value} as {self.quote_char}{field}{self.quote_char}")
|
|
1260
|
+
else:
|
|
1261
|
+
field_strings.append(f"{value!r} as {self.quote_char}{field}{self.quote_char}")
|
|
1262
|
+
|
|
1263
|
+
return ','.join(f"{self.quote_char}{k}{self.quote_char}" for k in fmap), ','.join(field_strings)
|
|
1264
|
+
|
|
1265
|
+
@txn_support
|
|
1266
|
+
async def copy_rows(
|
|
1267
|
+
self,
|
|
1268
|
+
where: Union[str, Term, EmptyCriterion],
|
|
1269
|
+
field_map: Dict[str, Union[str, int, FrozenClass, Term]] = None,
|
|
1270
|
+
distinct: bool = False,
|
|
1271
|
+
) -> CustomSqlRespDTO:
|
|
1272
|
+
"""拷贝当前表的数据行
|
|
1273
|
+
|
|
1274
|
+
按照指定where条件,copy数据到本表,
|
|
1275
|
+
可以通过field_map更新或者指定部分字段的值。(常用于版本拷贝)
|
|
1276
|
+
|
|
1277
|
+
Args:
|
|
1278
|
+
where: 需要复制的数据行的筛选条件
|
|
1279
|
+
field_map: key:需要复制的字段,value:需要复制的值
|
|
1280
|
+
distinct: select是否增加distinct
|
|
1281
|
+
|
|
1282
|
+
.. admonition:: 示例
|
|
1283
|
+
|
|
1284
|
+
.. code-block:: python
|
|
1285
|
+
|
|
1286
|
+
import pypika.functions as pf
|
|
1287
|
+
|
|
1288
|
+
tbl = DataTableMySQL("test")
|
|
1289
|
+
t = tbl.table
|
|
1290
|
+
tbl.copy_rows(
|
|
1291
|
+
where=(t.f1 >= 1) & (t.f2 == 2) | (t.f3 > 1),
|
|
1292
|
+
field_map={
|
|
1293
|
+
"f1": t.f1 + 1,
|
|
1294
|
+
"f2": 3,
|
|
1295
|
+
"f4": t.f5,
|
|
1296
|
+
"f6": pf.Max(t.f6)
|
|
1297
|
+
}
|
|
1298
|
+
)
|
|
1299
|
+
|
|
1300
|
+
将执行sql:
|
|
1301
|
+
|
|
1302
|
+
.. code-block:: sql
|
|
1303
|
+
|
|
1304
|
+
INSERT INTO
|
|
1305
|
+
test
|
|
1306
|
+
SELECT
|
|
1307
|
+
`f1` + 1 as f1,
|
|
1308
|
+
3 as f2,
|
|
1309
|
+
`f3`,
|
|
1310
|
+
`f5` as f4,
|
|
1311
|
+
`f5`,
|
|
1312
|
+
Max(`f6`) as `f6`
|
|
1313
|
+
FROM
|
|
1314
|
+
test
|
|
1315
|
+
WHERE
|
|
1316
|
+
`f1`>=1 AND `f2`==2 OR `f3`>1
|
|
1317
|
+
|
|
1318
|
+
"""
|
|
1319
|
+
field_map = field_map or {}
|
|
1320
|
+
fields, field_str = self._format_field(field_map)
|
|
1321
|
+
sql = "INSERT INTO {table} ({fields}) SELECT {distinct} {field_str} FROM {table} WHERE {where}".format(
|
|
1322
|
+
table=self._quoted_table_name,
|
|
1323
|
+
fields=fields,
|
|
1324
|
+
field_str=field_str,
|
|
1325
|
+
where=self._parse_where(where),
|
|
1326
|
+
distinct='DISTINCT' if distinct else ''
|
|
1327
|
+
)
|
|
1328
|
+
return await self.run_sql(sql)
|
|
1329
|
+
|
|
1330
|
+
async def _run_sql(self, sql: str) -> Optional[CustomSqlRespDTO]:
|
|
1331
|
+
txn_conf = self._safe_get_txn_conf()
|
|
1332
|
+
|
|
1333
|
+
if txn_conf.in_txn[-1] and self._txn_support_:
|
|
1334
|
+
txn_conf.sql[-1].append(sql)
|
|
1335
|
+
if txn_conf.async_api is None:
|
|
1336
|
+
txn_conf.async_api = self.async_api
|
|
1337
|
+
return
|
|
1338
|
+
|
|
1339
|
+
def trim_sql(): # pragma: no cover
|
|
1340
|
+
if len(sql) > SQL_LOG_MAX_LEN:
|
|
1341
|
+
return sql[:SQL_LOG_MAX_LEN-4] + "..."
|
|
1342
|
+
else:
|
|
1343
|
+
return sql
|
|
1344
|
+
|
|
1345
|
+
logger.opt(lazy=True).debug("Execute SQL: [{sql}].", sql=trim_sql)
|
|
1346
|
+
return await self.async_api.dml.run_sql(sql)
|
|
1347
|
+
|
|
1348
|
+
@txn_support
|
|
1349
|
+
async def run_sql(self, sql: str) -> Optional[CustomSqlRespDTO]:
|
|
1350
|
+
"""执行sql
|
|
1351
|
+
|
|
1352
|
+
直接执行sql,sql中出现的表名必须为实际表名。
|
|
1353
|
+
|
|
1354
|
+
Hint:
|
|
1355
|
+
实际表名可以通过 :attr:`table_name` 获取。
|
|
1356
|
+
|
|
1357
|
+
Args:
|
|
1358
|
+
sql: 执行的sql语句
|
|
1359
|
+
|
|
1360
|
+
Returns:
|
|
1361
|
+
执行结果
|
|
1362
|
+
|
|
1363
|
+
"""
|
|
1364
|
+
return await self._run_sql(sql)
|
|
1365
|
+
|
|
1366
|
+
@txn_support
|
|
1367
|
+
async def insert(
|
|
1368
|
+
self,
|
|
1369
|
+
value_map: Dict[str, Any] = None,
|
|
1370
|
+
value_list: Iterable[Sequence[Any]] = None,
|
|
1371
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
1372
|
+
):
|
|
1373
|
+
"""
|
|
1374
|
+
插入数据,数据量极少时推荐使用
|
|
1375
|
+
|
|
1376
|
+
Args:
|
|
1377
|
+
value_map: 以键值对(列名 -> 插入值)提供的入库数据
|
|
1378
|
+
value_list: 入库数据(不包含列数据)
|
|
1379
|
+
columns: 入库数据对应的列,不提供则默认使用全部列
|
|
1380
|
+
|
|
1381
|
+
.. admonition:: 示例
|
|
1382
|
+
|
|
1383
|
+
.. code-block:: python
|
|
1384
|
+
|
|
1385
|
+
tbl = DataTableMySQL("test")
|
|
1386
|
+
tbl.insert(value_map={'a': 1, 'b': 2})
|
|
1387
|
+
tbl.insert(value_list=[[1, 2]], columns=['a', 'b'])
|
|
1388
|
+
|
|
1389
|
+
两个 ``insert`` 是一样的,将执行sql:
|
|
1390
|
+
|
|
1391
|
+
.. code-block:: sql
|
|
1392
|
+
|
|
1393
|
+
INSERT INTO `test`
|
|
1394
|
+
(`a`,`b`)
|
|
1395
|
+
VALUES
|
|
1396
|
+
(1,2)
|
|
1397
|
+
|
|
1398
|
+
"""
|
|
1399
|
+
|
|
1400
|
+
q = self.query.into(self.table)
|
|
1401
|
+
|
|
1402
|
+
if value_map is not None:
|
|
1403
|
+
q = q.columns(*value_map.keys()).insert(*value_map.values())
|
|
1404
|
+
elif value_list is None:
|
|
1405
|
+
raise ValueError('None of argumnet [value_map, value_list] is set.')
|
|
1406
|
+
else:
|
|
1407
|
+
if columns:
|
|
1408
|
+
column_num = len(list(columns))
|
|
1409
|
+
q = q.columns(*columns)
|
|
1410
|
+
else:
|
|
1411
|
+
column_num = len(self.structure.columns.keys())
|
|
1412
|
+
|
|
1413
|
+
for value in value_list:
|
|
1414
|
+
if len(value) != column_num:
|
|
1415
|
+
raise ValueError(
|
|
1416
|
+
'Value number mismatch with column number.'
|
|
1417
|
+
f'values: {value}, number: {len(value)}, '
|
|
1418
|
+
f'columns number: {column_num}.')
|
|
1419
|
+
q = q.insert(*value)
|
|
1420
|
+
|
|
1421
|
+
return await self.run_sql(q.get_sql(quote_char=self.quote_char))
|
|
1422
|
+
|
|
1423
|
+
@classmethod
|
|
1424
|
+
@asynccontextmanager
|
|
1425
|
+
async def start_transaction(cls, flatten: bool = False):
|
|
1426
|
+
"""
|
|
1427
|
+
开启事务
|
|
1428
|
+
|
|
1429
|
+
上下文管理器,使用with语法开启上下文,上下文中的sql将作为事务执行。
|
|
1430
|
+
退出with语句块后,事务将立即执行,执行过程中如果报错会直接抛出,
|
|
1431
|
+
执行结果可通过 :attr:`transaction_result` 查询。
|
|
1432
|
+
|
|
1433
|
+
.. admonition:: 示例
|
|
1434
|
+
|
|
1435
|
+
.. code-block:: python
|
|
1436
|
+
|
|
1437
|
+
tbl = DataTableMySQL('table_example')
|
|
1438
|
+
t = tbl.table
|
|
1439
|
+
async with tbl.start_transaction():
|
|
1440
|
+
await tbl.insert({'key': 101, 'value': 'txn'})
|
|
1441
|
+
await tbl.update({'value': 'new_txn'}, where=t.key == 101)
|
|
1442
|
+
await tbl.delete(where=t.key >= 99)
|
|
1443
|
+
result = tbl.transaction_result
|
|
1444
|
+
|
|
1445
|
+
Args:
|
|
1446
|
+
flatten: 是否拉平嵌套事务,如果开启,嵌套的事务将会作为一个事务执行
|
|
1447
|
+
|
|
1448
|
+
Important:
|
|
1449
|
+
仅 ``insert/delete/update`` **系列** (包括 :meth:`insert_df`,
|
|
1450
|
+
:meth:`copy_rows` 等)的sql支持在事务中执行,
|
|
1451
|
+
支持事务运行的方法可以通过源码查看,带有 ``@txn_support``
|
|
1452
|
+
装饰器的方法即支持事务。
|
|
1453
|
+
|
|
1454
|
+
如果在事务中执行select,查询结果也将立刻返回。
|
|
1455
|
+
|
|
1456
|
+
"""
|
|
1457
|
+
try:
|
|
1458
|
+
cls._txn_.get()
|
|
1459
|
+
except LookupError:
|
|
1460
|
+
cls._txn_.set(_TxnConfig())
|
|
1461
|
+
bak_flatten = cls._txn_.get().flatten
|
|
1462
|
+
cls._txn_.get().in_txn.append(True)
|
|
1463
|
+
|
|
1464
|
+
if flatten and not cls._txn_.get().flatten:
|
|
1465
|
+
force_submit = True
|
|
1466
|
+
else:
|
|
1467
|
+
force_submit = False
|
|
1468
|
+
|
|
1469
|
+
cls._txn_.get().flatten = bak_flatten or flatten
|
|
1470
|
+
|
|
1471
|
+
if not cls._txn_.get().flatten:
|
|
1472
|
+
cls._txn_.get().sql.append([])
|
|
1473
|
+
|
|
1474
|
+
try:
|
|
1475
|
+
yield
|
|
1476
|
+
if force_submit or not cls._txn_.get().flatten:
|
|
1477
|
+
await cls.__submit_txn()
|
|
1478
|
+
finally:
|
|
1479
|
+
cls._txn_.get().in_txn.pop()
|
|
1480
|
+
cls._txn_.get().flatten = bak_flatten
|
|
1481
|
+
|
|
1482
|
+
@classmethod
|
|
1483
|
+
async def __submit_txn(cls):
|
|
1484
|
+
if sql := cls._txn_.get().sql.pop():
|
|
1485
|
+
resp = await cls._trxn_execute(cls._txn_.get(), sql)
|
|
1486
|
+
cls.transaction_result = resp
|
|
1487
|
+
|
|
1488
|
+
@staticmethod
|
|
1489
|
+
async def _trxn_execute(self, sqls: List[str]):
|
|
1490
|
+
return await self.async_api.dml.execute_batch_sql(sqls)
|
|
1491
|
+
|
|
1492
|
+
|
|
1493
|
+
class DataTableSyncMixin:
|
|
1494
|
+
synchronize = (
|
|
1495
|
+
'count',
|
|
1496
|
+
'select',
|
|
1497
|
+
'select_raw',
|
|
1498
|
+
'insert',
|
|
1499
|
+
'insert_df',
|
|
1500
|
+
'delete',
|
|
1501
|
+
'update',
|
|
1502
|
+
'update_from_dataframe',
|
|
1503
|
+
'copy_rows',
|
|
1504
|
+
'run_sql',
|
|
1505
|
+
)
|
|
1506
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
1507
|
+
def count(
|
|
1508
|
+
self,
|
|
1509
|
+
where: Union[str, Term, EmptyCriterion],
|
|
1510
|
+
) -> int:
|
|
1511
|
+
...
|
|
1512
|
+
|
|
1513
|
+
def select(
|
|
1514
|
+
self,
|
|
1515
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
1516
|
+
where: Union[str, Term, EmptyCriterion] = None,
|
|
1517
|
+
distinct: bool = False,
|
|
1518
|
+
groupby: Iterable[Union[str, int, Term]] = None,
|
|
1519
|
+
having: Iterable[Union[Term, EmptyCriterion]] = None,
|
|
1520
|
+
orderby: Iterable[Union[str, Field]] = None,
|
|
1521
|
+
order: Union[Order, str] = Order.asc,
|
|
1522
|
+
limit: int = None,
|
|
1523
|
+
offset: int = None,
|
|
1524
|
+
) -> pd.DataFrame:
|
|
1525
|
+
...
|
|
1526
|
+
|
|
1527
|
+
def select_raw(
|
|
1528
|
+
self,
|
|
1529
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
1530
|
+
where: Union[str, Term, EmptyCriterion] = None,
|
|
1531
|
+
distinct: bool = False,
|
|
1532
|
+
groupby: Iterable[Union[str, int, Term]] = None,
|
|
1533
|
+
having: Iterable[Union[Term, EmptyCriterion]] = None,
|
|
1534
|
+
orderby: Iterable[Union[str, Field]] = None,
|
|
1535
|
+
order: Union[Order, str] = Order.asc,
|
|
1536
|
+
limit: int = None,
|
|
1537
|
+
offset: int = None,
|
|
1538
|
+
) -> List[dict]:
|
|
1539
|
+
...
|
|
1540
|
+
|
|
1541
|
+
def insert(
|
|
1542
|
+
self,
|
|
1543
|
+
value_map: Dict[str, Any] = None,
|
|
1544
|
+
value_list: Iterable[Sequence[Any]] = None,
|
|
1545
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
1546
|
+
):
|
|
1547
|
+
...
|
|
1548
|
+
|
|
1549
|
+
def insert_df(
|
|
1550
|
+
self,
|
|
1551
|
+
dataframe: pd.DataFrame,
|
|
1552
|
+
updatecol: Iterable = None,
|
|
1553
|
+
chunksize: int = 5000,
|
|
1554
|
+
auto_fit: bool = True,
|
|
1555
|
+
) -> Union[CustomSqlRespDTO, Dict, None]:
|
|
1556
|
+
...
|
|
1557
|
+
|
|
1558
|
+
def delete(
|
|
1559
|
+
self,
|
|
1560
|
+
where: Union[str, Term, EmptyCriterion],
|
|
1561
|
+
) -> CustomSqlRespDTO:
|
|
1562
|
+
...
|
|
1563
|
+
|
|
1564
|
+
def update(
|
|
1565
|
+
self,
|
|
1566
|
+
assignment_list: T_DictLike,
|
|
1567
|
+
where: Union[None, Term, EmptyCriterion]
|
|
1568
|
+
):
|
|
1569
|
+
...
|
|
1570
|
+
|
|
1571
|
+
def copy_rows(
|
|
1572
|
+
self,
|
|
1573
|
+
where: Union[str, Term, EmptyCriterion],
|
|
1574
|
+
field_map: Dict[str, Union[str, int, FrozenClass, Term]] = None,
|
|
1575
|
+
distinct: bool = False,
|
|
1576
|
+
) -> CustomSqlRespDTO:
|
|
1577
|
+
...
|
|
1578
|
+
|
|
1579
|
+
def run_sql(self, sql: str) -> Optional[CustomSqlRespDTO]:
|
|
1580
|
+
...
|
|
1581
|
+
|
|
1582
|
+
def update_from_dataframe(
|
|
1583
|
+
self,
|
|
1584
|
+
source: pd.DataFrame,
|
|
1585
|
+
chucksize: Optional[int] = None
|
|
1586
|
+
):
|
|
1587
|
+
...
|
|
1588
|
+
|
|
1589
|
+
|
|
1590
|
+
class DataTableSyncMeta(SyncMeta):
|
|
1591
|
+
def __new__(mcs, name, bases, namespace, **kwargs):
|
|
1592
|
+
cls = super().__new__(mcs, name, bases, namespace, **kwargs)
|
|
1593
|
+
|
|
1594
|
+
@contextmanager
|
|
1595
|
+
def start_transaction(cls, flatten: bool = False):
|
|
1596
|
+
try:
|
|
1597
|
+
cls._txn_.get()
|
|
1598
|
+
except LookupError:
|
|
1599
|
+
cls._txn_.set(_TxnConfig())
|
|
1600
|
+
|
|
1601
|
+
bak_flatten = cls._txn_.get().flatten
|
|
1602
|
+
cls._txn_.get().in_txn.append(True)
|
|
1603
|
+
|
|
1604
|
+
if flatten and not cls._txn_.get().flatten:
|
|
1605
|
+
force_submit = True
|
|
1606
|
+
else:
|
|
1607
|
+
force_submit = False
|
|
1608
|
+
|
|
1609
|
+
cls._txn_.get().flatten = bak_flatten or flatten
|
|
1610
|
+
|
|
1611
|
+
if not cls._txn_.get().flatten:
|
|
1612
|
+
cls._txn_.get().sql.append([])
|
|
1613
|
+
|
|
1614
|
+
try:
|
|
1615
|
+
yield
|
|
1616
|
+
if force_submit or not cls._txn_.get().flatten:
|
|
1617
|
+
cls.__submit_txn()
|
|
1618
|
+
finally:
|
|
1619
|
+
cls._txn_.get().in_txn.pop()
|
|
1620
|
+
cls._txn_.get().flatten = bak_flatten
|
|
1621
|
+
|
|
1622
|
+
start_transaction.__doc__ = DOC_START_TX_TEMPLATE % name
|
|
1623
|
+
|
|
1624
|
+
def __submit_txn(cls):
|
|
1625
|
+
if sql := cls._txn_.get().sql.pop():
|
|
1626
|
+
resp = evloop.run(cls._trxn_execute(cls._txn_.get(), sql))
|
|
1627
|
+
cls.transaction_result = resp
|
|
1628
|
+
|
|
1629
|
+
cls.start_transaction = classmethod(start_transaction)
|
|
1630
|
+
cls.__submit_txn = classmethod(__submit_txn)
|
|
1631
|
+
|
|
1632
|
+
return cls
|
|
1633
|
+
|
|
1634
|
+
|
|
1635
|
+
class DataTableMySQL(
|
|
1636
|
+
AsyncDataTableMySQL,
|
|
1637
|
+
DataTableSyncMixin,
|
|
1638
|
+
metaclass=DataTableSyncMeta
|
|
1639
|
+
):
|
|
1640
|
+
pass
|
|
1641
|
+
|
|
1642
|
+
|
|
1643
|
+
class AsyncDirectAccessDataTableMySQL(AsyncDataTableMySQL):
|
|
1644
|
+
async def select(
|
|
1645
|
+
self,
|
|
1646
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
1647
|
+
where: Union[str, Term, EmptyCriterion] = None,
|
|
1648
|
+
distinct: bool = False,
|
|
1649
|
+
groupby: Iterable[Union[str, int, Term]] = None,
|
|
1650
|
+
having: Iterable[Union[Term, EmptyCriterion]] = None,
|
|
1651
|
+
orderby: Iterable[Union[str, Field]] = None,
|
|
1652
|
+
order: Union[Order, str] = Order.asc,
|
|
1653
|
+
limit: int = None,
|
|
1654
|
+
offset: int = None,
|
|
1655
|
+
) -> pd.DataFrame:
|
|
1656
|
+
from deepfos.db import damysql
|
|
1657
|
+
sql = self._build_select_sql(
|
|
1658
|
+
columns,
|
|
1659
|
+
where=where,
|
|
1660
|
+
distinct=distinct,
|
|
1661
|
+
groupby=groupby,
|
|
1662
|
+
having=having,
|
|
1663
|
+
orderby=orderby,
|
|
1664
|
+
order=order,
|
|
1665
|
+
limit=limit,
|
|
1666
|
+
offset=offset
|
|
1667
|
+
)
|
|
1668
|
+
return await damysql.query_dataframe(sql)
|
|
1669
|
+
|
|
1670
|
+
async def select_raw(
|
|
1671
|
+
self,
|
|
1672
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
1673
|
+
where: Union[str, Term, EmptyCriterion] = None,
|
|
1674
|
+
distinct: bool = False,
|
|
1675
|
+
groupby: Iterable[Union[str, int, Term]] = None,
|
|
1676
|
+
having: Iterable[Union[Term, EmptyCriterion]] = None,
|
|
1677
|
+
orderby: Iterable[Union[str, Field]] = None,
|
|
1678
|
+
order: Union[Order, str] = Order.asc,
|
|
1679
|
+
limit: int = None,
|
|
1680
|
+
offset: int = None,
|
|
1681
|
+
) -> List[dict]:
|
|
1682
|
+
raw_data = await self.select(
|
|
1683
|
+
columns,
|
|
1684
|
+
where=where,
|
|
1685
|
+
distinct=distinct,
|
|
1686
|
+
groupby=groupby,
|
|
1687
|
+
having=having,
|
|
1688
|
+
orderby=orderby,
|
|
1689
|
+
order=order,
|
|
1690
|
+
limit=limit,
|
|
1691
|
+
offset=offset
|
|
1692
|
+
)
|
|
1693
|
+
return raw_data.to_dict(orient='records')
|
|
1694
|
+
|
|
1695
|
+
@txn_support
|
|
1696
|
+
async def run_sql(self, sql: str):
|
|
1697
|
+
from deepfos.db import damysql
|
|
1698
|
+
ctx = self._txn_
|
|
1699
|
+
if ctx.get().in_txn[-1] and self._txn_support_:
|
|
1700
|
+
ctx.get().sql[-1].append(sql)
|
|
1701
|
+
return
|
|
1702
|
+
if len(sql) > SQL_LOG_MAX_LEN: # pragma: no cover
|
|
1703
|
+
sql_log = sql[:SQL_LOG_MAX_LEN - 4] + "..."
|
|
1704
|
+
else:
|
|
1705
|
+
sql_log = sql
|
|
1706
|
+
logger.debug(f"Execute SQL: [{sql_log}].") # pragma: no cover
|
|
1707
|
+
return await damysql.execute(sql)
|
|
1708
|
+
|
|
1709
|
+
@staticmethod
|
|
1710
|
+
async def _trxn_execute(self, sqls: List[str]):
|
|
1711
|
+
from deepfos.db import damysql
|
|
1712
|
+
return await damysql.trxn_execute(sqls)
|
|
1713
|
+
|
|
1714
|
+
async def count(
|
|
1715
|
+
self,
|
|
1716
|
+
where: Union[str, Term, EmptyCriterion],
|
|
1717
|
+
) -> int:
|
|
1718
|
+
from deepfos.db import damysql
|
|
1719
|
+
sql = f"SELECT COUNT(1) FROM {self._quoted_table_name} WHERE {self._parse_where(where)};"
|
|
1720
|
+
res = await damysql.select(sql)
|
|
1721
|
+
return res[0][0]
|
|
1722
|
+
|
|
1723
|
+
|
|
1724
|
+
class DirectAccessDataTableMySQL(
|
|
1725
|
+
AsyncDirectAccessDataTableMySQL,
|
|
1726
|
+
DataTableSyncMixin,
|
|
1727
|
+
metaclass=DataTableSyncMeta
|
|
1728
|
+
):
|
|
1729
|
+
pass
|
|
1730
|
+
|
|
1731
|
+
|
|
1732
|
+
class AsyncDataTableClickHouse(AsyncDataTableMySQL):
|
|
1733
|
+
__doc__ = DOC_TEMPLATE.format(DB='ClickHouse')
|
|
1734
|
+
api_class = ClickHouseAPI
|
|
1735
|
+
api: ClickHouseAPI
|
|
1736
|
+
query = ClickHouseQuery
|
|
1737
|
+
convertor = _ClickHouseDFConvertor(quote_char=AsyncDataTableMySQL.quote_char)
|
|
1738
|
+
|
|
1739
|
+
@cached_property
|
|
1740
|
+
def structure(self) -> ClickHouseTableStructure:
|
|
1741
|
+
"""数据表的表结构
|
|
1742
|
+
|
|
1743
|
+
主要包含了所有列的列名和类型信息,用于
|
|
1744
|
+
在查询和保存时对数据做类型转化的预处理。
|
|
1745
|
+
"""
|
|
1746
|
+
columns = self.meta.datatableColumn
|
|
1747
|
+
columns.append(MiscModel(name='createtime', type='int', whetherEmpty=False))
|
|
1748
|
+
columns.append(MiscModel(name='createdate', type='datetime', whetherEmpty=False))
|
|
1749
|
+
return ClickHouseTableStructure(columns)
|
|
1750
|
+
|
|
1751
|
+
async def delete(self, where: Dict[str, Union[VT, List[VT]]]):
|
|
1752
|
+
"""
|
|
1753
|
+
删除数据表的数据
|
|
1754
|
+
|
|
1755
|
+
Args:
|
|
1756
|
+
where: 删除条件。列名-> 要删除的值
|
|
1757
|
+
|
|
1758
|
+
.. admonition:: 示例
|
|
1759
|
+
|
|
1760
|
+
.. code-block:: python
|
|
1761
|
+
|
|
1762
|
+
tbl = DataTableClickHouse("example")
|
|
1763
|
+
tbl.delete({
|
|
1764
|
+
"col_a": 1,
|
|
1765
|
+
"col_b": ["x", "y"]
|
|
1766
|
+
})
|
|
1767
|
+
|
|
1768
|
+
将执行sql:
|
|
1769
|
+
|
|
1770
|
+
.. code-block:: sql
|
|
1771
|
+
|
|
1772
|
+
ALTER TABLE example
|
|
1773
|
+
DELETE
|
|
1774
|
+
WHERE
|
|
1775
|
+
`col_a` IN (1)
|
|
1776
|
+
AND `col_b` IN ('x', 'y')
|
|
1777
|
+
|
|
1778
|
+
Warnings:
|
|
1779
|
+
由于ClickHouse数据库的特性, ``delete`` 可能不会立即生效,
|
|
1780
|
+
所以不要依赖此方法保证数据一致性。并且不推荐频繁使用。
|
|
1781
|
+
|
|
1782
|
+
"""
|
|
1783
|
+
del_cols = {}
|
|
1784
|
+
for k, v in where.items():
|
|
1785
|
+
if isinstance(v, str):
|
|
1786
|
+
del_cols[k] = [v]
|
|
1787
|
+
else:
|
|
1788
|
+
del_cols[k] = v
|
|
1789
|
+
|
|
1790
|
+
return await self.async_api.dml.delete_data(
|
|
1791
|
+
DatatableDataDeleteDTO.construct_from(
|
|
1792
|
+
self.element_info,
|
|
1793
|
+
columnList=del_cols
|
|
1794
|
+
))
|
|
1795
|
+
|
|
1796
|
+
def _format_field(
|
|
1797
|
+
self,
|
|
1798
|
+
field_map: Dict[str, Union[str, int, FrozenClass, Term]]
|
|
1799
|
+
) -> Tuple[str, str]:
|
|
1800
|
+
base, incr = self._field_map_templates
|
|
1801
|
+
fmap = {**base, **field_map, **incr}
|
|
1802
|
+
|
|
1803
|
+
field_strings = []
|
|
1804
|
+
|
|
1805
|
+
for field, value in fmap.items():
|
|
1806
|
+
if value is None:
|
|
1807
|
+
field_strings.append(f"`{field}`")
|
|
1808
|
+
elif isinstance(value, Term):
|
|
1809
|
+
value = value.get_sql(quote_char=self.quote_char)
|
|
1810
|
+
field_strings.append(value)
|
|
1811
|
+
else:
|
|
1812
|
+
field_strings.append(repr(value))
|
|
1813
|
+
|
|
1814
|
+
return ','.join(f"`{k}`" for k in fmap), ','.join(field_strings)
|
|
1815
|
+
|
|
1816
|
+
@classmethod
|
|
1817
|
+
@asynccontextmanager
|
|
1818
|
+
async def start_transaction(cls, flatten: bool = False):
|
|
1819
|
+
"""不可用
|
|
1820
|
+
|
|
1821
|
+
ClickHouse不支持事务
|
|
1822
|
+
"""
|
|
1823
|
+
try:
|
|
1824
|
+
yield
|
|
1825
|
+
finally:
|
|
1826
|
+
raise NotImplementedError('ClickHouse does not support transaction.')
|
|
1827
|
+
|
|
1828
|
+
|
|
1829
|
+
class DataTableClickHouse(
|
|
1830
|
+
AsyncDataTableClickHouse,
|
|
1831
|
+
DataTableSyncMixin,
|
|
1832
|
+
metaclass=SyncMeta
|
|
1833
|
+
):
|
|
1834
|
+
@classmethod
|
|
1835
|
+
def start_transaction(cls, flatten: bool = False):
|
|
1836
|
+
"""不可用
|
|
1837
|
+
|
|
1838
|
+
ClickHouse不支持事务
|
|
1839
|
+
"""
|
|
1840
|
+
raise NotImplementedError('ClickHouse does not support transaction.')
|
|
1841
|
+
|
|
1842
|
+
|
|
1843
|
+
class AsyncDirectAccessDataTableClickHouse(AsyncDirectAccessDataTableMySQL):
|
|
1844
|
+
__doc__ = DOC_TEMPLATE.format(DB='ClickHouse')
|
|
1845
|
+
api_class = ClickHouseAPI
|
|
1846
|
+
api: ClickHouseAPI
|
|
1847
|
+
query = ClickHouseQuery
|
|
1848
|
+
|
|
1849
|
+
@classmethod
|
|
1850
|
+
@asynccontextmanager
|
|
1851
|
+
async def start_transaction(cls, flatten: bool = False):
|
|
1852
|
+
"""不可用
|
|
1853
|
+
|
|
1854
|
+
ClickHouse不支持事务
|
|
1855
|
+
"""
|
|
1856
|
+
try:
|
|
1857
|
+
yield
|
|
1858
|
+
finally:
|
|
1859
|
+
raise NotImplementedError('ClickHouse does not support transaction.')
|
|
1860
|
+
|
|
1861
|
+
async def run_sql(self, sql: str): # pragma: no cover
|
|
1862
|
+
from deepfos.db import daclickhouse
|
|
1863
|
+
if len(sql) > SQL_LOG_MAX_LEN:
|
|
1864
|
+
sql_log = sql[:SQL_LOG_MAX_LEN - 4] + "..."
|
|
1865
|
+
else:
|
|
1866
|
+
sql_log = sql
|
|
1867
|
+
logger.debug(f"Execute SQL: [{sql_log}].")
|
|
1868
|
+
return await daclickhouse.execute(sql)
|
|
1869
|
+
|
|
1870
|
+
async def insert_df(
|
|
1871
|
+
self,
|
|
1872
|
+
dataframe: pd.DataFrame,
|
|
1873
|
+
updatecol: Iterable = None,
|
|
1874
|
+
chunksize: int = 5000,
|
|
1875
|
+
auto_fit: bool = True,
|
|
1876
|
+
) -> List:
|
|
1877
|
+
from deepfos.db import daclickhouse
|
|
1878
|
+
if updatecol is not None:
|
|
1879
|
+
warnings.warn("CK数据表不支持INSERT INTO ON DUPLICATE语法,将忽略该入参地组织sql", Warning)
|
|
1880
|
+
r = []
|
|
1881
|
+
for df in split_dataframe(dataframe, chunksize):
|
|
1882
|
+
res = await daclickhouse.insert_dataframe(self.table_name, df)
|
|
1883
|
+
r.append(res)
|
|
1884
|
+
return r
|
|
1885
|
+
|
|
1886
|
+
async def select(
|
|
1887
|
+
self,
|
|
1888
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
1889
|
+
where: Union[str, Term, EmptyCriterion] = None,
|
|
1890
|
+
distinct: bool = False,
|
|
1891
|
+
groupby: Iterable[Union[str, int, Term]] = None,
|
|
1892
|
+
having: Iterable[Union[Term, EmptyCriterion]] = None,
|
|
1893
|
+
orderby: Iterable[Union[str, Field]] = None,
|
|
1894
|
+
order: Union[Order, str] = Order.asc,
|
|
1895
|
+
limit: int = None,
|
|
1896
|
+
offset: int = None,
|
|
1897
|
+
) -> pd.DataFrame:
|
|
1898
|
+
from deepfos.db import daclickhouse
|
|
1899
|
+
sql = self._build_select_sql(
|
|
1900
|
+
columns,
|
|
1901
|
+
where=where,
|
|
1902
|
+
distinct=distinct,
|
|
1903
|
+
groupby=groupby,
|
|
1904
|
+
having=having,
|
|
1905
|
+
orderby=orderby,
|
|
1906
|
+
order=order,
|
|
1907
|
+
limit=limit,
|
|
1908
|
+
offset=offset
|
|
1909
|
+
)
|
|
1910
|
+
return await daclickhouse.query_dataframe(sql)
|
|
1911
|
+
|
|
1912
|
+
async def count(
|
|
1913
|
+
self,
|
|
1914
|
+
where: Union[str, Term, EmptyCriterion],
|
|
1915
|
+
) -> int:
|
|
1916
|
+
from deepfos.db import daclickhouse
|
|
1917
|
+
sql = f"SELECT COUNT(1) FROM {self._quoted_table_name} WHERE {self._parse_where(where)};"
|
|
1918
|
+
res = await daclickhouse.select(sql)
|
|
1919
|
+
return res[0][0]
|
|
1920
|
+
|
|
1921
|
+
async def delete(self, where: Dict[str, Union[VT, List[VT]]]):
|
|
1922
|
+
"""
|
|
1923
|
+
删除数据表的数据
|
|
1924
|
+
|
|
1925
|
+
Args:
|
|
1926
|
+
where: 删除条件。列名-> 要删除的值
|
|
1927
|
+
|
|
1928
|
+
.. admonition:: 示例
|
|
1929
|
+
|
|
1930
|
+
.. code-block:: python
|
|
1931
|
+
|
|
1932
|
+
tbl = DataTableClickHouse("example")
|
|
1933
|
+
tbl.delete({
|
|
1934
|
+
"col_a": 1,
|
|
1935
|
+
"col_b": ["x", "y"]
|
|
1936
|
+
})
|
|
1937
|
+
|
|
1938
|
+
将执行sql:
|
|
1939
|
+
|
|
1940
|
+
.. code-block:: sql
|
|
1941
|
+
|
|
1942
|
+
ALTER TABLE example
|
|
1943
|
+
DELETE
|
|
1944
|
+
WHERE
|
|
1945
|
+
`col_a` IN (1)
|
|
1946
|
+
AND `col_b` IN ('x', 'y')
|
|
1947
|
+
|
|
1948
|
+
Warnings:
|
|
1949
|
+
由于ClickHouse数据库的特性, ``delete`` 可能不会立即生效,
|
|
1950
|
+
所以不要依赖此方法保证数据一致性。并且不推荐频繁使用。
|
|
1951
|
+
|
|
1952
|
+
"""
|
|
1953
|
+
t = self.table
|
|
1954
|
+
|
|
1955
|
+
q = self.query.from_(t).delete()
|
|
1956
|
+
|
|
1957
|
+
for k, v in where.items():
|
|
1958
|
+
if isinstance(v, List):
|
|
1959
|
+
q = q.where(getattr(t, k).isin(v))
|
|
1960
|
+
else:
|
|
1961
|
+
q = q.where(getattr(t, k) == v)
|
|
1962
|
+
|
|
1963
|
+
sql = q.get_sql(quote_char=self.quote_char)
|
|
1964
|
+
return await self.run_sql(sql)
|
|
1965
|
+
|
|
1966
|
+
|
|
1967
|
+
class DirectAccessDataTableClickHouse(
|
|
1968
|
+
AsyncDirectAccessDataTableClickHouse,
|
|
1969
|
+
DataTableSyncMixin,
|
|
1970
|
+
metaclass=SyncMeta
|
|
1971
|
+
):
|
|
1972
|
+
@classmethod
|
|
1973
|
+
def start_transaction(cls, flatten: bool = False):
|
|
1974
|
+
"""不可用
|
|
1975
|
+
|
|
1976
|
+
ClickHouse不支持事务
|
|
1977
|
+
"""
|
|
1978
|
+
raise NotImplementedError('ClickHouse does not support transaction.')
|
|
1979
|
+
|
|
1980
|
+
|
|
1981
|
+
if OPTION.general.db_direct_access:
|
|
1982
|
+
AsyncDataTableMySQL = AsyncDirectAccessDataTableMySQL
|
|
1983
|
+
AsyncDataTableClickHouse = AsyncDirectAccessDataTableClickHouse
|
|
1984
|
+
DataTableMySQL = DirectAccessDataTableMySQL
|
|
1985
|
+
DataTableClickHouse = DirectAccessDataTableClickHouse
|
|
1986
|
+
|
|
1987
|
+
Datatable = DataTableMySQL
|
|
1988
|
+
|
|
1989
|
+
|
|
1990
|
+
class _OracleField(Field):
|
|
1991
|
+
def get_sql(self, **kwargs: Any) -> str:
|
|
1992
|
+
with_alias = kwargs.pop("with_alias", False)
|
|
1993
|
+
with_namespace = kwargs.pop("with_namespace", False)
|
|
1994
|
+
quote_char = kwargs.pop("quote_char", '"')
|
|
1995
|
+
|
|
1996
|
+
field_sql = format_quotes(self.name, quote_char)
|
|
1997
|
+
field_sql = field_sql.upper()
|
|
1998
|
+
# Need to add namespace if the table has an alias
|
|
1999
|
+
if self.table and (with_namespace or self.table.alias):
|
|
2000
|
+
table_name = self.table.get_table_name()
|
|
2001
|
+
field_sql = "{namespace}.{name}".format(
|
|
2002
|
+
namespace=format_quotes(table_name, quote_char),
|
|
2003
|
+
name=field_sql,
|
|
2004
|
+
)
|
|
2005
|
+
|
|
2006
|
+
field_alias = getattr(self, "alias", None)
|
|
2007
|
+
if with_alias:
|
|
2008
|
+
return format_alias_sql(field_sql, field_alias, quote_char=quote_char, **kwargs)
|
|
2009
|
+
return field_sql
|
|
2010
|
+
|
|
2011
|
+
|
|
2012
|
+
class OracleTable(Table):
|
|
2013
|
+
def field(self, name: str) -> Field:
|
|
2014
|
+
return _OracleField(name, table=self, alias=name)
|
|
2015
|
+
|
|
2016
|
+
|
|
2017
|
+
class AsyncDataTableOracle(AsyncDataTableMySQL):
|
|
2018
|
+
__doc__ = DOC_TEMPLATE.format(DB='Oracle')
|
|
2019
|
+
api: OracleAPI
|
|
2020
|
+
api_class = OracleAPI
|
|
2021
|
+
quote_char = '"'
|
|
2022
|
+
convertor = _OracleDFConvertor(quote_char='"')
|
|
2023
|
+
query = OracleQuery
|
|
2024
|
+
|
|
2025
|
+
@cached_property
|
|
2026
|
+
def table(self) -> Table:
|
|
2027
|
+
return OracleTable(self.table_name.upper())
|
|
2028
|
+
|
|
2029
|
+
@cached_property
|
|
2030
|
+
def structure(self) -> OracleTableStructure:
|
|
2031
|
+
return OracleTableStructure(self.meta.datatableColumn)
|
|
2032
|
+
|
|
2033
|
+
@cached_property
|
|
2034
|
+
def _field_map_templates(self) -> Tuple[Dict[str, None], Dict[str, Type[null]]]:
|
|
2035
|
+
base_tmpl = {}
|
|
2036
|
+
incr_cols = {}
|
|
2037
|
+
|
|
2038
|
+
for col in self.meta.datatableColumn:
|
|
2039
|
+
if col.whetherIncrement:
|
|
2040
|
+
continue
|
|
2041
|
+
base_tmpl[col.name.upper()] = None
|
|
2042
|
+
|
|
2043
|
+
return base_tmpl, incr_cols
|
|
2044
|
+
|
|
2045
|
+
@txn_support
|
|
2046
|
+
async def copy_rows(
|
|
2047
|
+
self,
|
|
2048
|
+
where: Union[str, Term, EmptyCriterion],
|
|
2049
|
+
field_map: Dict[str, Union[str, int, FrozenClass, Term]] = None,
|
|
2050
|
+
distinct: bool = False,
|
|
2051
|
+
):
|
|
2052
|
+
new_field_map = None
|
|
2053
|
+
if field_map is not None:
|
|
2054
|
+
new_field_map = {k.upper(): v for k, v in field_map.items()}
|
|
2055
|
+
return await super().copy_rows(where, new_field_map, distinct)
|
|
2056
|
+
|
|
2057
|
+
@txn_support
|
|
2058
|
+
async def insert(
|
|
2059
|
+
self,
|
|
2060
|
+
value_map: Dict[str, Any] = None,
|
|
2061
|
+
value_list: Iterable[Sequence[Any]] = None,
|
|
2062
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
2063
|
+
):
|
|
2064
|
+
insert_line = f"INTO {self._quoted_table_name} ({{cols}}) VALUES ({{vals}})"
|
|
2065
|
+
|
|
2066
|
+
def quote_string(s):
|
|
2067
|
+
return f'"{s.upper()}"'
|
|
2068
|
+
|
|
2069
|
+
if value_map is not None:
|
|
2070
|
+
insert = insert_line.format(
|
|
2071
|
+
cols=','.join(map(quote_string, value_map.keys())),
|
|
2072
|
+
vals=','.join(map(repr, value_map.values()))
|
|
2073
|
+
)
|
|
2074
|
+
elif value_list is None:
|
|
2075
|
+
raise ValueError('None of argumnet [value_map, value_list] is set.')
|
|
2076
|
+
else:
|
|
2077
|
+
columns = columns or list(self.structure.columns.keys())
|
|
2078
|
+
column_num = len(list(columns))
|
|
2079
|
+
cols = ','.join(map(quote_string, columns))
|
|
2080
|
+
|
|
2081
|
+
insert_list = []
|
|
2082
|
+
for value in value_list:
|
|
2083
|
+
if len(value) != column_num:
|
|
2084
|
+
raise ValueError(
|
|
2085
|
+
'Value number mismatch with column number.'
|
|
2086
|
+
f'values: {value}, number: {len(value)}, '
|
|
2087
|
+
f'columns number: {column_num}.')
|
|
2088
|
+
insert_list.append(insert_line.format(
|
|
2089
|
+
cols=cols,
|
|
2090
|
+
vals=','.join(map(repr, value))
|
|
2091
|
+
))
|
|
2092
|
+
insert = '\n'.join(insert_list)
|
|
2093
|
+
return await self.run_sql(f"INSERT ALL {insert} SELECT 1 FROM DUAL")
|
|
2094
|
+
|
|
2095
|
+
|
|
2096
|
+
class DataTableOracle(
|
|
2097
|
+
AsyncDataTableOracle,
|
|
2098
|
+
DataTableSyncMixin,
|
|
2099
|
+
metaclass=DataTableSyncMeta
|
|
2100
|
+
):
|
|
2101
|
+
pass
|
|
2102
|
+
|
|
2103
|
+
|
|
2104
|
+
class AsyncDataTableSQLServer(AsyncDataTableMySQL):
|
|
2105
|
+
__doc__ = DOC_TEMPLATE.format(DB='SQLServer')
|
|
2106
|
+
api: SQLServerAPI
|
|
2107
|
+
api_class = SQLServerAPI
|
|
2108
|
+
quote_char = ''
|
|
2109
|
+
convertor = _SQLServerDFConvertor(quote_char=quote_char)
|
|
2110
|
+
query = MSSQLQuery
|
|
2111
|
+
|
|
2112
|
+
@cached_property
|
|
2113
|
+
def structure(self) -> MySQLTableStructure:
|
|
2114
|
+
return MySQLTableStructure(self.meta.datatableColumn)
|
|
2115
|
+
|
|
2116
|
+
async def select_raw(
|
|
2117
|
+
self,
|
|
2118
|
+
columns: Iterable[Union[str, Term]] = None,
|
|
2119
|
+
where: Union[str, Term, EmptyCriterion] = None,
|
|
2120
|
+
distinct: bool = False,
|
|
2121
|
+
groupby: Iterable[Union[str, int, Term]] = None,
|
|
2122
|
+
having: Iterable[Union[Term, EmptyCriterion]] = None,
|
|
2123
|
+
orderby: Iterable[Union[str, Field]] = None,
|
|
2124
|
+
order: Union[Order, str] = Order.asc,
|
|
2125
|
+
limit: int = None,
|
|
2126
|
+
offset: int = None,
|
|
2127
|
+
):
|
|
2128
|
+
if limit is not None or offset is not None:
|
|
2129
|
+
if not orderby:
|
|
2130
|
+
raise ValueError("orderby must not be empty when "
|
|
2131
|
+
"limit or offset is provided.")
|
|
2132
|
+
return await super().select_raw(
|
|
2133
|
+
columns,
|
|
2134
|
+
where=where,
|
|
2135
|
+
distinct=distinct,
|
|
2136
|
+
groupby=groupby,
|
|
2137
|
+
having=having,
|
|
2138
|
+
orderby=orderby,
|
|
2139
|
+
order=order,
|
|
2140
|
+
limit=limit,
|
|
2141
|
+
offset=offset
|
|
2142
|
+
)
|
|
2143
|
+
|
|
2144
|
+
|
|
2145
|
+
class DataTableSQLServer(
|
|
2146
|
+
AsyncDataTableSQLServer,
|
|
2147
|
+
DataTableSyncMixin,
|
|
2148
|
+
metaclass=DataTableSyncMeta
|
|
2149
|
+
):
|
|
2150
|
+
pass
|
|
2151
|
+
|
|
2152
|
+
|
|
2153
|
+
class AsyncDataTablePostgreSQL(AsyncDataTableMySQL):
|
|
2154
|
+
__doc__ = DOC_TEMPLATE.format(DB='PostgreSQL')
|
|
2155
|
+
api: PostgreSQLAPI
|
|
2156
|
+
api_class = PostgreSQLAPI
|
|
2157
|
+
quote_char = '"'
|
|
2158
|
+
convertor = _PostgreSQLDFConvertor(quote_char=quote_char)
|
|
2159
|
+
query = PostgreSQLQuery
|
|
2160
|
+
|
|
2161
|
+
@cached_property
|
|
2162
|
+
def structure(self) -> PGTableStructure:
|
|
2163
|
+
return PGTableStructure(self.meta.datatableColumn)
|
|
2164
|
+
|
|
2165
|
+
@txn_support
|
|
2166
|
+
async def insert_df(
|
|
2167
|
+
self,
|
|
2168
|
+
dataframe: pd.DataFrame,
|
|
2169
|
+
updatecol: Iterable = None,
|
|
2170
|
+
chunksize: int = 5000,
|
|
2171
|
+
auto_fit: bool = True,
|
|
2172
|
+
conflict_target: Iterable[str] = None,
|
|
2173
|
+
) -> Union[CustomSqlRespDTO, Dict, None]:
|
|
2174
|
+
"""将 ``DataFrame`` 的数据插入当前数据表
|
|
2175
|
+
|
|
2176
|
+
入库前会对DataFrame的数据作以下处理:
|
|
2177
|
+
|
|
2178
|
+
#. (强制)所有空值变更为 null,确保能正常入库
|
|
2179
|
+
#. (非强制)对于 **不可为空** 的字符串类型数据列,会填充 ``'null'`` 字符串(未来可能会修改,不要依赖这个逻辑)
|
|
2180
|
+
#. (非强制)对于decimal类型,自动 ``round`` 至规定小数位
|
|
2181
|
+
|
|
2182
|
+
上述 **(非强制)** 逻辑,可以通过指定 ``auto_fit=False`` 关闭。
|
|
2183
|
+
|
|
2184
|
+
Args:
|
|
2185
|
+
dataframe: 待插入数据
|
|
2186
|
+
updatecol: 更新的列 (用于INSERT INTO ON CONFLICT)
|
|
2187
|
+
chunksize: 单次插库的数据行数
|
|
2188
|
+
auto_fit: 是否自动进行数据调整
|
|
2189
|
+
conflict_target: 使用INSERT INTO ON CONFLICT语法时的conflict基准列信息,如不提供,则试图使用主键列
|
|
2190
|
+
|
|
2191
|
+
Hint:
|
|
2192
|
+
如果单次入库数据过多,导致超出数据库的单条sql语句的上限,可以降低
|
|
2193
|
+
chuncksize,此方法将把一条较大的sql拆分成多条执行。
|
|
2194
|
+
|
|
2195
|
+
Returns:
|
|
2196
|
+
执行的操作记录
|
|
2197
|
+
|
|
2198
|
+
"""
|
|
2199
|
+
if dataframe.empty:
|
|
2200
|
+
return
|
|
2201
|
+
|
|
2202
|
+
if auto_fit:
|
|
2203
|
+
dataframe = dataframe.copy()
|
|
2204
|
+
dataframe = self.structure.fit(dataframe, dataframe.columns)
|
|
2205
|
+
else:
|
|
2206
|
+
dataframe = dataframe[dataframe.columns.intersection(self.structure.columns)]
|
|
2207
|
+
|
|
2208
|
+
if conflict_target is None:
|
|
2209
|
+
conflict_target = [col.name for col in self.meta.datatableColumn if col.whetherPrimary] or None
|
|
2210
|
+
|
|
2211
|
+
sqls = self.convertor.iter_sql(dataframe, self.table_name, updatecol, chunksize, conflict_target=conflict_target)
|
|
2212
|
+
return await self._maybe_submit_in_txn(sqls)
|
|
2213
|
+
|
|
2214
|
+
@cached_property
|
|
2215
|
+
def _field_map_templates(self) -> Tuple[Dict[str, None], Dict[str, Type[null]]]:
|
|
2216
|
+
base_tmpl = {}
|
|
2217
|
+
incr_cols = {}
|
|
2218
|
+
|
|
2219
|
+
for col in self.meta.datatableColumn:
|
|
2220
|
+
if col.whetherIncrement:
|
|
2221
|
+
continue
|
|
2222
|
+
base_tmpl[col.name] = None
|
|
2223
|
+
|
|
2224
|
+
return base_tmpl, incr_cols
|
|
2225
|
+
|
|
2226
|
+
|
|
2227
|
+
class DataTablePostgreSQL(
|
|
2228
|
+
AsyncDataTablePostgreSQL,
|
|
2229
|
+
DataTableSyncMixin,
|
|
2230
|
+
metaclass=DataTableSyncMeta
|
|
2231
|
+
):
|
|
2232
|
+
pass
|
|
2233
|
+
|
|
2234
|
+
|
|
2235
|
+
class AsyncDataTableKingBase(AsyncDataTablePostgreSQL):
|
|
2236
|
+
__doc__ = DOC_TEMPLATE.format(DB='KingBase')
|
|
2237
|
+
api: KingBaseAPI
|
|
2238
|
+
api_class = KingBaseAPI
|
|
2239
|
+
|
|
2240
|
+
|
|
2241
|
+
class DataTableKingBase(
|
|
2242
|
+
AsyncDataTableKingBase,
|
|
2243
|
+
DataTableSyncMixin,
|
|
2244
|
+
metaclass=DataTableSyncMeta
|
|
2245
|
+
):
|
|
2246
|
+
pass
|
|
2247
|
+
|
|
2248
|
+
|
|
2249
|
+
class AsyncDataTableGauss(AsyncDataTablePostgreSQL):
|
|
2250
|
+
__doc__ = DOC_TEMPLATE.format(DB='Gauss')
|
|
2251
|
+
api: GaussAPI
|
|
2252
|
+
api_class = GaussAPI
|
|
2253
|
+
|
|
2254
|
+
|
|
2255
|
+
class DataTableGauss(
|
|
2256
|
+
AsyncDataTableGauss,
|
|
2257
|
+
DataTableSyncMixin,
|
|
2258
|
+
metaclass=DataTableSyncMeta
|
|
2259
|
+
):
|
|
2260
|
+
pass
|
|
2261
|
+
|
|
2262
|
+
|
|
2263
|
+
class AsyncDataTableDaMeng(AsyncDataTableOracle):
|
|
2264
|
+
__doc__ = DOC_TEMPLATE.format(DB='DaMeng')
|
|
2265
|
+
api: DaMengAPI
|
|
2266
|
+
api_class = DaMengAPI
|
|
2267
|
+
|
|
2268
|
+
|
|
2269
|
+
class DataTableDaMeng(
|
|
2270
|
+
AsyncDataTableDaMeng,
|
|
2271
|
+
DataTableSyncMixin,
|
|
2272
|
+
metaclass=DataTableSyncMeta
|
|
2273
|
+
):
|
|
2274
|
+
pass
|
|
2275
|
+
|
|
2276
|
+
|
|
2277
|
+
class AsyncDataTableDeepEngine(AsyncDataTableClickHouse):
|
|
2278
|
+
__doc__ = DOC_TEMPLATE.format(DB='DeepEngine')
|
|
2279
|
+
api: DeepEngineAPI
|
|
2280
|
+
api_class = DeepEngineAPI
|
|
2281
|
+
convertor = _DeepEngineDFConvertor(quote_char=AsyncDataTableClickHouse.quote_char)
|
|
2282
|
+
|
|
2283
|
+
@classmethod
|
|
2284
|
+
@asynccontextmanager
|
|
2285
|
+
async def start_transaction(cls, flatten: bool = False):
|
|
2286
|
+
"""不可用
|
|
2287
|
+
|
|
2288
|
+
DeepEngine不支持事务
|
|
2289
|
+
"""
|
|
2290
|
+
try:
|
|
2291
|
+
yield
|
|
2292
|
+
finally:
|
|
2293
|
+
raise NotImplementedError('DeepEngine does not support transaction.')
|
|
2294
|
+
|
|
2295
|
+
|
|
2296
|
+
class DataTableDeepEngine(
|
|
2297
|
+
AsyncDataTableDeepEngine,
|
|
2298
|
+
DataTableSyncMixin,
|
|
2299
|
+
metaclass=SyncMeta
|
|
2300
|
+
):
|
|
2301
|
+
@classmethod
|
|
2302
|
+
def start_transaction(cls, flatten: bool = False):
|
|
2303
|
+
"""不可用
|
|
2304
|
+
|
|
2305
|
+
DeepEngine不支持事务
|
|
2306
|
+
"""
|
|
2307
|
+
raise NotImplementedError('DeepEngine does not support transaction.')
|
|
2308
|
+
|
|
2309
|
+
|
|
2310
|
+
class AsyncDataTableDeepModel(AsyncDataTablePostgreSQL):
|
|
2311
|
+
__doc__ = DOC_TEMPLATE.format(DB='DeepModel')
|
|
2312
|
+
api: DeepModelAPI
|
|
2313
|
+
api_class = DeepModelAPI
|
|
2314
|
+
|
|
2315
|
+
|
|
2316
|
+
class DataTableDeepModel(
|
|
2317
|
+
AsyncDataTableDeepModel,
|
|
2318
|
+
DataTableSyncMixin,
|
|
2319
|
+
metaclass=DataTableSyncMeta
|
|
2320
|
+
):
|
|
2321
|
+
pass
|
|
2322
|
+
|
|
2323
|
+
|
|
2324
|
+
class AsyncDataTableDeepModelKingBase(AsyncDataTableKingBase):
|
|
2325
|
+
__doc__ = DOC_TEMPLATE.format(DB='DeepModelKingBase')
|
|
2326
|
+
api: DeepModelKingBaseAPI
|
|
2327
|
+
api_class = DeepModelKingBaseAPI
|
|
2328
|
+
|
|
2329
|
+
|
|
2330
|
+
class DataTableDeepModelKingBase(
|
|
2331
|
+
AsyncDataTableDeepModelKingBase,
|
|
2332
|
+
DataTableSyncMixin,
|
|
2333
|
+
metaclass=DataTableSyncMeta
|
|
2334
|
+
):
|
|
2335
|
+
pass
|
|
2336
|
+
|
|
2337
|
+
|
|
2338
|
+
_RE_PARSE_SERVER = re.compile(r"data[-]?table-(.*?)-server[\d]-[\d]")
|
|
2339
|
+
|
|
2340
|
+
|
|
2341
|
+
TO_MODULE_TYPE = CaseInsensitiveDict(
|
|
2342
|
+
{
|
|
2343
|
+
'mysql': MySQLAPI.module_type,
|
|
2344
|
+
'clickhouse': ClickHouseAPI.module_type,
|
|
2345
|
+
'sqlserver': SQLServerAPI.module_type,
|
|
2346
|
+
'oracle': OracleAPI.module_type,
|
|
2347
|
+
'kingbase': KingBaseAPI.module_type,
|
|
2348
|
+
'gauss': GaussAPI.module_type,
|
|
2349
|
+
'dameng': DaMengAPI.module_type,
|
|
2350
|
+
'postgresql': PostgreSQLAPI.module_type,
|
|
2351
|
+
'deepengine': DeepEngineAPI.module_type,
|
|
2352
|
+
'deepmodel': DeepModelAPI.module_type,
|
|
2353
|
+
'deepmodelkingbase': DeepModelKingBaseAPI.module_type,
|
|
2354
|
+
}
|
|
2355
|
+
)
|
|
2356
|
+
|
|
2357
|
+
TABLE = CaseInsensitiveDict(
|
|
2358
|
+
{
|
|
2359
|
+
MySQLAPI.module_type: (DataTableMySQL, AsyncDataTableMySQL),
|
|
2360
|
+
ClickHouseAPI.module_type: (DataTableClickHouse, AsyncDataTableClickHouse),
|
|
2361
|
+
SQLServerAPI.module_type: (DataTableSQLServer, AsyncDataTableSQLServer),
|
|
2362
|
+
OracleAPI.module_type: (DataTableOracle, AsyncDataTableOracle),
|
|
2363
|
+
KingBaseAPI.module_type: (DataTableKingBase, AsyncDataTableKingBase),
|
|
2364
|
+
GaussAPI.module_type: (DataTableGauss, AsyncDataTableGauss),
|
|
2365
|
+
DaMengAPI.module_type: (DataTableDaMeng, AsyncDataTableDaMeng),
|
|
2366
|
+
PostgreSQLAPI.module_type: (DataTablePostgreSQL, AsyncDataTablePostgreSQL),
|
|
2367
|
+
DeepEngineAPI.module_type: (DataTableDeepEngine, AsyncDataTableDeepEngine),
|
|
2368
|
+
DeepModelAPI.module_type: (DataTableDeepModel, AsyncDataTableDeepModel),
|
|
2369
|
+
DeepModelKingBaseAPI.module_type: (DataTableDeepModelKingBase, AsyncDataTableDeepModelKingBase),
|
|
2370
|
+
}
|
|
2371
|
+
)
|
|
2372
|
+
|
|
2373
|
+
T_DatatableClass = Union[
|
|
2374
|
+
Type[DataTableMySQL],
|
|
2375
|
+
Type[DataTableClickHouse],
|
|
2376
|
+
Type[DataTableOracle],
|
|
2377
|
+
Type[DataTableSQLServer],
|
|
2378
|
+
Type[DataTableKingBase],
|
|
2379
|
+
Type[DataTableGauss],
|
|
2380
|
+
Type[DataTableDaMeng],
|
|
2381
|
+
Type[DataTablePostgreSQL],
|
|
2382
|
+
Type[DataTableDeepEngine],
|
|
2383
|
+
Type[DataTableDeepModel],
|
|
2384
|
+
Type[DataTableDeepModelKingBase],
|
|
2385
|
+
]
|
|
2386
|
+
|
|
2387
|
+
T_AsyncDatatableClass = Union[
|
|
2388
|
+
Type[AsyncDataTableMySQL],
|
|
2389
|
+
Type[AsyncDataTableClickHouse],
|
|
2390
|
+
Type[AsyncDataTableOracle],
|
|
2391
|
+
Type[AsyncDataTableSQLServer],
|
|
2392
|
+
Type[AsyncDataTableKingBase],
|
|
2393
|
+
Type[AsyncDataTableGauss],
|
|
2394
|
+
Type[AsyncDataTableDaMeng],
|
|
2395
|
+
Type[AsyncDataTablePostgreSQL],
|
|
2396
|
+
Type[AsyncDataTableDeepEngine],
|
|
2397
|
+
Type[AsyncDataTableDeepModel],
|
|
2398
|
+
Type[AsyncDataTableDeepModelKingBase],
|
|
2399
|
+
]
|
|
2400
|
+
|
|
2401
|
+
T_DatatableInstance = Union[
|
|
2402
|
+
DataTableMySQL,
|
|
2403
|
+
DataTableClickHouse,
|
|
2404
|
+
DataTableOracle,
|
|
2405
|
+
DataTableSQLServer,
|
|
2406
|
+
DataTableKingBase,
|
|
2407
|
+
DataTableGauss,
|
|
2408
|
+
DataTableDaMeng,
|
|
2409
|
+
DataTablePostgreSQL,
|
|
2410
|
+
DataTableDeepEngine,
|
|
2411
|
+
DataTableDeepModel,
|
|
2412
|
+
DataTableDeepModelKingBase,
|
|
2413
|
+
]
|
|
2414
|
+
|
|
2415
|
+
T_AsyncDatatableInstance = Union[
|
|
2416
|
+
AsyncDataTableMySQL,
|
|
2417
|
+
AsyncDataTableClickHouse,
|
|
2418
|
+
AsyncDataTableOracle,
|
|
2419
|
+
AsyncDataTableSQLServer,
|
|
2420
|
+
AsyncDataTableKingBase,
|
|
2421
|
+
AsyncDataTableGauss,
|
|
2422
|
+
AsyncDataTableDaMeng,
|
|
2423
|
+
AsyncDataTablePostgreSQL,
|
|
2424
|
+
AsyncDataTableDeepEngine,
|
|
2425
|
+
AsyncDataTableDeepModel,
|
|
2426
|
+
AsyncDataTableDeepModelKingBase,
|
|
2427
|
+
]
|
|
2428
|
+
|
|
2429
|
+
|
|
2430
|
+
def get_table_class(
|
|
2431
|
+
element_type: str,
|
|
2432
|
+
sync: bool = True
|
|
2433
|
+
) -> Union[
|
|
2434
|
+
T_DatatableClass,
|
|
2435
|
+
T_AsyncDatatableClass
|
|
2436
|
+
]:
|
|
2437
|
+
"""
|
|
2438
|
+
根据元素类型获取对应的数据表元素类
|
|
2439
|
+
|
|
2440
|
+
Args:
|
|
2441
|
+
element_type: module type或server name
|
|
2442
|
+
sync: 同步或异步元素类,默认同步
|
|
2443
|
+
|
|
2444
|
+
"""
|
|
2445
|
+
if sync:
|
|
2446
|
+
index = 0
|
|
2447
|
+
else:
|
|
2448
|
+
index = 1
|
|
2449
|
+
|
|
2450
|
+
if element_type is None:
|
|
2451
|
+
raise ValueError("`element_type` should be a string value.")
|
|
2452
|
+
|
|
2453
|
+
module_type = element_type
|
|
2454
|
+
|
|
2455
|
+
if match := _RE_PARSE_SERVER.match(element_type):
|
|
2456
|
+
server_name = match.group(1)
|
|
2457
|
+
module_type = TO_MODULE_TYPE.get(server_name)
|
|
2458
|
+
|
|
2459
|
+
if module_type is None:
|
|
2460
|
+
raise ValueError(f"{element_type} is not a known datatable server.")
|
|
2461
|
+
|
|
2462
|
+
table = TABLE.get(module_type)
|
|
2463
|
+
|
|
2464
|
+
if table is None:
|
|
2465
|
+
raise TypeError(f"Unknown datatable type: {element_type}")
|
|
2466
|
+
|
|
2467
|
+
return table[index]
|