deepfos 1.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. deepfos/__init__.py +6 -0
  2. deepfos/_version.py +21 -0
  3. deepfos/algo/__init__.py +0 -0
  4. deepfos/algo/graph.py +171 -0
  5. deepfos/algo/segtree.py +31 -0
  6. deepfos/api/V1_1/__init__.py +0 -0
  7. deepfos/api/V1_1/business_model.py +119 -0
  8. deepfos/api/V1_1/dimension.py +599 -0
  9. deepfos/api/V1_1/models/__init__.py +0 -0
  10. deepfos/api/V1_1/models/business_model.py +1033 -0
  11. deepfos/api/V1_1/models/dimension.py +2768 -0
  12. deepfos/api/V1_2/__init__.py +0 -0
  13. deepfos/api/V1_2/dimension.py +285 -0
  14. deepfos/api/V1_2/models/__init__.py +0 -0
  15. deepfos/api/V1_2/models/dimension.py +2923 -0
  16. deepfos/api/__init__.py +0 -0
  17. deepfos/api/account.py +167 -0
  18. deepfos/api/accounting_engines.py +147 -0
  19. deepfos/api/app.py +626 -0
  20. deepfos/api/approval_process.py +198 -0
  21. deepfos/api/base.py +983 -0
  22. deepfos/api/business_model.py +160 -0
  23. deepfos/api/consolidation.py +129 -0
  24. deepfos/api/consolidation_process.py +106 -0
  25. deepfos/api/datatable.py +341 -0
  26. deepfos/api/deep_pipeline.py +61 -0
  27. deepfos/api/deepconnector.py +36 -0
  28. deepfos/api/deepfos_task.py +92 -0
  29. deepfos/api/deepmodel.py +188 -0
  30. deepfos/api/dimension.py +486 -0
  31. deepfos/api/financial_model.py +319 -0
  32. deepfos/api/journal_model.py +119 -0
  33. deepfos/api/journal_template.py +132 -0
  34. deepfos/api/memory_financial_model.py +98 -0
  35. deepfos/api/models/__init__.py +3 -0
  36. deepfos/api/models/account.py +483 -0
  37. deepfos/api/models/accounting_engines.py +756 -0
  38. deepfos/api/models/app.py +1338 -0
  39. deepfos/api/models/approval_process.py +1043 -0
  40. deepfos/api/models/base.py +234 -0
  41. deepfos/api/models/business_model.py +805 -0
  42. deepfos/api/models/consolidation.py +711 -0
  43. deepfos/api/models/consolidation_process.py +248 -0
  44. deepfos/api/models/datatable_mysql.py +427 -0
  45. deepfos/api/models/deep_pipeline.py +55 -0
  46. deepfos/api/models/deepconnector.py +28 -0
  47. deepfos/api/models/deepfos_task.py +386 -0
  48. deepfos/api/models/deepmodel.py +308 -0
  49. deepfos/api/models/dimension.py +1576 -0
  50. deepfos/api/models/financial_model.py +1796 -0
  51. deepfos/api/models/journal_model.py +341 -0
  52. deepfos/api/models/journal_template.py +854 -0
  53. deepfos/api/models/memory_financial_model.py +478 -0
  54. deepfos/api/models/platform.py +178 -0
  55. deepfos/api/models/python.py +221 -0
  56. deepfos/api/models/reconciliation_engine.py +411 -0
  57. deepfos/api/models/reconciliation_report.py +161 -0
  58. deepfos/api/models/role_strategy.py +884 -0
  59. deepfos/api/models/smartlist.py +237 -0
  60. deepfos/api/models/space.py +1137 -0
  61. deepfos/api/models/system.py +1065 -0
  62. deepfos/api/models/variable.py +463 -0
  63. deepfos/api/models/workflow.py +946 -0
  64. deepfos/api/platform.py +199 -0
  65. deepfos/api/python.py +90 -0
  66. deepfos/api/reconciliation_engine.py +181 -0
  67. deepfos/api/reconciliation_report.py +64 -0
  68. deepfos/api/role_strategy.py +234 -0
  69. deepfos/api/smartlist.py +69 -0
  70. deepfos/api/space.py +582 -0
  71. deepfos/api/system.py +372 -0
  72. deepfos/api/variable.py +154 -0
  73. deepfos/api/workflow.py +264 -0
  74. deepfos/boost/__init__.py +6 -0
  75. deepfos/boost/py_jstream.py +89 -0
  76. deepfos/boost/py_pandas.py +20 -0
  77. deepfos/cache.py +121 -0
  78. deepfos/config.py +6 -0
  79. deepfos/core/__init__.py +27 -0
  80. deepfos/core/cube/__init__.py +10 -0
  81. deepfos/core/cube/_base.py +462 -0
  82. deepfos/core/cube/constants.py +21 -0
  83. deepfos/core/cube/cube.py +408 -0
  84. deepfos/core/cube/formula.py +707 -0
  85. deepfos/core/cube/syscube.py +532 -0
  86. deepfos/core/cube/typing.py +7 -0
  87. deepfos/core/cube/utils.py +238 -0
  88. deepfos/core/dimension/__init__.py +11 -0
  89. deepfos/core/dimension/_base.py +506 -0
  90. deepfos/core/dimension/dimcreator.py +184 -0
  91. deepfos/core/dimension/dimension.py +472 -0
  92. deepfos/core/dimension/dimexpr.py +271 -0
  93. deepfos/core/dimension/dimmember.py +155 -0
  94. deepfos/core/dimension/eledimension.py +22 -0
  95. deepfos/core/dimension/filters.py +99 -0
  96. deepfos/core/dimension/sysdimension.py +168 -0
  97. deepfos/core/logictable/__init__.py +5 -0
  98. deepfos/core/logictable/_cache.py +141 -0
  99. deepfos/core/logictable/_operator.py +663 -0
  100. deepfos/core/logictable/nodemixin.py +673 -0
  101. deepfos/core/logictable/sqlcondition.py +609 -0
  102. deepfos/core/logictable/tablemodel.py +497 -0
  103. deepfos/db/__init__.py +36 -0
  104. deepfos/db/cipher.py +660 -0
  105. deepfos/db/clickhouse.py +191 -0
  106. deepfos/db/connector.py +195 -0
  107. deepfos/db/daclickhouse.py +171 -0
  108. deepfos/db/dameng.py +101 -0
  109. deepfos/db/damysql.py +189 -0
  110. deepfos/db/dbkits.py +358 -0
  111. deepfos/db/deepengine.py +99 -0
  112. deepfos/db/deepmodel.py +82 -0
  113. deepfos/db/deepmodel_kingbase.py +83 -0
  114. deepfos/db/edb.py +214 -0
  115. deepfos/db/gauss.py +83 -0
  116. deepfos/db/kingbase.py +83 -0
  117. deepfos/db/mysql.py +184 -0
  118. deepfos/db/oracle.py +131 -0
  119. deepfos/db/postgresql.py +192 -0
  120. deepfos/db/sqlserver.py +99 -0
  121. deepfos/db/utils.py +135 -0
  122. deepfos/element/__init__.py +89 -0
  123. deepfos/element/accounting.py +348 -0
  124. deepfos/element/apvlprocess.py +215 -0
  125. deepfos/element/base.py +398 -0
  126. deepfos/element/bizmodel.py +1269 -0
  127. deepfos/element/datatable.py +2467 -0
  128. deepfos/element/deep_pipeline.py +186 -0
  129. deepfos/element/deepconnector.py +59 -0
  130. deepfos/element/deepmodel.py +1806 -0
  131. deepfos/element/dimension.py +1254 -0
  132. deepfos/element/fact_table.py +427 -0
  133. deepfos/element/finmodel.py +1485 -0
  134. deepfos/element/journal.py +840 -0
  135. deepfos/element/journal_template.py +943 -0
  136. deepfos/element/pyscript.py +412 -0
  137. deepfos/element/reconciliation.py +553 -0
  138. deepfos/element/rolestrategy.py +243 -0
  139. deepfos/element/smartlist.py +457 -0
  140. deepfos/element/variable.py +756 -0
  141. deepfos/element/workflow.py +560 -0
  142. deepfos/exceptions/__init__.py +239 -0
  143. deepfos/exceptions/hook.py +86 -0
  144. deepfos/lazy.py +104 -0
  145. deepfos/lazy_import.py +84 -0
  146. deepfos/lib/__init__.py +0 -0
  147. deepfos/lib/_javaobj.py +366 -0
  148. deepfos/lib/asynchronous.py +879 -0
  149. deepfos/lib/concurrency.py +107 -0
  150. deepfos/lib/constant.py +39 -0
  151. deepfos/lib/decorator.py +310 -0
  152. deepfos/lib/deepchart.py +778 -0
  153. deepfos/lib/deepux.py +477 -0
  154. deepfos/lib/discovery.py +273 -0
  155. deepfos/lib/edb_lexer.py +789 -0
  156. deepfos/lib/eureka.py +156 -0
  157. deepfos/lib/filterparser.py +751 -0
  158. deepfos/lib/httpcli.py +106 -0
  159. deepfos/lib/jsonstreamer.py +80 -0
  160. deepfos/lib/msg.py +394 -0
  161. deepfos/lib/nacos.py +225 -0
  162. deepfos/lib/patch.py +92 -0
  163. deepfos/lib/redis.py +241 -0
  164. deepfos/lib/serutils.py +181 -0
  165. deepfos/lib/stopwatch.py +99 -0
  166. deepfos/lib/subtask.py +572 -0
  167. deepfos/lib/sysutils.py +703 -0
  168. deepfos/lib/utils.py +1003 -0
  169. deepfos/local.py +160 -0
  170. deepfos/options.py +670 -0
  171. deepfos/translation.py +237 -0
  172. deepfos-1.1.60.dist-info/METADATA +33 -0
  173. deepfos-1.1.60.dist-info/RECORD +175 -0
  174. deepfos-1.1.60.dist-info/WHEEL +5 -0
  175. deepfos-1.1.60.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2467 @@
1
+ import re
2
+ import warnings
3
+ from contextvars import ContextVar
4
+ import pprint
5
+ from contextlib import contextmanager, asynccontextmanager
6
+ from typing import (
7
+ Iterable, List, Dict, Union, Type,
8
+ Tuple, TypeVar, Any, Sequence, Optional,
9
+ TYPE_CHECKING,
10
+ )
11
+
12
+ import pandas as pd
13
+ import numpy as np
14
+ from loguru import logger
15
+ from pypika import Field, Query, Table, ClickHouseQuery, Order, OracleQuery, MSSQLQuery, PostgreSQLQuery
16
+ from pypika.terms import Term, EmptyCriterion
17
+ from pypika.utils import format_quotes, format_alias_sql
18
+ from requests.utils import CaseInsensitiveDict
19
+
20
+ from .base import ElementBase, SyncMeta
21
+ from deepfos.api.datatable import (
22
+ MySQLAPI, ClickHouseAPI, SQLServerAPI,
23
+ OracleAPI, KingBaseAPI, GaussAPI, DaMengAPI,
24
+ PostgreSQLAPI, DeepEngineAPI, DeepModelAPI,
25
+ DeepModelKingBaseAPI
26
+ )
27
+ from deepfos.api.models.datatable_mysql import (
28
+ CustomSqlRespDTO, MiscModel, DatatableDataDeleteDTO
29
+ )
30
+ from deepfos.lib.utils import FrozenClass, split_dataframe
31
+ from deepfos.lib.asynchronous import future_property, evloop
32
+ from deepfos.lib.decorator import flagmethod, cached_property
33
+ from deepfos.lib.constant import UNSET
34
+ from deepfos.db.dbkits import null, Skip, DataframeSQLConvertor, escape_string, escape_mysql_string, escape_pg_string
35
+ from deepfos.db.oracle import OracleDFSQLConvertor # noqa
36
+ from deepfos.db.clickhouse import ClickHouseConvertor
37
+ from deepfos.db.postgresql import PostgreSQLConvertor
38
+ from deepfos.db.sqlserver import SQLServerDFSQLConvertor
39
+ from deepfos.db.deepengine import DeepEngineDFSQLConvertor
40
+ from deepfos.options import OPTION
41
+
42
+ __all__ = [
43
+ 'Datatable',
44
+ 'AsyncDataTableMySQL',
45
+ 'DataTableMySQL',
46
+ 'AsyncDataTableClickHouse',
47
+ 'DataTableClickHouse',
48
+ 'AsyncDataTableOracle',
49
+ 'DataTableOracle',
50
+ 'AsyncDataTableSQLServer',
51
+ 'DataTableSQLServer',
52
+ 'AsyncDataTableKingBase',
53
+ 'DataTableKingBase',
54
+ 'AsyncDataTableGauss',
55
+ 'DataTableGauss',
56
+ 'AsyncDataTableDaMeng',
57
+ 'DataTableDaMeng',
58
+ 'AsyncDataTablePostgreSQL',
59
+ 'DataTablePostgreSQL',
60
+ 'AsyncDataTableDeepEngine',
61
+ 'DataTableDeepEngine',
62
+ 'AsyncDataTableDeepModel',
63
+ 'DataTableDeepModel',
64
+ 'AsyncDataTableDeepModelKingBase',
65
+ 'DataTableDeepModelKingBase',
66
+ 'null',
67
+ 'Skip',
68
+ 'Field',
69
+ 'get_table_class',
70
+ 'T_DatatableClass',
71
+ 'T_DatatableInstance',
72
+ 'T_AsyncDatatableClass',
73
+ 'T_AsyncDatatableInstance'
74
+ ]
75
+
76
+ SQL_LOG_MAX_LEN = 1024
77
+ # -----------------------------------------------------------------------------
78
+ # typing
79
+ KT = TypeVar('KT', Field, str)
80
+ VT = TypeVar('VT', str, int)
81
+ T_DictLike = Union[Dict[KT, VT], Iterable[Tuple[KT, VT]]]
82
+
83
+
84
+ # -----------------------------------------------------------------------------
85
+ # Columns
86
+ class BaseColumn:
87
+ null_val = UNSET
88
+ dtype = UNSET
89
+
90
+ def __init__(self, column: MiscModel):
91
+ self.column = column
92
+ self.col_name = column.name
93
+ self.col_type = column.type
94
+ self.nullable = column.whetherEmpty
95
+
96
+ def fit(self, df: pd.DataFrame, column: str):
97
+ """
98
+ 使 :class:`Dataframe` 对应的列符合列的限制条件。
99
+ 一般在需要把df的数据登录至DB时使用。包含两部分工作:
100
+
101
+ 1. 填充空值。前提是子类 :attr:`nullable` 为 ``False``,\
102
+ 并且子类定义了类属性 :attr:`null_val` 作为填充值。
103
+ 2. 其他转换工作。子类通过实现 :meth:`extra_fit` 定义。
104
+
105
+ Args:
106
+ df: 待转换的 :class:`Dataframe`
107
+ column: 需要转化的列名
108
+ """
109
+ if not self.nullable and self.null_val is not UNSET:
110
+ df[column] = df[column].fillna(self.null_val)
111
+ self.extra_fit(df, column)
112
+
113
+ def extra_fit(self, df: pd.DataFrame, column: str):
114
+ # df[self.col_name] = df[self.col_name].astype(self.dtype, errors='ignore')
115
+ pass
116
+
117
+ def cast(self, df: pd.DataFrame, column: str):
118
+ """
119
+ 对 :class:`Dataframe` 对应的列作类型转换。
120
+ 一般在获取 :class:`Dataframe` 时使用。
121
+ """
122
+ pass
123
+
124
+ def __repr__(self): # pragma: no cover
125
+ return self.__class__.__name__
126
+
127
+
128
+ class ColumnFloat(BaseColumn):
129
+ dtype = 'float'
130
+
131
+
132
+ class ColumnDateTime(BaseColumn):
133
+ dtype = 'datetime64[ns]'
134
+
135
+ def cast(self, df, column: str):
136
+ df[column] = pd.to_datetime(df[column])
137
+
138
+ @staticmethod
139
+ def format_datetime(dt):
140
+ if not isnull(dt):
141
+ return "'" + dt.strftime("%Y-%m-%d %H:%M:%S") + "'"
142
+ return pd.NaT
143
+
144
+ def extra_fit(self, df: pd.DataFrame, column: str):
145
+ df[column] = df[column].apply(self.format_datetime)
146
+
147
+
148
+ class ColumnOracleDateTime(ColumnDateTime):
149
+ dtype = 'datetime64[ns]'
150
+
151
+ def cast(self, df, column: str):
152
+ df[column] = pd.to_datetime(df[column])
153
+
154
+ @staticmethod
155
+ def format_datetime(dt):
156
+ if not isnull(dt):
157
+ return f"TO_DATE('{dt.strftime('%Y-%m-%d %H:%M:%S')}', 'YYYY-MM-DD HH24:MI:SS')"
158
+ return pd.NaT
159
+
160
+ def extra_fit(self, df: pd.DataFrame, column: str):
161
+ df[column] = df[column].apply(self.format_datetime)
162
+
163
+
164
+ class ColumnInt(BaseColumn):
165
+ dtype = 'int'
166
+
167
+
168
+ class ColumnString(BaseColumn):
169
+ null_val = 'null'
170
+ dtype = 'object'
171
+
172
+ @staticmethod
173
+ def escape_string(string):
174
+ if string is null:
175
+ return null
176
+ if string:
177
+ return f"'{escape_string(string)}'"
178
+ return "''"
179
+
180
+ def extra_fit(self, df: pd.DataFrame, column: str):
181
+ if self.nullable:
182
+ df[column] = df[column].fillna(null)
183
+ df[column] = df[column].apply(self.escape_string)
184
+
185
+
186
+ class MySQLColumnString(ColumnString):
187
+ null_val = 'null'
188
+ dtype = 'object'
189
+
190
+ @staticmethod
191
+ def escape_string(string):
192
+ if string is null:
193
+ return null
194
+ if string:
195
+ return f"'{escape_mysql_string(string)}'"
196
+ return "''"
197
+
198
+
199
+ class PGColumnString(ColumnString):
200
+ null_val = 'null'
201
+ dtype = 'object'
202
+
203
+ @staticmethod
204
+ def escape_string(string):
205
+ if string is null:
206
+ return null
207
+ if string:
208
+ return f"'{escape_pg_string(string)}'"
209
+ return "''"
210
+
211
+
212
+ class ColumnDecimal(BaseColumn):
213
+ dtype = 'float'
214
+
215
+ def extra_fit(self, df, column: str):
216
+ digits = self.column.length.rsplit(',')[1]
217
+ df[column] = np.where(
218
+ df[column].isna(),
219
+ df[column], df[column].fillna(0).round(int(digits)))
220
+
221
+
222
+ class ColumnFactory:
223
+ col_map = {
224
+ "datetime": ColumnDateTime,
225
+ "oracle_datetime": ColumnOracleDateTime,
226
+ "date": ColumnDateTime,
227
+ "int": ColumnInt,
228
+ "smallint": ColumnInt,
229
+ "tinyint": ColumnInt,
230
+ "bigint": ColumnInt,
231
+ "integer": ColumnInt,
232
+ "varchar": ColumnString,
233
+ "pg_varchar": PGColumnString,
234
+ "mysql_varchar": MySQLColumnString,
235
+ "pg_text": PGColumnString,
236
+ "mysql_text": MySQLColumnString,
237
+ "text": ColumnString,
238
+ "float": ColumnFloat,
239
+ "double": ColumnFloat,
240
+ "decimal": ColumnDecimal,
241
+ }
242
+
243
+ def __new__(cls, column: MiscModel):
244
+ col_class = cls.col_map.get(cls.get_col_key(column.type), BaseColumn)
245
+ return col_class(column)
246
+
247
+ @staticmethod
248
+ def get_col_key(col_type):
249
+ return col_type.lower()
250
+
251
+
252
+ class MySQLColumnFactory(ColumnFactory):
253
+ @staticmethod
254
+ def get_col_key(col_type):
255
+ if col_type.lower() == 'varchar':
256
+ return "mysql_varchar"
257
+ if col_type.lower() == 'text':
258
+ return "mysql_text"
259
+ return col_type.lower()
260
+
261
+
262
+ class ClickHouseColumnFactory(ColumnFactory):
263
+ @staticmethod
264
+ def get_col_key(col_type):
265
+ if col_type.lower() == 'varchar':
266
+ return "mysql_varchar"
267
+ if col_type.lower() == 'text':
268
+ return "mysql_text"
269
+ if col_type == 'LowCardinality(String)':
270
+ return "mysql_varchar"
271
+ return col_type.lower()
272
+
273
+
274
+ class OracleColumnFactory(ColumnFactory):
275
+ @staticmethod
276
+ def get_col_key(col_type):
277
+ if col_type.lower() == 'datetime':
278
+ return "oracle_datetime"
279
+ return col_type.lower()
280
+
281
+
282
+ class PGColumnFactory(ColumnFactory):
283
+ @staticmethod
284
+ def get_col_key(col_type):
285
+ if col_type.lower() == 'varchar':
286
+ return "pg_varchar"
287
+ if col_type.lower() == 'text':
288
+ return "pg_text"
289
+ return col_type.lower()
290
+
291
+
292
+ class TableStructure:
293
+ """
294
+ 表结构
295
+
296
+ Args:
297
+ meta_info: 表的元数据,包含各个列的列名及数据类型
298
+
299
+ """
300
+ ColumnFactory = ColumnFactory
301
+
302
+ def __init__(self, meta_info: List[MiscModel]):
303
+ self.columns = CaseInsensitiveDict({
304
+ col.name: self.ColumnFactory(col)
305
+ for col in meta_info
306
+ })
307
+
308
+ def fit(self, df: pd.DataFrame, columns: Iterable[str] = None):
309
+ """
310
+ 对传入的DataFrame的指定数据列执行fit操作。
311
+ 直接影响DataFrame数据。
312
+
313
+ Args:
314
+ df: 数据源
315
+ columns: 数据列
316
+
317
+ See Also:
318
+ :meth:`BaseColumn.fit`
319
+
320
+ """
321
+ if columns is None:
322
+ columns = self.columns
323
+
324
+ valid_cols = []
325
+ for col in columns:
326
+ if col in self.columns:
327
+ valid_cols.append(col)
328
+ self.columns[col].fit(df, col)
329
+ return df[valid_cols]
330
+
331
+ def fit_single(self, df: pd.DataFrame, column: str): # pragma: no cover
332
+ """
333
+ 对传入的DataFrame的某一指定列执行fit操作。
334
+ 直接影响DataFrame数据。
335
+
336
+ Args:
337
+ df: 数据源
338
+ column: 数据列名
339
+
340
+ See Also:
341
+ :meth:`fit` , :meth:`BaseColumn.fit`
342
+
343
+ """
344
+ if column not in self.columns:
345
+ raise KeyError(f"Given column: {column} doesn't exist.")
346
+ self.columns[column].fit(df)
347
+
348
+ def cast(self, df: pd.DataFrame):
349
+ """
350
+ 对传入的DataFrame的所有列执行cast操作。
351
+ 直接影响DataFrame数据。
352
+
353
+ Args:
354
+ df: 数据源
355
+
356
+ See Also:
357
+ :meth:`BaseColumn.cast`
358
+
359
+ """
360
+ for col in df.columns:
361
+ if col in self.columns:
362
+ self.columns[col].cast(df, col)
363
+
364
+ def __repr__(self): # pragma: no cover
365
+ return pprint.pformat(self.columns)
366
+
367
+
368
+ class MySQLTableStructure(TableStructure):
369
+ ColumnFactory = MySQLColumnFactory
370
+
371
+
372
+ class OracleTableStructure(TableStructure):
373
+ ColumnFactory = OracleColumnFactory
374
+
375
+
376
+ class PGTableStructure(TableStructure):
377
+ ColumnFactory = PGColumnFactory
378
+
379
+
380
+ class ClickHouseTableStructure(TableStructure):
381
+ ColumnFactory = ClickHouseColumnFactory
382
+
383
+
384
+ # -----------------------------------------------------------------------------
385
+ # utils
386
+ class _DataTableDFConvertor(DataframeSQLConvertor):
387
+ def convert(
388
+ self,
389
+ dataframe: pd.DataFrame,
390
+ tablename: str,
391
+ updatecol: Iterable[str] = None,
392
+ **opts
393
+ ) -> str:
394
+ """
395
+ DataFrame对象转换为插库sql
396
+ 如果不传updatecol,用作INSERT INTO语法;
397
+ 如果传了updatecol,用作INSERT INTO ON DUPLICATE语法,
398
+ 无主键重复时作为插入,主键相同时更新指定列
399
+
400
+ Args:
401
+ dataframe: 待插入数据
402
+ tablename: 数据库表名
403
+ updatecol: 更新的列
404
+
405
+ Returns:
406
+ sql语句
407
+ """
408
+ if dataframe.empty:
409
+ return ''
410
+
411
+ data_df = dataframe.fillna(null).astype(str, errors='ignore')
412
+ data_series = "(" + pd.Series(data_df.values.tolist()).str.join(',') + ")"
413
+ columns = self.build_column_string(dataframe.columns)
414
+
415
+ return self.build_sql(columns, data_series, tablename, updatecol)
416
+
417
+
418
+ class _OracleDFConvertor(_DataTableDFConvertor):
419
+ def build_sql(
420
+ self,
421
+ columns: str,
422
+ values_in_line: Iterable[str],
423
+ tablename: str,
424
+ updatecol: Iterable[str] = None,
425
+ **opts
426
+ ):
427
+ return OracleDFSQLConvertor(self.quote_char).build_sql(columns, values_in_line, tablename, updatecol, **opts)
428
+
429
+ def build_column_string(self, columns):
430
+ return ','.join(columns.map(
431
+ lambda x: f'"{x.upper()}"'
432
+ ))
433
+
434
+
435
+ class _ClickHouseDFConvertor(_DataTableDFConvertor):
436
+ def build_sql(
437
+ self,
438
+ columns: str,
439
+ values_in_line: Iterable[str],
440
+ tablename: str,
441
+ updatecol: Iterable[str] = None,
442
+ **opts
443
+ ):
444
+ return ClickHouseConvertor(self.quote_char).build_sql(columns, values_in_line, tablename, updatecol, **opts)
445
+
446
+
447
+ class _SQLServerDFConvertor(_DataTableDFConvertor):
448
+ def build_sql(
449
+ self,
450
+ columns: str,
451
+ values_in_line: Iterable[str],
452
+ tablename: str,
453
+ updatecol: Iterable[str] = None,
454
+ **opts
455
+ ):
456
+ return SQLServerDFSQLConvertor(self.quote_char).build_sql(columns, values_in_line, tablename, updatecol, **opts)
457
+
458
+
459
+ class _DeepEngineDFConvertor(_DataTableDFConvertor):
460
+ def build_sql(
461
+ self,
462
+ columns: str,
463
+ values_in_line: Iterable[str],
464
+ tablename: str,
465
+ updatecol: Iterable[str] = None,
466
+ **opts
467
+ ):
468
+ return DeepEngineDFSQLConvertor(self.quote_char).build_sql(columns, values_in_line, tablename, updatecol, **opts)
469
+
470
+
471
+ class _PostgreSQLDFConvertor(PostgreSQLConvertor):
472
+ def convert(
473
+ self,
474
+ dataframe: pd.DataFrame,
475
+ tablename: str,
476
+ updatecol: Iterable[str] = None,
477
+ conflict_target: Iterable[str] = None,
478
+ **opts
479
+ ) -> str:
480
+ """
481
+ DataFrame对象转换为插库sql
482
+ 如果不传updatecol,用作INSERT INTO语法;
483
+ 如果传了updatecol,用作INSERT INTO ON CONFLICT语法,
484
+ 无主键重复时作为插入,主键相同时更新指定列
485
+
486
+ Args:
487
+ dataframe: 待插入数据
488
+ tablename: 数据库表名
489
+ updatecol: 更新的列
490
+ conflict_target: 使用INSERT INTO ON CONFLICT语法时的conflict基准列信息
491
+
492
+ Returns:
493
+ sql语句
494
+ """
495
+ if dataframe.empty:
496
+ return ''
497
+
498
+ data_df = dataframe.fillna(null).astype(str, errors='ignore')
499
+ data_series = "(" + pd.Series(data_df.values.tolist()).str.join(',') + ")"
500
+ columns = self.build_column_string(dataframe.columns)
501
+
502
+ return self.build_sql(columns, data_series, tablename, updatecol, conflict_target=conflict_target, **opts)
503
+
504
+
505
+ def isnull(obj: Any) -> bool:
506
+ return (obj is null) or pd.isna(obj)
507
+
508
+
509
+ def ensure_pikafield(table: Table, fields: Iterable[Union[str, int, Field, Term]]):
510
+ for fld in fields:
511
+ if isinstance(fld, str):
512
+ yield table.__getattr__(fld)
513
+ elif isinstance(fld, int):
514
+ yield table.__getattr__(str(fld))
515
+ else:
516
+ yield fld
517
+
518
+
519
+ txn_support = flagmethod('_txn_support_')
520
+
521
+
522
+ class _TxnConfig:
523
+ __slots__ = ('async_api', 'sql', 'in_txn', 'txn_support', 'flatten')
524
+
525
+ def __init__(self):
526
+ self.async_api = None
527
+ self.sql = [[]]
528
+ self.in_txn = [False]
529
+ self.txn_support = False
530
+ self.flatten = False
531
+
532
+
533
+ DOC_TEMPLATE = """{DB}数据表
534
+
535
+ 提供单表的增删改查操作
536
+
537
+ Args:
538
+ table_name: 数据表的真实表名,已知的情况下,可以避免内部重复查询表名。能提高性能。
539
+ """
540
+
541
+ DOC_START_TX_TEMPLATE = """开启事务
542
+
543
+ 上下文管理器,使用with语法开启上下文,上下文中的sql将作为事务执行。
544
+ 退出with语句块后,事务将立即执行,执行过程中如果报错会直接抛出,
545
+ 执行结果可通过 :attr:`transaction_result` 查询。
546
+
547
+ .. admonition:: 示例
548
+
549
+ .. code-block:: python
550
+
551
+ tbl = %s('table_example')
552
+ t = tbl.table
553
+ with tbl.start_transaction():
554
+ tbl.insert({'key': 101, 'value': 'txn'})
555
+ tbl.update({'value': 'new_txn'}, where=t.key == 101)
556
+ tbl.delete(where=t.key >= 99)
557
+ result = tbl.transaction_result
558
+
559
+ Args:
560
+ flatten: 是否拉平嵌套事务,如果开启,嵌套的事务将会作为一个事务执行
561
+
562
+ Important:
563
+ 仅 ``insert/delete/update`` **系列** (包括 :meth:`insert_df`,
564
+ :meth:`copy_rows` 等)的sql支持在事务中执行,
565
+ 支持事务运行的方法可以通过源码查看,带有 ``@txn_support``
566
+ 装饰器的方法即支持事务。
567
+
568
+ 如果在事务中执行select,查询结果也将立刻返回。
569
+
570
+ """
571
+
572
+ # -----------------------------------------------------------------------------
573
+ # core
574
+
575
+
576
+ class AsyncDataTableMySQL(ElementBase):
577
+ __doc__ = DOC_TEMPLATE.format(DB='MySQL')
578
+ api_class = MySQLAPI
579
+ api: MySQLAPI
580
+ query = Query
581
+ quote_char = '`'
582
+ convertor = _DataTableDFConvertor(quote_char=quote_char)
583
+
584
+ _txn_ = ContextVar('TXN')
585
+ #: 事务执行结果
586
+ transaction_result = None
587
+
588
+ def __init__(
589
+ self,
590
+ element_name: str,
591
+ folder_id: str = None,
592
+ path: str = None,
593
+ table_name: str = None,
594
+ server_name: str = None,
595
+ ):
596
+ self.__tbl_name = table_name
597
+ super().__init__(element_name, folder_id, path, server_name)
598
+
599
+ def _safe_get_txn_conf(self) -> _TxnConfig:
600
+ try:
601
+ config = self._txn_.get()
602
+ except LookupError:
603
+ config = _TxnConfig()
604
+ self._txn_.set(config)
605
+ return config
606
+
607
+ @property
608
+ def _txn_support_(self):
609
+ return self._safe_get_txn_conf().txn_support
610
+
611
+ @_txn_support_.setter
612
+ def _txn_support_(self, val):
613
+ self._safe_get_txn_conf().txn_support = val
614
+
615
+ @future_property
616
+ async def meta(self):
617
+ """数据表的元配置信息"""
618
+ api = await self.wait_for('async_api')
619
+ element_info = await self.wait_for('element_info')
620
+ r = await api.dml.table_info_field([element_info])
621
+ return r[0]
622
+
623
+ @cached_property
624
+ def table_name(self) -> str:
625
+ """数据表真实表名"""
626
+ if self.__tbl_name is None:
627
+ self.__tbl_name = self.meta.datatableInfo.actualTableName
628
+ return self.__tbl_name
629
+
630
+ @cached_property
631
+ def table(self) -> Table:
632
+ """pipyka的Table对象
633
+
634
+ 主要用于创建查询条件
635
+
636
+ .. admonition:: 示例
637
+
638
+ .. code-block:: python
639
+
640
+ tbl = DataTableMySQL("test")
641
+ t = tbl.table
642
+ where = (
643
+ ((t.f1 > 1) | (t.f2 == '23'))
644
+ &
645
+ (t.f3.isin([1, 2, 3]) | t.f4.like('f%'))
646
+ )
647
+ tbl.select(where=where)
648
+
649
+ 将执行sql:
650
+
651
+ .. code-block:: sql
652
+
653
+ SELECT
654
+ *
655
+ FROM
656
+ test
657
+ WHERE
658
+ (`f1`>1 OR `f2`='23')
659
+ AND
660
+ (`f3` IN (1,2,3) OR `f4` LIKE 'f%')
661
+
662
+ See Also:
663
+ 关于table的更多使用方法,可以查看
664
+ `pypika的github <https://github.com/kayak/pypika#tables-columns-schemas-and-databases>`_
665
+
666
+ """
667
+ return Table(self.table_name)
668
+
669
+ @cached_property
670
+ def _quoted_table_name(self):
671
+ return self.table.get_sql(quote_char=self.quote_char)
672
+
673
+ @cached_property
674
+ def structure(self) -> MySQLTableStructure:
675
+ """数据表的表结构
676
+
677
+ 主要包含了所有列的列名和类型信息,用于
678
+ 在查询和保存时对数据做类型转化的预处理。
679
+ """
680
+ return MySQLTableStructure(self.meta.datatableColumn)
681
+
682
+ @cached_property
683
+ def _field_map_templates(self) -> Tuple[Dict[str, None], Dict[str, Type[null]]]:
684
+ base_tmpl = {}
685
+ incr_cols = {}
686
+
687
+ for col in self.meta.datatableColumn:
688
+ if col.whetherIncrement:
689
+ incr_cols[col.name] = null
690
+ else:
691
+ base_tmpl[col.name] = None
692
+
693
+ return base_tmpl, incr_cols
694
+
695
+ async def select(
696
+ self,
697
+ columns: Iterable[Union[str, Term]] = None,
698
+ where: Union[str, Term, EmptyCriterion] = None,
699
+ distinct: bool = False,
700
+ groupby: Iterable[Union[str, int, Term]] = None,
701
+ having: Iterable[Union[Term, EmptyCriterion]] = None,
702
+ orderby: Iterable[Union[str, Field]] = None,
703
+ order: Union[Order, str] = Order.asc,
704
+ limit: int = None,
705
+ offset: int = None,
706
+ ) -> pd.DataFrame:
707
+ """从数据表获取 ``DataFrame`` 格式的数据
708
+
709
+ 根据查询条件返回 ``DataFrame`` 类型的二维数据表,
710
+ 会根据列类型作自动数据转换。如 ``DATETIME`` 字段将会转换为日期类型。
711
+
712
+ Important:
713
+ 使用方法与 :meth:`select_raw` 完全相同,使用示例请参考 :meth:`select_raw`
714
+ 的文档。
715
+
716
+ Args:
717
+ columns: 查询字段
718
+ where: 查询条件(聚合条件也可以)
719
+ distinct: 是否使用select distinct语法
720
+ groupby: 用于groupby的列
721
+ having: 用于having语句的条件
722
+ orderby: 用于orderby的列
723
+ order: orderby的顺序,ASC/DESC
724
+ limit: limit限制返回数据量
725
+ offset: offset偏移量
726
+
727
+ Returns:
728
+ 查询的二维数据表
729
+
730
+ See Also:
731
+ 如果希望获取原始数据,可以使用 :meth:`select_raw`
732
+
733
+ """
734
+ raw_data = await self.select_raw(
735
+ columns,
736
+ where=where,
737
+ distinct=distinct,
738
+ groupby=groupby,
739
+ having=having,
740
+ orderby=orderby,
741
+ order=order,
742
+ limit=limit,
743
+ offset=offset
744
+ )
745
+ data = pd.DataFrame.from_records(raw_data)
746
+ self.structure.cast(data)
747
+ if data.empty:
748
+ if columns:
749
+ add_cols = self._get_valid_columns(columns)
750
+ else:
751
+ add_cols = self.structure.columns.keys()
752
+ return pd.DataFrame(columns=add_cols)
753
+ return data
754
+
755
+ async def select_raw(
756
+ self,
757
+ columns: Iterable[Union[str, Term]] = None,
758
+ where: Union[str, Term, EmptyCriterion] = None,
759
+ distinct: bool = False,
760
+ groupby: Iterable[Union[str, int, Term]] = None,
761
+ having: Iterable[Union[Term, EmptyCriterion]] = None,
762
+ orderby: Iterable[Union[str, Field]] = None,
763
+ order: Union[Order, str] = Order.asc,
764
+ limit: int = None,
765
+ offset: int = None,
766
+ ) -> List[dict]:
767
+ """根据查询条件从数据表获取数据
768
+
769
+ 根据查询条件查询数据表接口并返回。
770
+ 数据类型将只含有 **JSON基本数据类型**
771
+
772
+ Args:
773
+ columns: 查询字段
774
+ where: 查询条件(聚合条件也可以)
775
+ distinct: 是否使用select distinct语法
776
+ groupby: 用于groupby的列
777
+ having: 用于having语句的条件
778
+ orderby: 用于orderby的列
779
+ order: orderby的顺序,ASC/DESC
780
+ limit: limit限制返回数据量
781
+ offset: offset偏移量
782
+
783
+
784
+ .. admonition:: 示例
785
+
786
+ .. code-block:: python
787
+
788
+ import pypika.functions as pf
789
+ tbl = DataTableMySQL("example")
790
+ t = tbl.table
791
+
792
+ #. 执行全表查询
793
+
794
+ .. code-block:: python
795
+
796
+ tbl.select()
797
+
798
+ #. 指定列查询,对数据列使用公式
799
+
800
+ .. code-block:: python
801
+
802
+ columns = [
803
+ 'col_a',
804
+ pf.Max('col_b'),
805
+ t.col_c,
806
+ t.col_d + 25,
807
+ pf.Avg(t.col_e)
808
+ ]
809
+ tbl.select(columns)
810
+
811
+ 将执行sql:
812
+
813
+ .. code-block:: sql
814
+
815
+ SELECT
816
+ `col_a`,
817
+ MAX('col_b'),
818
+ `col_c`,
819
+ `col_d`+ 25,
820
+ AVG(`col_e`)
821
+ FROM
822
+ `example`
823
+
824
+ #. 指定查询条件
825
+
826
+ .. code-block:: python
827
+
828
+ where = (
829
+ ((t.col_a > 1) | (t.col_b == '23'))
830
+ &
831
+ (t.col_c.isin([1, 2, 3]) | t.col_d.like('f%'))
832
+ )
833
+ tbl.select(where=where)
834
+
835
+ 将执行sql:
836
+
837
+ .. code-block:: sql
838
+
839
+ SELECT
840
+ *
841
+ FROM
842
+ `example`
843
+ WHERE
844
+ (`col_a`>1
845
+ OR `col_b`= '23')
846
+ AND (`col_c` IN (1, 2, 3)
847
+ OR `col_d` LIKE 'f%')
848
+
849
+ #. 聚合条件等
850
+
851
+ .. code-block:: python
852
+
853
+ tbl.select(
854
+ [pf.Max('col_a')],
855
+ groupby=[t.col_c],
856
+ limit=10,
857
+ offset=5,
858
+ )
859
+
860
+ 将执行sql:
861
+
862
+ .. code-block:: sql
863
+
864
+ SELECT
865
+ MAX('col_a')
866
+ FROM
867
+ `example`
868
+ GROUP BY
869
+ `col_c`
870
+ LIMIT 10 OFFSET 5
871
+
872
+ Warnings:
873
+ 虽然目前参数where可以接收str类型,但这种支持会在将来被移除。
874
+ 因为这会导致最终执行SQL:
875
+ ``"SELECT {distinct} {fields} FROM {table_name} WHERE {where}"``
876
+ 即groupby,limit等参数都将失效,虽然可以写在where条件中,
877
+ 但这种方式会使你的代码可读性下降,因此并不被推荐。
878
+ 请务必按照示例中的写法使用本方法,否则您可能在代码中看到一些
879
+ warning甚至在将来无法使用。
880
+
881
+ See Also:
882
+ 如果希望返回 ``DataFrame`` 的数据,可以使用 :meth:`select`
883
+
884
+ Returns:
885
+ 形如 [{column -> value}, ... , {column -> value}] 的数据。
886
+
887
+ 例如原始数据为
888
+
889
+ +------+------+
890
+ | col1 | col2 |
891
+ +======+======+
892
+ | 1 | 2 |
893
+ +------+------+
894
+ | 3 | 4 |
895
+ +------+------+
896
+
897
+ 则返回 ``[{'col1': 1, 'col2': 2}, {'col1': 3, 'col2': 4}]`` 。
898
+
899
+ """
900
+ sql = self._build_select_sql(
901
+ columns,
902
+ where=where,
903
+ distinct=distinct,
904
+ groupby=groupby,
905
+ having=having,
906
+ orderby=orderby,
907
+ order=order,
908
+ limit=limit,
909
+ offset=offset
910
+ )
911
+ r = await self._run_sql(sql)
912
+ return r.selectResult
913
+
914
+ def _build_select_sql(
915
+ self,
916
+ columns: Iterable[Union[str, Term]] = None,
917
+ where: Union[str, Term, EmptyCriterion] = None,
918
+ distinct: bool = False,
919
+ groupby: Iterable[Union[str, int, Term]] = None,
920
+ having: Iterable[Union[Term, EmptyCriterion]] = None,
921
+ orderby: Iterable[Union[str, Field]] = None,
922
+ order: Union[Order, str] = Order.asc,
923
+ limit: int = None,
924
+ offset: int = None,
925
+ ) -> str:
926
+ q = self.query.from_(self.table)
927
+ if distinct:
928
+ q = q.distinct()
929
+
930
+ if columns is None:
931
+ q = q.select('*')
932
+ else:
933
+ q = q.select(*ensure_pikafield(self.table, columns))
934
+
935
+ if isinstance(where, str):
936
+ warnings.warn(
937
+ "使用字符串类型作为where条件入参将被弃用。推荐方式请参考此方法的文档。",
938
+ DeprecationWarning)
939
+ sql = f"{q.get_sql(quote_char=self.quote_char)} WHERE {where}"
940
+ else:
941
+ if where is not None:
942
+ q = q.where(where)
943
+ if groupby is not None:
944
+ q = q.groupby(*ensure_pikafield(self.table, groupby))
945
+ if having is not None:
946
+ q = q.having(*having)
947
+ if orderby is not None:
948
+ if isinstance(order, str):
949
+ order = Order[order.lower()]
950
+ q = q.orderby(*ensure_pikafield(self.table, orderby), order=order)
951
+ if limit is not None:
952
+ q = q.limit(limit)
953
+ if offset is not None:
954
+ q = q.offset(offset)
955
+ sql = q.get_sql(quote_char=self.quote_char)
956
+ return sql
957
+
958
+ @staticmethod
959
+ def _get_valid_columns(columns: Iterable[Union[str, Term]]):
960
+ res = []
961
+ for c in columns:
962
+ if isinstance(c, str):
963
+ res.append(c)
964
+ elif isinstance(c, Term):
965
+ if c.alias is not None:
966
+ res.append(c.alias)
967
+ else:
968
+ res.append(c.get_sql(quote_char=''))
969
+ else:
970
+ res.append(str(c))
971
+ return res
972
+
973
+ @txn_support
974
+ async def insert_df(
975
+ self,
976
+ dataframe: pd.DataFrame,
977
+ updatecol: Iterable = None,
978
+ chunksize: int = 5000,
979
+ auto_fit: bool = True,
980
+ ) -> Union[CustomSqlRespDTO, Dict, None]:
981
+ """将 ``DataFrame`` 的数据插入当前数据表
982
+
983
+ 入库前会对DataFrame的数据作以下处理:
984
+
985
+ #. (强制)所有空值变更为 null,确保能正常入库
986
+ #. (非强制)对于 **不可为空** 的字符串类型数据列,会填充 ``'null'`` 字符串(未来可能会修改,不要依赖这个逻辑)
987
+ #. (非强制)对于decimal类型,自动 ``round`` 至规定小数位
988
+
989
+ 上述 **(非强制)** 逻辑,可以通过指定 ``auto_fit=False`` 关闭。
990
+
991
+ Args:
992
+ dataframe: 待插入数据
993
+ updatecol: 更新的列 (用于INSERT INTO ON DUPLICATE)
994
+ chunksize: 单次插库的数据行数
995
+ auto_fit: 是否自动进行数据调整
996
+
997
+ Hint:
998
+ 如果单次入库数据过多,导致超出数据库的单条sql语句的上限,可以降低
999
+ chuncksize,此方法将把一条较大的sql拆分成多条执行。
1000
+
1001
+ Returns:
1002
+ 执行的操作记录
1003
+
1004
+ """
1005
+ if dataframe.empty:
1006
+ return
1007
+
1008
+ if auto_fit:
1009
+ dataframe = dataframe.copy()
1010
+ dataframe = self.structure.fit(dataframe, dataframe.columns)
1011
+ else:
1012
+ dataframe = dataframe[dataframe.columns.intersection(self.structure.columns)]
1013
+
1014
+ sqls = self.convertor.iter_sql(dataframe, self.table.get_table_name(), updatecol, chunksize)
1015
+ return await self._maybe_submit_in_txn(sqls)
1016
+
1017
+ async def _maybe_submit_in_txn(self, sqls: Iterable[str]):
1018
+ if self._txn_.get().in_txn[-1]:
1019
+ for sql in sqls:
1020
+ await self.run_sql(sql)
1021
+ else:
1022
+ return await self._trxn_execute(self, list(sqls))
1023
+
1024
+ def _parse_where(self, where: Union[None, Term, EmptyCriterion]) -> str:
1025
+ if isinstance(where, (Term, EmptyCriterion)):
1026
+ return where.get_sql(quote_char=self.quote_char)
1027
+ if isinstance(where, str):
1028
+ return where
1029
+ raise TypeError(f"Unsupported type: {type(where)} for where.")
1030
+
1031
+ @txn_support
1032
+ async def delete(
1033
+ self,
1034
+ where: Union[str, Term, EmptyCriterion],
1035
+ ) -> CustomSqlRespDTO:
1036
+ """删除数据表的数据
1037
+
1038
+ Args:
1039
+ where: 删除条件
1040
+
1041
+ .. admonition:: 示例
1042
+
1043
+ .. code-block:: python
1044
+
1045
+ tbl = DataTableMySQL("example")
1046
+ t = tbl.table
1047
+ where = (
1048
+ ((t.col_a > 1) | (t.col_b == '23'))
1049
+ &
1050
+ (t.col_c.isin([1, 2, 3]) | t.col_d.like('f%'))
1051
+ )
1052
+ tbl.delete(where)
1053
+
1054
+ 将执行sql:
1055
+
1056
+ .. code-block:: sql
1057
+
1058
+ DELETE
1059
+ FROM
1060
+ `example`
1061
+ WHERE
1062
+ (`col_a`>1
1063
+ OR `col_b`= '23')
1064
+ AND (`col_c` IN (1, 2, 3)
1065
+ OR `col_d` LIKE 'f%')
1066
+
1067
+ Warnings:
1068
+ where参数对 ``str`` 格式的支持会在将来移除,请按照示例中的调用方式使用。
1069
+
1070
+ """
1071
+ sql = f"DELETE FROM {self._quoted_table_name} WHERE {self._parse_where(where)}"
1072
+ return await self.run_sql(sql)
1073
+
1074
+ @txn_support
1075
+ async def update(
1076
+ self,
1077
+ assignment_list: T_DictLike,
1078
+ where: Union[None, Term, EmptyCriterion]
1079
+ ):
1080
+ """
1081
+ 更新数据表的数据
1082
+
1083
+ Args:
1084
+ assignment_list: 更新的字段与对应的更新值
1085
+ where: 更新行满足的条件
1086
+
1087
+ .. admonition:: 示例
1088
+
1089
+ .. code-block:: python
1090
+
1091
+ tbl = DataTableMySQL("example")
1092
+ t = tbl.table
1093
+ tbl.update({'col1': 'val1', 'col2': t.col2 + 1}, where=t.key == 101)
1094
+ tbl.update([('col1', 'val1'), ('col2', t.col2 + 1)], where=t.key == 101)
1095
+
1096
+ 两个 ``update`` 是一样的,将执行sql:
1097
+
1098
+ .. code-block:: sql
1099
+
1100
+ UPDATE
1101
+ `example`
1102
+ SET
1103
+ `col1`= 'val1',
1104
+ `col2`=`col2`+ 1
1105
+ WHERE
1106
+ `KEY`= 101
1107
+
1108
+ Important:
1109
+ 为了避免使用者忘记指定where条件而执行了全表更新,
1110
+ 此方法中where为必须参数,如果确实需要执行全表更新,
1111
+ 请显式传入 ``where = None`` 。
1112
+
1113
+ """
1114
+
1115
+ q = self.query.update(self.table)
1116
+
1117
+ if isinstance(assignment_list, Dict):
1118
+ iter_items = assignment_list.items()
1119
+ else:
1120
+ iter_items = assignment_list
1121
+
1122
+ for field, value in iter_items:
1123
+ if isinstance(field, str):
1124
+ field = self.table.__getattr__(field)
1125
+ q = q.set(field, value)
1126
+
1127
+ if where is not None:
1128
+ q = q.where(where)
1129
+ return await self.run_sql(q.get_sql(quote_char=self.quote_char))
1130
+
1131
+ @txn_support
1132
+ async def update_from_dataframe(
1133
+ self,
1134
+ source: pd.DataFrame,
1135
+ chucksize: Optional[int] = None
1136
+ ):
1137
+ """使用 :class:`DataFrame` 更新数据表
1138
+
1139
+ Args:
1140
+ source: 更新数据源
1141
+ chucksize: 每批更新最大使用的DataFrame行数
1142
+
1143
+ Important:
1144
+ :class:`DataFrame` ``source`` 必须包含 ``where`` 列,
1145
+ 其列类型可以为字符串,也可以是pypika的条件语句。
1146
+ 该列指明每行数据对应的更新条件。为了防止条件缺失而进行了全表更新,
1147
+ 该列所有数据不允许为空。
1148
+
1149
+ 如果有部分行不想进行所有字段的更新,可在对应单元格内填充Skip值。
1150
+
1151
+ .. admonition:: 示例
1152
+
1153
+ .. code-block:: python
1154
+
1155
+ from deepfos.element.datatable import Skip
1156
+
1157
+ df = pd.DataFrame(data=[
1158
+ [1, 'Foo', 'Foo@x.com'],
1159
+ [2, 'Bar', 'bar@x.com'],
1160
+ [3, 'Jack', Skip]
1161
+ ], columns=['id', 'name', 'email'])
1162
+
1163
+ df['where'] = pd.Series(f"id='{i + 1}'" for i in range(3))
1164
+
1165
+ tbl = DataTableMySQL("example")
1166
+ tbl.update_from_dataframe(df)
1167
+
1168
+ 将执行以下SQL:
1169
+
1170
+ .. code-block:: SQL
1171
+
1172
+ UPDATE `example`
1173
+ SET `id`=1,`name`='Foo',`email`='Foo@x.com'
1174
+ WHERE
1175
+ id = 1;
1176
+ UPDATE `example`
1177
+ SET `id`=2,`name`='Bar',`email`='bar@x.com'
1178
+ WHERE
1179
+ id = 2;
1180
+ UPDATE `example`
1181
+ SET `id`=3,`name`='Jack' -- email字段值为Skip,因此不更新
1182
+ WHERE
1183
+ id = 3;
1184
+
1185
+ """
1186
+ key_where = 'where'
1187
+ if key_where not in source.columns:
1188
+ raise ValueError(f"Column <{key_where}> is missing in source dataframe.")
1189
+
1190
+ valid_columns = source.columns.intersection(self.structure.columns.keys())
1191
+ table = self.table
1192
+
1193
+ def yield_sql(df):
1194
+ where_col = df[key_where]
1195
+ for idx, upd_data in enumerate(df[valid_columns].to_dict(orient='records')):
1196
+ q = self.query.update(table)
1197
+
1198
+ any_updates = False
1199
+ for field, value in upd_data.items():
1200
+ if value is Skip:
1201
+ continue
1202
+
1203
+ any_updates = True
1204
+ if isinstance(field, str):
1205
+ field = table.__getattr__(field)
1206
+ q = q.set(field, value)
1207
+
1208
+ if not any_updates:
1209
+ continue
1210
+
1211
+ if isnull(where := where_col.iloc[idx]):
1212
+ raise ValueError(
1213
+ f"The where condition in [row: {idx}] is null, "
1214
+ f"which is strictly prohibited.")
1215
+
1216
+ if isinstance(where, str):
1217
+ yield f"{q.get_sql(quote_char=self.quote_char)} WHERE {where}"
1218
+ elif isinstance(where, (Term, EmptyCriterion)):
1219
+ q = q.where(where)
1220
+ yield q.get_sql(quote_char=self.quote_char)
1221
+
1222
+ ret = []
1223
+ for dataframe in split_dataframe(source, chucksize):
1224
+ r = await self._maybe_submit_in_txn(yield_sql(dataframe))
1225
+ ret.append(r)
1226
+ return ret
1227
+
1228
+ async def count(
1229
+ self,
1230
+ where: Union[str, Term, EmptyCriterion],
1231
+ ) -> int:
1232
+ """
1233
+ 查询数据记录数
1234
+
1235
+ 查询满足给定查询条件的数据记录数量。
1236
+
1237
+ Args:
1238
+ where: 查询条件
1239
+
1240
+ """
1241
+ sql = f"SELECT COUNT(1) FROM {self._quoted_table_name} WHERE {self._parse_where(where)};"
1242
+ resp = await self._run_sql(sql)
1243
+ return list(resp.selectResult[0].values())[0]
1244
+
1245
+ def _format_field(
1246
+ self,
1247
+ field_map: Dict[str, Union[str, int, FrozenClass, Term]]
1248
+ ) -> Tuple[str, str]:
1249
+ base, incr = self._field_map_templates
1250
+ fmap = {**base, **field_map, **incr}
1251
+
1252
+ field_strings = []
1253
+
1254
+ for field, value in fmap.items():
1255
+ if value is None:
1256
+ field_strings.append(f"{self.quote_char}{field}{self.quote_char}")
1257
+ elif isinstance(value, Term):
1258
+ value = value.get_sql(quote_char=self.quote_char)
1259
+ field_strings.append(f"{value} as {self.quote_char}{field}{self.quote_char}")
1260
+ else:
1261
+ field_strings.append(f"{value!r} as {self.quote_char}{field}{self.quote_char}")
1262
+
1263
+ return ','.join(f"{self.quote_char}{k}{self.quote_char}" for k in fmap), ','.join(field_strings)
1264
+
1265
+ @txn_support
1266
+ async def copy_rows(
1267
+ self,
1268
+ where: Union[str, Term, EmptyCriterion],
1269
+ field_map: Dict[str, Union[str, int, FrozenClass, Term]] = None,
1270
+ distinct: bool = False,
1271
+ ) -> CustomSqlRespDTO:
1272
+ """拷贝当前表的数据行
1273
+
1274
+ 按照指定where条件,copy数据到本表,
1275
+ 可以通过field_map更新或者指定部分字段的值。(常用于版本拷贝)
1276
+
1277
+ Args:
1278
+ where: 需要复制的数据行的筛选条件
1279
+ field_map: key:需要复制的字段,value:需要复制的值
1280
+ distinct: select是否增加distinct
1281
+
1282
+ .. admonition:: 示例
1283
+
1284
+ .. code-block:: python
1285
+
1286
+ import pypika.functions as pf
1287
+
1288
+ tbl = DataTableMySQL("test")
1289
+ t = tbl.table
1290
+ tbl.copy_rows(
1291
+ where=(t.f1 >= 1) & (t.f2 == 2) | (t.f3 > 1),
1292
+ field_map={
1293
+ "f1": t.f1 + 1,
1294
+ "f2": 3,
1295
+ "f4": t.f5,
1296
+ "f6": pf.Max(t.f6)
1297
+ }
1298
+ )
1299
+
1300
+ 将执行sql:
1301
+
1302
+ .. code-block:: sql
1303
+
1304
+ INSERT INTO
1305
+ test
1306
+ SELECT
1307
+ `f1` + 1 as f1,
1308
+ 3 as f2,
1309
+ `f3`,
1310
+ `f5` as f4,
1311
+ `f5`,
1312
+ Max(`f6`) as `f6`
1313
+ FROM
1314
+ test
1315
+ WHERE
1316
+ `f1`>=1 AND `f2`==2 OR `f3`>1
1317
+
1318
+ """
1319
+ field_map = field_map or {}
1320
+ fields, field_str = self._format_field(field_map)
1321
+ sql = "INSERT INTO {table} ({fields}) SELECT {distinct} {field_str} FROM {table} WHERE {where}".format(
1322
+ table=self._quoted_table_name,
1323
+ fields=fields,
1324
+ field_str=field_str,
1325
+ where=self._parse_where(where),
1326
+ distinct='DISTINCT' if distinct else ''
1327
+ )
1328
+ return await self.run_sql(sql)
1329
+
1330
+ async def _run_sql(self, sql: str) -> Optional[CustomSqlRespDTO]:
1331
+ txn_conf = self._safe_get_txn_conf()
1332
+
1333
+ if txn_conf.in_txn[-1] and self._txn_support_:
1334
+ txn_conf.sql[-1].append(sql)
1335
+ if txn_conf.async_api is None:
1336
+ txn_conf.async_api = self.async_api
1337
+ return
1338
+
1339
+ def trim_sql(): # pragma: no cover
1340
+ if len(sql) > SQL_LOG_MAX_LEN:
1341
+ return sql[:SQL_LOG_MAX_LEN-4] + "..."
1342
+ else:
1343
+ return sql
1344
+
1345
+ logger.opt(lazy=True).debug("Execute SQL: [{sql}].", sql=trim_sql)
1346
+ return await self.async_api.dml.run_sql(sql)
1347
+
1348
+ @txn_support
1349
+ async def run_sql(self, sql: str) -> Optional[CustomSqlRespDTO]:
1350
+ """执行sql
1351
+
1352
+ 直接执行sql,sql中出现的表名必须为实际表名。
1353
+
1354
+ Hint:
1355
+ 实际表名可以通过 :attr:`table_name` 获取。
1356
+
1357
+ Args:
1358
+ sql: 执行的sql语句
1359
+
1360
+ Returns:
1361
+ 执行结果
1362
+
1363
+ """
1364
+ return await self._run_sql(sql)
1365
+
1366
+ @txn_support
1367
+ async def insert(
1368
+ self,
1369
+ value_map: Dict[str, Any] = None,
1370
+ value_list: Iterable[Sequence[Any]] = None,
1371
+ columns: Iterable[Union[str, Term]] = None,
1372
+ ):
1373
+ """
1374
+ 插入数据,数据量极少时推荐使用
1375
+
1376
+ Args:
1377
+ value_map: 以键值对(列名 -> 插入值)提供的入库数据
1378
+ value_list: 入库数据(不包含列数据)
1379
+ columns: 入库数据对应的列,不提供则默认使用全部列
1380
+
1381
+ .. admonition:: 示例
1382
+
1383
+ .. code-block:: python
1384
+
1385
+ tbl = DataTableMySQL("test")
1386
+ tbl.insert(value_map={'a': 1, 'b': 2})
1387
+ tbl.insert(value_list=[[1, 2]], columns=['a', 'b'])
1388
+
1389
+ 两个 ``insert`` 是一样的,将执行sql:
1390
+
1391
+ .. code-block:: sql
1392
+
1393
+ INSERT INTO `test`
1394
+ (`a`,`b`)
1395
+ VALUES
1396
+ (1,2)
1397
+
1398
+ """
1399
+
1400
+ q = self.query.into(self.table)
1401
+
1402
+ if value_map is not None:
1403
+ q = q.columns(*value_map.keys()).insert(*value_map.values())
1404
+ elif value_list is None:
1405
+ raise ValueError('None of argumnet [value_map, value_list] is set.')
1406
+ else:
1407
+ if columns:
1408
+ column_num = len(list(columns))
1409
+ q = q.columns(*columns)
1410
+ else:
1411
+ column_num = len(self.structure.columns.keys())
1412
+
1413
+ for value in value_list:
1414
+ if len(value) != column_num:
1415
+ raise ValueError(
1416
+ 'Value number mismatch with column number.'
1417
+ f'values: {value}, number: {len(value)}, '
1418
+ f'columns number: {column_num}.')
1419
+ q = q.insert(*value)
1420
+
1421
+ return await self.run_sql(q.get_sql(quote_char=self.quote_char))
1422
+
1423
+ @classmethod
1424
+ @asynccontextmanager
1425
+ async def start_transaction(cls, flatten: bool = False):
1426
+ """
1427
+ 开启事务
1428
+
1429
+ 上下文管理器,使用with语法开启上下文,上下文中的sql将作为事务执行。
1430
+ 退出with语句块后,事务将立即执行,执行过程中如果报错会直接抛出,
1431
+ 执行结果可通过 :attr:`transaction_result` 查询。
1432
+
1433
+ .. admonition:: 示例
1434
+
1435
+ .. code-block:: python
1436
+
1437
+ tbl = DataTableMySQL('table_example')
1438
+ t = tbl.table
1439
+ async with tbl.start_transaction():
1440
+ await tbl.insert({'key': 101, 'value': 'txn'})
1441
+ await tbl.update({'value': 'new_txn'}, where=t.key == 101)
1442
+ await tbl.delete(where=t.key >= 99)
1443
+ result = tbl.transaction_result
1444
+
1445
+ Args:
1446
+ flatten: 是否拉平嵌套事务,如果开启,嵌套的事务将会作为一个事务执行
1447
+
1448
+ Important:
1449
+ 仅 ``insert/delete/update`` **系列** (包括 :meth:`insert_df`,
1450
+ :meth:`copy_rows` 等)的sql支持在事务中执行,
1451
+ 支持事务运行的方法可以通过源码查看,带有 ``@txn_support``
1452
+ 装饰器的方法即支持事务。
1453
+
1454
+ 如果在事务中执行select,查询结果也将立刻返回。
1455
+
1456
+ """
1457
+ try:
1458
+ cls._txn_.get()
1459
+ except LookupError:
1460
+ cls._txn_.set(_TxnConfig())
1461
+ bak_flatten = cls._txn_.get().flatten
1462
+ cls._txn_.get().in_txn.append(True)
1463
+
1464
+ if flatten and not cls._txn_.get().flatten:
1465
+ force_submit = True
1466
+ else:
1467
+ force_submit = False
1468
+
1469
+ cls._txn_.get().flatten = bak_flatten or flatten
1470
+
1471
+ if not cls._txn_.get().flatten:
1472
+ cls._txn_.get().sql.append([])
1473
+
1474
+ try:
1475
+ yield
1476
+ if force_submit or not cls._txn_.get().flatten:
1477
+ await cls.__submit_txn()
1478
+ finally:
1479
+ cls._txn_.get().in_txn.pop()
1480
+ cls._txn_.get().flatten = bak_flatten
1481
+
1482
+ @classmethod
1483
+ async def __submit_txn(cls):
1484
+ if sql := cls._txn_.get().sql.pop():
1485
+ resp = await cls._trxn_execute(cls._txn_.get(), sql)
1486
+ cls.transaction_result = resp
1487
+
1488
+ @staticmethod
1489
+ async def _trxn_execute(self, sqls: List[str]):
1490
+ return await self.async_api.dml.execute_batch_sql(sqls)
1491
+
1492
+
1493
+ class DataTableSyncMixin:
1494
+ synchronize = (
1495
+ 'count',
1496
+ 'select',
1497
+ 'select_raw',
1498
+ 'insert',
1499
+ 'insert_df',
1500
+ 'delete',
1501
+ 'update',
1502
+ 'update_from_dataframe',
1503
+ 'copy_rows',
1504
+ 'run_sql',
1505
+ )
1506
+ if TYPE_CHECKING: # pragma: no cover
1507
+ def count(
1508
+ self,
1509
+ where: Union[str, Term, EmptyCriterion],
1510
+ ) -> int:
1511
+ ...
1512
+
1513
+ def select(
1514
+ self,
1515
+ columns: Iterable[Union[str, Term]] = None,
1516
+ where: Union[str, Term, EmptyCriterion] = None,
1517
+ distinct: bool = False,
1518
+ groupby: Iterable[Union[str, int, Term]] = None,
1519
+ having: Iterable[Union[Term, EmptyCriterion]] = None,
1520
+ orderby: Iterable[Union[str, Field]] = None,
1521
+ order: Union[Order, str] = Order.asc,
1522
+ limit: int = None,
1523
+ offset: int = None,
1524
+ ) -> pd.DataFrame:
1525
+ ...
1526
+
1527
+ def select_raw(
1528
+ self,
1529
+ columns: Iterable[Union[str, Term]] = None,
1530
+ where: Union[str, Term, EmptyCriterion] = None,
1531
+ distinct: bool = False,
1532
+ groupby: Iterable[Union[str, int, Term]] = None,
1533
+ having: Iterable[Union[Term, EmptyCriterion]] = None,
1534
+ orderby: Iterable[Union[str, Field]] = None,
1535
+ order: Union[Order, str] = Order.asc,
1536
+ limit: int = None,
1537
+ offset: int = None,
1538
+ ) -> List[dict]:
1539
+ ...
1540
+
1541
+ def insert(
1542
+ self,
1543
+ value_map: Dict[str, Any] = None,
1544
+ value_list: Iterable[Sequence[Any]] = None,
1545
+ columns: Iterable[Union[str, Term]] = None,
1546
+ ):
1547
+ ...
1548
+
1549
+ def insert_df(
1550
+ self,
1551
+ dataframe: pd.DataFrame,
1552
+ updatecol: Iterable = None,
1553
+ chunksize: int = 5000,
1554
+ auto_fit: bool = True,
1555
+ ) -> Union[CustomSqlRespDTO, Dict, None]:
1556
+ ...
1557
+
1558
+ def delete(
1559
+ self,
1560
+ where: Union[str, Term, EmptyCriterion],
1561
+ ) -> CustomSqlRespDTO:
1562
+ ...
1563
+
1564
+ def update(
1565
+ self,
1566
+ assignment_list: T_DictLike,
1567
+ where: Union[None, Term, EmptyCriterion]
1568
+ ):
1569
+ ...
1570
+
1571
+ def copy_rows(
1572
+ self,
1573
+ where: Union[str, Term, EmptyCriterion],
1574
+ field_map: Dict[str, Union[str, int, FrozenClass, Term]] = None,
1575
+ distinct: bool = False,
1576
+ ) -> CustomSqlRespDTO:
1577
+ ...
1578
+
1579
+ def run_sql(self, sql: str) -> Optional[CustomSqlRespDTO]:
1580
+ ...
1581
+
1582
+ def update_from_dataframe(
1583
+ self,
1584
+ source: pd.DataFrame,
1585
+ chucksize: Optional[int] = None
1586
+ ):
1587
+ ...
1588
+
1589
+
1590
+ class DataTableSyncMeta(SyncMeta):
1591
+ def __new__(mcs, name, bases, namespace, **kwargs):
1592
+ cls = super().__new__(mcs, name, bases, namespace, **kwargs)
1593
+
1594
+ @contextmanager
1595
+ def start_transaction(cls, flatten: bool = False):
1596
+ try:
1597
+ cls._txn_.get()
1598
+ except LookupError:
1599
+ cls._txn_.set(_TxnConfig())
1600
+
1601
+ bak_flatten = cls._txn_.get().flatten
1602
+ cls._txn_.get().in_txn.append(True)
1603
+
1604
+ if flatten and not cls._txn_.get().flatten:
1605
+ force_submit = True
1606
+ else:
1607
+ force_submit = False
1608
+
1609
+ cls._txn_.get().flatten = bak_flatten or flatten
1610
+
1611
+ if not cls._txn_.get().flatten:
1612
+ cls._txn_.get().sql.append([])
1613
+
1614
+ try:
1615
+ yield
1616
+ if force_submit or not cls._txn_.get().flatten:
1617
+ cls.__submit_txn()
1618
+ finally:
1619
+ cls._txn_.get().in_txn.pop()
1620
+ cls._txn_.get().flatten = bak_flatten
1621
+
1622
+ start_transaction.__doc__ = DOC_START_TX_TEMPLATE % name
1623
+
1624
+ def __submit_txn(cls):
1625
+ if sql := cls._txn_.get().sql.pop():
1626
+ resp = evloop.run(cls._trxn_execute(cls._txn_.get(), sql))
1627
+ cls.transaction_result = resp
1628
+
1629
+ cls.start_transaction = classmethod(start_transaction)
1630
+ cls.__submit_txn = classmethod(__submit_txn)
1631
+
1632
+ return cls
1633
+
1634
+
1635
+ class DataTableMySQL(
1636
+ AsyncDataTableMySQL,
1637
+ DataTableSyncMixin,
1638
+ metaclass=DataTableSyncMeta
1639
+ ):
1640
+ pass
1641
+
1642
+
1643
+ class AsyncDirectAccessDataTableMySQL(AsyncDataTableMySQL):
1644
+ async def select(
1645
+ self,
1646
+ columns: Iterable[Union[str, Term]] = None,
1647
+ where: Union[str, Term, EmptyCriterion] = None,
1648
+ distinct: bool = False,
1649
+ groupby: Iterable[Union[str, int, Term]] = None,
1650
+ having: Iterable[Union[Term, EmptyCriterion]] = None,
1651
+ orderby: Iterable[Union[str, Field]] = None,
1652
+ order: Union[Order, str] = Order.asc,
1653
+ limit: int = None,
1654
+ offset: int = None,
1655
+ ) -> pd.DataFrame:
1656
+ from deepfos.db import damysql
1657
+ sql = self._build_select_sql(
1658
+ columns,
1659
+ where=where,
1660
+ distinct=distinct,
1661
+ groupby=groupby,
1662
+ having=having,
1663
+ orderby=orderby,
1664
+ order=order,
1665
+ limit=limit,
1666
+ offset=offset
1667
+ )
1668
+ return await damysql.query_dataframe(sql)
1669
+
1670
+ async def select_raw(
1671
+ self,
1672
+ columns: Iterable[Union[str, Term]] = None,
1673
+ where: Union[str, Term, EmptyCriterion] = None,
1674
+ distinct: bool = False,
1675
+ groupby: Iterable[Union[str, int, Term]] = None,
1676
+ having: Iterable[Union[Term, EmptyCriterion]] = None,
1677
+ orderby: Iterable[Union[str, Field]] = None,
1678
+ order: Union[Order, str] = Order.asc,
1679
+ limit: int = None,
1680
+ offset: int = None,
1681
+ ) -> List[dict]:
1682
+ raw_data = await self.select(
1683
+ columns,
1684
+ where=where,
1685
+ distinct=distinct,
1686
+ groupby=groupby,
1687
+ having=having,
1688
+ orderby=orderby,
1689
+ order=order,
1690
+ limit=limit,
1691
+ offset=offset
1692
+ )
1693
+ return raw_data.to_dict(orient='records')
1694
+
1695
+ @txn_support
1696
+ async def run_sql(self, sql: str):
1697
+ from deepfos.db import damysql
1698
+ ctx = self._txn_
1699
+ if ctx.get().in_txn[-1] and self._txn_support_:
1700
+ ctx.get().sql[-1].append(sql)
1701
+ return
1702
+ if len(sql) > SQL_LOG_MAX_LEN: # pragma: no cover
1703
+ sql_log = sql[:SQL_LOG_MAX_LEN - 4] + "..."
1704
+ else:
1705
+ sql_log = sql
1706
+ logger.debug(f"Execute SQL: [{sql_log}].") # pragma: no cover
1707
+ return await damysql.execute(sql)
1708
+
1709
+ @staticmethod
1710
+ async def _trxn_execute(self, sqls: List[str]):
1711
+ from deepfos.db import damysql
1712
+ return await damysql.trxn_execute(sqls)
1713
+
1714
+ async def count(
1715
+ self,
1716
+ where: Union[str, Term, EmptyCriterion],
1717
+ ) -> int:
1718
+ from deepfos.db import damysql
1719
+ sql = f"SELECT COUNT(1) FROM {self._quoted_table_name} WHERE {self._parse_where(where)};"
1720
+ res = await damysql.select(sql)
1721
+ return res[0][0]
1722
+
1723
+
1724
+ class DirectAccessDataTableMySQL(
1725
+ AsyncDirectAccessDataTableMySQL,
1726
+ DataTableSyncMixin,
1727
+ metaclass=DataTableSyncMeta
1728
+ ):
1729
+ pass
1730
+
1731
+
1732
+ class AsyncDataTableClickHouse(AsyncDataTableMySQL):
1733
+ __doc__ = DOC_TEMPLATE.format(DB='ClickHouse')
1734
+ api_class = ClickHouseAPI
1735
+ api: ClickHouseAPI
1736
+ query = ClickHouseQuery
1737
+ convertor = _ClickHouseDFConvertor(quote_char=AsyncDataTableMySQL.quote_char)
1738
+
1739
+ @cached_property
1740
+ def structure(self) -> ClickHouseTableStructure:
1741
+ """数据表的表结构
1742
+
1743
+ 主要包含了所有列的列名和类型信息,用于
1744
+ 在查询和保存时对数据做类型转化的预处理。
1745
+ """
1746
+ columns = self.meta.datatableColumn
1747
+ columns.append(MiscModel(name='createtime', type='int', whetherEmpty=False))
1748
+ columns.append(MiscModel(name='createdate', type='datetime', whetherEmpty=False))
1749
+ return ClickHouseTableStructure(columns)
1750
+
1751
+ async def delete(self, where: Dict[str, Union[VT, List[VT]]]):
1752
+ """
1753
+ 删除数据表的数据
1754
+
1755
+ Args:
1756
+ where: 删除条件。列名-> 要删除的值
1757
+
1758
+ .. admonition:: 示例
1759
+
1760
+ .. code-block:: python
1761
+
1762
+ tbl = DataTableClickHouse("example")
1763
+ tbl.delete({
1764
+ "col_a": 1,
1765
+ "col_b": ["x", "y"]
1766
+ })
1767
+
1768
+ 将执行sql:
1769
+
1770
+ .. code-block:: sql
1771
+
1772
+ ALTER TABLE example
1773
+ DELETE
1774
+ WHERE
1775
+ `col_a` IN (1)
1776
+ AND `col_b` IN ('x', 'y')
1777
+
1778
+ Warnings:
1779
+ 由于ClickHouse数据库的特性, ``delete`` 可能不会立即生效,
1780
+ 所以不要依赖此方法保证数据一致性。并且不推荐频繁使用。
1781
+
1782
+ """
1783
+ del_cols = {}
1784
+ for k, v in where.items():
1785
+ if isinstance(v, str):
1786
+ del_cols[k] = [v]
1787
+ else:
1788
+ del_cols[k] = v
1789
+
1790
+ return await self.async_api.dml.delete_data(
1791
+ DatatableDataDeleteDTO.construct_from(
1792
+ self.element_info,
1793
+ columnList=del_cols
1794
+ ))
1795
+
1796
+ def _format_field(
1797
+ self,
1798
+ field_map: Dict[str, Union[str, int, FrozenClass, Term]]
1799
+ ) -> Tuple[str, str]:
1800
+ base, incr = self._field_map_templates
1801
+ fmap = {**base, **field_map, **incr}
1802
+
1803
+ field_strings = []
1804
+
1805
+ for field, value in fmap.items():
1806
+ if value is None:
1807
+ field_strings.append(f"`{field}`")
1808
+ elif isinstance(value, Term):
1809
+ value = value.get_sql(quote_char=self.quote_char)
1810
+ field_strings.append(value)
1811
+ else:
1812
+ field_strings.append(repr(value))
1813
+
1814
+ return ','.join(f"`{k}`" for k in fmap), ','.join(field_strings)
1815
+
1816
+ @classmethod
1817
+ @asynccontextmanager
1818
+ async def start_transaction(cls, flatten: bool = False):
1819
+ """不可用
1820
+
1821
+ ClickHouse不支持事务
1822
+ """
1823
+ try:
1824
+ yield
1825
+ finally:
1826
+ raise NotImplementedError('ClickHouse does not support transaction.')
1827
+
1828
+
1829
+ class DataTableClickHouse(
1830
+ AsyncDataTableClickHouse,
1831
+ DataTableSyncMixin,
1832
+ metaclass=SyncMeta
1833
+ ):
1834
+ @classmethod
1835
+ def start_transaction(cls, flatten: bool = False):
1836
+ """不可用
1837
+
1838
+ ClickHouse不支持事务
1839
+ """
1840
+ raise NotImplementedError('ClickHouse does not support transaction.')
1841
+
1842
+
1843
+ class AsyncDirectAccessDataTableClickHouse(AsyncDirectAccessDataTableMySQL):
1844
+ __doc__ = DOC_TEMPLATE.format(DB='ClickHouse')
1845
+ api_class = ClickHouseAPI
1846
+ api: ClickHouseAPI
1847
+ query = ClickHouseQuery
1848
+
1849
+ @classmethod
1850
+ @asynccontextmanager
1851
+ async def start_transaction(cls, flatten: bool = False):
1852
+ """不可用
1853
+
1854
+ ClickHouse不支持事务
1855
+ """
1856
+ try:
1857
+ yield
1858
+ finally:
1859
+ raise NotImplementedError('ClickHouse does not support transaction.')
1860
+
1861
+ async def run_sql(self, sql: str): # pragma: no cover
1862
+ from deepfos.db import daclickhouse
1863
+ if len(sql) > SQL_LOG_MAX_LEN:
1864
+ sql_log = sql[:SQL_LOG_MAX_LEN - 4] + "..."
1865
+ else:
1866
+ sql_log = sql
1867
+ logger.debug(f"Execute SQL: [{sql_log}].")
1868
+ return await daclickhouse.execute(sql)
1869
+
1870
+ async def insert_df(
1871
+ self,
1872
+ dataframe: pd.DataFrame,
1873
+ updatecol: Iterable = None,
1874
+ chunksize: int = 5000,
1875
+ auto_fit: bool = True,
1876
+ ) -> List:
1877
+ from deepfos.db import daclickhouse
1878
+ if updatecol is not None:
1879
+ warnings.warn("CK数据表不支持INSERT INTO ON DUPLICATE语法,将忽略该入参地组织sql", Warning)
1880
+ r = []
1881
+ for df in split_dataframe(dataframe, chunksize):
1882
+ res = await daclickhouse.insert_dataframe(self.table_name, df)
1883
+ r.append(res)
1884
+ return r
1885
+
1886
+ async def select(
1887
+ self,
1888
+ columns: Iterable[Union[str, Term]] = None,
1889
+ where: Union[str, Term, EmptyCriterion] = None,
1890
+ distinct: bool = False,
1891
+ groupby: Iterable[Union[str, int, Term]] = None,
1892
+ having: Iterable[Union[Term, EmptyCriterion]] = None,
1893
+ orderby: Iterable[Union[str, Field]] = None,
1894
+ order: Union[Order, str] = Order.asc,
1895
+ limit: int = None,
1896
+ offset: int = None,
1897
+ ) -> pd.DataFrame:
1898
+ from deepfos.db import daclickhouse
1899
+ sql = self._build_select_sql(
1900
+ columns,
1901
+ where=where,
1902
+ distinct=distinct,
1903
+ groupby=groupby,
1904
+ having=having,
1905
+ orderby=orderby,
1906
+ order=order,
1907
+ limit=limit,
1908
+ offset=offset
1909
+ )
1910
+ return await daclickhouse.query_dataframe(sql)
1911
+
1912
+ async def count(
1913
+ self,
1914
+ where: Union[str, Term, EmptyCriterion],
1915
+ ) -> int:
1916
+ from deepfos.db import daclickhouse
1917
+ sql = f"SELECT COUNT(1) FROM {self._quoted_table_name} WHERE {self._parse_where(where)};"
1918
+ res = await daclickhouse.select(sql)
1919
+ return res[0][0]
1920
+
1921
+ async def delete(self, where: Dict[str, Union[VT, List[VT]]]):
1922
+ """
1923
+ 删除数据表的数据
1924
+
1925
+ Args:
1926
+ where: 删除条件。列名-> 要删除的值
1927
+
1928
+ .. admonition:: 示例
1929
+
1930
+ .. code-block:: python
1931
+
1932
+ tbl = DataTableClickHouse("example")
1933
+ tbl.delete({
1934
+ "col_a": 1,
1935
+ "col_b": ["x", "y"]
1936
+ })
1937
+
1938
+ 将执行sql:
1939
+
1940
+ .. code-block:: sql
1941
+
1942
+ ALTER TABLE example
1943
+ DELETE
1944
+ WHERE
1945
+ `col_a` IN (1)
1946
+ AND `col_b` IN ('x', 'y')
1947
+
1948
+ Warnings:
1949
+ 由于ClickHouse数据库的特性, ``delete`` 可能不会立即生效,
1950
+ 所以不要依赖此方法保证数据一致性。并且不推荐频繁使用。
1951
+
1952
+ """
1953
+ t = self.table
1954
+
1955
+ q = self.query.from_(t).delete()
1956
+
1957
+ for k, v in where.items():
1958
+ if isinstance(v, List):
1959
+ q = q.where(getattr(t, k).isin(v))
1960
+ else:
1961
+ q = q.where(getattr(t, k) == v)
1962
+
1963
+ sql = q.get_sql(quote_char=self.quote_char)
1964
+ return await self.run_sql(sql)
1965
+
1966
+
1967
+ class DirectAccessDataTableClickHouse(
1968
+ AsyncDirectAccessDataTableClickHouse,
1969
+ DataTableSyncMixin,
1970
+ metaclass=SyncMeta
1971
+ ):
1972
+ @classmethod
1973
+ def start_transaction(cls, flatten: bool = False):
1974
+ """不可用
1975
+
1976
+ ClickHouse不支持事务
1977
+ """
1978
+ raise NotImplementedError('ClickHouse does not support transaction.')
1979
+
1980
+
1981
+ if OPTION.general.db_direct_access:
1982
+ AsyncDataTableMySQL = AsyncDirectAccessDataTableMySQL
1983
+ AsyncDataTableClickHouse = AsyncDirectAccessDataTableClickHouse
1984
+ DataTableMySQL = DirectAccessDataTableMySQL
1985
+ DataTableClickHouse = DirectAccessDataTableClickHouse
1986
+
1987
+ Datatable = DataTableMySQL
1988
+
1989
+
1990
+ class _OracleField(Field):
1991
+ def get_sql(self, **kwargs: Any) -> str:
1992
+ with_alias = kwargs.pop("with_alias", False)
1993
+ with_namespace = kwargs.pop("with_namespace", False)
1994
+ quote_char = kwargs.pop("quote_char", '"')
1995
+
1996
+ field_sql = format_quotes(self.name, quote_char)
1997
+ field_sql = field_sql.upper()
1998
+ # Need to add namespace if the table has an alias
1999
+ if self.table and (with_namespace or self.table.alias):
2000
+ table_name = self.table.get_table_name()
2001
+ field_sql = "{namespace}.{name}".format(
2002
+ namespace=format_quotes(table_name, quote_char),
2003
+ name=field_sql,
2004
+ )
2005
+
2006
+ field_alias = getattr(self, "alias", None)
2007
+ if with_alias:
2008
+ return format_alias_sql(field_sql, field_alias, quote_char=quote_char, **kwargs)
2009
+ return field_sql
2010
+
2011
+
2012
+ class OracleTable(Table):
2013
+ def field(self, name: str) -> Field:
2014
+ return _OracleField(name, table=self, alias=name)
2015
+
2016
+
2017
+ class AsyncDataTableOracle(AsyncDataTableMySQL):
2018
+ __doc__ = DOC_TEMPLATE.format(DB='Oracle')
2019
+ api: OracleAPI
2020
+ api_class = OracleAPI
2021
+ quote_char = '"'
2022
+ convertor = _OracleDFConvertor(quote_char='"')
2023
+ query = OracleQuery
2024
+
2025
+ @cached_property
2026
+ def table(self) -> Table:
2027
+ return OracleTable(self.table_name.upper())
2028
+
2029
+ @cached_property
2030
+ def structure(self) -> OracleTableStructure:
2031
+ return OracleTableStructure(self.meta.datatableColumn)
2032
+
2033
+ @cached_property
2034
+ def _field_map_templates(self) -> Tuple[Dict[str, None], Dict[str, Type[null]]]:
2035
+ base_tmpl = {}
2036
+ incr_cols = {}
2037
+
2038
+ for col in self.meta.datatableColumn:
2039
+ if col.whetherIncrement:
2040
+ continue
2041
+ base_tmpl[col.name.upper()] = None
2042
+
2043
+ return base_tmpl, incr_cols
2044
+
2045
+ @txn_support
2046
+ async def copy_rows(
2047
+ self,
2048
+ where: Union[str, Term, EmptyCriterion],
2049
+ field_map: Dict[str, Union[str, int, FrozenClass, Term]] = None,
2050
+ distinct: bool = False,
2051
+ ):
2052
+ new_field_map = None
2053
+ if field_map is not None:
2054
+ new_field_map = {k.upper(): v for k, v in field_map.items()}
2055
+ return await super().copy_rows(where, new_field_map, distinct)
2056
+
2057
+ @txn_support
2058
+ async def insert(
2059
+ self,
2060
+ value_map: Dict[str, Any] = None,
2061
+ value_list: Iterable[Sequence[Any]] = None,
2062
+ columns: Iterable[Union[str, Term]] = None,
2063
+ ):
2064
+ insert_line = f"INTO {self._quoted_table_name} ({{cols}}) VALUES ({{vals}})"
2065
+
2066
+ def quote_string(s):
2067
+ return f'"{s.upper()}"'
2068
+
2069
+ if value_map is not None:
2070
+ insert = insert_line.format(
2071
+ cols=','.join(map(quote_string, value_map.keys())),
2072
+ vals=','.join(map(repr, value_map.values()))
2073
+ )
2074
+ elif value_list is None:
2075
+ raise ValueError('None of argumnet [value_map, value_list] is set.')
2076
+ else:
2077
+ columns = columns or list(self.structure.columns.keys())
2078
+ column_num = len(list(columns))
2079
+ cols = ','.join(map(quote_string, columns))
2080
+
2081
+ insert_list = []
2082
+ for value in value_list:
2083
+ if len(value) != column_num:
2084
+ raise ValueError(
2085
+ 'Value number mismatch with column number.'
2086
+ f'values: {value}, number: {len(value)}, '
2087
+ f'columns number: {column_num}.')
2088
+ insert_list.append(insert_line.format(
2089
+ cols=cols,
2090
+ vals=','.join(map(repr, value))
2091
+ ))
2092
+ insert = '\n'.join(insert_list)
2093
+ return await self.run_sql(f"INSERT ALL {insert} SELECT 1 FROM DUAL")
2094
+
2095
+
2096
+ class DataTableOracle(
2097
+ AsyncDataTableOracle,
2098
+ DataTableSyncMixin,
2099
+ metaclass=DataTableSyncMeta
2100
+ ):
2101
+ pass
2102
+
2103
+
2104
+ class AsyncDataTableSQLServer(AsyncDataTableMySQL):
2105
+ __doc__ = DOC_TEMPLATE.format(DB='SQLServer')
2106
+ api: SQLServerAPI
2107
+ api_class = SQLServerAPI
2108
+ quote_char = ''
2109
+ convertor = _SQLServerDFConvertor(quote_char=quote_char)
2110
+ query = MSSQLQuery
2111
+
2112
+ @cached_property
2113
+ def structure(self) -> MySQLTableStructure:
2114
+ return MySQLTableStructure(self.meta.datatableColumn)
2115
+
2116
+ async def select_raw(
2117
+ self,
2118
+ columns: Iterable[Union[str, Term]] = None,
2119
+ where: Union[str, Term, EmptyCriterion] = None,
2120
+ distinct: bool = False,
2121
+ groupby: Iterable[Union[str, int, Term]] = None,
2122
+ having: Iterable[Union[Term, EmptyCriterion]] = None,
2123
+ orderby: Iterable[Union[str, Field]] = None,
2124
+ order: Union[Order, str] = Order.asc,
2125
+ limit: int = None,
2126
+ offset: int = None,
2127
+ ):
2128
+ if limit is not None or offset is not None:
2129
+ if not orderby:
2130
+ raise ValueError("orderby must not be empty when "
2131
+ "limit or offset is provided.")
2132
+ return await super().select_raw(
2133
+ columns,
2134
+ where=where,
2135
+ distinct=distinct,
2136
+ groupby=groupby,
2137
+ having=having,
2138
+ orderby=orderby,
2139
+ order=order,
2140
+ limit=limit,
2141
+ offset=offset
2142
+ )
2143
+
2144
+
2145
+ class DataTableSQLServer(
2146
+ AsyncDataTableSQLServer,
2147
+ DataTableSyncMixin,
2148
+ metaclass=DataTableSyncMeta
2149
+ ):
2150
+ pass
2151
+
2152
+
2153
+ class AsyncDataTablePostgreSQL(AsyncDataTableMySQL):
2154
+ __doc__ = DOC_TEMPLATE.format(DB='PostgreSQL')
2155
+ api: PostgreSQLAPI
2156
+ api_class = PostgreSQLAPI
2157
+ quote_char = '"'
2158
+ convertor = _PostgreSQLDFConvertor(quote_char=quote_char)
2159
+ query = PostgreSQLQuery
2160
+
2161
+ @cached_property
2162
+ def structure(self) -> PGTableStructure:
2163
+ return PGTableStructure(self.meta.datatableColumn)
2164
+
2165
+ @txn_support
2166
+ async def insert_df(
2167
+ self,
2168
+ dataframe: pd.DataFrame,
2169
+ updatecol: Iterable = None,
2170
+ chunksize: int = 5000,
2171
+ auto_fit: bool = True,
2172
+ conflict_target: Iterable[str] = None,
2173
+ ) -> Union[CustomSqlRespDTO, Dict, None]:
2174
+ """将 ``DataFrame`` 的数据插入当前数据表
2175
+
2176
+ 入库前会对DataFrame的数据作以下处理:
2177
+
2178
+ #. (强制)所有空值变更为 null,确保能正常入库
2179
+ #. (非强制)对于 **不可为空** 的字符串类型数据列,会填充 ``'null'`` 字符串(未来可能会修改,不要依赖这个逻辑)
2180
+ #. (非强制)对于decimal类型,自动 ``round`` 至规定小数位
2181
+
2182
+ 上述 **(非强制)** 逻辑,可以通过指定 ``auto_fit=False`` 关闭。
2183
+
2184
+ Args:
2185
+ dataframe: 待插入数据
2186
+ updatecol: 更新的列 (用于INSERT INTO ON CONFLICT)
2187
+ chunksize: 单次插库的数据行数
2188
+ auto_fit: 是否自动进行数据调整
2189
+ conflict_target: 使用INSERT INTO ON CONFLICT语法时的conflict基准列信息,如不提供,则试图使用主键列
2190
+
2191
+ Hint:
2192
+ 如果单次入库数据过多,导致超出数据库的单条sql语句的上限,可以降低
2193
+ chuncksize,此方法将把一条较大的sql拆分成多条执行。
2194
+
2195
+ Returns:
2196
+ 执行的操作记录
2197
+
2198
+ """
2199
+ if dataframe.empty:
2200
+ return
2201
+
2202
+ if auto_fit:
2203
+ dataframe = dataframe.copy()
2204
+ dataframe = self.structure.fit(dataframe, dataframe.columns)
2205
+ else:
2206
+ dataframe = dataframe[dataframe.columns.intersection(self.structure.columns)]
2207
+
2208
+ if conflict_target is None:
2209
+ conflict_target = [col.name for col in self.meta.datatableColumn if col.whetherPrimary] or None
2210
+
2211
+ sqls = self.convertor.iter_sql(dataframe, self.table_name, updatecol, chunksize, conflict_target=conflict_target)
2212
+ return await self._maybe_submit_in_txn(sqls)
2213
+
2214
+ @cached_property
2215
+ def _field_map_templates(self) -> Tuple[Dict[str, None], Dict[str, Type[null]]]:
2216
+ base_tmpl = {}
2217
+ incr_cols = {}
2218
+
2219
+ for col in self.meta.datatableColumn:
2220
+ if col.whetherIncrement:
2221
+ continue
2222
+ base_tmpl[col.name] = None
2223
+
2224
+ return base_tmpl, incr_cols
2225
+
2226
+
2227
+ class DataTablePostgreSQL(
2228
+ AsyncDataTablePostgreSQL,
2229
+ DataTableSyncMixin,
2230
+ metaclass=DataTableSyncMeta
2231
+ ):
2232
+ pass
2233
+
2234
+
2235
+ class AsyncDataTableKingBase(AsyncDataTablePostgreSQL):
2236
+ __doc__ = DOC_TEMPLATE.format(DB='KingBase')
2237
+ api: KingBaseAPI
2238
+ api_class = KingBaseAPI
2239
+
2240
+
2241
+ class DataTableKingBase(
2242
+ AsyncDataTableKingBase,
2243
+ DataTableSyncMixin,
2244
+ metaclass=DataTableSyncMeta
2245
+ ):
2246
+ pass
2247
+
2248
+
2249
+ class AsyncDataTableGauss(AsyncDataTablePostgreSQL):
2250
+ __doc__ = DOC_TEMPLATE.format(DB='Gauss')
2251
+ api: GaussAPI
2252
+ api_class = GaussAPI
2253
+
2254
+
2255
+ class DataTableGauss(
2256
+ AsyncDataTableGauss,
2257
+ DataTableSyncMixin,
2258
+ metaclass=DataTableSyncMeta
2259
+ ):
2260
+ pass
2261
+
2262
+
2263
+ class AsyncDataTableDaMeng(AsyncDataTableOracle):
2264
+ __doc__ = DOC_TEMPLATE.format(DB='DaMeng')
2265
+ api: DaMengAPI
2266
+ api_class = DaMengAPI
2267
+
2268
+
2269
+ class DataTableDaMeng(
2270
+ AsyncDataTableDaMeng,
2271
+ DataTableSyncMixin,
2272
+ metaclass=DataTableSyncMeta
2273
+ ):
2274
+ pass
2275
+
2276
+
2277
+ class AsyncDataTableDeepEngine(AsyncDataTableClickHouse):
2278
+ __doc__ = DOC_TEMPLATE.format(DB='DeepEngine')
2279
+ api: DeepEngineAPI
2280
+ api_class = DeepEngineAPI
2281
+ convertor = _DeepEngineDFConvertor(quote_char=AsyncDataTableClickHouse.quote_char)
2282
+
2283
+ @classmethod
2284
+ @asynccontextmanager
2285
+ async def start_transaction(cls, flatten: bool = False):
2286
+ """不可用
2287
+
2288
+ DeepEngine不支持事务
2289
+ """
2290
+ try:
2291
+ yield
2292
+ finally:
2293
+ raise NotImplementedError('DeepEngine does not support transaction.')
2294
+
2295
+
2296
+ class DataTableDeepEngine(
2297
+ AsyncDataTableDeepEngine,
2298
+ DataTableSyncMixin,
2299
+ metaclass=SyncMeta
2300
+ ):
2301
+ @classmethod
2302
+ def start_transaction(cls, flatten: bool = False):
2303
+ """不可用
2304
+
2305
+ DeepEngine不支持事务
2306
+ """
2307
+ raise NotImplementedError('DeepEngine does not support transaction.')
2308
+
2309
+
2310
+ class AsyncDataTableDeepModel(AsyncDataTablePostgreSQL):
2311
+ __doc__ = DOC_TEMPLATE.format(DB='DeepModel')
2312
+ api: DeepModelAPI
2313
+ api_class = DeepModelAPI
2314
+
2315
+
2316
+ class DataTableDeepModel(
2317
+ AsyncDataTableDeepModel,
2318
+ DataTableSyncMixin,
2319
+ metaclass=DataTableSyncMeta
2320
+ ):
2321
+ pass
2322
+
2323
+
2324
+ class AsyncDataTableDeepModelKingBase(AsyncDataTableKingBase):
2325
+ __doc__ = DOC_TEMPLATE.format(DB='DeepModelKingBase')
2326
+ api: DeepModelKingBaseAPI
2327
+ api_class = DeepModelKingBaseAPI
2328
+
2329
+
2330
+ class DataTableDeepModelKingBase(
2331
+ AsyncDataTableDeepModelKingBase,
2332
+ DataTableSyncMixin,
2333
+ metaclass=DataTableSyncMeta
2334
+ ):
2335
+ pass
2336
+
2337
+
2338
+ _RE_PARSE_SERVER = re.compile(r"data[-]?table-(.*?)-server[\d]-[\d]")
2339
+
2340
+
2341
+ TO_MODULE_TYPE = CaseInsensitiveDict(
2342
+ {
2343
+ 'mysql': MySQLAPI.module_type,
2344
+ 'clickhouse': ClickHouseAPI.module_type,
2345
+ 'sqlserver': SQLServerAPI.module_type,
2346
+ 'oracle': OracleAPI.module_type,
2347
+ 'kingbase': KingBaseAPI.module_type,
2348
+ 'gauss': GaussAPI.module_type,
2349
+ 'dameng': DaMengAPI.module_type,
2350
+ 'postgresql': PostgreSQLAPI.module_type,
2351
+ 'deepengine': DeepEngineAPI.module_type,
2352
+ 'deepmodel': DeepModelAPI.module_type,
2353
+ 'deepmodelkingbase': DeepModelKingBaseAPI.module_type,
2354
+ }
2355
+ )
2356
+
2357
+ TABLE = CaseInsensitiveDict(
2358
+ {
2359
+ MySQLAPI.module_type: (DataTableMySQL, AsyncDataTableMySQL),
2360
+ ClickHouseAPI.module_type: (DataTableClickHouse, AsyncDataTableClickHouse),
2361
+ SQLServerAPI.module_type: (DataTableSQLServer, AsyncDataTableSQLServer),
2362
+ OracleAPI.module_type: (DataTableOracle, AsyncDataTableOracle),
2363
+ KingBaseAPI.module_type: (DataTableKingBase, AsyncDataTableKingBase),
2364
+ GaussAPI.module_type: (DataTableGauss, AsyncDataTableGauss),
2365
+ DaMengAPI.module_type: (DataTableDaMeng, AsyncDataTableDaMeng),
2366
+ PostgreSQLAPI.module_type: (DataTablePostgreSQL, AsyncDataTablePostgreSQL),
2367
+ DeepEngineAPI.module_type: (DataTableDeepEngine, AsyncDataTableDeepEngine),
2368
+ DeepModelAPI.module_type: (DataTableDeepModel, AsyncDataTableDeepModel),
2369
+ DeepModelKingBaseAPI.module_type: (DataTableDeepModelKingBase, AsyncDataTableDeepModelKingBase),
2370
+ }
2371
+ )
2372
+
2373
+ T_DatatableClass = Union[
2374
+ Type[DataTableMySQL],
2375
+ Type[DataTableClickHouse],
2376
+ Type[DataTableOracle],
2377
+ Type[DataTableSQLServer],
2378
+ Type[DataTableKingBase],
2379
+ Type[DataTableGauss],
2380
+ Type[DataTableDaMeng],
2381
+ Type[DataTablePostgreSQL],
2382
+ Type[DataTableDeepEngine],
2383
+ Type[DataTableDeepModel],
2384
+ Type[DataTableDeepModelKingBase],
2385
+ ]
2386
+
2387
+ T_AsyncDatatableClass = Union[
2388
+ Type[AsyncDataTableMySQL],
2389
+ Type[AsyncDataTableClickHouse],
2390
+ Type[AsyncDataTableOracle],
2391
+ Type[AsyncDataTableSQLServer],
2392
+ Type[AsyncDataTableKingBase],
2393
+ Type[AsyncDataTableGauss],
2394
+ Type[AsyncDataTableDaMeng],
2395
+ Type[AsyncDataTablePostgreSQL],
2396
+ Type[AsyncDataTableDeepEngine],
2397
+ Type[AsyncDataTableDeepModel],
2398
+ Type[AsyncDataTableDeepModelKingBase],
2399
+ ]
2400
+
2401
+ T_DatatableInstance = Union[
2402
+ DataTableMySQL,
2403
+ DataTableClickHouse,
2404
+ DataTableOracle,
2405
+ DataTableSQLServer,
2406
+ DataTableKingBase,
2407
+ DataTableGauss,
2408
+ DataTableDaMeng,
2409
+ DataTablePostgreSQL,
2410
+ DataTableDeepEngine,
2411
+ DataTableDeepModel,
2412
+ DataTableDeepModelKingBase,
2413
+ ]
2414
+
2415
+ T_AsyncDatatableInstance = Union[
2416
+ AsyncDataTableMySQL,
2417
+ AsyncDataTableClickHouse,
2418
+ AsyncDataTableOracle,
2419
+ AsyncDataTableSQLServer,
2420
+ AsyncDataTableKingBase,
2421
+ AsyncDataTableGauss,
2422
+ AsyncDataTableDaMeng,
2423
+ AsyncDataTablePostgreSQL,
2424
+ AsyncDataTableDeepEngine,
2425
+ AsyncDataTableDeepModel,
2426
+ AsyncDataTableDeepModelKingBase,
2427
+ ]
2428
+
2429
+
2430
+ def get_table_class(
2431
+ element_type: str,
2432
+ sync: bool = True
2433
+ ) -> Union[
2434
+ T_DatatableClass,
2435
+ T_AsyncDatatableClass
2436
+ ]:
2437
+ """
2438
+ 根据元素类型获取对应的数据表元素类
2439
+
2440
+ Args:
2441
+ element_type: module type或server name
2442
+ sync: 同步或异步元素类,默认同步
2443
+
2444
+ """
2445
+ if sync:
2446
+ index = 0
2447
+ else:
2448
+ index = 1
2449
+
2450
+ if element_type is None:
2451
+ raise ValueError("`element_type` should be a string value.")
2452
+
2453
+ module_type = element_type
2454
+
2455
+ if match := _RE_PARSE_SERVER.match(element_type):
2456
+ server_name = match.group(1)
2457
+ module_type = TO_MODULE_TYPE.get(server_name)
2458
+
2459
+ if module_type is None:
2460
+ raise ValueError(f"{element_type} is not a known datatable server.")
2461
+
2462
+ table = TABLE.get(module_type)
2463
+
2464
+ if table is None:
2465
+ raise TypeError(f"Unknown datatable type: {element_type}")
2466
+
2467
+ return table[index]