deepfos 1.1.60__py3-none-any.whl → 1.1.78__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. deepfos/_version.py +3 -3
  2. deepfos/api/V1_1/models/business_model.py +322 -322
  3. deepfos/api/V1_1/models/dimension.py +1075 -1075
  4. deepfos/api/V1_2/models/dimension.py +1119 -1116
  5. deepfos/api/account.py +1 -0
  6. deepfos/api/app.py +1 -0
  7. deepfos/api/base.py +70 -71
  8. deepfos/api/deep_pipeline.py +1 -1
  9. deepfos/api/deepconnector.py +3 -3
  10. deepfos/api/financial_model.py +12 -0
  11. deepfos/api/models/account.py +130 -130
  12. deepfos/api/models/accounting_engines.py +250 -250
  13. deepfos/api/models/app.py +355 -355
  14. deepfos/api/models/approval_process.py +231 -231
  15. deepfos/api/models/base.py +49 -209
  16. deepfos/api/models/business_model.py +239 -239
  17. deepfos/api/models/consolidation.py +196 -196
  18. deepfos/api/models/consolidation_process.py +31 -31
  19. deepfos/api/models/datatable_mysql.py +78 -78
  20. deepfos/api/models/deep_pipeline.py +20 -9
  21. deepfos/api/models/deepconnector.py +9 -8
  22. deepfos/api/models/deepfos_task.py +118 -118
  23. deepfos/api/models/deepmodel.py +120 -120
  24. deepfos/api/models/dimension.py +613 -610
  25. deepfos/api/models/financial_model.py +691 -663
  26. deepfos/api/models/journal_model.py +120 -120
  27. deepfos/api/models/journal_template.py +185 -185
  28. deepfos/api/models/memory_financial_model.py +131 -131
  29. deepfos/api/models/platform.py +16 -16
  30. deepfos/api/models/python.py +32 -32
  31. deepfos/api/models/reconciliation_engine.py +104 -104
  32. deepfos/api/models/reconciliation_report.py +29 -29
  33. deepfos/api/models/role_strategy.py +213 -213
  34. deepfos/api/models/smartlist.py +86 -86
  35. deepfos/api/models/space.py +312 -312
  36. deepfos/api/models/system.py +299 -297
  37. deepfos/api/models/variable.py +131 -131
  38. deepfos/api/models/workflow.py +290 -270
  39. deepfos/api/platform.py +3 -1
  40. deepfos/api/space.py +1 -0
  41. deepfos/api/system.py +1 -0
  42. deepfos/api/workflow.py +8 -0
  43. deepfos/cache.py +50 -4
  44. deepfos/element/bizmodel.py +2 -2
  45. deepfos/element/deep_pipeline.py +29 -16
  46. deepfos/element/deepconnector.py +36 -1
  47. deepfos/element/deepmodel.py +591 -332
  48. deepfos/element/dimension.py +30 -17
  49. deepfos/element/finmodel.py +542 -101
  50. deepfos/element/journal.py +20 -10
  51. deepfos/element/rolestrategy.py +4 -4
  52. deepfos/element/variable.py +23 -17
  53. deepfos/element/workflow.py +60 -3
  54. deepfos/exceptions/__init__.py +1 -1
  55. deepfos/lib/deepchart.py +14 -13
  56. deepfos/lib/deepux.py +11 -11
  57. deepfos/lib/discovery.py +3 -0
  58. deepfos/lib/filterparser.py +2 -2
  59. deepfos/lib/k8s.py +101 -0
  60. deepfos/lib/msg.py +34 -8
  61. deepfos/lib/serutils.py +34 -9
  62. deepfos/lib/sysutils.py +37 -18
  63. deepfos/lib/utils.py +62 -2
  64. deepfos/options.py +39 -8
  65. {deepfos-1.1.60.dist-info → deepfos-1.1.78.dist-info}/METADATA +7 -7
  66. {deepfos-1.1.60.dist-info → deepfos-1.1.78.dist-info}/RECORD +68 -67
  67. {deepfos-1.1.60.dist-info → deepfos-1.1.78.dist-info}/WHEEL +0 -0
  68. {deepfos-1.1.60.dist-info → deepfos-1.1.78.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,6 @@
1
1
  import re
2
+ import textwrap
3
+ import threading
2
4
 
3
5
  import numpy as np
4
6
  from asyncpg.connection import connect as pg_conn
@@ -6,7 +8,7 @@ import json
6
8
  import uuid
7
9
  from contextlib import asynccontextmanager, contextmanager
8
10
  from contextvars import ContextVar
9
- from itertools import count
11
+ from itertools import count, chain
10
12
  from typing import (
11
13
  List, TYPE_CHECKING, Any, Dict, Union, NamedTuple,
12
14
  Iterable, Optional, Literal
@@ -15,9 +17,10 @@ from typing import (
15
17
  import edgedb
16
18
  import pandas as pd
17
19
  from loguru import logger
18
- from pydantic import BaseModel, parse_obj_as, ValidationError, Field
20
+ from pydantic import BaseModel, ValidationError, Field
19
21
 
20
22
  from deepfos import OPTION
23
+ from deepfos.api.models import compat_parse_obj_as as parse_obj_as
21
24
  from deepfos.api.deepmodel import DeepModelAPI
22
25
  from deepfos.api.models.deepmodel import (
23
26
  ObjectBasicDTO, ObjectParam,
@@ -34,7 +37,9 @@ from deepfos.exceptions import (
34
37
  from deepfos.lib import serutils
35
38
  from deepfos.lib.asynchronous import future_property, evloop
36
39
  from deepfos.lib.decorator import flagmethod, cached_property, lru_cache
37
- from deepfos.lib.utils import AliasGenerator, to_version_tuple
40
+ from deepfos.lib.utils import (
41
+ AliasGenerator, to_version_tuple,
42
+ )
38
43
 
39
44
  __all__ = ['AsyncDeepModel', 'DeepModel', 'to_fields', 'QueryWithArgs']
40
45
 
@@ -117,6 +122,7 @@ dm_type_to_edb_scalar = {
117
122
  'uuid': 'std::str',
118
123
  'json': 'std::json',
119
124
  }
125
+ TAB = ' ' * 4
120
126
 
121
127
 
122
128
  class ObjectElement(ObjectParam):
@@ -128,6 +134,7 @@ class ObjectElement(ObjectParam):
128
134
  class QueryWithArgs(BaseModel):
129
135
  commands: str
130
136
  kwargs: Dict[str, Any] = Field(default_factory=dict)
137
+ globals: Dict[str, Any] = Field(default_factory=dict)
131
138
 
132
139
 
133
140
  class MainField(NamedTuple):
@@ -146,25 +153,25 @@ class MainPgCol(NamedTuple):
146
153
  class ConstraintField(BaseModel):
147
154
  name: str
148
155
  expr: str
149
- finalexpr: str = None
150
- subjectexpr: str = None
156
+ finalexpr: Optional[str] = None
157
+ subjectexpr: Optional[str] = None
151
158
 
152
159
 
153
160
  class NamedField(BaseModel):
154
- name: str = None
161
+ name: Optional[str] = None
155
162
 
156
163
 
157
164
  class NameIdField(BaseModel):
158
- id: uuid.UUID = None
159
- name: str = None
165
+ id: Optional[uuid.UUID] = None
166
+ name: Optional[str] = None
160
167
 
161
168
 
162
169
  class TargetField(BaseModel):
163
- id: uuid.UUID = None
170
+ id: Optional[uuid.UUID] = None
164
171
  name: str
165
172
  external: bool = False
166
173
  annotations: List[Dict[str, str]] = Field(default_factory=list)
167
- properties: List[NameIdField] = None
174
+ properties: Optional[List[NameIdField]] = None
168
175
 
169
176
  @property
170
177
  def is_scalar(self) -> bool:
@@ -184,7 +191,7 @@ class TargetField(BaseModel):
184
191
 
185
192
 
186
193
  class LinkPropField(BaseModel):
187
- id: uuid.UUID = None
194
+ id: Optional[uuid.UUID] = None
188
195
  name: str
189
196
  target: NamedField
190
197
 
@@ -194,15 +201,15 @@ class LinkPropField(BaseModel):
194
201
 
195
202
 
196
203
  class PtrInfo(BaseModel):
197
- id: Union[uuid.UUID, Literal['id']] = None
204
+ id: Optional[Union[uuid.UUID, Literal['id']]] = None
198
205
  name: str
199
206
  target: TargetField
200
207
  properties: List[LinkPropField] = Field(default_factory=list)
201
- expr: str = None
202
- source_property: NamedField = None
203
- target_property: NamedField = None
204
- required: bool = False
205
- cardinality: str = None
208
+ expr: Optional[str] = None
209
+ source_property: Optional[NamedField] = None
210
+ target_property: Optional[NamedField] = None
211
+ required: Optional[bool] = False
212
+ cardinality: Optional[str] = None
206
213
  constraints: List[ConstraintField] = Field(default_factory=list)
207
214
  annotations: List[Dict[str, str]] = Field(default_factory=list)
208
215
 
@@ -252,7 +259,7 @@ _RE_CONSTRAINT_FIELDS = re.compile(r'\((((\.\w+)(,\s+)?)+)\)')
252
259
 
253
260
 
254
261
  class ObjectTypeFrame(BaseModel):
255
- id: uuid.UUID = None
262
+ id: Optional[uuid.UUID] = None
256
263
  name: str
257
264
  links: List[PtrInfo] = Field(default_factory=list)
258
265
  properties: List[PtrInfo] = Field(default_factory=list)
@@ -297,20 +304,43 @@ class ObjectTypeFrame(BaseModel):
297
304
  return exclusive
298
305
 
299
306
 
300
- def _format_link(df: pd.DataFrame, link_name: str):
301
- if all(pd.isnull(df['target'])):
302
- return {'target': pd.NA}
303
-
304
- record = df.drop(columns=['source']).set_index('target')
307
+ def _format_link(link_df_fit: pd.DataFrame, link_name: str):
308
+ if link_df_fit.empty:
309
+ return pd.Series(dtype=object), False
305
310
 
306
- if not record.index.is_unique:
311
+ if link_df_fit.duplicated(subset=['source', 'target']).any():
307
312
  raise MultiLinkTargetNotUnique(
308
313
  f'Multi Link: [{link_name}] relation dataframe中'
309
314
  f'source与target对应存在不唯一性'
310
315
  )
311
316
 
312
- record = record.to_dict(orient='index')
313
- return {'prop': record, 'target': list(record.keys())}
317
+ prop_cols = [col for col in link_df_fit.columns if col not in ['source', 'target']]
318
+ has_props = bool(prop_cols)
319
+
320
+ if has_props:
321
+ sources = link_df_fit['source'].values
322
+ targets = link_df_fit['target'].values
323
+ unique_sources, source_indices = np.unique(sources, return_inverse=True)
324
+
325
+ prop_arrays = {col: link_df_fit[col].values for col in prop_cols}
326
+ result = {}
327
+
328
+ for i in range(len(unique_sources)):
329
+ idx = source_indices == i
330
+ source = unique_sources[i]
331
+ source_targets = targets[idx]
332
+
333
+ indices = np.where(idx)[0]
334
+ prop_dict = {
335
+ source_targets[j]: {col: prop_arrays[col][indices[j]] for col in prop_cols}
336
+ for j in range(len(source_targets))
337
+ }
338
+ result[source] = {'target': source_targets.tolist(), 'prop': prop_dict}
339
+ link = pd.Series(result, dtype=object)
340
+ else:
341
+ link = link_df_fit.groupby('source')['target'].agg(list)
342
+
343
+ return link, has_props
314
344
 
315
345
 
316
346
  class BaseField(PtrInfo):
@@ -370,16 +400,12 @@ class FieldJson(BaseField):
370
400
  df[field_name] = df[field_name].apply(self.format_json)
371
401
 
372
402
  def cast(self, df: pd.DataFrame, field_name: str, direct_access: bool = True):
373
- # std::json needed to be cast only when data is from http
374
- # since json value will be converted to json string(type: str)
375
- # in edgedb python protocol
376
- if not direct_access:
377
- df[field_name] = df[field_name].apply(self.format_json)
403
+ pass
378
404
 
379
405
 
380
406
  class FieldInt(BaseField):
381
407
  def fit(self, df: pd.DataFrame, field_name: str, raw_pg: bool = False):
382
- df[field_name] = pd.to_numeric(df[field_name])
408
+ df[field_name] = df[field_name].astype(pd.Int64Dtype(), errors='ignore')
383
409
 
384
410
  def cast(self, df: pd.DataFrame, field_name: str, direct_access: bool = True):
385
411
  df[field_name] = df[field_name].astype(pd.Int64Dtype(), errors='ignore')
@@ -483,71 +509,91 @@ def _iter_link_prop_assign(link, business_key, prop_name, prop_type, is_multi):
483
509
  def _iter_single_assign(
484
510
  field: PtrInfo,
485
511
  cast_type: str,
486
- target_main_field: Dict[str, MainField]
487
- ):
488
- assign_string = f"{field.name} := "
512
+ target_main_field: Dict[str, MainField],
513
+ error_on_empty_link: bool = False
514
+ ) -> str:
515
+ """
516
+ 生成单字段赋值语句
517
+
518
+ Args:
519
+ field: 字段信息
520
+ cast_type: 字段类型
521
+ target_main_field: 目标字段信息
522
+ error_on_empty_link: 链接字段值不存在时是否抛出异常
523
+
524
+ Returns:
525
+ 赋值语句
526
+ """
527
+ assign = f"\n{field.name} := "
489
528
  # 设置标量值
490
529
  if field.name not in target_main_field:
491
- assign_string += f"<{cast_type}>"
530
+ if field.is_multi:
531
+ return assign + f"json_array_unpack(item['{field.name}'])"
532
+
533
+ assign += f"<{cast_type}>"
492
534
 
493
535
  if cast_type in NEED_CAST_STR:
494
- assign_string += '<std::str>'
536
+ assign += '<std::str>'
495
537
 
496
- return assign_string + f"item['{field.name}']"
538
+ return assign + f"item['{field.name}']"
497
539
 
498
540
  # 设置link target值
499
541
  link = field.name
500
542
  main_field = target_main_field[link]
501
543
 
502
544
  if main_field.props:
503
- target = (
504
- cast_type + "{" +
505
- ",".join(
506
- _iter_link_prop_assign(link, main_field.business_key, name,
507
- field.prop_type[name], main_field.is_multi)
508
- for name in main_field.props
509
- ) + "}"
545
+ prop_assigns = ','.join(
546
+ _iter_link_prop_assign(link, main_field.business_key, name,
547
+ field.prop_type[name], main_field.is_multi)
548
+ for name in main_field.props
510
549
  )
550
+ prop_block = f" {{{prop_assigns}}}"
511
551
  else:
512
- target = cast_type
552
+ prop_block = ""
513
553
 
514
554
  if main_field.is_multi:
515
- assign_string += f"""(
516
- select detached {target}
517
- filter contains(
518
- <array<{main_field.type}>>(json_get(item, '{link}', 'target')),
519
- .{main_field.business_key}
520
- )
521
- )"""
555
+ link_value = f"each_{link}"
556
+ else:
557
+ link_value = f"(json_get(item, '{link}'))"
558
+
559
+ if error_on_empty_link:
560
+ link_expr = f"(<{cast_type}><std::str>{link_value}){prop_block}"
561
+ else:
562
+ link_expr = f"(select detached {cast_type}{prop_block}\nfilter .{main_field.business_key} = <std::str>{link_value})"
563
+
564
+ if main_field.is_multi:
565
+ if main_field.props:
566
+ target_source = f"json_get(item, '{link}', 'target')"
567
+ else:
568
+ target_source = f"item['{link}']"
569
+
570
+ assign += 'distinct (\n' + textwrap.indent(textwrap.dedent(f"""\
571
+ for each_{link} in json_array_unpack({target_source})
572
+ union (
573
+ {link_expr}
574
+ )"""), TAB) + '\n)'
522
575
  else:
523
- assign_string += f"""assert_single((
524
- select detached {target}
525
- filter .{main_field.business_key} = <{main_field.type}>(json_get(item, '{link}'))
526
- ))"""
576
+ assign += link_expr
527
577
 
528
- return assign_string
578
+ return assign
529
579
 
530
580
 
531
581
  def bulk_insert_by_fields(
532
582
  object_name: str,
533
583
  field_type: List[PtrInfo],
534
584
  target_main_field: Dict[str, MainField],
585
+ error_on_empty_link: bool = False,
535
586
  ):
536
- insert_assign_body = ','.join(
537
- [
538
- _iter_single_assign(field, field.type, target_main_field)
539
- for field in field_type
540
- ]
541
- )
542
-
543
- return f"""
544
- with raw_data := <json>to_json(<std::str>${BATCH_INSERT_KW}),
545
- for item in json_array_unpack(raw_data) union (
546
- insert {object_name} {{
547
- {insert_assign_body}
548
- }}
549
- )
550
- """
587
+ insert_assign_body = ','.join([
588
+ _iter_single_assign(field, field.type, target_main_field, error_on_empty_link)
589
+ for field in field_type
590
+ ])
591
+ return textwrap.dedent(f"""
592
+ with raw_data := <json>to_json(<std::str>${BATCH_INSERT_KW}),
593
+ for item in json_array_unpack(raw_data) union (
594
+ insert {object_name} {{{textwrap.indent(insert_assign_body, TAB * 4)}
595
+ }}
596
+ )""")
551
597
 
552
598
 
553
599
  def bulk_upsert_by_fields(
@@ -555,63 +601,62 @@ def bulk_upsert_by_fields(
555
601
  field_type: List[PtrInfo],
556
602
  target_main_field: Dict[str, MainField],
557
603
  exclusive_fields: Iterable[str],
558
- update_fields: Iterable[str]
604
+ update_fields: Iterable[str],
605
+ error_on_empty_link: bool = False,
559
606
  ):
560
607
  conflict_on_fields = map(lambda n: f'.{n}', exclusive_fields)
561
-
562
- insert_assign_body = ','.join(
563
- [
564
- _iter_single_assign(field, field.type, target_main_field)
565
- for field in field_type
566
- ]
567
- )
608
+ insert_assign_body = ','.join([
609
+ _iter_single_assign(field, field.type, target_main_field, error_on_empty_link)
610
+ for field in field_type
611
+ ])
568
612
  update_assign_body = ','.join(
569
613
  [
570
- _iter_single_assign(field, field.type, target_main_field)
614
+ _iter_single_assign(field, field.type, target_main_field, error_on_empty_link)
571
615
  for field in field_type if field.name in update_fields
572
616
  ]
573
617
  )
574
-
575
- return f"""
618
+ return textwrap.dedent(f"""
576
619
  with raw_data := <json>to_json(<std::str>${BATCH_INSERT_KW}),
577
620
  for item in json_array_unpack(raw_data) union (
578
- insert {object_name} {{
579
- {insert_assign_body}
621
+ insert {object_name} {{{textwrap.indent(insert_assign_body, TAB * 4)}
580
622
  }}
581
623
  unless conflict on ({','.join(conflict_on_fields)})
582
624
  else (
583
- update {object_name} set {{
584
- {update_assign_body}
625
+ update {object_name} set {{{textwrap.indent(update_assign_body, TAB * 5)}
585
626
  }}
586
627
  )
587
- )
588
- """
628
+ )""")
589
629
 
590
630
 
591
631
  def bulk_update_by_fields(
592
632
  object_name: str,
593
- business_key: str,
594
633
  field_type: List[PtrInfo],
595
634
  target_main_field: Dict[str, MainField],
596
- update_fields: Iterable[str] = None,
635
+ match_fields: Iterable[str],
636
+ update_fields: Iterable[str],
637
+ error_on_empty_link: bool = False,
597
638
  ):
598
- update_assign_body = ','.join(
639
+ update_assign_body = ','.join([
640
+ _iter_single_assign(field, field.type, target_main_field, error_on_empty_link)
641
+ for field in field_type if field.name in update_fields
642
+ ])
643
+
644
+ field_type_map = {field.name: field.type for field in field_type}
645
+ match_str = " and ".join(
599
646
  [
600
- _iter_single_assign(field, field.type, target_main_field)
601
- for field in field_type if field.name in update_fields
647
+ f".{name} = <{field_type_map.get(name, 'std::str')}>item['{name}']"
648
+ for name in match_fields
602
649
  ]
603
650
  )
604
-
605
- return f"""
606
- with raw_data := <json>to_json(<std::str>${BATCH_INSERT_KW}),
607
- for item in json_array_unpack(raw_data) union (
608
- update {object_name}
609
- filter .{business_key} = <std::str>item['{business_key}']
610
- set {{
611
- {update_assign_body}
612
- }}
613
- )
614
- """
651
+ return textwrap.dedent(f"""
652
+ with raw_data := <json>to_json(<std::str>${BATCH_INSERT_KW}),
653
+ for item in json_array_unpack(raw_data) union (
654
+ update {object_name}
655
+ filter {match_str}
656
+ set {{{textwrap.indent(update_assign_body, TAB * 3)}
657
+ }}
658
+ )
659
+ """)
615
660
 
616
661
 
617
662
  def format_obj(obj: edgedb.Object) -> ObjectTypeFrame:
@@ -636,25 +681,27 @@ def collect_query_result_structure(
636
681
  fields = [
637
682
  PtrInfo(
638
683
  name=f.name,
639
- target=NamedField(name=dm_type_to_edb_scalar.get(f.type, NOT_SCALAR))
684
+ target=TargetField(name=dm_type_to_edb_scalar.get(f.type, NOT_SCALAR))
640
685
  )
641
686
  for f in object_info.fields
642
687
  ]
643
- return ObjectStructure(name='', structure=fields)
688
+ include_id = len(fields) == 1 and fields[0].name == 'id'
689
+ return ObjectStructure(name='', structure=fields, include_id=include_id)
644
690
 
645
691
 
646
692
  def collect_frame_desc_structure(desc: Dict[str, str]):
647
693
  fields = [
648
694
  PtrInfo(
649
695
  name=name if isinstance(name, str) else name[0],
650
- target=NamedField(
696
+ target=TargetField(
651
697
  name=tname
652
698
  if isinstance(tname, str) else NOT_SCALAR
653
699
  )
654
700
  )
655
701
  for name, tname in desc.items()
656
702
  ]
657
- return ObjectStructure(name='', structure=fields)
703
+ include_id = len(fields) == 1 and fields[0].name == 'id'
704
+ return ObjectStructure(name='', structure=fields, include_id=include_id)
658
705
 
659
706
 
660
707
  # Copied from edb/pgsql/types.py base_type_name_map
@@ -765,48 +812,74 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
765
812
  """DeepModel元素
766
813
 
767
814
  Args:
768
- direct_access: 是否使用直连模式
815
+ direct_access: 是否使用直连模式,默认为True
816
+ 会结合OPTION.edgedb.dsn是否有值决定是否使用直连模式,如无值,则仍为非直连模式
817
+ 直连模式下,会使用edgedb-python库直连edgedb server,
818
+ 否则会使用DeepModel组件API进行操作
769
819
  pg_dsn: PG连接信息
820
+
770
821
  """
771
822
  __mangle_docs__ = False
772
823
 
773
- def __init__(self, direct_access: bool = True, pg_dsn: str = None): # noqa
824
+ def __init__(
825
+ self,
826
+ direct_access: bool = True,
827
+ pg_dsn: str = None,
828
+ **kwargs
829
+ ):
774
830
  self._txn_ = ContextVar('QLTXN')
775
831
  self.appmodule = f"app{OPTION.api.header['app']}"
776
832
  self.spacemodule = f"space{OPTION.api.header['space']}"
777
- self.direct_access = direct_access
833
+ self.direct_access = direct_access and bool(OPTION.edgedb.dsn)
834
+ if not self.direct_access:
835
+ logger.debug('当前DeepModel为非直连模式')
778
836
  self.alias = AliasGenerator()
779
837
  self.pg_dsn = pg_dsn
838
+ self._globals = None
839
+ self._clients = threading.local()
780
840
 
781
841
  @future_property
782
- async def client(self):
783
- if self.direct_access:
784
- api = await self.wait_for('async_api')
785
- ver = await api.extra.version()
786
- if to_version_tuple(ver, -1) >= (3, 0, 18, 8, 0):
787
- db_info = await api.sharding.database()
788
- space = OPTION.api.header['space']
789
- if db_info.space != space:
790
- raise ValueError(
791
- f'Space id in sharding database info invalid. '
792
- f'Expected space id: {space}, actual: {db_info.space}'
793
- )
794
- dbname = db_info.edgedbName
795
- else:
796
- dbname = None
797
- client = create_async_client(
798
- default_module=self.appmodule,
799
- dbname=dbname
842
+ async def _internal_dbname(self):
843
+ if not self.direct_access:
844
+ # N.B: only retrieved when direct access is enabled
845
+ return
846
+ api = await self.wait_for('async_api')
847
+ ver = await api.extra.version()
848
+ if to_version_tuple(ver, 4) < (3, 0, 18, 8, 0):
849
+ return
850
+ db_info = await api.sharding.database()
851
+ space = OPTION.api.header['space']
852
+ if db_info.space != space:
853
+ raise ValueError(
854
+ f'Space id in sharding database info invalid. '
855
+ f'Expected space id: {space}, actual: {db_info.space}'
800
856
  )
801
- if user_id := OPTION.api.header.get('user'):
802
- client = client.with_globals(
803
- **{
804
- f'{self.spacemodule}::current_user_id':
805
- user_id
806
- }
807
- )
857
+ return db_info.edgedbName
858
+
859
+ @property
860
+ def client(self):
861
+ if not self.direct_access:
862
+ self._globals = {}
863
+ return
864
+
865
+ if (client := getattr(self._clients, 'value', None)) is not None:
808
866
  return client
809
867
 
868
+ client = create_async_client(
869
+ default_module=self.appmodule,
870
+ dbname=self._internal_dbname
871
+ )
872
+ if user_id := OPTION.api.header.get('user'):
873
+ default_globals = {
874
+ f'{self.spacemodule}::current_user_id':
875
+ user_id
876
+ }
877
+ client = client.with_globals(**default_globals)
878
+ if self._globals is None:
879
+ self._globals = client._options.state._globals
880
+ self._clients.value = client
881
+ return client
882
+
810
883
  @future_property
811
884
  async def element_info(self):
812
885
  """元素信息"""
@@ -899,12 +972,20 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
899
972
  """
900
973
 
901
974
  if self.direct_access:
902
- logger.opt(lazy=True).debug(f"Query: [{ql}], \nkwargs: [{kwargs}].")
903
- _, result = await self.client.query(ql, **kwargs)
975
+ logger.opt(lazy=True).debug(f"Query: [{ql}],\n"
976
+ f"kwargs: [{kwargs}],\n"
977
+ f"globals: [{self._globals}].")
978
+ client = self.client
979
+ client._options.state._globals = self._globals
980
+ _, result = await client.query(ql, **kwargs)
904
981
  return result
905
982
 
906
- result = await AsyncDeepModel.query(self, ql, **kwargs)
907
- return serutils.deserialize(result)
983
+ result = await self._http_query(ql, **kwargs)
984
+ field_info = {
985
+ fi.name: fi.type if fi.fields is None else fi.fields
986
+ for fi in result.objectInfos[0].fields
987
+ } if result.objectInfos else {}
988
+ return serutils.deserialize(result.json_, field_info)
908
989
 
909
990
  async def query(self, ql: str, **kwargs) -> List[Any]:
910
991
  """执行ql查询语句,得到序列化后的结果
@@ -934,8 +1015,12 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
934
1015
 
935
1016
  """
936
1017
  if self.direct_access:
937
- logger.opt(lazy=True).debug(f"Query: [{ql}], \nkwargs: [{kwargs}].")
938
- frame_desc, result = await self.client.query(ql, **kwargs)
1018
+ logger.opt(lazy=True).debug(f"Query: [{ql}],\n"
1019
+ f"kwargs: [{kwargs}],\n"
1020
+ f"globals: [{self._globals}].")
1021
+ client = self.client
1022
+ client._options.state._globals = self._globals
1023
+ frame_desc, result = await client.query(ql, **kwargs)
939
1024
  return serutils.serialize(
940
1025
  result, ctx=serutils.Context(frame_desc=frame_desc)
941
1026
  )
@@ -982,7 +1067,17 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
982
1067
 
983
1068
  """
984
1069
  if self.direct_access:
985
- frame_desc, data = await self.client.query(ql, **kwargs)
1070
+ client = self.client
1071
+ client._options.state._globals = self._globals
1072
+ frame_desc, data = await client.query(ql, **kwargs)
1073
+ # set of unnamed tuple
1074
+ if isinstance(frame_desc, list):
1075
+ return pd.DataFrame(
1076
+ data=serutils.serialize(
1077
+ data, ctx=serutils.Context(frame_desc=frame_desc, query_df=True)
1078
+ ),
1079
+ columns=[str(i) for i in range(len(frame_desc))]
1080
+ )
986
1081
 
987
1082
  data = pd.DataFrame(data=serutils.serialize(
988
1083
  data, ctx=serutils.Context(frame_desc=frame_desc, query_df=True)
@@ -995,7 +1090,7 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
995
1090
  else:
996
1091
  result = await self._http_query(ql, **kwargs)
997
1092
  # No object structure info
998
- if result.objectInfos is None:
1093
+ if not result.objectInfos:
999
1094
  return pd.DataFrame(data=result.json_)
1000
1095
 
1001
1096
  data = pd.DataFrame(
@@ -1015,6 +1110,9 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1015
1110
  query_object.__doc__ = query_object.__doc__ + DOC_ARGS_KWARGS
1016
1111
  query_df.__doc__ = query_df.__doc__ + DOC_ARGS_KWARGS
1017
1112
 
1113
+ def _ensure_client(self):
1114
+ self.client # noqa
1115
+
1018
1116
  @txn_support
1019
1117
  async def execute(
1020
1118
  self,
@@ -1030,8 +1128,15 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1030
1128
  会自动用作所有string形式ql的参数
1031
1129
 
1032
1130
  """
1131
+ qls_with_args = self._collect_execute_qls(qls, kwargs)
1132
+ return await self._maybe_exec_qls(qls_with_args)
1133
+
1134
+ def _collect_execute_qls(self, qls, kwargs):
1135
+ self._ensure_client()
1033
1136
  if isinstance(qls, str):
1034
- qls_with_args = [QueryWithArgs(commands=qls, kwargs=kwargs)]
1137
+ qls_with_args = [QueryWithArgs(
1138
+ commands=qls, kwargs=kwargs, globals=self._globals
1139
+ )]
1035
1140
  else:
1036
1141
  qls_with_args = []
1037
1142
  seen_kwargs_key = set()
@@ -1049,11 +1154,12 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1049
1154
  seen_kwargs_key = seen_kwargs_key.union(ql.kwargs.keys())
1050
1155
 
1051
1156
  elif isinstance(ql, str):
1052
- qls_with_args.append(QueryWithArgs(commands=ql, kwargs=kwargs))
1157
+ qls_with_args.append(QueryWithArgs(
1158
+ commands=ql, kwargs=kwargs, globals=self._globals
1159
+ ))
1053
1160
  else:
1054
1161
  raise TypeError(f'qls参数中出现类型非法成员:{type(ql)}')
1055
-
1056
- return await self._maybe_exec_qls(qls_with_args)
1162
+ return qls_with_args
1057
1163
 
1058
1164
  execute.__doc__ = execute.__doc__ + DOC_ARGS_KWARGS
1059
1165
 
@@ -1085,16 +1191,31 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1085
1191
  return affected
1086
1192
 
1087
1193
  result = []
1088
- async for tx in self.client.transaction():
1194
+ client = self.client
1195
+ client._options.state._globals = self._globals
1196
+ async for tx in client.transaction():
1089
1197
  async with tx:
1090
1198
  for ql in qls_with_args:
1091
1199
  logger.opt(lazy=True).debug(
1092
- f"Execute QL: [{ql.commands}], \nkwargs: [{ql.kwargs}]."
1200
+ f"Execute QL: [{ql.commands}],"
1201
+ f"\nkwargs: [{ql.kwargs}],"
1202
+ f"\nglobals: [{ql.globals}]."
1093
1203
  )
1094
- desc, affected = await tx.execute(ql.commands, **ql.kwargs)
1095
- result.append(serutils.serialize(
1096
- affected, ctx=serutils.Context(frame_desc=desc)
1097
- ))
1204
+ if ql.globals:
1205
+ bak_cli = tx._client
1206
+ tx._client = tx._client.with_globals(**ql.globals)
1207
+ try:
1208
+ desc, affected = await tx.execute(ql.commands, **ql.kwargs)
1209
+ result.append(serutils.serialize(
1210
+ affected, ctx=serutils.Context(frame_desc=desc)
1211
+ ))
1212
+ finally:
1213
+ tx._client = bak_cli
1214
+ else:
1215
+ desc, affected = await tx.execute(ql.commands, **ql.kwargs)
1216
+ result.append(serutils.serialize(
1217
+ affected, ctx=serutils.Context(frame_desc=desc)
1218
+ ))
1098
1219
  if len(result) == 1:
1099
1220
  return result[0]
1100
1221
  return result
@@ -1122,13 +1243,14 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1122
1243
  return await self._execute(qls_with_args)
1123
1244
 
1124
1245
  @staticmethod
1125
- def _valid_data(data, object_name, relation, structure):
1126
- required_fields = set(map(
1127
- lambda f: f.name,
1128
- filter(lambda f: f.required, structure.fields.values())
1129
- ))
1130
- if missing_fields := (required_fields - set(data.columns)):
1131
- raise RequiredFieldUnfilled(f'缺少必填字段: {missing_fields}')
1246
+ def _valid_data(data, object_name, relation, structure, check_required=True):
1247
+ if check_required:
1248
+ required_fields = set(map(
1249
+ lambda f: f.name,
1250
+ filter(lambda f: f.required, structure.fields.values())
1251
+ ))
1252
+ if missing_fields := (required_fields - set(data.columns).union(relation)):
1253
+ raise RequiredFieldUnfilled(f'缺少必填字段: {missing_fields}')
1132
1254
 
1133
1255
  if not relation:
1134
1256
  return
@@ -1176,6 +1298,218 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1176
1298
  if exclusive not in valid_exclusive:
1177
1299
  raise ValueError(f"exclusive fields: {exclusive_fields} 没有相应唯一约束")
1178
1300
 
1301
+ async def _get_bkey(
1302
+ self,
1303
+ obj: Union[ObjectTypeFrame, TargetField],
1304
+ source: str = None,
1305
+ name: str = None
1306
+ ) -> str:
1307
+ # 如可在object结构的annotations中取业务主键,则优先取,否则走接口
1308
+ if obj.info and BUSINESS_KEY in obj.info:
1309
+ return obj.info[BUSINESS_KEY]
1310
+ elif (code := obj.normalized_name) in self.model_objects:
1311
+ return self.model_objects[code].businessKey
1312
+
1313
+ assert isinstance(obj, TargetField)
1314
+ # Link 至非本应用对象,需单独查询
1315
+ tgt = ObjectElement.construct_from(self.model_objects[source]).links[name]
1316
+ tgt_model_info = await self.async_api.object.info(
1317
+ app=tgt.targetApp, object_code=tgt.targetObjectCode
1318
+ )
1319
+ return tgt_model_info.businessKey
1320
+
1321
+ async def _collect_bulk_field_info(self, object_name, structure, data, relation):
1322
+ field_info = []
1323
+ tgt_main_field = {}
1324
+ for field in structure.fields.values():
1325
+ if field.name not in data.columns:
1326
+ continue
1327
+
1328
+ field_info.append(field)
1329
+
1330
+ if not field.is_link:
1331
+ continue
1332
+
1333
+ is_multi = field.is_multi_link
1334
+ name = field.name
1335
+ # 链接至其他对象,记录目标对象信息
1336
+ if is_multi:
1337
+ if name not in relation:
1338
+ raise ValueError(
1339
+ f'对象[{object_name}]的多选链接:[{name}]未定义在relation中'
1340
+ )
1341
+ link_props = set(relation[name].columns).intersection(field.props)
1342
+ else:
1343
+ link_props = set(
1344
+ c[len(f'{name}@')::]
1345
+ for c in data.columns if c.startswith(f'{name}@')
1346
+ ).intersection(field.props)
1347
+ tgt_bkey = await self._get_bkey(field.target, object_name, name)
1348
+ tgt_main_field[name] = MainField(tgt_bkey, is_multi, link_props)
1349
+ return field_info, tgt_main_field
1350
+
1351
+ def _ql_payload(self, data: pd.DataFrame, ql: str,):
1352
+ self._ensure_client()
1353
+ kw_name = self.alias.get(BATCH_INSERT_KW)
1354
+ return QueryWithArgs(
1355
+ commands=ql.replace(f'${BATCH_INSERT_KW}', f'${kw_name}'),
1356
+ kwargs={kw_name: data.to_json(
1357
+ orient='records', double_precision=15,
1358
+ force_ascii=False, default_handler=str
1359
+ )},
1360
+ globals=self._globals,
1361
+ )
1362
+
1363
+ @staticmethod
1364
+ def _split_self_link(data, relation, structure, bkey):
1365
+ self_link_dfs = {}
1366
+ for name in structure.self_link_fields:
1367
+ field = structure.fields[name]
1368
+ if (link_df := relation.get(name)) is not None:
1369
+ link_props = set(link_df.columns).intersection(field.props)
1370
+ self_link_dfs[name] = (
1371
+ structure.fit(data[[bkey, name]]),
1372
+ MainField(bkey, field.is_multi_link, link_props)
1373
+ )
1374
+ data = data.drop(columns=[name])
1375
+ elif name in data.columns:
1376
+ link_prop_cols = []
1377
+ link_props = []
1378
+
1379
+ for col in data.columns:
1380
+ if (
1381
+ col.startswith(f'{name}@')
1382
+ and ((prop_name := col[len(f'{name}@')::]) in field.props)
1383
+ ):
1384
+ link_prop_cols.append(col)
1385
+ link_props.append(prop_name)
1386
+
1387
+ self_link_dfs[name] = (
1388
+ structure.fit(data[[bkey, name, *link_prop_cols]]),
1389
+ MainField(bkey, field.is_multi_link, link_props)
1390
+ )
1391
+ data = data.drop(columns=[name, *link_prop_cols])
1392
+ return data, self_link_dfs
1393
+
1394
+ @staticmethod
1395
+ def _merge_relation(data, relation, structure, bkey):
1396
+ for name, link_df in relation.items():
1397
+ link_df = link_df.dropna(how='any', subset=['target', 'source'])
1398
+ if name not in structure.fields:
1399
+ continue
1400
+ field = structure.fields[name]
1401
+ valid_cols = list({'source', 'target', *field.props} & set(link_df.columns))
1402
+ link_df = link_df[valid_cols]
1403
+ # for fit only
1404
+ temp_structure = ObjectStructure(
1405
+ field.type,
1406
+ [
1407
+ PtrInfo(name='source', target=TargetField(name='std::str')),
1408
+ PtrInfo(name='target', target=TargetField(name='std::str')),
1409
+ *[PtrInfo(**prop.dict()) for prop in field.properties]
1410
+ ]
1411
+ )
1412
+ link_df = temp_structure.fit(link_df)
1413
+ link, has_props = _format_link(link_df, name)
1414
+
1415
+ if not has_props:
1416
+ data = data.drop(columns=[name], errors='ignore')
1417
+ data = data.join(link.to_frame(name), on=bkey, how='left')
1418
+ mask = data[name].isna()
1419
+ if mask.any():
1420
+ empty_series = pd.Series([[]] * mask.sum(), index=data[mask].index, dtype=object)
1421
+ data.loc[mask, name] = empty_series
1422
+ else:
1423
+ bkey_values = data[bkey].values
1424
+ mapped_values = np.array([link.get(key, []) for key in bkey_values], dtype=object)
1425
+ data[name] = mapped_values
1426
+
1427
+ return data
1428
+
1429
+ async def _collect_bulk_qls(
1430
+ self,
1431
+ object_name: str,
1432
+ data: pd.DataFrame,
1433
+ relation: Dict[str, pd.DataFrame] = None,
1434
+ chunk_size: int = 500,
1435
+ enable_upsert: bool = False,
1436
+ update_fields: Iterable[str] = None,
1437
+ exclusive_fields: Iterable[str] = None,
1438
+ match_fields: Iterable[str] = None,
1439
+ insert: bool = True,
1440
+ error_on_empty_link: bool = False
1441
+ ) -> List[List[QueryWithArgs]]:
1442
+ if object_name in self.objects:
1443
+ obj = self.objects[object_name]
1444
+ else:
1445
+ raise ObjectNotExist(
1446
+ f'DeepModel对象[{object_name}]在当前应用不存在,无法插入数据'
1447
+ )
1448
+ if obj.external:
1449
+ raise ExternalObjectReadOnly('外部对象只可读')
1450
+
1451
+ structure = ObjectStructure(name=obj.name, structure=obj.fields.values())
1452
+ relation = relation or {}
1453
+ self._valid_data(data, object_name, relation, structure, check_required=insert)
1454
+
1455
+ bkey = await self._get_bkey(obj)
1456
+ if bkey not in data.columns:
1457
+ raise RequiredFieldUnfilled(f'缺少业务主键[{bkey}]')
1458
+
1459
+ # data拼接relation df
1460
+ data = self._merge_relation(data, relation, structure, bkey)
1461
+ # 从data中分离出self-link更新信息
1462
+ data, self_link_dfs = self._split_self_link(data, relation, structure, bkey)
1463
+ field_info, tgt_main_field = await self._collect_bulk_field_info(
1464
+ object_name, structure, data, relation
1465
+ )
1466
+ field_names = set(map(lambda f: f.name, field_info))
1467
+ if insert:
1468
+ if enable_upsert:
1469
+ self._valid_upsert(obj, field_names, bkey, exclusive_fields, update_fields)
1470
+
1471
+ exclusive_fields = set(exclusive_fields or {bkey}) & set(field_names)
1472
+ update_fields = set(update_fields or (field_names - {bkey})) & set(field_names)
1473
+ if enable_upsert and update_fields:
1474
+ bulk_ql = bulk_upsert_by_fields(
1475
+ object_name, field_info, tgt_main_field,
1476
+ exclusive_fields, update_fields, error_on_empty_link
1477
+ )
1478
+ else:
1479
+ bulk_ql = bulk_insert_by_fields(object_name, field_info, tgt_main_field, error_on_empty_link)
1480
+ else:
1481
+ if missing := (set(match_fields or [bkey]) - set(field_names)):
1482
+ raise ValueError(f"match fields: {missing} 不在提供的数据中")
1483
+
1484
+ match_fields = set(match_fields or [bkey]) & set(field_names)
1485
+ if to_upd := (field_names - match_fields):
1486
+ bulk_ql = bulk_update_by_fields(
1487
+ object_name, field_info, tgt_main_field,
1488
+ match_fields, to_upd, error_on_empty_link
1489
+ )
1490
+ else:
1491
+ bulk_ql = None
1492
+ qls = []
1493
+ self._ensure_client()
1494
+ if chunk_size is None:
1495
+ chunk_size = len(data)
1496
+ for i in range(0, len(data), chunk_size):
1497
+ part = structure.fit(data.iloc[i: i + chunk_size])
1498
+ ql_chunk = []
1499
+ # Ignore bulk_ql when only update multi links
1500
+ if bulk_ql is not None:
1501
+ ql_chunk = [self._ql_payload(part, bulk_ql)]
1502
+ for update_field, (update_df, main_field) in self_link_dfs.items():
1503
+ field = structure.fields[update_field]
1504
+ update_ql = bulk_update_by_fields(
1505
+ object_name, [field], {update_field: main_field},
1506
+ [bkey], [update_field]
1507
+ )
1508
+ update_part = update_df.iloc[i: i + chunk_size]
1509
+ ql_chunk.append(self._ql_payload(update_part, update_ql))
1510
+ qls.append(ql_chunk)
1511
+ return qls
1512
+
1179
1513
  @txn_support
1180
1514
  async def insert_df(
1181
1515
  self,
@@ -1186,6 +1520,8 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1186
1520
  enable_upsert: bool = False,
1187
1521
  update_fields: Iterable[str] = None,
1188
1522
  exclusive_fields: Iterable[str] = None,
1523
+ commit_per_chunk: bool = False,
1524
+ error_on_empty_link: bool = False,
1189
1525
  ) -> None:
1190
1526
  """以事务执行基于DataFrame字段信息的批量插入数据
1191
1527
 
@@ -1206,6 +1542,12 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1206
1542
  exclusive_fields: upsert句式下update的exclusive fields列表,
1207
1543
  涉及的fields需出现在data或relation中,
1208
1544
  默认为业务主键
1545
+ commit_per_chunk: 每次插入后是否提交事务,
1546
+ 默认为False,即所有数据插入后再提交事务
1547
+ 该参数仅在非start transaction上下文中生效
1548
+ error_on_empty_link: 链接字段值不存在时是否抛出异常,
1549
+ 默认为False,即不检查链接目标是否存在
1550
+ 当设置为True时,会检查链接目标是否存在,不存在则抛出异常
1209
1551
 
1210
1552
  Notes:
1211
1553
 
@@ -1278,94 +1620,16 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1278
1620
  logger.info("data为空,无DML执行")
1279
1621
  return
1280
1622
 
1281
- if object_name in self.objects:
1282
- obj = self.objects[object_name]
1283
- else:
1284
- raise ObjectNotExist(
1285
- f'DeepModel对象[{object_name}]在当前应用不存在,无法插入数据'
1286
- )
1287
- if obj.external:
1288
- raise ExternalObjectReadOnly('外部对象只可读')
1289
-
1290
- structure = ObjectStructure(name=obj.name, structure=obj.fields.values())
1291
-
1292
- self._valid_data(data, object_name, relation, structure)
1293
-
1294
- relation = relation or {}
1295
- bkey = await self._get_bkey(obj)
1296
- # data拼接relation df
1297
- data = self._merge_relation(data, relation, structure, bkey)
1298
- # 从data中分离出self-link更新信息
1299
- data, self_link_dfs = self._split_self_link(data, relation, structure, bkey)
1300
-
1301
- field_info = []
1302
- tgt_main_field = {}
1303
- # 准备bulk insert所需field信息
1304
- for field in structure.fields.values():
1305
- if field.name not in data.columns:
1306
- continue
1307
-
1308
- field_info.append(field)
1309
-
1310
- if not field.is_link:
1311
- continue
1312
-
1313
- is_multi = field.is_multi_link
1314
- name = field.name
1315
- # 链接至其他对象,记录目标对象信息
1316
- if is_multi:
1317
- link_props = set(relation[name].columns).intersection(field.props)
1318
- else:
1319
- link_props = set(
1320
- c[len(f'{name}@')::]
1321
- for c in data.columns if c.startswith(f'{name}@')
1322
- ).intersection(field.props)
1323
- tgt_bkey = await self._get_bkey(field.target, object_name, name)
1324
- tgt_main_field[name] = MainField(tgt_bkey, is_multi, link_props)
1325
-
1326
- field_names = set(map(lambda f: f.name, field_info))
1327
- if enable_upsert:
1328
- self._valid_upsert(obj, field_names, bkey, exclusive_fields, update_fields)
1329
-
1330
- exclusive_fields = set(exclusive_fields or {bkey}) & set(field_names)
1331
- update_fields = set(update_fields or (field_names- {bkey})) & set(field_names)
1332
- if enable_upsert and update_fields:
1333
- insert_ql = bulk_upsert_by_fields(
1334
- object_name, field_info, tgt_main_field,
1335
- exclusive_fields, update_fields
1336
- )
1623
+ qls = await self._collect_bulk_qls(
1624
+ object_name, data, relation, chunksize,
1625
+ enable_upsert, update_fields, exclusive_fields,
1626
+ insert=True, error_on_empty_link=error_on_empty_link
1627
+ )
1628
+ if commit_per_chunk:
1629
+ for ql_chunk in qls:
1630
+ await self.execute(ql_chunk)
1337
1631
  else:
1338
- insert_ql = bulk_insert_by_fields(object_name, field_info, tgt_main_field)
1339
-
1340
- qls = []
1341
- self._collect_qls(structure.fit(data), insert_ql, chunksize, qls)
1342
- if self_link_dfs:
1343
- for update_field, (update_df, main_field) in self_link_dfs.items():
1344
- field = structure.fields[update_field]
1345
- update_ql = bulk_update_by_fields(
1346
- object_name, bkey, [field],
1347
- {update_field: main_field}, [update_field]
1348
- )
1349
- self._collect_qls(update_df, update_ql, chunksize, qls)
1350
-
1351
- await self.execute(qls)
1352
-
1353
- def _collect_qls(
1354
- self,
1355
- data: pd.DataFrame,
1356
- ql: str,
1357
- chunksize: int,
1358
- qls: List[QueryWithArgs]
1359
- ):
1360
- for i in range(0, len(data), chunksize):
1361
- part = data.iloc[i: i + chunksize]
1362
- kw_name = self.alias.get(BATCH_INSERT_KW)
1363
- qls.append(QueryWithArgs(
1364
- commands=ql.replace(
1365
- f'${BATCH_INSERT_KW}', f'${kw_name}'
1366
- ),
1367
- kwargs={kw_name: part.to_json(orient='records', double_precision=15)}
1368
- ))
1632
+ await self.execute(list(chain(*qls)))
1369
1633
 
1370
1634
  async def get_object(
1371
1635
  self,
@@ -1395,80 +1659,6 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1395
1659
 
1396
1660
  return format_obj(objs[0])
1397
1661
 
1398
- async def _get_bkey(
1399
- self,
1400
- obj: Union[ObjectTypeFrame, TargetField],
1401
- source: str = None,
1402
- name: str = None
1403
- ) -> str:
1404
- # 如可在object结构的annotations中取业务主键,则优先取,否则走接口
1405
- if obj.info and BUSINESS_KEY in obj.info:
1406
- return obj.info[BUSINESS_KEY]
1407
- elif (code := obj.normalized_name) in self.model_objects:
1408
- return self.model_objects[code].businessKey
1409
-
1410
- assert isinstance(obj, TargetField)
1411
- # Link 至非本应用对象,需单独查询
1412
- tgt = ObjectElement.construct_from(self.model_objects[source]).links[name]
1413
- tgt_model_info = await self.async_api.object.info(
1414
- app=tgt.targetApp, object_code=tgt.targetObjectCode
1415
- )
1416
- return tgt_model_info.businessKey
1417
-
1418
- @staticmethod
1419
- def _split_self_link(data, relation, structure, bkey):
1420
- self_link_dfs = {}
1421
- for name in structure.self_link_fields:
1422
- field = structure.fields[name]
1423
- if (link_df := relation.get(name)) is not None:
1424
- link_props = set(link_df.columns).intersection(field.props)
1425
- self_link_dfs[name] = (
1426
- structure.fit(data[[bkey, name]]),
1427
- MainField(bkey, field.is_multi_link, link_props)
1428
- )
1429
- data = data.drop(columns=[name])
1430
- elif name in data.columns:
1431
- link_prop_cols = []
1432
- link_props = []
1433
-
1434
- for col in data.columns:
1435
- if (
1436
- col.startswith(f'{name}@')
1437
- and ((prop_name := col[len(f'{name}@')::]) in field.props)
1438
- ):
1439
- link_prop_cols.append(col)
1440
- link_props.append(prop_name)
1441
-
1442
- self_link_dfs[name] = (
1443
- structure.fit(data[[bkey, name, *link_prop_cols]]),
1444
- MainField(bkey, field.is_multi_link, link_props)
1445
- )
1446
- data = data.drop(columns=[name, *link_prop_cols])
1447
- return data, self_link_dfs
1448
-
1449
- @staticmethod
1450
- def _merge_relation(data, relation, structure, bkey):
1451
- for name, link_df in relation.items():
1452
- if name not in structure.fields:
1453
- continue
1454
- field = structure.fields[name]
1455
- valid_cols = list({'source', 'target', *field.props} & set(link_df.columns))
1456
- link_df = link_df[valid_cols]
1457
- # for fit only
1458
- temp_structure = ObjectStructure(
1459
- field.type,
1460
- [
1461
- PtrInfo(name='source', target=TargetField(name='std::str')),
1462
- PtrInfo(name='target', target=TargetField(name='std::str')),
1463
- *[PtrInfo(**prop.dict()) for prop in field.properties]
1464
- ]
1465
- )
1466
- link_df = temp_structure.fit(link_df)
1467
- link = link_df.groupby('source').apply(_format_link, link_name=name)
1468
- data = data.drop(columns=[name], errors='ignore')
1469
- data = data.join(link.to_frame(name), on=bkey)
1470
- return data
1471
-
1472
1662
  async def insert_df_pg(
1473
1663
  self,
1474
1664
  object_name: str,
@@ -1584,6 +1774,49 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1584
1774
  finally:
1585
1775
  await conn.close()
1586
1776
 
1777
+ @txn_support
1778
+ async def update_df(
1779
+ self,
1780
+ object_name: str,
1781
+ data: pd.DataFrame,
1782
+ relation: Dict[str, pd.DataFrame] = None,
1783
+ chunksize: int = 500,
1784
+ match_fields: Iterable[str] = None,
1785
+ commit_per_chunk: bool = False,
1786
+ ) -> None:
1787
+ """以事务执行基于DataFrame字段信息的批量更新数据
1788
+
1789
+ 将以业务主键作为匹配条件,除业务主键以外的字段将为update fields
1790
+
1791
+ Args:
1792
+ object_name: 被更新数据的对象名,需属于当前应用
1793
+ data: 要更新的数据,若有single link property,
1794
+ 则以列名为link_name@link_property_name的形式提供
1795
+ relation: 如有multi link,提供该字典用于补充link target信息,
1796
+ 键为link字段名,值为映射关系的DataFrame
1797
+ DataFrame中的source列需为插入对象的业务主键,
1798
+ target列需为link target的业务主键,
1799
+ 若有link property,则以property名为列名,提供在除source和target的列中
1800
+ chunksize: 单次最大行数
1801
+ match_fields: update的匹配列表,涉及的fields需出现在data或relation中,默认为业务主键
1802
+ commit_per_chunk: 每次插入后是否提交事务,
1803
+ 默认为False,即所有数据插入后再提交事务
1804
+ 该参数仅在非start transaction上下文中生效
1805
+ """
1806
+ if data.empty:
1807
+ logger.info("data为空,无DML执行")
1808
+ return
1809
+
1810
+ qls = await self._collect_bulk_qls(
1811
+ object_name, data, relation, chunksize,
1812
+ match_fields=match_fields, insert=False
1813
+ )
1814
+ if commit_per_chunk:
1815
+ for ql_chunk in qls:
1816
+ await self.execute(ql_chunk)
1817
+ else:
1818
+ await self.execute(list(chain(*qls)))
1819
+
1587
1820
  @asynccontextmanager
1588
1821
  async def start_transaction(self, flatten: bool = False):
1589
1822
  """开启事务
@@ -1622,7 +1855,7 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1622
1855
 
1623
1856
  Important:
1624
1857
 
1625
- 仅 :func:`insert_df` :func:`execute` 方法支持在事务中执行
1858
+ 仅 :func:`insert_df` :func:`update_df` :func:`execute` 方法支持在事务中执行
1626
1859
 
1627
1860
  """
1628
1861
  try:
@@ -1660,12 +1893,18 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1660
1893
  finally:
1661
1894
  raise NotImplemented('非直连模式不支持设置state信息')
1662
1895
  else:
1663
- bak_cli = self.client
1896
+ if self._globals is None:
1897
+ bak_globals = self.client._options.state._globals
1898
+ else:
1899
+ bak_globals = self._globals
1664
1900
  try:
1665
- self.client = self.client.with_globals(**globals_)
1901
+ client = self.client
1902
+ client._options.state._globals = bak_globals
1903
+ client = client.with_globals(**globals_)
1904
+ self._globals = client._options.state._globals
1666
1905
  yield
1667
1906
  finally:
1668
- self.client = bak_cli
1907
+ self._globals = bak_globals
1669
1908
 
1670
1909
  @contextmanager
1671
1910
  def without_globals(self, *global_names):
@@ -1675,19 +1914,26 @@ class AsyncDeepModel(ElementBase[DeepModelAPI]):
1675
1914
  finally:
1676
1915
  raise NotImplemented('非直连模式不支持设置state信息')
1677
1916
  else:
1678
- bak_cli = self.client
1917
+ if self._globals is None:
1918
+ bak_globals = self.client._options.state._globals
1919
+ else:
1920
+ bak_globals = self._globals
1679
1921
  try:
1680
- self.client = self.client.without_globals(*global_names)
1922
+ client = self.client
1923
+ client._options.state._globals = bak_globals
1924
+ client = client.without_globals(*global_names)
1925
+ self._globals = client._options.state._globals
1681
1926
  yield
1682
1927
  finally:
1683
- self.client = bak_cli
1928
+ self._globals = bak_globals
1684
1929
 
1685
1930
 
1686
1931
  class DeepModel(AsyncDeepModel, metaclass=SyncMeta):
1687
1932
  synchronize = (
1688
1933
  'query_object', 'query', 'query_df',
1689
1934
  'execute', 'get_object',
1690
- 'insert_df', 'insert_df_pg'
1935
+ 'insert_df', 'insert_df_pg',
1936
+ 'update_df',
1691
1937
  )
1692
1938
 
1693
1939
  if TYPE_CHECKING: # pragma: no cover
@@ -1723,6 +1969,8 @@ class DeepModel(AsyncDeepModel, metaclass=SyncMeta):
1723
1969
  enable_upsert: bool = False,
1724
1970
  update_fields: Iterable[str] = None,
1725
1971
  exclusive_fields: Iterable[str] = None,
1972
+ commit_per_chunk: bool = False,
1973
+ error_on_empty_link: bool = False,
1726
1974
  ) -> None:
1727
1975
  ...
1728
1976
 
@@ -1737,6 +1985,17 @@ class DeepModel(AsyncDeepModel, metaclass=SyncMeta):
1737
1985
  ) -> None:
1738
1986
  ...
1739
1987
 
1988
+ def update_df(
1989
+ self,
1990
+ object_name: str,
1991
+ data: pd.DataFrame,
1992
+ relation: Dict[str, pd.DataFrame] = None,
1993
+ chunksize: int = 500,
1994
+ match_fields: Iterable[str] = None,
1995
+ commit_per_chunk: bool = False,
1996
+ ) -> None:
1997
+ ...
1998
+
1740
1999
  @contextmanager
1741
2000
  def start_transaction(self, flatten: bool = False):
1742
2001
  """开启事务