bella-openapi 1.0.3__py3-none-any.whl → 1.0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bella_openapi/entity/standard_domtree.py +49 -20
- {bella_openapi-1.0.3.dist-info → bella_openapi-1.0.3.2.dist-info}/METADATA +5 -1
- {bella_openapi-1.0.3.dist-info → bella_openapi-1.0.3.2.dist-info}/RECORD +6 -6
- {bella_openapi-1.0.3.dist-info → bella_openapi-1.0.3.2.dist-info}/WHEEL +0 -0
- {bella_openapi-1.0.3.dist-info → bella_openapi-1.0.3.2.dist-info}/licenses/LICENSE +0 -0
- {bella_openapi-1.0.3.dist-info → bella_openapi-1.0.3.2.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,7 @@ StandardDomTree - 新的DOM树协议定义
|
|
9
9
|
"""
|
10
10
|
|
11
11
|
from typing import List, Optional, Literal, Any, Union
|
12
|
-
from pydantic import BaseModel, Field
|
12
|
+
from pydantic import BaseModel, Field, root_validator
|
13
13
|
import tiktoken
|
14
14
|
|
15
15
|
# 布局类型映射表:将旧解析器的布局类型映射到新的标准类型
|
@@ -92,6 +92,23 @@ class StandardNode(BaseModel):
|
|
92
92
|
path: Optional[List[int]] = Field(default_factory=list) # 编号的层级信息,例如:1.2.1
|
93
93
|
element: Optional[Union[StandardElement, StandardImageElement, StandardTableElement]] = None # 元素信息,当前节点的元素详情
|
94
94
|
children: Optional[List["StandardNode"]] = Field(default_factory=list) # 子节点信息,当前节点的所有子节点
|
95
|
+
|
96
|
+
@root_validator(pre=True)
|
97
|
+
def parse_element(cls, values):
|
98
|
+
"""根据element的type字段正确实例化element"""
|
99
|
+
if 'element' in values and values['element'] is not None:
|
100
|
+
element_data = values['element']
|
101
|
+
# 只有当element是字典时才需要转换,如果已经是正确的实例则保持不变
|
102
|
+
if isinstance(element_data, dict):
|
103
|
+
element_type = element_data.get('type', '')
|
104
|
+
if element_type == 'Figure':
|
105
|
+
values['element'] = StandardImageElement(**element_data)
|
106
|
+
elif element_type == 'Table':
|
107
|
+
values['element'] = StandardTableElement(**element_data)
|
108
|
+
else:
|
109
|
+
values['element'] = StandardElement(**element_data)
|
110
|
+
# 如果element已经是正确的实例类型,则保持不变
|
111
|
+
return values
|
95
112
|
|
96
113
|
|
97
114
|
class StandardDomTree(BaseModel):
|
@@ -400,19 +417,22 @@ class StandardDomTree(BaseModel):
|
|
400
417
|
url=element['image_link']
|
401
418
|
)
|
402
419
|
|
403
|
-
# 创建
|
404
|
-
|
420
|
+
# 创建StandardImageElement实例
|
421
|
+
image_element = StandardImageElement(
|
422
|
+
type=element_type,
|
423
|
+
positions=positions,
|
424
|
+
name="",
|
425
|
+
description="",
|
426
|
+
text=text,
|
427
|
+
image=image,
|
428
|
+
)
|
429
|
+
|
430
|
+
# 使用construct方法跳过validator,保持正确的element类型
|
431
|
+
standard_node = StandardNode.construct(
|
405
432
|
summary="",
|
406
433
|
tokens=0, # 先设置为 0,后面再计算
|
407
434
|
path=[], # 初始化为空列表,后续再计算
|
408
|
-
element=
|
409
|
-
type=element_type,
|
410
|
-
positions=positions,
|
411
|
-
name="",
|
412
|
-
description="",
|
413
|
-
text=text,
|
414
|
-
image=image,
|
415
|
-
),
|
435
|
+
element=image_element,
|
416
436
|
children=[]
|
417
437
|
)
|
418
438
|
elif element_type == "Table":
|
@@ -447,17 +467,21 @@ class StandardDomTree(BaseModel):
|
|
447
467
|
# 将所有单元格的文本合并,用于计算 token 数量
|
448
468
|
text = " ".join(cell_texts)
|
449
469
|
|
450
|
-
|
470
|
+
# 创建StandardTableElement实例
|
471
|
+
table_element = StandardTableElement(
|
472
|
+
type=element_type,
|
473
|
+
positions=positions,
|
474
|
+
name="",
|
475
|
+
description="",
|
476
|
+
rows=rows
|
477
|
+
)
|
478
|
+
|
479
|
+
# 使用construct方法跳过validator,保持正确的element类型
|
480
|
+
standard_node = StandardNode.construct(
|
451
481
|
summary="",
|
452
482
|
tokens=0, # 先设置为 0,后面再计算
|
453
483
|
path=[], # 初始化为空列表,后续再计算
|
454
|
-
element=
|
455
|
-
type=element_type,
|
456
|
-
positions=positions,
|
457
|
-
name="",
|
458
|
-
description="",
|
459
|
-
rows=rows
|
460
|
-
),
|
484
|
+
element=table_element,
|
461
485
|
children=[]
|
462
486
|
)
|
463
487
|
else:
|
@@ -510,4 +534,9 @@ class StandardDomTree(BaseModel):
|
|
510
534
|
# 计算标记列表的长度,即标记的数量
|
511
535
|
token_count = len(tokens)
|
512
536
|
# 返回标记的数量
|
513
|
-
return token_count
|
537
|
+
return token_count
|
538
|
+
|
539
|
+
|
540
|
+
# 更新forward references
|
541
|
+
StandardNode.update_forward_refs()
|
542
|
+
Cell.update_forward_refs()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: bella-openapi
|
3
|
-
Version: 1.0.3
|
3
|
+
Version: 1.0.3.2
|
4
4
|
Summary: client for openapi service.
|
5
5
|
Home-page:
|
6
6
|
Author: ['tangxiaolong', 'fanqiangwei', 'zhangxiaojia', 'liumin', 'wangyukun']
|
@@ -12,6 +12,8 @@ License-File: LICENSE
|
|
12
12
|
Requires-Dist: httpx<=0.26.0,>=0.10.0
|
13
13
|
Requires-Dist: Werkzeug==3.0.1
|
14
14
|
Requires-Dist: tiktoken>=0.5.0
|
15
|
+
Requires-Dist: pydantic==1.10.14
|
16
|
+
Requires-Dist: fastapi==0.110.0
|
15
17
|
Dynamic: author
|
16
18
|
Dynamic: classifier
|
17
19
|
Dynamic: description
|
@@ -259,3 +261,5 @@ get接口地址:/v1/openapi/log/{requestId}
|
|
259
261
|
* rename 包名
|
260
262
|
* 1.0.2
|
261
263
|
* 修复bug
|
264
|
+
* 1.0.3
|
265
|
+
* 新增standard domtree
|
@@ -14,11 +14,11 @@ bella_openapi/bella_trace/trace_requests.py,sha256=ADA8J_gbC3TwUo5LWQ3c_yTmCSZRa
|
|
14
14
|
bella_openapi/console/__init__.py,sha256=uSfr5v6JLRSqTlftjK_ZU1pnbkEyxAPbuQbMyYX_phk,64
|
15
15
|
bella_openapi/console/models.py,sha256=Hh1UuYHIxFtF9r5QK-pSJPFrSqbZUHv6spLvPbCeX08,1274
|
16
16
|
bella_openapi/entity/__init__.py,sha256=zzsYYg859pzPSgx1Py2kxB2ozQ0tt4OtTatBtpm2bAw,512
|
17
|
-
bella_openapi/entity/standard_domtree.py,sha256=
|
17
|
+
bella_openapi/entity/standard_domtree.py,sha256=6b-490HP6wt8Z4kU6e3kwuNp5UdNtiNmTML3akNGx5U,21574
|
18
18
|
bella_openapi/middleware/__init__.py,sha256=XWvZG1xO30ZXIn10YVYthmT1BV-9fonMEP_jVRZbAlQ,157
|
19
19
|
bella_openapi/middleware/context_middleware.py,sha256=YawQyKAxMzvlDs_MxcuQKh90pP6VoMKzCBDS94qmlzQ,3870
|
20
|
-
bella_openapi-1.0.3.dist-info/licenses/LICENSE,sha256=O-0zMbcEi6wXz1DiSdVgzMlQjJcNqNe5KDv08uYzqR0,1055
|
21
|
-
bella_openapi-1.0.3.dist-info/METADATA,sha256=
|
22
|
-
bella_openapi-1.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
23
|
-
bella_openapi-1.0.3.dist-info/top_level.txt,sha256=EZuq3F6tKeF-vmZQi6_S2XzmES7SPW7HAbGN1Uv9vN8,14
|
24
|
-
bella_openapi-1.0.3.dist-info/RECORD,,
|
20
|
+
bella_openapi-1.0.3.2.dist-info/licenses/LICENSE,sha256=O-0zMbcEi6wXz1DiSdVgzMlQjJcNqNe5KDv08uYzqR0,1055
|
21
|
+
bella_openapi-1.0.3.2.dist-info/METADATA,sha256=YAppGGwIIymFj1Q5QVFgHK3Q31s2t0KV2Z7FO-4rLwo,9480
|
22
|
+
bella_openapi-1.0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
23
|
+
bella_openapi-1.0.3.2.dist-info/top_level.txt,sha256=EZuq3F6tKeF-vmZQi6_S2XzmES7SPW7HAbGN1Uv9vN8,14
|
24
|
+
bella_openapi-1.0.3.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|