bella-openapi 1.0.3.1__py3-none-any.whl → 1.0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bella_openapi/entity/standard_domtree.py +66 -36
- {bella_openapi-1.0.3.1.dist-info → bella_openapi-1.0.4.1.dist-info}/METADATA +5 -2
- {bella_openapi-1.0.3.1.dist-info → bella_openapi-1.0.4.1.dist-info}/RECORD +6 -6
- {bella_openapi-1.0.3.1.dist-info → bella_openapi-1.0.4.1.dist-info}/WHEEL +0 -0
- {bella_openapi-1.0.3.1.dist-info → bella_openapi-1.0.4.1.dist-info}/licenses/LICENSE +0 -0
- {bella_openapi-1.0.3.1.dist-info → bella_openapi-1.0.4.1.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,7 @@ StandardDomTree - 新的DOM树协议定义
|
|
9
9
|
"""
|
10
10
|
|
11
11
|
from typing import List, Optional, Literal, Any, Union
|
12
|
-
from pydantic import BaseModel, Field
|
12
|
+
from pydantic import BaseModel, Field, root_validator
|
13
13
|
import tiktoken
|
14
14
|
|
15
15
|
# 布局类型映射表:将旧解析器的布局类型映射到新的标准类型
|
@@ -77,12 +77,26 @@ class StandardTableElement(StandardBaseElement):
|
|
77
77
|
name: Optional[str] = None # 如果类型是Table、Figure为其名字
|
78
78
|
description: Optional[str] = None # 如果类型是Table、Figure为其描述
|
79
79
|
rows: List[StandardRow] = Field(default_factory=list) # 表格行
|
80
|
+
|
81
|
+
@root_validator(pre=True)
|
82
|
+
def validate_table_type(cls, values):
|
83
|
+
"""确保只有type='Table'的数据才能创建StandardTableElement"""
|
84
|
+
if values.get('type') != 'Table':
|
85
|
+
raise ValueError(f"StandardTableElement只能用于type='Table'的数据,当前type='{values.get('type')}'")
|
86
|
+
return values
|
80
87
|
|
81
88
|
|
82
89
|
class StandardImageElement(StandardElement):
|
83
90
|
name: Optional[str] = None # 如果类型是Table、Figure为其名字
|
84
91
|
description: Optional[str] = None # 如果类型是Table、Figure为其描述
|
85
92
|
image: Optional[StandardImage] = None # 图片信息
|
93
|
+
|
94
|
+
@root_validator(pre=True)
|
95
|
+
def validate_image_type(cls, values):
|
96
|
+
"""确保只有type='Figure'的数据才能创建StandardImageElement"""
|
97
|
+
if values.get('type') != 'Figure':
|
98
|
+
raise ValueError(f"StandardImageElement只能用于type='Figure'的数据,当前type='{values.get('type')}'")
|
99
|
+
return values
|
86
100
|
|
87
101
|
|
88
102
|
class StandardNode(BaseModel):
|
@@ -90,8 +104,9 @@ class StandardNode(BaseModel):
|
|
90
104
|
summary: Optional[str] = None # 摘要,文档的简要概述
|
91
105
|
tokens: Optional[int] = None # token预估数量,文档中的token数量估计
|
92
106
|
path: Optional[List[int]] = Field(default_factory=list) # 编号的层级信息,例如:1.2.1
|
93
|
-
element: Optional[Union[
|
107
|
+
element: Optional[Union[StandardTableElement, StandardImageElement, StandardElement]] = None # 元素信息,当前节点的元素详情
|
94
108
|
children: Optional[List["StandardNode"]] = Field(default_factory=list) # 子节点信息,当前节点的所有子节点
|
109
|
+
|
95
110
|
|
96
111
|
|
97
112
|
class StandardDomTree(BaseModel):
|
@@ -129,7 +144,7 @@ class StandardDomTree(BaseModel):
|
|
129
144
|
# 添加表格名称
|
130
145
|
if node.element.name:
|
131
146
|
markdown_res += f"**{node.element.name}**\n\n"
|
132
|
-
table_md = self.
|
147
|
+
table_md = self._list_to_html_table(node.element.rows)
|
133
148
|
markdown_res += f"{table_md}\n\n"
|
134
149
|
# 添加表格描述
|
135
150
|
if node.element.description:
|
@@ -168,25 +183,28 @@ class StandardDomTree(BaseModel):
|
|
168
183
|
quoted_lines = ['> ' + line for line in lines]
|
169
184
|
return '\n'.join(quoted_lines)
|
170
185
|
|
171
|
-
def
|
172
|
-
"""将表格行转换为
|
186
|
+
def _list_to_html_table(self, rows: List[StandardRow]) -> str:
|
187
|
+
"""将表格行转换为HTML表格"""
|
173
188
|
if not rows:
|
174
189
|
return ""
|
175
190
|
|
176
|
-
|
177
|
-
|
178
|
-
# 添加表头
|
179
|
-
if rows:
|
180
|
-
header_cells = [cell.text or "" for cell in rows[0].cells]
|
181
|
-
markdown_table += "| " + " | ".join(header_cells) + " |\n"
|
182
|
-
markdown_table += "| " + " | ".join(["---"] * len(header_cells)) + " |\n"
|
183
|
-
|
184
|
-
# 添加数据行
|
191
|
+
html_text = "<table>"
|
185
192
|
for row in rows:
|
186
|
-
|
187
|
-
|
193
|
+
html_text += "<tr>"
|
194
|
+
for cell in row.cells:
|
195
|
+
# 从path中提取rowspan和colspan信息
|
196
|
+
if len(cell.path) >= 4:
|
197
|
+
start_row, end_row, start_col, end_col = cell.path[:4]
|
198
|
+
rowspan = end_row - start_row + 1
|
199
|
+
colspan = end_col - start_col + 1
|
200
|
+
else:
|
201
|
+
rowspan = colspan = 1
|
188
202
|
|
189
|
-
|
203
|
+
cell_text = cell.text or ""
|
204
|
+
html_text += f"<td rowspan='{rowspan}' colspan='{colspan}'>{cell_text}</td>"
|
205
|
+
html_text += "</tr>"
|
206
|
+
html_text += "</table>"
|
207
|
+
return html_text
|
190
208
|
|
191
209
|
@classmethod
|
192
210
|
def from_domtree_dict(cls, domtree: dict, file_info):
|
@@ -397,19 +415,22 @@ class StandardDomTree(BaseModel):
|
|
397
415
|
url=element['image_link']
|
398
416
|
)
|
399
417
|
|
400
|
-
# 创建
|
401
|
-
|
418
|
+
# 创建StandardImageElement实例
|
419
|
+
image_element = StandardImageElement(
|
420
|
+
type=element_type,
|
421
|
+
positions=positions,
|
422
|
+
name="",
|
423
|
+
description="",
|
424
|
+
text=text,
|
425
|
+
image=image,
|
426
|
+
)
|
427
|
+
|
428
|
+
# 使用construct方法跳过validator,保持正确的element类型
|
429
|
+
standard_node = StandardNode.construct(
|
402
430
|
summary="",
|
403
431
|
tokens=0, # 先设置为 0,后面再计算
|
404
432
|
path=[], # 初始化为空列表,后续再计算
|
405
|
-
element=
|
406
|
-
type=element_type,
|
407
|
-
positions=positions,
|
408
|
-
name="",
|
409
|
-
description="",
|
410
|
-
text=text,
|
411
|
-
image=image,
|
412
|
-
),
|
433
|
+
element=image_element,
|
413
434
|
children=[]
|
414
435
|
)
|
415
436
|
elif element_type == "Table":
|
@@ -444,17 +465,21 @@ class StandardDomTree(BaseModel):
|
|
444
465
|
# 将所有单元格的文本合并,用于计算 token 数量
|
445
466
|
text = " ".join(cell_texts)
|
446
467
|
|
447
|
-
|
468
|
+
# 创建StandardTableElement实例
|
469
|
+
table_element = StandardTableElement(
|
470
|
+
type=element_type,
|
471
|
+
positions=positions,
|
472
|
+
name="",
|
473
|
+
description="",
|
474
|
+
rows=rows
|
475
|
+
)
|
476
|
+
|
477
|
+
# 使用construct方法跳过validator,保持正确的element类型
|
478
|
+
standard_node = StandardNode.construct(
|
448
479
|
summary="",
|
449
480
|
tokens=0, # 先设置为 0,后面再计算
|
450
481
|
path=[], # 初始化为空列表,后续再计算
|
451
|
-
element=
|
452
|
-
type=element_type,
|
453
|
-
positions=positions,
|
454
|
-
name="",
|
455
|
-
description="",
|
456
|
-
rows=rows
|
457
|
-
),
|
482
|
+
element=table_element,
|
458
483
|
children=[]
|
459
484
|
)
|
460
485
|
else:
|
@@ -507,4 +532,9 @@ class StandardDomTree(BaseModel):
|
|
507
532
|
# 计算标记列表的长度,即标记的数量
|
508
533
|
token_count = len(tokens)
|
509
534
|
# 返回标记的数量
|
510
|
-
return token_count
|
535
|
+
return token_count
|
536
|
+
|
537
|
+
|
538
|
+
# 更新forward references
|
539
|
+
StandardNode.update_forward_refs()
|
540
|
+
Cell.update_forward_refs()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: bella-openapi
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.4.1
|
4
4
|
Summary: client for openapi service.
|
5
5
|
Home-page:
|
6
6
|
Author: ['tangxiaolong', 'fanqiangwei', 'zhangxiaojia', 'liumin', 'wangyukun']
|
@@ -12,7 +12,8 @@ License-File: LICENSE
|
|
12
12
|
Requires-Dist: httpx<=0.26.0,>=0.10.0
|
13
13
|
Requires-Dist: Werkzeug==3.0.1
|
14
14
|
Requires-Dist: tiktoken>=0.5.0
|
15
|
-
Requires-Dist: pydantic==
|
15
|
+
Requires-Dist: pydantic==1.10.14
|
16
|
+
Requires-Dist: fastapi==0.110.0
|
16
17
|
Dynamic: author
|
17
18
|
Dynamic: classifier
|
18
19
|
Dynamic: description
|
@@ -260,3 +261,5 @@ get接口地址:/v1/openapi/log/{requestId}
|
|
260
261
|
* rename 包名
|
261
262
|
* 1.0.2
|
262
263
|
* 修复bug
|
264
|
+
* 1.0.3
|
265
|
+
* 新增standard domtree
|
@@ -14,11 +14,11 @@ bella_openapi/bella_trace/trace_requests.py,sha256=ADA8J_gbC3TwUo5LWQ3c_yTmCSZRa
|
|
14
14
|
bella_openapi/console/__init__.py,sha256=uSfr5v6JLRSqTlftjK_ZU1pnbkEyxAPbuQbMyYX_phk,64
|
15
15
|
bella_openapi/console/models.py,sha256=Hh1UuYHIxFtF9r5QK-pSJPFrSqbZUHv6spLvPbCeX08,1274
|
16
16
|
bella_openapi/entity/__init__.py,sha256=zzsYYg859pzPSgx1Py2kxB2ozQ0tt4OtTatBtpm2bAw,512
|
17
|
-
bella_openapi/entity/standard_domtree.py,sha256=
|
17
|
+
bella_openapi/entity/standard_domtree.py,sha256=X6C9azEVahiz7OMFBUC3RLDZEKv1pMiQDD1DT8ILPh0,21375
|
18
18
|
bella_openapi/middleware/__init__.py,sha256=XWvZG1xO30ZXIn10YVYthmT1BV-9fonMEP_jVRZbAlQ,157
|
19
19
|
bella_openapi/middleware/context_middleware.py,sha256=YawQyKAxMzvlDs_MxcuQKh90pP6VoMKzCBDS94qmlzQ,3870
|
20
|
-
bella_openapi-1.0.
|
21
|
-
bella_openapi-1.0.
|
22
|
-
bella_openapi-1.0.
|
23
|
-
bella_openapi-1.0.
|
24
|
-
bella_openapi-1.0.
|
20
|
+
bella_openapi-1.0.4.1.dist-info/licenses/LICENSE,sha256=O-0zMbcEi6wXz1DiSdVgzMlQjJcNqNe5KDv08uYzqR0,1055
|
21
|
+
bella_openapi-1.0.4.1.dist-info/METADATA,sha256=ly-PHCzIuwpyNxhajy5HNbBI4KGHpjnJAwB9wg6VeZQ,9480
|
22
|
+
bella_openapi-1.0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
23
|
+
bella_openapi-1.0.4.1.dist-info/top_level.txt,sha256=EZuq3F6tKeF-vmZQi6_S2XzmES7SPW7HAbGN1Uv9vN8,14
|
24
|
+
bella_openapi-1.0.4.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|