bella-openapi 1.0.2.3__py3-none-any.whl → 1.0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bella_openapi/__init__.py CHANGED
@@ -3,7 +3,19 @@ from .log import operation_log, submit_log
3
3
  from .openapi_contexvar import trace_id_context, caller_id_context, request_url_context
4
4
  from .auth_billing import ErrorInfo, async_authenticate_decorator_args, authenticate_user, print_context, \
5
5
  get_context, set_context, clean_context, report
6
- from .domtree import StandardDomTree, StandardNode
6
+ from .entity import (
7
+ StandardDomTree,
8
+ StandardNode,
9
+ SourceFile,
10
+ StandardPosition,
11
+ StandardImage,
12
+ Cell,
13
+ StandardRow,
14
+ StandardBaseElement,
15
+ StandardElement,
16
+ StandardTableElement,
17
+ StandardImageElement
18
+ )
7
19
 
8
20
  __all__ = ["validate_token", "operation_log",
9
21
  "support_model",
@@ -22,5 +34,14 @@ __all__ = ["validate_token", "operation_log",
22
34
  "clean_context",
23
35
  "report",
24
36
  "StandardDomTree",
25
- "StandardNode"
37
+ "StandardNode",
38
+ "SourceFile",
39
+ "StandardPosition",
40
+ "StandardImage",
41
+ "Cell",
42
+ "StandardRow",
43
+ "StandardBaseElement",
44
+ "StandardElement",
45
+ "StandardTableElement",
46
+ "StandardImageElement"
26
47
  ]
@@ -0,0 +1,27 @@
1
+ from .standard_domtree import (
2
+ StandardDomTree,
3
+ StandardNode,
4
+ SourceFile,
5
+ StandardPosition,
6
+ StandardImage,
7
+ Cell,
8
+ StandardRow,
9
+ StandardBaseElement,
10
+ StandardElement,
11
+ StandardTableElement,
12
+ StandardImageElement
13
+ )
14
+
15
+ __all__ = [
16
+ "StandardDomTree",
17
+ "StandardNode",
18
+ "SourceFile",
19
+ "StandardPosition",
20
+ "StandardImage",
21
+ "Cell",
22
+ "StandardRow",
23
+ "StandardBaseElement",
24
+ "StandardElement",
25
+ "StandardTableElement",
26
+ "StandardImageElement"
27
+ ]
@@ -10,8 +10,7 @@ StandardDomTree - 新的DOM树协议定义
10
10
 
11
11
  from typing import List, Optional, Literal, Any, Union
12
12
  from pydantic import BaseModel, Field
13
-
14
- from bella_openapi.domtree.utils import count_tokens
13
+ import tiktoken
15
14
 
16
15
  # 布局类型映射表:将旧解析器的布局类型映射到新的标准类型
17
16
  # 注意:这个映射表是临时的,后续计划统一布局类型定义
@@ -33,6 +32,7 @@ layout_type_mapping = {
33
32
  }
34
33
 
35
34
 
35
+
36
36
  class SourceFile(BaseModel):
37
37
  id: str # 文件ID,唯一标识符,类型为string
38
38
  name: str # 文件名,文档的名称,类型为string
@@ -431,7 +431,7 @@ class StandardDomTree(BaseModel):
431
431
  cell_data['end_col']],
432
432
  text=cell_text,
433
433
  # 目前只会有一个元素,且是Text类型,Path重新从头编号,相对cell是root
434
- nodes=[StandardNode(summary="", tokens=count_tokens(cell_text), path=[1], children=[],
434
+ nodes=[StandardNode(summary="", tokens=cls.count_tokens(cell_text), path=[1], children=[],
435
435
  element=StandardElement(
436
436
  type='Text',
437
437
  positions=[],
@@ -482,7 +482,7 @@ class StandardDomTree(BaseModel):
482
482
  standard_node.children.append(standard_child)
483
483
 
484
484
  # 计算 token 数量:自身 text 的 token 数量 + 子节点 token 数量
485
- tokens = count_tokens(text)
485
+ tokens = cls.count_tokens(text)
486
486
  for child in standard_node.children:
487
487
  tokens += child.tokens
488
488
 
@@ -490,3 +490,24 @@ class StandardDomTree(BaseModel):
490
490
  standard_node.tokens = tokens
491
491
 
492
492
  return standard_node
493
+
494
+ @classmethod
495
+ def count_tokens(cls, text: str, model: str = "gpt-4") -> int:
496
+ """
497
+ 计算文本的token数量
498
+
499
+ Args:
500
+ text: 要计算的文本
501
+ model: 使用的模型名称,默认为gpt-4
502
+
503
+ Returns:
504
+ int: token数量
505
+ """
506
+ if not text:
507
+ return 0
508
+ encoding = tiktoken.encoding_for_model(model)
509
+ tokens = encoding.encode(text)
510
+ # 计算标记列表的长度,即标记的数量
511
+ token_count = len(tokens)
512
+ # 返回标记的数量
513
+ return token_count
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bella-openapi
3
- Version: 1.0.2.3
3
+ Version: 1.0.2.5
4
4
  Summary: client for openapi service.
5
5
  Home-page:
6
6
  Author: ['tangxiaolong', 'fanqiangwei', 'zhangxiaojia', 'liumin', 'wangyukun']
@@ -1,4 +1,4 @@
1
- bella_openapi/__init__.py,sha256=NPsPxysa625xRIKgL6je4q1ZSJfnVar4Mb-2RljNwfs,1006
1
+ bella_openapi/__init__.py,sha256=Iq8YjMYBNWJvQvlu9gFy8uxG8Xf06jPaCyZvXtAW2jA,1462
2
2
  bella_openapi/auth_billing.py,sha256=Hn0KS8GuG48etnvnd1Faej4IfFXD3tjzalUzDnpZh7Q,3520
3
3
  bella_openapi/authorize.py,sha256=cO6J-wx9dmmkDAeqpXT7QlyCr13hO-HSC5SWQSw2gZw,2150
4
4
  bella_openapi/config.py,sha256=Dn8vnToDaOesPGboauxCCwNrW5awQLeSkmDjNjXS4bQ,319
@@ -13,13 +13,12 @@ bella_openapi/bella_trace/record_log.py,sha256=qSZXp_VTzIzMVlQNnZKLIbmyGvggFSQL5
13
13
  bella_openapi/bella_trace/trace_requests.py,sha256=ADA8J_gbC3TwUo5LWQ3c_yTmCSZRaWzq1FC0iUOnst0,1370
14
14
  bella_openapi/console/__init__.py,sha256=uSfr5v6JLRSqTlftjK_ZU1pnbkEyxAPbuQbMyYX_phk,64
15
15
  bella_openapi/console/models.py,sha256=Hh1UuYHIxFtF9r5QK-pSJPFrSqbZUHv6spLvPbCeX08,1274
16
- bella_openapi/domtree/__init__.py,sha256=qxFd6d9WZ4ThWQAFtHAhSseo2h2FB2-5KZWyGOooAbo,107
17
- bella_openapi/domtree/standard_domtree.py,sha256=lIJP2gReyF2RgAxtyncSB6xl_XAzKCpOb-FTNaf-MIw,19952
18
- bella_openapi/domtree/utils.py,sha256=-ItZYh9Gj8QyOkZzjCC5xWPYU-FkzJllGC0oUO21Kp4,394
16
+ bella_openapi/entity/__init__.py,sha256=zzsYYg859pzPSgx1Py2kxB2ozQ0tt4OtTatBtpm2bAw,512
17
+ bella_openapi/entity/standard_domtree.py,sha256=EzeYH_UH8MfcVucOcviof8qAIZ5BULVu2DO2EBN7F-c,20510
19
18
  bella_openapi/middleware/__init__.py,sha256=XWvZG1xO30ZXIn10YVYthmT1BV-9fonMEP_jVRZbAlQ,157
20
19
  bella_openapi/middleware/context_middleware.py,sha256=YawQyKAxMzvlDs_MxcuQKh90pP6VoMKzCBDS94qmlzQ,3870
21
- bella_openapi-1.0.2.3.dist-info/licenses/LICENSE,sha256=O-0zMbcEi6wXz1DiSdVgzMlQjJcNqNe5KDv08uYzqR0,1055
22
- bella_openapi-1.0.2.3.dist-info/METADATA,sha256=PbC6CexkWl5ZOR1_0dXPtD6HpydmJ9H_TsOMRH0Kp_E,9377
23
- bella_openapi-1.0.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- bella_openapi-1.0.2.3.dist-info/top_level.txt,sha256=EZuq3F6tKeF-vmZQi6_S2XzmES7SPW7HAbGN1Uv9vN8,14
25
- bella_openapi-1.0.2.3.dist-info/RECORD,,
20
+ bella_openapi-1.0.2.5.dist-info/licenses/LICENSE,sha256=O-0zMbcEi6wXz1DiSdVgzMlQjJcNqNe5KDv08uYzqR0,1055
21
+ bella_openapi-1.0.2.5.dist-info/METADATA,sha256=ohCbeW5mv4uFMn3NgoscDQxEBKW7zi168nRfIbgltc0,9377
22
+ bella_openapi-1.0.2.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ bella_openapi-1.0.2.5.dist-info/top_level.txt,sha256=EZuq3F6tKeF-vmZQi6_S2XzmES7SPW7HAbGN1Uv9vN8,14
24
+ bella_openapi-1.0.2.5.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- from .standard_domtree import StandardDomTree, StandardNode
2
-
3
- __all__ = ["StandardDomTree", "StandardNode"]
@@ -1,12 +0,0 @@
1
- import tiktoken
2
-
3
- # 自研模型均用gpt-4计算(可能有误差,可忽略)
4
- def count_tokens(text: str, model: str = "gpt-4") -> int:
5
- if not text:
6
- return 0
7
- encoding = tiktoken.encoding_for_model(model)
8
- tokens = encoding.encode(text)
9
- # 计算标记列表的长度,即标记的数量
10
- token_count = len(tokens)
11
- # 返回标记的数量
12
- return token_count