bella-openapi 1.0.2.4__tar.gz → 1.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {bella_openapi-1.0.2.4/src/bella_openapi.egg-info → bella_openapi-1.0.3}/PKG-INFO +1 -1
  2. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/setup.py +1 -1
  3. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/__init__.py +2 -13
  4. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/entity/standard_domtree.py +44 -33
  5. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3/src/bella_openapi.egg-info}/PKG-INFO +1 -1
  6. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/LICENSE +0 -0
  7. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/README.md +0 -0
  8. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/setup.cfg +0 -0
  9. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/auth_billing.py +0 -0
  10. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/authorize.py +0 -0
  11. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/bella_trace/__init__.py +0 -0
  12. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/bella_trace/_context.py +0 -0
  13. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/bella_trace/fastapi_interceptor.py +0 -0
  14. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/bella_trace/record_log.py +0 -0
  15. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/bella_trace/trace_requests.py +0 -0
  16. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/config.py +0 -0
  17. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/console/__init__.py +0 -0
  18. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/console/models.py +0 -0
  19. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/entity/__init__.py +0 -0
  20. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/exception.py +0 -0
  21. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/log.py +0 -0
  22. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/middleware/__init__.py +0 -0
  23. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/middleware/context_middleware.py +0 -0
  24. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/openapi_contexvar.py +0 -0
  25. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/schema.py +0 -0
  26. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi.egg-info/SOURCES.txt +0 -0
  27. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi.egg-info/dependency_links.txt +0 -0
  28. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi.egg-info/not-zip-safe +0 -0
  29. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi.egg-info/requires.txt +0 -0
  30. {bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bella-openapi
3
- Version: 1.0.2.4
3
+ Version: 1.0.3
4
4
  Summary: client for openapi service.
5
5
  Home-page:
6
6
  Author: ['tangxiaolong', 'fanqiangwei', 'zhangxiaojia', 'liumin', 'wangyukun']
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
3
3
 
4
4
  SHORT = "client for openapi service."
5
5
 
6
- __version__ = "1.0.2.4"
6
+ __version__ = "1.0.3"
7
7
  __author__ = ["tangxiaolong", "fanqiangwei", "zhangxiaojia", 'liumin', 'wangyukun']
8
8
  __email__ = ''
9
9
  readme_path = 'README.md'
@@ -3,19 +3,8 @@ from .log import operation_log, submit_log
3
3
  from .openapi_contexvar import trace_id_context, caller_id_context, request_url_context
4
4
  from .auth_billing import ErrorInfo, async_authenticate_decorator_args, authenticate_user, print_context, \
5
5
  get_context, set_context, clean_context, report
6
- from .entity import (
7
- StandardDomTree,
8
- StandardNode,
9
- SourceFile,
10
- StandardPosition,
11
- StandardImage,
12
- Cell,
13
- StandardRow,
14
- StandardBaseElement,
15
- StandardElement,
16
- StandardTableElement,
17
- StandardImageElement
18
- )
6
+ from .entity import StandardDomTree, StandardNode, SourceFile, StandardPosition, StandardImage, Cell, \
7
+ StandardRow, StandardBaseElement, StandardElement, StandardTableElement, StandardImageElement
19
8
 
20
9
  __all__ = ["validate_token", "operation_log",
21
10
  "support_model",
@@ -31,16 +31,7 @@ layout_type_mapping = {
31
31
  "TableNote": "Text", # 目前实际解析出来没有
32
32
  }
33
33
 
34
- # 自研模型均用gpt-4计算(可能有误差,可忽略)
35
- def count_tokens(text: str, model: str = "gpt-4") -> int:
36
- if not text:
37
- return 0
38
- encoding = tiktoken.encoding_for_model(model)
39
- tokens = encoding.encode(text)
40
- # 计算标记列表的长度,即标记的数量
41
- token_count = len(tokens)
42
- # 返回标记的数量
43
- return token_count
34
+
44
35
 
45
36
  class SourceFile(BaseModel):
46
37
  id: str # 文件ID,唯一标识符,类型为string
@@ -315,18 +306,16 @@ class StandardDomTree(BaseModel):
315
306
  # 检查前一个节点
316
307
  if i > 0:
317
308
  prev_sibling = node.children[i - 1]
318
- if prev_sibling.element and prev_sibling.element.type == target_type:
319
- # 找到对应类型的前一个兄弟节点,合并节点
320
- if cls._merge_nodes(prev_sibling, current, target_type):
321
- merged = True
309
+ # 找到对应类型的前一个兄弟节点,合并节点
310
+ merged = ( prev_sibling.element and prev_sibling.element.type == target_type and
311
+ cls._merge_nodes(prev_sibling, current, target_type))
322
312
 
323
313
  # 如果没有与前一个节点合并,检查后一个节点
324
314
  if not merged and i < len(node.children) - 1:
325
315
  next_sibling = node.children[i + 1]
326
- if next_sibling.element and next_sibling.element.type == target_type:
327
- # 找到对应类型的后一个兄弟节点,合并节点
328
- if cls._merge_nodes(next_sibling, current, target_type):
329
- merged = True
316
+ # 找到对应类型的后一个兄弟节点,合并节点
317
+ merged = (next_sibling.element and next_sibling.element.type == target_type and
318
+ cls._merge_nodes(next_sibling, current, target_type))
330
319
 
331
320
  # 如果没有找到对应类型的兄弟节点,将当前节点类型改为 Text
332
321
  if not merged:
@@ -357,23 +346,25 @@ class StandardDomTree(BaseModel):
357
346
  Returns:
358
347
  bool: 是否成功合并
359
348
  """
360
- if node_type == 'Figure' and isinstance(target_node.element, StandardImageElement):
361
- # FigureName 的文本作为 Figure 的 name
349
+ # 定义节点类型与元素类型的映射
350
+ type_element_mapping = {
351
+ 'Figure': StandardImageElement,
352
+ 'Table': StandardTableElement
353
+ }
354
+
355
+ can_merge = (node_type in type_element_mapping and
356
+ isinstance(target_node.element, type_element_mapping[node_type]))
357
+
358
+ # 检查节点类型是否支持且目标节点元素类型匹配
359
+ if can_merge:
360
+ # 将源节点的文本作为目标节点的 name
362
361
  target_node.element.name = source_node.element.text
363
362
  # 更新 tokens 计数
364
363
  target_node.tokens += source_node.tokens
365
- # 将 FigureName 的位置添加到 Figure 中
364
+ # 将源节点的位置添加到目标节点中
366
365
  target_node.element.positions += source_node.element.positions
367
- return True
368
- elif node_type == 'Table' and isinstance(target_node.element, StandardTableElement):
369
- # 将 TableName 的文本作为 Table 的 name
370
- target_node.element.name = source_node.element.text
371
- # 更新 tokens 计数
372
- target_node.tokens += source_node.tokens
373
- # 将 Table 的位置添加到 Figure 中
374
- target_node.element.positions += source_node.element.positions
375
- return True
376
- return False
366
+
367
+ return can_merge
377
368
 
378
369
  @classmethod
379
370
  def _from_domtree_node_to_base_info(cls, node: dict) -> Optional[StandardNode]:
@@ -440,7 +431,7 @@ class StandardDomTree(BaseModel):
440
431
  cell_data['end_col']],
441
432
  text=cell_text,
442
433
  # 目前只会有一个元素,且是Text类型,Path重新从头编号,相对cell是root
443
- nodes=[StandardNode(summary="", tokens=count_tokens(cell_text), path=[1], children=[],
434
+ nodes=[StandardNode(summary="", tokens=cls.count_tokens(cell_text), path=[1], children=[],
444
435
  element=StandardElement(
445
436
  type='Text',
446
437
  positions=[],
@@ -491,7 +482,7 @@ class StandardDomTree(BaseModel):
491
482
  standard_node.children.append(standard_child)
492
483
 
493
484
  # 计算 token 数量:自身 text 的 token 数量 + 子节点 token 数量
494
- tokens = count_tokens(text)
485
+ tokens = cls.count_tokens(text)
495
486
  for child in standard_node.children:
496
487
  tokens += child.tokens
497
488
 
@@ -500,3 +491,23 @@ class StandardDomTree(BaseModel):
500
491
 
501
492
  return standard_node
502
493
 
494
+ @classmethod
495
+ def count_tokens(cls, text: str) -> int:
496
+ """
497
+ 计算文本的token数量
498
+
499
+ Args:
500
+ text: 要计算的文本
501
+
502
+ Returns:
503
+ int: token数量
504
+ """
505
+ model = "gpt-4" # 使用模型默认为gpt-4
506
+ if not text:
507
+ return 0
508
+ encoding = tiktoken.encoding_for_model(model)
509
+ tokens = encoding.encode(text)
510
+ # 计算标记列表的长度,即标记的数量
511
+ token_count = len(tokens)
512
+ # 返回标记的数量
513
+ return token_count
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bella-openapi
3
- Version: 1.0.2.4
3
+ Version: 1.0.3
4
4
  Summary: client for openapi service.
5
5
  Home-page:
6
6
  Author: ['tangxiaolong', 'fanqiangwei', 'zhangxiaojia', 'liumin', 'wangyukun']
File without changes
File without changes
File without changes