PyPI - bella-openapi - Versions diffs - 1.0.2.4__tar.gz → 1.0.3__tar.gz - Mend

bella-openapi 1.0.2.4tar.gz → 1.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{bella_openapi-1.0.2.4/src/bella_openapi.egg-info → bella_openapi-1.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bella-openapi
-Version: 1.0.2.4
+Version: 1.0.3
 Summary: client for openapi service.
 Home-page:
 Author: ['tangxiaolong', 'fanqiangwei', 'zhangxiaojia', 'liumin', 'wangyukun']

{bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/setup.py RENAMED Viewed

@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
 SHORT = "client for openapi service."
-__version__ = "1.0.2.4"
+__version__ = "1.0.3"
 __author__ = ["tangxiaolong", "fanqiangwei", "zhangxiaojia", 'liumin', 'wangyukun']
 __email__ = ''
 readme_path = 'README.md'

{bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/__init__.py RENAMED Viewed

@@ -3,19 +3,8 @@ from .log import operation_log, submit_log
 from .openapi_contexvar import trace_id_context, caller_id_context, request_url_context
 from .auth_billing import ErrorInfo, async_authenticate_decorator_args, authenticate_user, print_context, \
 get_context, set_context, clean_context, report
-from .entity import (
-    StandardDomTree,
-    StandardNode,
-    SourceFile,
-    StandardPosition,
-    StandardImage,
-    Cell,
-    StandardRow,
-    StandardBaseElement,
-    StandardElement,
-    StandardTableElement,
-    StandardImageElement
-)
+from .entity import StandardDomTree, StandardNode, SourceFile, StandardPosition, StandardImage, Cell, \
+    StandardRow, StandardBaseElement, StandardElement, StandardTableElement, StandardImageElement
 __all__ = ["validate_token", "operation_log",
            "support_model",

{bella_openapi-1.0.2.4 → bella_openapi-1.0.3}/src/bella_openapi/entity/standard_domtree.py RENAMED Viewed

@@ -31,16 +31,7 @@ layout_type_mapping = {
     "TableNote": "Text",  # 目前实际解析出来没有
 }
-# 自研模型均用gpt-4计算（可能有误差，可忽略）
-def count_tokens(text: str, model: str = "gpt-4") -> int:
-    if not text:
-        return 0
-    encoding = tiktoken.encoding_for_model(model)
-    tokens = encoding.encode(text)
-    # 计算标记列表的长度，即标记的数量
-    token_count = len(tokens)
-    # 返回标记的数量
-    return token_count
 class SourceFile(BaseModel):
     id: str  # 文件ID，唯一标识符，类型为string
@@ -315,18 +306,16 @@ class StandardDomTree(BaseModel):
                 # 检查前一个节点
                 if i > 0:
                     prev_sibling = node.children[i - 1]
-                    if prev_sibling.element and prev_sibling.element.type == target_type:
-                        # 找到对应类型的前一个兄弟节点，合并节点
-                        if cls._merge_nodes(prev_sibling, current, target_type):
-                            merged = True
+                    # 找到对应类型的前一个兄弟节点，合并节点
+                    merged = ( prev_sibling.element and prev_sibling.element.type == target_type and
+                               cls._merge_nodes(prev_sibling, current, target_type))
                 # 如果没有与前一个节点合并，检查后一个节点
                 if not merged and i < len(node.children) - 1:
                     next_sibling = node.children[i + 1]
-                    if next_sibling.element and next_sibling.element.type == target_type:
-                        # 找到对应类型的后一个兄弟节点，合并节点
-                        if cls._merge_nodes(next_sibling, current, target_type):
-                            merged = True
+                    # 找到对应类型的后一个兄弟节点，合并节点
+                    merged = (next_sibling.element and next_sibling.element.type == target_type and
+                              cls._merge_nodes(next_sibling, current, target_type))
                 # 如果没有找到对应类型的兄弟节点，将当前节点类型改为 Text
                 if not merged:
@@ -357,23 +346,25 @@ class StandardDomTree(BaseModel):
         Returns:
             bool: 是否成功合并
         """
-        if node_type == 'Figure' and isinstance(target_node.element, StandardImageElement):
-            # 将 FigureName 的文本作为 Figure 的 name
+        # 定义节点类型与元素类型的映射
+        type_element_mapping = {
+            'Figure': StandardImageElement,
+            'Table': StandardTableElement
+        }
+        can_merge = (node_type in type_element_mapping and
+            isinstance(target_node.element, type_element_mapping[node_type]))
+        # 检查节点类型是否支持且目标节点元素类型匹配
+        if can_merge:
+            # 将源节点的文本作为目标节点的 name
             target_node.element.name = source_node.element.text
             # 更新 tokens 计数
             target_node.tokens += source_node.tokens
-            # 将 FigureName 的位置添加到 Figure 中
+            # 将源节点的位置添加到目标节点中
             target_node.element.positions += source_node.element.positions
-            return True
-        elif node_type == 'Table' and isinstance(target_node.element, StandardTableElement):
-            # 将 TableName 的文本作为 Table 的 name
-            target_node.element.name = source_node.element.text
-            # 更新 tokens 计数
-            target_node.tokens += source_node.tokens
-            # 将 Table 的位置添加到 Figure 中
-            target_node.element.positions += source_node.element.positions
-            return True
-        return False
+        return can_merge
     @classmethod
     def _from_domtree_node_to_base_info(cls, node: dict) -> Optional[StandardNode]:
@@ -440,7 +431,7 @@ class StandardDomTree(BaseModel):
                                       cell_data['end_col']],
                                 text=cell_text,
                                 # 目前只会有一个元素,且是Text类型，Path重新从头编号，相对cell是root
-                                nodes=[StandardNode(summary="", tokens=count_tokens(cell_text), path=[1], children=[],
+                                nodes=[StandardNode(summary="", tokens=cls.count_tokens(cell_text), path=[1], children=[],
                                                     element=StandardElement(
                                                         type='Text',
                                                         positions=[],
@@ -491,7 +482,7 @@ class StandardDomTree(BaseModel):
                     standard_node.children.append(standard_child)
         # 计算 token 数量：自身 text 的 token 数量 + 子节点 token 数量
-        tokens = count_tokens(text)
+        tokens = cls.count_tokens(text)
         for child in standard_node.children:
             tokens += child.tokens
@@ -500,3 +491,23 @@ class StandardDomTree(BaseModel):
         return standard_node
+    @classmethod
+    def count_tokens(cls, text: str) -> int:
+        """
+        计算文本的token数量
+        Args:
+            text: 要计算的文本
+        Returns:
+            int: token数量
+        """
+        model = "gpt-4" # 使用模型默认为gpt-4
+        if not text:
+            return 0
+        encoding = tiktoken.encoding_for_model(model)
+        tokens = encoding.encode(text)
+        # 计算标记列表的长度，即标记的数量
+        token_count = len(tokens)
+        # 返回标记的数量
+        return token_count

{bella_openapi-1.0.2.4 → bella_openapi-1.0.3/src/bella_openapi.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bella-openapi
-Version: 1.0.2.4
+Version: 1.0.3
 Summary: client for openapi service.
 Home-page:
 Author: ['tangxiaolong', 'fanqiangwei', 'zhangxiaojia', 'liumin', 'wangyukun']