PyPI - dingo-python - Versions diffs - 2.2.2__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

dingo-python 2.2.2py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

dingo/config/input_args.py +11 -1
dingo/exec/local.py +2 -1
dingo/io/output/__init__.py +1 -0
dingo/io/output/result_info.py +16 -0
dingo/model/llm/compare/llm_html_extract_compare.py +17 -2
dingo/model/llm/compare/llm_html_extract_compare_v2.py +1 -1
dingo/model/llm/compare/llm_html_extract_compare_v3.py +221 -0
dingo/model/llm/hhh/llm_text_3h.py +1 -1
dingo/model/llm/llm_classify_qr.py +4 -2
dingo/model/llm/llm_custom_metric.py +211 -0
dingo/model/llm/llm_document_parsing_ocr.py +6 -2
dingo/model/llm/llm_factcheck_public.py +1 -1
dingo/model/llm/llm_keyword_matcher.py +1 -1
dingo/model/llm/llm_scout.py +1 -1
dingo/model/llm/mineru/vlm_document_parsing.py +4 -8
dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py +4 -8
dingo/model/llm/rag/llm_rag_answer_relevancy.py +1 -1
dingo/model/llm/rag/llm_rag_chunk_quality.py +99 -0
dingo/model/llm/rag/llm_rag_context_precision.py +1 -1
dingo/model/llm/rag/llm_rag_context_recall.py +1 -1
dingo/model/llm/rag/llm_rag_faithfulness.py +1 -1
dingo/model/llm/vlm_image_relevant.py +9 -52
dingo/model/llm/vlm_layout_quality.py +3 -54
dingo/model/model.py +37 -24
dingo/model/rule/rule_common.py +76 -0
dingo/model/rule/rule_image.py +41 -32
dingo/model/rule/scibase/__init__.py +1 -0
dingo/model/rule/scibase/rule_quanliang.py +655 -0
dingo/run/cli.py +22 -1
dingo/utils/image_loader.py +141 -0
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/METADATA +22 -1
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/RECORD +36 -30
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/WHEEL +0 -0
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/entry_points.txt +0 -0
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/licenses/LICENSE +0 -0
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/top_level.txt +0 -0

dingo/model/llm/mineru/vlm_document_parsing.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import base64
 import json
 from typing import List
@@ -7,11 +6,12 @@ from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
 from dingo.utils import log
+from dingo.utils.image_loader import ImageLoader
 @Model.llm_register("VLMDocumentParsing")
 class VLMDocumentParsing(BaseOpenAI):
-    _required_fields = [RequiredField.CONTENT, RequiredField.IMAGE]
+    _required_fields = [RequiredField.IMAGE, RequiredField.CONTENT]
     prompt = r"""
             *角色*
         你是一名严谨细致的文档转换质量评估助手。
@@ -174,18 +174,14 @@ class VLMDocumentParsing(BaseOpenAI):
     @classmethod
     def build_messages(cls, input_data: Data) -> List:
-        if isinstance(input_data.image[0], str):
-            with open(input_data.image[0], "rb") as image_file:
-                base64_image = base64.b64encode(image_file.read()).decode('utf-8')
-        else:
-            base64_image = input_data.image[0]
+        image_url = ImageLoader.encode_for_api(input_data.image)
         messages = [
             {
                 "role": "user",
                 "content": [
                     {"type": "text", "text": cls.prompt},
-                    {"type": "image_url", "image_url": {"url": base64_image}},
+                    {"type": "image_url", "image_url": {"url": image_url}},
                     {"type": "text", "text": f"Markdown:\n{input_data.content}"}
                 ]
             }

dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import base64
 import json
 import re
 from typing import List
@@ -8,6 +7,7 @@ from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
 from dingo.utils import log
+from dingo.utils.image_loader import ImageLoader
 @Model.llm_register("VLMDocumentParsingOCRTrain")
@@ -86,22 +86,18 @@ class VLMDocumentParsingOCRTrain(BaseOpenAI):
         ```
         """
-    _required_fields = [RequiredField.CONTENT, RequiredField.IMAGE]
+    _required_fields = [RequiredField.IMAGE, RequiredField.CONTENT]
     @classmethod
     def build_messages(cls, input_data: Data) -> List:
-        if isinstance(input_data.image[0], str):
-            with open(input_data.image[0], "rb") as image_file:
-                base64_image = base64.b64encode(image_file.read()).decode('utf-8')
-        else:
-            base64_image = input_data.image[0]
+        image_url = ImageLoader.encode_for_api(input_data.image)
         messages = [
             {
                 "role": "user",
                 "content": [
                     {"type": "text", "text": cls.prompt},
-                    {"type": "image_url", "image_url": {"url": base64_image}},
+                    {"type": "image_url", "image_url": {"url": image_url}},
                     {"type": "text", "text": f"Markdown:\n{input_data.content}"}
                 ]
             }

dingo/model/llm/rag/llm_rag_answer_relevancy.py CHANGED Viewed

@@ -43,7 +43,7 @@ class LLMRAGAnswerRelevancy(BaseOpenAI):
         "source_frameworks": "Ragas"
     }
-    _required_fields = [RequiredField.CONTENT, RequiredField.PROMPT]
+    _required_fields = [RequiredField.PROMPT, RequiredField.CONTENT]
     question_generation_prompt = """Task: Generate a question for the given answer and identify if the answer is noncommittal.

dingo/model/llm/rag/llm_rag_chunk_quality.py ADDED Viewed

@@ -0,0 +1,99 @@
+from dingo.io.input import RequiredField
+from dingo.model import Model
+from dingo.model.llm.text_quality.base_text_quality import BaseTextQuality
+@Model.llm_register("LLMChunkQuality")
+class LLMChunkQuality(BaseTextQuality):
+    # Metadata for documentation generation
+    _metric_info = {
+        "category": "RAG Retrieved Evidence Chunk Quality Metrics",
+        "metric_name": "LLMChunkQuality",
+        "description": "Assesses retrieved citation chunks referenced by LLM answers, detecting start-boundary truncation and duplicated leading text that can weaken grounded generation",
+        "examples": "examples/rag/sdk_chunk_eval.py"
+    }
+    _required_fields = [RequiredField.CONTENT]
+    prompt = """
+# Role
+You are a data quality evaluator for RAG evidence chunks that are cited by LLM answers.
+# Goal
+Determine whether this retrieved chunk is reliable as citation evidence for grounded LLM answers.
+Focus on start-boundary corruption and duplicate-leading content that can materially harm retrieval-to-generation quality, not minor imperfections.
+# Quality Dimensions
+## 1. Completeness
+**Impact**: Broken starts prevent models from learning proper chunk boundaries and coherent text patterns.
+**Check for**:
+- **Error_Start_Text_Truncation**: The beginning text is truncated (letters, words, Chinese characters, or other languages)
+  **Common corruption patterns**:
+  - Leading letter truncation, e.g.:
+    "e with agroforestry and green manure-based technologies can significantly enhance financial profits."
+  - Leading word truncation, e.g.:
+    "osition of noble gases in this ionized reservoir depends on ionization energy and plasma temperature."
+  - Leading Chinese character truncation, e.g.:
+    "烈。可以说,在中国历史上,这是一个大动荡的时期,更是一个大融合、大发展的时期。"
+- **Error_Start_Punctuation_Truncation**: The beginning punctuation is truncated
+  **Common corruption patterns**:
+  - Truncated ending punctuation from the previous sentence, e.g.:
+    ". Due to the inhibitory effects from module 2, the firing rate of these diverged bumps are very low."
+  - Truncated punctuation from the middle of the previous sentence, e.g.:
+    ", 23.27±14.57; M/F, 30/9) were found of ALL-T origin. Their specimens were mainly bone marrow $(\\Nu=26$ ) and peripheral blood $(\\Nu{=}13$ ) and subjected for molecular analysis irrespective of their CD5 expression."
+- **Error_Start_Inline_Formula_Truncation**: Inline formula at the beginning is truncated
+  **Common corruption patterns**:
+  - Truncation of inline formulas wrapped by single "$", e.g.:
+    "-}1100^{\\circ}\\mathrm{C}$ there is relatively no loss in weight on heating."
+- **Error_Start_Interline_Formula_Truncation**: Interline formula at the beginning is truncated
+  **Common corruption patterns**:
+  - Truncation of interline formulas wrapped by double "$$", e.g.:
+    "q_{D N}=-0,01\\cdot T+2,41;\n$$\n\n$q_{D N}-$ denitrifikacijos greitis, $\\mathrm{\\mgN/gVDBSM\\cdoth}$ ;"
+---
+## 2. Similarity
+**Impact**: Repeated content severely reduces learning efficiency and increases memorization risk.
+**Check for**:
+- **Error_Start_Text_Duplicate**: Repeated text at the beginning
+  **Common corruption patterns**:
+  - Start-position duplicate text, e.g.:
+    "4. Diefendorf, Barbara. From Penitence to Charity: Pious Women and the Catholic Reformation in Paris\n\n. Diefendorf, Barbara. From Penitence to Charity: Pious Women and the Catholic Reformation in Paris. New York: Oxford University Press, 2004. Di Filippo Bareggi, Claudia."
+---
+# Workflow
+1. **Quick scan**: Is the text generally readable and structurally complete?
+2. **Identify category**: If there is an issue, which dimension is most severely affected?
+3. **Validate impact**: Will this issue materially damage model training?
+4. **Assign labels**:
+   - Score: 1 (suitable) or 0 (unsuitable)
+   - Type: `Good` or one of `Completeness`, `Similarity`
+   - Name: Specific error type (from above)
+   - Reason: Brief explanation (1-2 sentences)
+---
+# Output Format
+Return JSON only: {"score": 0/1, "type": "", "name": "", "reason": ""}
+# Examples
+**Example 1 (Good - Simple)**:
+Input: "The Pythagorean theorem states that $a^2 + b^2 = c^2$ for right triangles."
+Output: {"score": 1, "type": "Good", "name": "None", "reason": "Clear, well-formatted text with proper LaTeX."}
+**Example 2 (Bad - Completeness, punctuation truncation)**:
+Input: ", and the patient was diagnosed with IE due to methicillin-resistant Staphylococcus aureus infection\n\n."
+Output: {"score": 0, "type": "Completeness", "name": "Error_Start_Punctuation_Truncation", "reason": "The beginning is incomplete and starts from truncated punctuation."}
+---
+# Input content to evaluate:
+"""

dingo/model/llm/rag/llm_rag_context_precision.py CHANGED Viewed

@@ -43,7 +43,7 @@ class LLMRAGContextPrecision(BaseOpenAI):
         "source_frameworks": "Ragas"
     }
-    _required_fields = [RequiredField.CONTENT, RequiredField.CONTEXT, RequiredField.PROMPT]
+    _required_fields = [RequiredField.PROMPT, RequiredField.CONTEXT, RequiredField.CONTENT]
     @classmethod
     def context_precision_prompt(cls, question: str, context: str, answer: str) -> str:

dingo/model/llm/rag/llm_rag_context_recall.py CHANGED Viewed

@@ -47,7 +47,7 @@ class LLMRAGContextRecall(BaseOpenAI):
         "source_frameworks": "Ragas + DeepEval"
     }
-    _required_fields = [RequiredField.CONTENT, RequiredField.CONTEXT, RequiredField.PROMPT]
+    _required_fields = [RequiredField.PROMPT, RequiredField.CONTEXT, RequiredField.CONTENT]
     prompt = """上下文召回评估提示词，用于分类陈述归因"""
     @staticmethod

dingo/model/llm/rag/llm_rag_faithfulness.py CHANGED Viewed

@@ -43,7 +43,7 @@ class LLMRAGFaithfulness(BaseOpenAI):
         "source_frameworks": "Ragas + DeepEval"
     }
-    _required_fields = [RequiredField.CONTENT, RequiredField.CONTEXT, RequiredField.PROMPT]
+    _required_fields = [RequiredField.PROMPT, RequiredField.CONTEXT, RequiredField.CONTENT]
     @staticmethod
     def statement_generator_prompt(question: str, answer: str) -> str:

dingo/model/llm/vlm_image_relevant.py CHANGED Viewed

@@ -1,15 +1,14 @@
-import base64
-import os
 from typing import List
 from dingo.io.input import Data, RequiredField
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
+from dingo.utils.image_loader import ImageLoader
 @Model.llm_register("VLMImageRelevant")
 class VLMImageRelevant(BaseOpenAI):
-    _required_fields = [RequiredField.PROMPT, RequiredField.CONTENT]
+    _required_fields = [RequiredField.IMAGE]
     prompt = """
     你是一个专业的图像对比分析系统。请对比分析两张图片的一致性和相关性。
@@ -42,57 +41,15 @@ class VLMImageRelevant(BaseOpenAI):
     输出格式必须为JSON：{"score": 评分, "reason": "原因说明"}
     """
-    @classmethod
-    def _encode_image(cls, image_path: str) -> str:
-        """
-        Encode a local image file to base64 data URL format.
-        If the input is already a URL, return it as is.
-        This method follows Python's standard path resolution:
-        - Relative paths are resolved relative to the current working directory
-        - Absolute paths are used as-is
-        - URLs (http://, https://, data:) are passed through unchanged
-        Args:
-            image_path: Local file path (absolute or relative) or URL
-        Returns:
-            Base64 data URL for local files, or original URL for web resources
-        Raises:
-            FileNotFoundError: If a local file path does not exist
-            RuntimeError: If the file cannot be read
-        """
-        # Pass through URLs unchanged
-        if image_path.startswith(('http://', 'https://', 'data:')):
-            return image_path
-        # Standard file path handling (relative or absolute)
-        if not os.path.isfile(image_path):
-            raise FileNotFoundError(
-                f"Image file not found: '{image_path}'\n"
-                f"Current working directory: {os.getcwd()}\n"
-                f"Absolute path would be: {os.path.abspath(image_path)}\n"
-                f"Ensure the path is correct relative to your current working directory."
-            )
-        try:
-            with open(image_path, "rb") as image_file:
-                base64_image = base64.b64encode(image_file.read()).decode('utf-8')
-                # Determine MIME type from file extension
-                ext = os.path.splitext(image_path)[1].lower()
-                mime_type = 'image/jpeg' if ext in ['.jpg', '.jpeg'] else f'image/{ext[1:]}'
-                return f"data:{mime_type};base64,{base64_image}"
-        except Exception as e:
-            raise RuntimeError(
-                f"Failed to read image file '{image_path}': {e}"
-            )
     @classmethod
     def build_messages(cls, input_data: Data) -> List:
-        # Encode images if they are local file paths
-        image_url_1 = cls._encode_image(input_data.prompt)
-        image_url_2 = cls._encode_image(input_data.content)
+        if not input_data.image or len(input_data.image) < 2:
+            raise ValueError(
+                "VLMImageRelevant requires exactly 2 images in the image field, "
+                f"got {len(input_data.image) if input_data.image else 0}."
+            )
+        image_url_1 = ImageLoader.encode_for_api(input_data.image[0])
+        image_url_2 = ImageLoader.encode_for_api(input_data.image[1])
         messages = [
             {

dingo/model/llm/vlm_layout_quality.py CHANGED Viewed

@@ -1,6 +1,4 @@
-import base64
 import json
-import os
 from typing import List
 from dingo.io.input import Data, RequiredField
@@ -8,11 +6,12 @@ from dingo.io.output.eval_detail import EvalDetail
 from dingo.model import Model
 from dingo.model.llm.base_openai import BaseOpenAI
 from dingo.utils import log
+from dingo.utils.image_loader import ImageLoader
 @Model.llm_register("VLMLayoutQuality")
 class VLMLayoutQuality(BaseOpenAI):
-    _required_fields = [RequiredField.CONTENT, RequiredField.IMAGE]
+    _required_fields = [RequiredField.IMAGE, RequiredField.CONTENT]
     prompt = r"""
      # 角色
     你是一名严谨细致的布局检测模型专家，你的任务是审查一个布局检测模型输出的蒙版图片，。由于没有标准的正确答案，你需要运用你对通用文档结构、排版惯例和逻辑关系的深刻理解，来识别并标记模型预测中的所有错误。
@@ -119,59 +118,9 @@ class VLMLayoutQuality(BaseOpenAI):
     {{ bbox_typr_list }}
         """
-    @classmethod
-    def _encode_image(cls, image_path: str) -> str:
-        """
-        Encode a local image file to base64 data URL format.
-        If the input is already a URL, return it as is.
-        This method follows Python's standard path resolution:
-        - Relative paths are resolved relative to the current working directory
-        - Absolute paths are used as-is
-        - URLs (http://, https://, data:) are passed through unchanged
-        Args:
-            image_path: Local file path (absolute or relative) or URL
-        Returns:
-            Base64 data URL for local files, or original URL for web resources
-        Raises:
-            FileNotFoundError: If a local file path does not exist
-            RuntimeError: If the file cannot be read
-        """
-        # Pass through URLs unchanged
-        if image_path.startswith('data:'):
-            return image_path
-        if image_path.startswith(("http://", "https://", 'data:')):
-            return image_path
-        # Standard file path handling (relative or absolute)
-        if not os.path.isfile(image_path):
-            raise FileNotFoundError(
-                f"Image file not found: '{image_path}'\n"
-                f"Current working directory: {os.getcwd()}\n"
-                f"Absolute path would be: {os.path.abspath(image_path)}\n"
-                f"Ensure the path is correct relative to your current working directory."
-            )
-        try:
-            with open(image_path, "rb") as image_file:
-                base64_image = base64.b64encode(image_file.read()).decode('utf-8')
-                # Determine MIME type from file extension
-                ext = os.path.splitext(image_path)[1].lower()
-                mime_type = 'image/jpeg' if ext in ['.jpg', '.jpeg'] else f'image/{ext[1:]}'
-                return f"data:{mime_type};base64,{base64_image}"
-        except Exception as e:
-            raise RuntimeError(
-                f"Failed to read image file '{image_path}': {e}"
-            )
     @classmethod
     def build_messages(cls, input_data: Data) -> List:
-        if isinstance(input_data.image[0], str):
-            image_base64 = cls._encode_image(input_data.image[0])
+        image_base64 = ImageLoader.encode_for_api(input_data.image)
         bboxs = eval(input_data.content)

dingo/model/model.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import importlib
 import inspect
 import os
-from typing import Callable, Dict, List, Optional
+from typing import Callable, Dict, List
 from pydantic import BaseModel
@@ -22,13 +22,19 @@ class Model:
     module_loaded = False
     # group
-    rule_groups: Dict[str, List[Callable]] = {}  # such as: {'default': [<class.RuleAlphaWords>]}
+    rule_groups: Dict[
+        str, List[Callable]
+    ] = {}  # such as: {'default': [<class.RuleAlphaWords>]}
     # metric map
-    rule_metric_type_map: Dict[str, List[Callable]] = {}   # such as: {'QUALITY_INEFFECTIVENESS': [<class.RuleAlphaWords>]}
+    rule_metric_type_map: Dict[
+        str, List[Callable]
+    ] = {}  # such as: {'QUALITY_INEFFECTIVENESS': [<class.RuleAlphaWords>]}
     # other map
-    rule_name_map: Dict[str, BaseRule] = {}  # such as: {'RuleAlphaWords': <class.RuleAlphaWords>}
+    rule_name_map: Dict[
+        str, BaseRule
+    ] = {}  # such as: {'RuleAlphaWords': <class.RuleAlphaWords>}
     llm_name_map: Dict[str, BaseLLM] = {}
     def __init__(self):
@@ -61,10 +67,10 @@ class Model:
     def get_group(cls, group_name) -> Dict[str, List]:
         res = {}
         if group_name not in Model.rule_groups:
-            raise KeyError('no such group: ' + group_name)
+            raise KeyError("no such group: " + group_name)
         if group_name in Model.rule_groups:
             log.debug(f"[Load rule group {group_name}]")
-            res['rule'] = Model.rule_groups[group_name]
+            res["rule"] = Model.rule_groups[group_name]
         return res
     @classmethod
@@ -75,6 +81,7 @@ class Model:
             metric_type (str): The metric type (quality map).
             group (List[str]): The group names.
         """
         def decorator(root_class):
             # group
             for group_name in group:
@@ -101,6 +108,7 @@ class Model:
         Args:
             llm_id (str): Name of llm model class.
         """
         def decorator(root_class):
             cls.llm_name_map[llm_id] = root_class
@@ -117,30 +125,34 @@ class Model:
             return
         this_module_directory = os.path.dirname(os.path.abspath(__file__))
         # rule auto register
-        for file in os.listdir(os.path.join(this_module_directory, 'rule')):
-            path = os.path.join(this_module_directory, 'rule', file)
-            if os.path.isfile(path) and file.endswith('.py') and not file == '__init__.py':
+        for file in os.listdir(os.path.join(this_module_directory, "rule")):
+            path = os.path.join(this_module_directory, "rule", file)
+            if (
+                os.path.isfile(path)
+                and file.endswith(".py")
+                and not file == "__init__.py"
+            ):
                 try:
-                    importlib.import_module('dingo.model.rule.' + file.split('.')[0])
+                    importlib.import_module("dingo.model.rule." + file.split(".")[0])
                 except ModuleNotFoundError as e:
                     log.debug(e)
         # llm auto register - 递归扫描子目录
-        llm_base_dir = os.path.join(this_module_directory, 'llm')
+        llm_base_dir = os.path.join(this_module_directory, "llm")
         for root, dirs, files in os.walk(llm_base_dir):
             # 跳过 __pycache__ 目录
-            dirs[:] = [d for d in dirs if d != '__pycache__']
+            dirs[:] = [d for d in dirs if d != "__pycache__"]
             for file in files:
-                if file.endswith('.py') and file != '__init__.py':
+                if file.endswith(".py") and file != "__init__.py":
                     # 计算相对于 llm 目录的模块路径
                     rel_path = os.path.relpath(root, llm_base_dir)
-                    if rel_path == '.':
-                        module_name = f'dingo.model.llm.{file[:-3]}'
+                    if rel_path == ".":
+                        module_name = f"dingo.model.llm.{file[:-3]}"
                     else:
                         # 将路径分隔符转换为点
-                        rel_module = rel_path.replace(os.sep, '.')
-                        module_name = f'dingo.model.llm.{rel_module}.{file[:-3]}'
+                        rel_module = rel_path.replace(os.sep, ".")
+                        module_name = f"dingo.model.llm.{rel_module}.{file[:-3]}"
                     try:
                         importlib.import_module(module_name)
@@ -148,7 +160,7 @@ class Model:
                         log.debug(e)
                     except ImportError as e:
                         log.debug("=" * 30 + " ImportError " + "=" * 30)
-                        log.debug(f'module {module_name} not imported because: \n{e}')
+                        log.debug(f"module {module_name} not imported because: \n{e}")
                         log.debug("=" * 73)
         cls.module_loaded = True
@@ -157,20 +169,21 @@ class Model:
     def set_config_rule(cls, rule: BaseRule, rule_config: EvaluatorRuleArgs):
         if not rule_config:
             return
-        config_default = getattr(rule, 'dynamic_config')
+        config_default = rule.dynamic_config.model_copy(deep=True)
         # Iterate over rule_config fields using Pydantic's model_dump()
         for k, v in rule_config.model_dump().items():
             if v is not None:
                 setattr(config_default, k, v)
-        setattr(rule, 'dynamic_config', config_default)
+        setattr(rule, "dynamic_config", config_default)
     @classmethod
     def set_config_llm(cls, llm: BaseLLM, llm_config: EvaluatorLLMArgs):
         if not llm_config:
             return
-        config_default = getattr(llm, 'dynamic_config')
-        # Iterate over llm_config fields using Pydantic's model_dump()
-        for k, v in llm_config.model_dump().items():
+        config_default = llm.dynamic_config.model_copy(deep=True)
+        # Preserve nested Pydantic config objects while still applying extra fields.
+        config_items = dict(llm_config)
+        for k, v in config_items.items():
             if v is not None:
                 setattr(config_default, k, v)
-        setattr(llm, 'dynamic_config', config_default)
+        setattr(llm, "dynamic_config", config_default)

dingo/model/rule/rule_common.py CHANGED Viewed

@@ -2678,6 +2678,82 @@ class RulePIIDetection(BaseRule):
         return res
+@Model.rule_register("QUALITY_BAD_EFFECTIVENESS", [""])
+class RuleDictConsistency(BaseRule):
+    """Compare two dict fields and report mismatched keys."""
+    _metric_info = {
+        "category": "Rule-Based TEXT Quality Metrics",
+        "quality_dimension": "EFFECTIVENESS",
+        "metric_name": "RuleDictConsistency",
+        "description": "Checks whether metadata and context dict are consistent by key/value equality",
+        "evaluation_results": ""
+    }
+    _required_fields = [RequiredField.METADATA, RequiredField.CONTEXT]
+    dynamic_config = EvaluatorRuleArgs(parameters={"ignore_order": True})
+    @classmethod
+    def _normalize_value(cls, value, ignore_order: bool):
+        """Normalize nested values for configurable order-aware comparison."""
+        if isinstance(value, dict):
+            return {
+                key: cls._normalize_value(value[key], ignore_order)
+                for key in sorted(value.keys(), key=lambda x: str(x))
+            }
+        if isinstance(value, (list, tuple)):
+            normalized = [cls._normalize_value(item, ignore_order) for item in value]
+            if ignore_order:
+                return sorted(normalized, key=lambda x: repr(x))
+            return normalized
+        if isinstance(value, set):
+            normalized = [cls._normalize_value(item, ignore_order) for item in value]
+            return sorted(normalized, key=lambda x: repr(x))
+        return value
+    @classmethod
+    def eval(cls, input_data: Data) -> EvalDetail:
+        res = EvalDetail(metric=cls.__name__)
+        left_dict = getattr(input_data, "metadata", None)
+        right_dict = getattr(input_data, "context", None)
+        parameters = cls.dynamic_config.parameters or {}
+        ignore_order = parameters.get("ignore_order", True)
+        if not isinstance(left_dict, dict) or not isinstance(right_dict, dict):
+            res.status = True
+            res.label = [f"{cls.metric_type}.{cls.__name__}", "INVALID_DICT_FIELD"]
+            res.reason = [
+                "metadata/context must both be dict, "
+                f"got metadata={type(left_dict).__name__}, context={type(right_dict).__name__}"
+            ]
+            return res
+        diff_keys = []
+        all_keys = set(left_dict.keys()) | set(right_dict.keys())
+        for key in sorted(all_keys, key=lambda x: str(x)):
+            if key not in left_dict or key not in right_dict:
+                diff_keys.append(str(key))
+                continue
+            left_value = cls._normalize_value(left_dict[key], ignore_order)
+            right_value = cls._normalize_value(right_dict[key], ignore_order)
+            if left_value != right_value:
+                diff_keys.append(str(key))
+        if diff_keys:
+            res.status = True
+            res.label = [
+                f"{cls.metric_type}.{cls.__name__}.{key}" for key in diff_keys
+            ]
+            res.reason = [f"Inconsistent keys: {', '.join(diff_keys)}"]
+        else:
+            res.label = [QualityLabel.QUALITY_GOOD]
+        return res
 if __name__ == "__main__":
     data = Data(data_id="", prompt="", content="\n \n \n \n hello \n \n ")
     tmp = RuleEnterAndSpace().eval(data)

dingo-python 2.2.2__py3-none-any.whl → 2.3.0__py3-none-any.whl

dingo-python 2.2.2py3-none-any.whl → 2.3.0py3-none-any.whl