dingo-python 2.2.2__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. dingo/config/input_args.py +11 -1
  2. dingo/exec/local.py +2 -1
  3. dingo/io/output/__init__.py +1 -0
  4. dingo/io/output/result_info.py +16 -0
  5. dingo/model/llm/compare/llm_html_extract_compare.py +17 -2
  6. dingo/model/llm/compare/llm_html_extract_compare_v2.py +1 -1
  7. dingo/model/llm/compare/llm_html_extract_compare_v3.py +221 -0
  8. dingo/model/llm/hhh/llm_text_3h.py +1 -1
  9. dingo/model/llm/llm_classify_qr.py +4 -2
  10. dingo/model/llm/llm_custom_metric.py +211 -0
  11. dingo/model/llm/llm_document_parsing_ocr.py +6 -2
  12. dingo/model/llm/llm_factcheck_public.py +1 -1
  13. dingo/model/llm/llm_keyword_matcher.py +1 -1
  14. dingo/model/llm/llm_scout.py +1 -1
  15. dingo/model/llm/mineru/vlm_document_parsing.py +4 -8
  16. dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py +4 -8
  17. dingo/model/llm/rag/llm_rag_answer_relevancy.py +1 -1
  18. dingo/model/llm/rag/llm_rag_chunk_quality.py +99 -0
  19. dingo/model/llm/rag/llm_rag_context_precision.py +1 -1
  20. dingo/model/llm/rag/llm_rag_context_recall.py +1 -1
  21. dingo/model/llm/rag/llm_rag_faithfulness.py +1 -1
  22. dingo/model/llm/vlm_image_relevant.py +9 -52
  23. dingo/model/llm/vlm_layout_quality.py +3 -54
  24. dingo/model/model.py +37 -24
  25. dingo/model/rule/rule_common.py +76 -0
  26. dingo/model/rule/rule_image.py +41 -32
  27. dingo/model/rule/scibase/__init__.py +1 -0
  28. dingo/model/rule/scibase/rule_quanliang.py +655 -0
  29. dingo/run/cli.py +22 -1
  30. dingo/utils/image_loader.py +141 -0
  31. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/METADATA +22 -1
  32. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/RECORD +36 -30
  33. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/WHEEL +0 -0
  34. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/entry_points.txt +0 -0
  35. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/licenses/LICENSE +0 -0
  36. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,3 @@
1
- import base64
2
1
  import json
3
2
  from typing import List
4
3
 
@@ -7,11 +6,12 @@ from dingo.io.output.eval_detail import EvalDetail
7
6
  from dingo.model import Model
8
7
  from dingo.model.llm.base_openai import BaseOpenAI
9
8
  from dingo.utils import log
9
+ from dingo.utils.image_loader import ImageLoader
10
10
 
11
11
 
12
12
  @Model.llm_register("VLMDocumentParsing")
13
13
  class VLMDocumentParsing(BaseOpenAI):
14
- _required_fields = [RequiredField.CONTENT, RequiredField.IMAGE]
14
+ _required_fields = [RequiredField.IMAGE, RequiredField.CONTENT]
15
15
  prompt = r"""
16
16
  *角色*
17
17
  你是一名严谨细致的文档转换质量评估助手。
@@ -174,18 +174,14 @@ class VLMDocumentParsing(BaseOpenAI):
174
174
 
175
175
  @classmethod
176
176
  def build_messages(cls, input_data: Data) -> List:
177
- if isinstance(input_data.image[0], str):
178
- with open(input_data.image[0], "rb") as image_file:
179
- base64_image = base64.b64encode(image_file.read()).decode('utf-8')
180
- else:
181
- base64_image = input_data.image[0]
177
+ image_url = ImageLoader.encode_for_api(input_data.image)
182
178
 
183
179
  messages = [
184
180
  {
185
181
  "role": "user",
186
182
  "content": [
187
183
  {"type": "text", "text": cls.prompt},
188
- {"type": "image_url", "image_url": {"url": base64_image}},
184
+ {"type": "image_url", "image_url": {"url": image_url}},
189
185
  {"type": "text", "text": f"Markdown:\n{input_data.content}"}
190
186
  ]
191
187
  }
@@ -1,4 +1,3 @@
1
- import base64
2
1
  import json
3
2
  import re
4
3
  from typing import List
@@ -8,6 +7,7 @@ from dingo.io.output.eval_detail import EvalDetail
8
7
  from dingo.model import Model
9
8
  from dingo.model.llm.base_openai import BaseOpenAI
10
9
  from dingo.utils import log
10
+ from dingo.utils.image_loader import ImageLoader
11
11
 
12
12
 
13
13
  @Model.llm_register("VLMDocumentParsingOCRTrain")
@@ -86,22 +86,18 @@ class VLMDocumentParsingOCRTrain(BaseOpenAI):
86
86
  ```
87
87
  """
88
88
 
89
- _required_fields = [RequiredField.CONTENT, RequiredField.IMAGE]
89
+ _required_fields = [RequiredField.IMAGE, RequiredField.CONTENT]
90
90
 
91
91
  @classmethod
92
92
  def build_messages(cls, input_data: Data) -> List:
93
- if isinstance(input_data.image[0], str):
94
- with open(input_data.image[0], "rb") as image_file:
95
- base64_image = base64.b64encode(image_file.read()).decode('utf-8')
96
- else:
97
- base64_image = input_data.image[0]
93
+ image_url = ImageLoader.encode_for_api(input_data.image)
98
94
 
99
95
  messages = [
100
96
  {
101
97
  "role": "user",
102
98
  "content": [
103
99
  {"type": "text", "text": cls.prompt},
104
- {"type": "image_url", "image_url": {"url": base64_image}},
100
+ {"type": "image_url", "image_url": {"url": image_url}},
105
101
  {"type": "text", "text": f"Markdown:\n{input_data.content}"}
106
102
  ]
107
103
  }
@@ -43,7 +43,7 @@ class LLMRAGAnswerRelevancy(BaseOpenAI):
43
43
  "source_frameworks": "Ragas"
44
44
  }
45
45
 
46
- _required_fields = [RequiredField.CONTENT, RequiredField.PROMPT]
46
+ _required_fields = [RequiredField.PROMPT, RequiredField.CONTENT]
47
47
 
48
48
  question_generation_prompt = """Task: Generate a question for the given answer and identify if the answer is noncommittal.
49
49
 
@@ -0,0 +1,99 @@
1
+ from dingo.io.input import RequiredField
2
+ from dingo.model import Model
3
+ from dingo.model.llm.text_quality.base_text_quality import BaseTextQuality
4
+
5
+
6
+ @Model.llm_register("LLMChunkQuality")
7
+ class LLMChunkQuality(BaseTextQuality):
8
+ # Metadata for documentation generation
9
+ _metric_info = {
10
+ "category": "RAG Retrieved Evidence Chunk Quality Metrics",
11
+ "metric_name": "LLMChunkQuality",
12
+ "description": "Assesses retrieved citation chunks referenced by LLM answers, detecting start-boundary truncation and duplicated leading text that can weaken grounded generation",
13
+ "examples": "examples/rag/sdk_chunk_eval.py"
14
+ }
15
+ _required_fields = [RequiredField.CONTENT]
16
+ prompt = """
17
+ # Role
18
+ You are a data quality evaluator for RAG evidence chunks that are cited by LLM answers.
19
+
20
+ # Goal
21
+ Determine whether this retrieved chunk is reliable as citation evidence for grounded LLM answers.
22
+ Focus on start-boundary corruption and duplicate-leading content that can materially harm retrieval-to-generation quality, not minor imperfections.
23
+
24
+ # Quality Dimensions
25
+
26
+ ## 1. Completeness
27
+ **Impact**: Broken starts prevent models from learning proper chunk boundaries and coherent text patterns.
28
+
29
+ **Check for**:
30
+ - **Error_Start_Text_Truncation**: The beginning text is truncated (letters, words, Chinese characters, or other languages)
31
+ **Common corruption patterns**:
32
+ - Leading letter truncation, e.g.:
33
+ "e with agroforestry and green manure-based technologies can significantly enhance financial profits."
34
+ - Leading word truncation, e.g.:
35
+ "osition of noble gases in this ionized reservoir depends on ionization energy and plasma temperature."
36
+ - Leading Chinese character truncation, e.g.:
37
+ "烈。可以说,在中国历史上,这是一个大动荡的时期,更是一个大融合、大发展的时期。"
38
+
39
+ - **Error_Start_Punctuation_Truncation**: The beginning punctuation is truncated
40
+ **Common corruption patterns**:
41
+ - Truncated ending punctuation from the previous sentence, e.g.:
42
+ ". Due to the inhibitory effects from module 2, the firing rate of these diverged bumps are very low."
43
+ - Truncated punctuation from the middle of the previous sentence, e.g.:
44
+ ", 23.27±14.57; M/F, 30/9) were found of ALL-T origin. Their specimens were mainly bone marrow $(\\Nu=26$ ) and peripheral blood $(\\Nu{=}13$ ) and subjected for molecular analysis irrespective of their CD5 expression."
45
+
46
+ - **Error_Start_Inline_Formula_Truncation**: Inline formula at the beginning is truncated
47
+ **Common corruption patterns**:
48
+ - Truncation of inline formulas wrapped by single "$", e.g.:
49
+ "-}1100^{\\circ}\\mathrm{C}$ there is relatively no loss in weight on heating."
50
+
51
+ - **Error_Start_Interline_Formula_Truncation**: Interline formula at the beginning is truncated
52
+ **Common corruption patterns**:
53
+ - Truncation of interline formulas wrapped by double "$$", e.g.:
54
+ "q_{D N}=-0,01\\cdot T+2,41;\n$$\n\n$q_{D N}-$ denitrifikacijos greitis, $\\mathrm{\\mgN/gVDBSM\\cdoth}$ ;"
55
+
56
+ ---
57
+
58
+ ## 2. Similarity
59
+ **Impact**: Repeated content severely reduces learning efficiency and increases memorization risk.
60
+
61
+ **Check for**:
62
+ - **Error_Start_Text_Duplicate**: Repeated text at the beginning
63
+ **Common corruption patterns**:
64
+ - Start-position duplicate text, e.g.:
65
+ "4. Diefendorf, Barbara. From Penitence to Charity: Pious Women and the Catholic Reformation in Paris\n\n. Diefendorf, Barbara. From Penitence to Charity: Pious Women and the Catholic Reformation in Paris. New York: Oxford University Press, 2004. Di Filippo Bareggi, Claudia."
66
+
67
+ ---
68
+
69
+ # Workflow
70
+
71
+ 1. **Quick scan**: Is the text generally readable and structurally complete?
72
+ 2. **Identify category**: If there is an issue, which dimension is most severely affected?
73
+ 3. **Validate impact**: Will this issue materially damage model training?
74
+ 4. **Assign labels**:
75
+ - Score: 1 (suitable) or 0 (unsuitable)
76
+ - Type: `Good` or one of `Completeness`, `Similarity`
77
+ - Name: Specific error type (from above)
78
+ - Reason: Brief explanation (1-2 sentences)
79
+
80
+ ---
81
+
82
+ # Output Format
83
+ Return JSON only: {"score": 0/1, "type": "", "name": "", "reason": ""}
84
+
85
+ # Examples
86
+
87
+ **Example 1 (Good - Simple)**:
88
+ Input: "The Pythagorean theorem states that $a^2 + b^2 = c^2$ for right triangles."
89
+ Output: {"score": 1, "type": "Good", "name": "None", "reason": "Clear, well-formatted text with proper LaTeX."}
90
+
91
+ **Example 2 (Bad - Completeness, punctuation truncation)**:
92
+ Input: ", and the patient was diagnosed with IE due to methicillin-resistant Staphylococcus aureus infection\n\n."
93
+ Output: {"score": 0, "type": "Completeness", "name": "Error_Start_Punctuation_Truncation", "reason": "The beginning is incomplete and starts from truncated punctuation."}
94
+
95
+ ---
96
+
97
+ # Input content to evaluate:
98
+
99
+ """
@@ -43,7 +43,7 @@ class LLMRAGContextPrecision(BaseOpenAI):
43
43
  "source_frameworks": "Ragas"
44
44
  }
45
45
 
46
- _required_fields = [RequiredField.CONTENT, RequiredField.CONTEXT, RequiredField.PROMPT]
46
+ _required_fields = [RequiredField.PROMPT, RequiredField.CONTEXT, RequiredField.CONTENT]
47
47
 
48
48
  @classmethod
49
49
  def context_precision_prompt(cls, question: str, context: str, answer: str) -> str:
@@ -47,7 +47,7 @@ class LLMRAGContextRecall(BaseOpenAI):
47
47
  "source_frameworks": "Ragas + DeepEval"
48
48
  }
49
49
 
50
- _required_fields = [RequiredField.CONTENT, RequiredField.CONTEXT, RequiredField.PROMPT]
50
+ _required_fields = [RequiredField.PROMPT, RequiredField.CONTEXT, RequiredField.CONTENT]
51
51
  prompt = """上下文召回评估提示词,用于分类陈述归因"""
52
52
 
53
53
  @staticmethod
@@ -43,7 +43,7 @@ class LLMRAGFaithfulness(BaseOpenAI):
43
43
  "source_frameworks": "Ragas + DeepEval"
44
44
  }
45
45
 
46
- _required_fields = [RequiredField.CONTENT, RequiredField.CONTEXT, RequiredField.PROMPT]
46
+ _required_fields = [RequiredField.PROMPT, RequiredField.CONTEXT, RequiredField.CONTENT]
47
47
 
48
48
  @staticmethod
49
49
  def statement_generator_prompt(question: str, answer: str) -> str:
@@ -1,15 +1,14 @@
1
- import base64
2
- import os
3
1
  from typing import List
4
2
 
5
3
  from dingo.io.input import Data, RequiredField
6
4
  from dingo.model import Model
7
5
  from dingo.model.llm.base_openai import BaseOpenAI
6
+ from dingo.utils.image_loader import ImageLoader
8
7
 
9
8
 
10
9
  @Model.llm_register("VLMImageRelevant")
11
10
  class VLMImageRelevant(BaseOpenAI):
12
- _required_fields = [RequiredField.PROMPT, RequiredField.CONTENT]
11
+ _required_fields = [RequiredField.IMAGE]
13
12
  prompt = """
14
13
  你是一个专业的图像对比分析系统。请对比分析两张图片的一致性和相关性。
15
14
 
@@ -42,57 +41,15 @@ class VLMImageRelevant(BaseOpenAI):
42
41
  输出格式必须为JSON:{"score": 评分, "reason": "原因说明"}
43
42
  """
44
43
 
45
- @classmethod
46
- def _encode_image(cls, image_path: str) -> str:
47
- """
48
- Encode a local image file to base64 data URL format.
49
- If the input is already a URL, return it as is.
50
-
51
- This method follows Python's standard path resolution:
52
- - Relative paths are resolved relative to the current working directory
53
- - Absolute paths are used as-is
54
- - URLs (http://, https://, data:) are passed through unchanged
55
-
56
- Args:
57
- image_path: Local file path (absolute or relative) or URL
58
-
59
- Returns:
60
- Base64 data URL for local files, or original URL for web resources
61
-
62
- Raises:
63
- FileNotFoundError: If a local file path does not exist
64
- RuntimeError: If the file cannot be read
65
- """
66
- # Pass through URLs unchanged
67
- if image_path.startswith(('http://', 'https://', 'data:')):
68
- return image_path
69
-
70
- # Standard file path handling (relative or absolute)
71
- if not os.path.isfile(image_path):
72
- raise FileNotFoundError(
73
- f"Image file not found: '{image_path}'\n"
74
- f"Current working directory: {os.getcwd()}\n"
75
- f"Absolute path would be: {os.path.abspath(image_path)}\n"
76
- f"Ensure the path is correct relative to your current working directory."
77
- )
78
-
79
- try:
80
- with open(image_path, "rb") as image_file:
81
- base64_image = base64.b64encode(image_file.read()).decode('utf-8')
82
- # Determine MIME type from file extension
83
- ext = os.path.splitext(image_path)[1].lower()
84
- mime_type = 'image/jpeg' if ext in ['.jpg', '.jpeg'] else f'image/{ext[1:]}'
85
- return f"data:{mime_type};base64,{base64_image}"
86
- except Exception as e:
87
- raise RuntimeError(
88
- f"Failed to read image file '{image_path}': {e}"
89
- )
90
-
91
44
  @classmethod
92
45
  def build_messages(cls, input_data: Data) -> List:
93
- # Encode images if they are local file paths
94
- image_url_1 = cls._encode_image(input_data.prompt)
95
- image_url_2 = cls._encode_image(input_data.content)
46
+ if not input_data.image or len(input_data.image) < 2:
47
+ raise ValueError(
48
+ "VLMImageRelevant requires exactly 2 images in the image field, "
49
+ f"got {len(input_data.image) if input_data.image else 0}."
50
+ )
51
+ image_url_1 = ImageLoader.encode_for_api(input_data.image[0])
52
+ image_url_2 = ImageLoader.encode_for_api(input_data.image[1])
96
53
 
97
54
  messages = [
98
55
  {
@@ -1,6 +1,4 @@
1
- import base64
2
1
  import json
3
- import os
4
2
  from typing import List
5
3
 
6
4
  from dingo.io.input import Data, RequiredField
@@ -8,11 +6,12 @@ from dingo.io.output.eval_detail import EvalDetail
8
6
  from dingo.model import Model
9
7
  from dingo.model.llm.base_openai import BaseOpenAI
10
8
  from dingo.utils import log
9
+ from dingo.utils.image_loader import ImageLoader
11
10
 
12
11
 
13
12
  @Model.llm_register("VLMLayoutQuality")
14
13
  class VLMLayoutQuality(BaseOpenAI):
15
- _required_fields = [RequiredField.CONTENT, RequiredField.IMAGE]
14
+ _required_fields = [RequiredField.IMAGE, RequiredField.CONTENT]
16
15
  prompt = r"""
17
16
  # 角色
18
17
  你是一名严谨细致的布局检测模型专家,你的任务是审查一个布局检测模型输出的蒙版图片,。由于没有标准的正确答案,你需要运用你对通用文档结构、排版惯例和逻辑关系的深刻理解,来识别并标记模型预测中的所有错误。
@@ -119,59 +118,9 @@ class VLMLayoutQuality(BaseOpenAI):
119
118
  {{ bbox_typr_list }}
120
119
  """
121
120
 
122
- @classmethod
123
- def _encode_image(cls, image_path: str) -> str:
124
- """
125
- Encode a local image file to base64 data URL format.
126
- If the input is already a URL, return it as is.
127
-
128
- This method follows Python's standard path resolution:
129
- - Relative paths are resolved relative to the current working directory
130
- - Absolute paths are used as-is
131
- - URLs (http://, https://, data:) are passed through unchanged
132
-
133
- Args:
134
- image_path: Local file path (absolute or relative) or URL
135
-
136
- Returns:
137
- Base64 data URL for local files, or original URL for web resources
138
-
139
- Raises:
140
- FileNotFoundError: If a local file path does not exist
141
- RuntimeError: If the file cannot be read
142
- """
143
- # Pass through URLs unchanged
144
- if image_path.startswith('data:'):
145
- return image_path
146
-
147
- if image_path.startswith(("http://", "https://", 'data:')):
148
- return image_path
149
-
150
- # Standard file path handling (relative or absolute)
151
- if not os.path.isfile(image_path):
152
- raise FileNotFoundError(
153
- f"Image file not found: '{image_path}'\n"
154
- f"Current working directory: {os.getcwd()}\n"
155
- f"Absolute path would be: {os.path.abspath(image_path)}\n"
156
- f"Ensure the path is correct relative to your current working directory."
157
- )
158
-
159
- try:
160
- with open(image_path, "rb") as image_file:
161
- base64_image = base64.b64encode(image_file.read()).decode('utf-8')
162
- # Determine MIME type from file extension
163
- ext = os.path.splitext(image_path)[1].lower()
164
- mime_type = 'image/jpeg' if ext in ['.jpg', '.jpeg'] else f'image/{ext[1:]}'
165
- return f"data:{mime_type};base64,{base64_image}"
166
- except Exception as e:
167
- raise RuntimeError(
168
- f"Failed to read image file '{image_path}': {e}"
169
- )
170
-
171
121
  @classmethod
172
122
  def build_messages(cls, input_data: Data) -> List:
173
- if isinstance(input_data.image[0], str):
174
- image_base64 = cls._encode_image(input_data.image[0])
123
+ image_base64 = ImageLoader.encode_for_api(input_data.image)
175
124
 
176
125
  bboxs = eval(input_data.content)
177
126
 
dingo/model/model.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import importlib
2
2
  import inspect
3
3
  import os
4
- from typing import Callable, Dict, List, Optional
4
+ from typing import Callable, Dict, List
5
5
 
6
6
  from pydantic import BaseModel
7
7
 
@@ -22,13 +22,19 @@ class Model:
22
22
  module_loaded = False
23
23
 
24
24
  # group
25
- rule_groups: Dict[str, List[Callable]] = {} # such as: {'default': [<class.RuleAlphaWords>]}
25
+ rule_groups: Dict[
26
+ str, List[Callable]
27
+ ] = {} # such as: {'default': [<class.RuleAlphaWords>]}
26
28
 
27
29
  # metric map
28
- rule_metric_type_map: Dict[str, List[Callable]] = {} # such as: {'QUALITY_INEFFECTIVENESS': [<class.RuleAlphaWords>]}
30
+ rule_metric_type_map: Dict[
31
+ str, List[Callable]
32
+ ] = {} # such as: {'QUALITY_INEFFECTIVENESS': [<class.RuleAlphaWords>]}
29
33
 
30
34
  # other map
31
- rule_name_map: Dict[str, BaseRule] = {} # such as: {'RuleAlphaWords': <class.RuleAlphaWords>}
35
+ rule_name_map: Dict[
36
+ str, BaseRule
37
+ ] = {} # such as: {'RuleAlphaWords': <class.RuleAlphaWords>}
32
38
  llm_name_map: Dict[str, BaseLLM] = {}
33
39
 
34
40
  def __init__(self):
@@ -61,10 +67,10 @@ class Model:
61
67
  def get_group(cls, group_name) -> Dict[str, List]:
62
68
  res = {}
63
69
  if group_name not in Model.rule_groups:
64
- raise KeyError('no such group: ' + group_name)
70
+ raise KeyError("no such group: " + group_name)
65
71
  if group_name in Model.rule_groups:
66
72
  log.debug(f"[Load rule group {group_name}]")
67
- res['rule'] = Model.rule_groups[group_name]
73
+ res["rule"] = Model.rule_groups[group_name]
68
74
  return res
69
75
 
70
76
  @classmethod
@@ -75,6 +81,7 @@ class Model:
75
81
  metric_type (str): The metric type (quality map).
76
82
  group (List[str]): The group names.
77
83
  """
84
+
78
85
  def decorator(root_class):
79
86
  # group
80
87
  for group_name in group:
@@ -101,6 +108,7 @@ class Model:
101
108
  Args:
102
109
  llm_id (str): Name of llm model class.
103
110
  """
111
+
104
112
  def decorator(root_class):
105
113
  cls.llm_name_map[llm_id] = root_class
106
114
 
@@ -117,30 +125,34 @@ class Model:
117
125
  return
118
126
  this_module_directory = os.path.dirname(os.path.abspath(__file__))
119
127
  # rule auto register
120
- for file in os.listdir(os.path.join(this_module_directory, 'rule')):
121
- path = os.path.join(this_module_directory, 'rule', file)
122
- if os.path.isfile(path) and file.endswith('.py') and not file == '__init__.py':
128
+ for file in os.listdir(os.path.join(this_module_directory, "rule")):
129
+ path = os.path.join(this_module_directory, "rule", file)
130
+ if (
131
+ os.path.isfile(path)
132
+ and file.endswith(".py")
133
+ and not file == "__init__.py"
134
+ ):
123
135
  try:
124
- importlib.import_module('dingo.model.rule.' + file.split('.')[0])
136
+ importlib.import_module("dingo.model.rule." + file.split(".")[0])
125
137
  except ModuleNotFoundError as e:
126
138
  log.debug(e)
127
139
 
128
140
  # llm auto register - 递归扫描子目录
129
- llm_base_dir = os.path.join(this_module_directory, 'llm')
141
+ llm_base_dir = os.path.join(this_module_directory, "llm")
130
142
  for root, dirs, files in os.walk(llm_base_dir):
131
143
  # 跳过 __pycache__ 目录
132
- dirs[:] = [d for d in dirs if d != '__pycache__']
144
+ dirs[:] = [d for d in dirs if d != "__pycache__"]
133
145
 
134
146
  for file in files:
135
- if file.endswith('.py') and file != '__init__.py':
147
+ if file.endswith(".py") and file != "__init__.py":
136
148
  # 计算相对于 llm 目录的模块路径
137
149
  rel_path = os.path.relpath(root, llm_base_dir)
138
- if rel_path == '.':
139
- module_name = f'dingo.model.llm.{file[:-3]}'
150
+ if rel_path == ".":
151
+ module_name = f"dingo.model.llm.{file[:-3]}"
140
152
  else:
141
153
  # 将路径分隔符转换为点
142
- rel_module = rel_path.replace(os.sep, '.')
143
- module_name = f'dingo.model.llm.{rel_module}.{file[:-3]}'
154
+ rel_module = rel_path.replace(os.sep, ".")
155
+ module_name = f"dingo.model.llm.{rel_module}.{file[:-3]}"
144
156
 
145
157
  try:
146
158
  importlib.import_module(module_name)
@@ -148,7 +160,7 @@ class Model:
148
160
  log.debug(e)
149
161
  except ImportError as e:
150
162
  log.debug("=" * 30 + " ImportError " + "=" * 30)
151
- log.debug(f'module {module_name} not imported because: \n{e}')
163
+ log.debug(f"module {module_name} not imported because: \n{e}")
152
164
  log.debug("=" * 73)
153
165
 
154
166
  cls.module_loaded = True
@@ -157,20 +169,21 @@ class Model:
157
169
  def set_config_rule(cls, rule: BaseRule, rule_config: EvaluatorRuleArgs):
158
170
  if not rule_config:
159
171
  return
160
- config_default = getattr(rule, 'dynamic_config')
172
+ config_default = rule.dynamic_config.model_copy(deep=True)
161
173
  # Iterate over rule_config fields using Pydantic's model_dump()
162
174
  for k, v in rule_config.model_dump().items():
163
175
  if v is not None:
164
176
  setattr(config_default, k, v)
165
- setattr(rule, 'dynamic_config', config_default)
177
+ setattr(rule, "dynamic_config", config_default)
166
178
 
167
179
  @classmethod
168
180
  def set_config_llm(cls, llm: BaseLLM, llm_config: EvaluatorLLMArgs):
169
181
  if not llm_config:
170
182
  return
171
- config_default = getattr(llm, 'dynamic_config')
172
- # Iterate over llm_config fields using Pydantic's model_dump()
173
- for k, v in llm_config.model_dump().items():
183
+ config_default = llm.dynamic_config.model_copy(deep=True)
184
+ # Preserve nested Pydantic config objects while still applying extra fields.
185
+ config_items = dict(llm_config)
186
+ for k, v in config_items.items():
174
187
  if v is not None:
175
188
  setattr(config_default, k, v)
176
- setattr(llm, 'dynamic_config', config_default)
189
+ setattr(llm, "dynamic_config", config_default)
@@ -2678,6 +2678,82 @@ class RulePIIDetection(BaseRule):
2678
2678
  return res
2679
2679
 
2680
2680
 
2681
+ @Model.rule_register("QUALITY_BAD_EFFECTIVENESS", [""])
2682
+ class RuleDictConsistency(BaseRule):
2683
+ """Compare two dict fields and report mismatched keys."""
2684
+
2685
+ _metric_info = {
2686
+ "category": "Rule-Based TEXT Quality Metrics",
2687
+ "quality_dimension": "EFFECTIVENESS",
2688
+ "metric_name": "RuleDictConsistency",
2689
+ "description": "Checks whether metadata and context dict are consistent by key/value equality",
2690
+ "evaluation_results": ""
2691
+ }
2692
+
2693
+ _required_fields = [RequiredField.METADATA, RequiredField.CONTEXT]
2694
+ dynamic_config = EvaluatorRuleArgs(parameters={"ignore_order": True})
2695
+
2696
+ @classmethod
2697
+ def _normalize_value(cls, value, ignore_order: bool):
2698
+ """Normalize nested values for configurable order-aware comparison."""
2699
+ if isinstance(value, dict):
2700
+ return {
2701
+ key: cls._normalize_value(value[key], ignore_order)
2702
+ for key in sorted(value.keys(), key=lambda x: str(x))
2703
+ }
2704
+
2705
+ if isinstance(value, (list, tuple)):
2706
+ normalized = [cls._normalize_value(item, ignore_order) for item in value]
2707
+ if ignore_order:
2708
+ return sorted(normalized, key=lambda x: repr(x))
2709
+ return normalized
2710
+
2711
+ if isinstance(value, set):
2712
+ normalized = [cls._normalize_value(item, ignore_order) for item in value]
2713
+ return sorted(normalized, key=lambda x: repr(x))
2714
+
2715
+ return value
2716
+
2717
+ @classmethod
2718
+ def eval(cls, input_data: Data) -> EvalDetail:
2719
+ res = EvalDetail(metric=cls.__name__)
2720
+ left_dict = getattr(input_data, "metadata", None)
2721
+ right_dict = getattr(input_data, "context", None)
2722
+ parameters = cls.dynamic_config.parameters or {}
2723
+ ignore_order = parameters.get("ignore_order", True)
2724
+
2725
+ if not isinstance(left_dict, dict) or not isinstance(right_dict, dict):
2726
+ res.status = True
2727
+ res.label = [f"{cls.metric_type}.{cls.__name__}", "INVALID_DICT_FIELD"]
2728
+ res.reason = [
2729
+ "metadata/context must both be dict, "
2730
+ f"got metadata={type(left_dict).__name__}, context={type(right_dict).__name__}"
2731
+ ]
2732
+ return res
2733
+
2734
+ diff_keys = []
2735
+ all_keys = set(left_dict.keys()) | set(right_dict.keys())
2736
+ for key in sorted(all_keys, key=lambda x: str(x)):
2737
+ if key not in left_dict or key not in right_dict:
2738
+ diff_keys.append(str(key))
2739
+ continue
2740
+
2741
+ left_value = cls._normalize_value(left_dict[key], ignore_order)
2742
+ right_value = cls._normalize_value(right_dict[key], ignore_order)
2743
+ if left_value != right_value:
2744
+ diff_keys.append(str(key))
2745
+
2746
+ if diff_keys:
2747
+ res.status = True
2748
+ res.label = [
2749
+ f"{cls.metric_type}.{cls.__name__}.{key}" for key in diff_keys
2750
+ ]
2751
+ res.reason = [f"Inconsistent keys: {', '.join(diff_keys)}"]
2752
+ else:
2753
+ res.label = [QualityLabel.QUALITY_GOOD]
2754
+ return res
2755
+
2756
+
2681
2757
  if __name__ == "__main__":
2682
2758
  data = Data(data_id="", prompt="", content="\n \n \n \n hello \n \n ")
2683
2759
  tmp = RuleEnterAndSpace().eval(data)