dingo-python 2.2.1__py3-none-any.whl → 2.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dingo/config/input_args.py +2 -1
- dingo/model/llm/agent/agent_article_fact_checker.py +102 -29
- dingo/model/llm/agent/agent_fact_check.py +7 -9
- dingo/model/llm/agent/agent_hallucination.py +7 -9
- dingo/model/llm/agent/agent_wrapper.py +6 -6
- dingo/model/llm/agent/base_agent.py +5 -5
- dingo/model/llm/base_openai.py +4 -8
- dingo/model/llm/compare/llm_html_extract_compare_v2.py +11 -2
- dingo/model/llm/instruction_quality/llm_instruction_clarity.py +2 -2
- dingo/model/llm/instruction_quality/llm_task_difficulty.py +4 -4
- dingo/model/llm/rag/llm_rag_answer_relevancy.py +5 -12
- dingo/model/llm/rag/llm_rag_context_precision.py +2 -2
- dingo/model/llm/rag/llm_rag_context_recall.py +2 -2
- dingo/model/llm/rag/llm_rag_context_relevancy.py +2 -2
- dingo/model/llm/rag/llm_rag_faithfulness.py +2 -2
- dingo/model/llm/text_quality/base_text_quality.py +2 -7
- dingo/model/llm/text_quality/llm_text_equation.py +68 -0
- dingo/model/llm/text_quality/llm_text_quality_v5.py +45 -13
- dingo/model/llm/text_quality/llm_text_table.py +70 -0
- dingo/model/llm/vlm_layout_quality.py +2 -2
- {dingo_python-2.2.1.dist-info → dingo_python-2.2.2.dist-info}/METADATA +4 -1
- {dingo_python-2.2.1.dist-info → dingo_python-2.2.2.dist-info}/RECORD +26 -24
- {dingo_python-2.2.1.dist-info → dingo_python-2.2.2.dist-info}/WHEEL +0 -0
- {dingo_python-2.2.1.dist-info → dingo_python-2.2.2.dist-info}/entry_points.txt +0 -0
- {dingo_python-2.2.1.dist-info → dingo_python-2.2.2.dist-info}/licenses/LICENSE +0 -0
- {dingo_python-2.2.1.dist-info → dingo_python-2.2.2.dist-info}/top_level.txt +0 -0
dingo/config/input_args.py
CHANGED
|
@@ -102,10 +102,11 @@ class EmbeddingConfigArgs(BaseModel):
|
|
|
102
102
|
|
|
103
103
|
|
|
104
104
|
class EvaluatorLLMArgs(BaseModel):
|
|
105
|
+
model_config = {"extra": "allow"}
|
|
106
|
+
|
|
105
107
|
model: Optional[str] = None
|
|
106
108
|
key: Optional[str] = None
|
|
107
109
|
api_url: Optional[str] = None
|
|
108
|
-
parameters: Optional[dict] = None
|
|
109
110
|
embedding_config: Optional[EmbeddingConfigArgs] = None
|
|
110
111
|
|
|
111
112
|
|
|
@@ -343,21 +343,21 @@ class ArticleFactChecker(BaseAgent):
|
|
|
343
343
|
"config": {
|
|
344
344
|
"key": "your-openai-api-key",
|
|
345
345
|
"model": "gpt-4o-mini",
|
|
346
|
-
"
|
|
347
|
-
"
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
}
|
|
346
|
+
"agent_config": {
|
|
347
|
+
"max_iterations": 10,
|
|
348
|
+
"overall_timeout": 900,
|
|
349
|
+
"max_concurrent_claims": 5,
|
|
350
|
+
"tools": {
|
|
351
|
+
"claims_extractor": {
|
|
352
|
+
"api_key": "your-openai-api-key",
|
|
353
|
+
"max_claims": 50,
|
|
354
|
+
"claim_types": ["factual", "institutional", "statistical", "attribution"]
|
|
355
|
+
},
|
|
356
|
+
"tavily_search": {
|
|
357
|
+
"api_key": "your-tavily-api-key",
|
|
358
|
+
"max_results": 5
|
|
359
|
+
},
|
|
360
|
+
"arxiv_search": {"max_results": 5}
|
|
361
361
|
}
|
|
362
362
|
}
|
|
363
363
|
}
|
|
@@ -372,6 +372,9 @@ class ArticleFactChecker(BaseAgent):
|
|
|
372
372
|
]
|
|
373
373
|
max_iterations = 10 # Allow more iterations for comprehensive checking
|
|
374
374
|
max_concurrent_claims = 5 # Default parallel claim verification slots
|
|
375
|
+
overall_timeout = 900 # 15-minute wall-clock timeout for entire evaluation
|
|
376
|
+
_MIN_OVERALL_TIMEOUT = 30 # Floor: 30 seconds
|
|
377
|
+
_MAX_OVERALL_TIMEOUT = 7200 # Ceiling: 2 hours
|
|
375
378
|
|
|
376
379
|
_required_fields = [RequiredField.CONTENT] # Article text
|
|
377
380
|
|
|
@@ -394,8 +397,8 @@ class ArticleFactChecker(BaseAgent):
|
|
|
394
397
|
Returns:
|
|
395
398
|
Output directory path (created if needed), or None if saving is disabled.
|
|
396
399
|
"""
|
|
397
|
-
|
|
398
|
-
agent_cfg =
|
|
400
|
+
extra_params = cls.dynamic_config.model_extra
|
|
401
|
+
agent_cfg = extra_params.get('agent_config') or {}
|
|
399
402
|
|
|
400
403
|
explicit_path = agent_cfg.get('output_path')
|
|
401
404
|
if explicit_path:
|
|
@@ -816,24 +819,42 @@ class ArticleFactChecker(BaseAgent):
|
|
|
816
819
|
output_dir = cls._get_output_dir()
|
|
817
820
|
|
|
818
821
|
if cls.dynamic_config:
|
|
819
|
-
if cls.dynamic_config.
|
|
820
|
-
cls.dynamic_config.
|
|
821
|
-
cls.dynamic_config.parameters.setdefault("temperature", 0)
|
|
822
|
+
if 'temperature' not in cls.dynamic_config.model_extra:
|
|
823
|
+
cls.dynamic_config.temperature = 0
|
|
822
824
|
|
|
823
825
|
if output_dir and input_data.content:
|
|
824
826
|
cls._save_article_content(output_dir, input_data.content)
|
|
825
827
|
|
|
828
|
+
timeout = cls._get_overall_timeout()
|
|
829
|
+
|
|
830
|
+
async def _run_with_timeout() -> EvalDetail:
|
|
831
|
+
return await asyncio.wait_for(
|
|
832
|
+
cls._async_eval(input_data, start_time, output_dir),
|
|
833
|
+
timeout=timeout,
|
|
834
|
+
)
|
|
835
|
+
|
|
826
836
|
try:
|
|
827
|
-
return asyncio.run(
|
|
837
|
+
return asyncio.run(_run_with_timeout())
|
|
838
|
+
except asyncio.TimeoutError:
|
|
839
|
+
elapsed = time.time() - start_time
|
|
840
|
+
log.warning(f"ArticleFactChecker: overall timeout exceeded ({elapsed:.1f}s / {timeout:.0f}s limit)")
|
|
841
|
+
return cls._create_overall_timeout_result(elapsed, timeout)
|
|
828
842
|
except RuntimeError as e:
|
|
829
843
|
# Fallback when called inside an already-running event loop (e.g. Jupyter, tests)
|
|
830
844
|
if "cannot run" in str(e).lower() or "already running" in str(e).lower():
|
|
831
845
|
import concurrent.futures
|
|
832
846
|
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
|
833
|
-
future = pool.submit(
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
847
|
+
future = pool.submit(lambda: asyncio.run(_run_with_timeout()))
|
|
848
|
+
try:
|
|
849
|
+
# Extra margin so asyncio.wait_for fires before this outer timeout
|
|
850
|
+
return future.result(timeout=timeout + 30)
|
|
851
|
+
except (asyncio.TimeoutError, concurrent.futures.TimeoutError):
|
|
852
|
+
elapsed = time.time() - start_time
|
|
853
|
+
log.warning(
|
|
854
|
+
f"ArticleFactChecker: overall timeout exceeded "
|
|
855
|
+
f"({elapsed:.1f}s / {timeout:.0f}s limit, fallback path)"
|
|
856
|
+
)
|
|
857
|
+
return cls._create_overall_timeout_result(elapsed, timeout)
|
|
837
858
|
raise
|
|
838
859
|
|
|
839
860
|
# --- Two-Phase Async Architecture Methods ---
|
|
@@ -922,8 +943,8 @@ class ArticleFactChecker(BaseAgent):
|
|
|
922
943
|
"""
|
|
923
944
|
from dingo.model.llm.agent.tools.claims_extractor import ClaimsExtractor, ClaimsExtractorConfig
|
|
924
945
|
|
|
925
|
-
|
|
926
|
-
agent_cfg =
|
|
946
|
+
extra_params = cls.dynamic_config.model_extra
|
|
947
|
+
agent_cfg = extra_params.get('agent_config') or {}
|
|
927
948
|
extractor_cfg = agent_cfg.get('tools', {}).get('claims_extractor', {})
|
|
928
949
|
|
|
929
950
|
config_kwargs: Dict[str, Any] = {
|
|
@@ -1019,10 +1040,30 @@ class ArticleFactChecker(BaseAgent):
|
|
|
1019
1040
|
@classmethod
|
|
1020
1041
|
def _get_max_concurrent_claims(cls) -> int:
|
|
1021
1042
|
"""Read max_concurrent_claims from agent_config or use class default."""
|
|
1022
|
-
|
|
1023
|
-
agent_cfg =
|
|
1043
|
+
extra_params = cls.dynamic_config.model_extra
|
|
1044
|
+
agent_cfg = extra_params.get('agent_config') or {}
|
|
1024
1045
|
return agent_cfg.get('max_concurrent_claims', cls.max_concurrent_claims)
|
|
1025
1046
|
|
|
1047
|
+
@classmethod
|
|
1048
|
+
def _get_overall_timeout(cls) -> float:
|
|
1049
|
+
"""Read overall_timeout from agent_config or use class default (900s).
|
|
1050
|
+
|
|
1051
|
+
Returns:
|
|
1052
|
+
Positive timeout in seconds, clamped to [30, 7200].
|
|
1053
|
+
"""
|
|
1054
|
+
extra_params = cls.dynamic_config.model_extra
|
|
1055
|
+
agent_cfg = extra_params.get('agent_config') or {}
|
|
1056
|
+
raw = agent_cfg.get('overall_timeout', cls.overall_timeout)
|
|
1057
|
+
try:
|
|
1058
|
+
timeout = float(raw)
|
|
1059
|
+
except (TypeError, ValueError):
|
|
1060
|
+
log.warning(f"Invalid overall_timeout={raw!r}, using default {cls.overall_timeout}s")
|
|
1061
|
+
return float(cls.overall_timeout)
|
|
1062
|
+
clamped = max(cls._MIN_OVERALL_TIMEOUT, min(timeout, cls._MAX_OVERALL_TIMEOUT))
|
|
1063
|
+
if clamped != timeout:
|
|
1064
|
+
log.warning(f"overall_timeout={timeout} out of range, clamped to {clamped}s")
|
|
1065
|
+
return float(clamped)
|
|
1066
|
+
|
|
1026
1067
|
@classmethod
|
|
1027
1068
|
def _parse_claim_json_robust(cls, output: Optional[str]) -> Dict[str, Any]:
|
|
1028
1069
|
"""
|
|
@@ -1795,6 +1836,38 @@ Begin your systematic fact-checking process now.
|
|
|
1795
1836
|
]
|
|
1796
1837
|
return result
|
|
1797
1838
|
|
|
1839
|
+
@classmethod
|
|
1840
|
+
def _create_overall_timeout_result(cls, elapsed: float, timeout: float) -> EvalDetail:
|
|
1841
|
+
"""
|
|
1842
|
+
Create error result when overall wall-clock timeout is exceeded.
|
|
1843
|
+
|
|
1844
|
+
Args:
|
|
1845
|
+
elapsed: Actual elapsed time in seconds
|
|
1846
|
+
timeout: Configured timeout limit in seconds
|
|
1847
|
+
|
|
1848
|
+
Returns:
|
|
1849
|
+
EvalDetail with timeout error status
|
|
1850
|
+
"""
|
|
1851
|
+
minutes, seconds = divmod(int(timeout), 60)
|
|
1852
|
+
limit_str = f"{minutes}m{seconds}s" if minutes else f"{int(timeout)}s"
|
|
1853
|
+
result = EvalDetail(metric=cls.__name__)
|
|
1854
|
+
result.status = True
|
|
1855
|
+
result.label = [f"{QualityLabel.QUALITY_BAD_PREFIX}AGENT_OVERALL_TIMEOUT"]
|
|
1856
|
+
result.reason = [
|
|
1857
|
+
"Article Fact-Checking Failed: Overall Timeout Exceeded",
|
|
1858
|
+
"=" * 70,
|
|
1859
|
+
f"Execution exceeded the {int(timeout)}s ({limit_str}) wall-clock limit.",
|
|
1860
|
+
f"Elapsed time: {elapsed:.1f}s",
|
|
1861
|
+
"",
|
|
1862
|
+
"Recommendations:",
|
|
1863
|
+
f" 1. Increase overall_timeout (current: {int(timeout)}s) in agent_config",
|
|
1864
|
+
" 2. Reduce max_claims in claims_extractor config (e.g., 50 -> 20)",
|
|
1865
|
+
" 3. Use a faster model (e.g., gpt-4o-mini instead of gpt-4o)",
|
|
1866
|
+
" 4. Reduce max_concurrent_claims to lower API rate-limit pressure",
|
|
1867
|
+
" 5. Split long articles into shorter sections",
|
|
1868
|
+
]
|
|
1869
|
+
return result
|
|
1870
|
+
|
|
1798
1871
|
@classmethod
|
|
1799
1872
|
def plan_execution(cls, input_data: Data) -> List[Dict[str, Any]]:
|
|
1800
1873
|
"""
|
|
@@ -70,15 +70,13 @@ class AgentFactCheck(BaseAgent):
|
|
|
70
70
|
"key": "your-openai-api-key",
|
|
71
71
|
"api_url": "https://api.openai.com/v1",
|
|
72
72
|
"model": "gpt-4.1-mini-2025-04-14",
|
|
73
|
-
"
|
|
74
|
-
"
|
|
75
|
-
|
|
76
|
-
"
|
|
77
|
-
"
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
"search_depth": "advanced"
|
|
81
|
-
}
|
|
73
|
+
"agent_config": {
|
|
74
|
+
"max_iterations": 5,
|
|
75
|
+
"tools": {
|
|
76
|
+
"tavily_search": {
|
|
77
|
+
"api_key": "your-tavily-api-key",
|
|
78
|
+
"max_results": 5,
|
|
79
|
+
"search_depth": "advanced"
|
|
82
80
|
}
|
|
83
81
|
}
|
|
84
82
|
}
|
|
@@ -82,15 +82,13 @@ class AgentHallucination(BaseAgent):
|
|
|
82
82
|
"key": "your-openai-api-key",
|
|
83
83
|
"api_url": "https://api.openai.com/v1",
|
|
84
84
|
"model": "gpt-4.1-mini-2025-04-14",
|
|
85
|
-
"
|
|
86
|
-
"
|
|
87
|
-
|
|
88
|
-
"
|
|
89
|
-
"
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
"search_depth": "advanced"
|
|
93
|
-
}
|
|
85
|
+
"agent_config": {
|
|
86
|
+
"max_iterations": 3,
|
|
87
|
+
"tools": {
|
|
88
|
+
"tavily_search": {
|
|
89
|
+
"api_key": "your-tavily-api-key",
|
|
90
|
+
"max_results": 5,
|
|
91
|
+
"search_depth": "advanced"
|
|
94
92
|
}
|
|
95
93
|
}
|
|
96
94
|
}
|
|
@@ -327,22 +327,22 @@ class AgentWrapper:
|
|
|
327
327
|
)
|
|
328
328
|
|
|
329
329
|
# Extract parameters
|
|
330
|
-
|
|
330
|
+
extra_params = dynamic_config.model_extra
|
|
331
331
|
|
|
332
332
|
# Create ChatOpenAI instance
|
|
333
333
|
llm = ChatOpenAI(
|
|
334
334
|
api_key=dynamic_config.key,
|
|
335
335
|
base_url=dynamic_config.api_url,
|
|
336
336
|
model=dynamic_config.model or "gpt-4.1-mini",
|
|
337
|
-
temperature=
|
|
338
|
-
max_tokens=
|
|
339
|
-
top_p=
|
|
340
|
-
timeout=
|
|
337
|
+
temperature=extra_params.get("temperature", 0.3),
|
|
338
|
+
max_tokens=extra_params.get("max_tokens", 4096),
|
|
339
|
+
top_p=extra_params.get("top_p", 1.0),
|
|
340
|
+
timeout=extra_params.get("timeout", 30)
|
|
341
341
|
)
|
|
342
342
|
|
|
343
343
|
log.debug(
|
|
344
344
|
f"Created ChatOpenAI: model={dynamic_config.model}, "
|
|
345
|
-
f"temp={
|
|
345
|
+
f"temp={extra_params.get('temperature', 0.3)}"
|
|
346
346
|
)
|
|
347
347
|
|
|
348
348
|
return llm
|
|
@@ -146,7 +146,7 @@ class BaseAgent(BaseOpenAI):
|
|
|
146
146
|
Extract tool configuration from agent's dynamic_config.
|
|
147
147
|
|
|
148
148
|
Configuration is expected in:
|
|
149
|
-
dynamic_config.
|
|
149
|
+
dynamic_config.agent_config.tools.{tool_name}
|
|
150
150
|
|
|
151
151
|
Args:
|
|
152
152
|
tool_name: Name of the tool
|
|
@@ -154,8 +154,8 @@ class BaseAgent(BaseOpenAI):
|
|
|
154
154
|
Returns:
|
|
155
155
|
Dict of configuration values for the tool
|
|
156
156
|
"""
|
|
157
|
-
|
|
158
|
-
agent_config =
|
|
157
|
+
extra_params = cls.dynamic_config.model_extra
|
|
158
|
+
agent_config = extra_params.get('agent_config', {})
|
|
159
159
|
tools_config = agent_config.get('tools', {})
|
|
160
160
|
return tools_config.get(tool_name, {})
|
|
161
161
|
|
|
@@ -184,8 +184,8 @@ class BaseAgent(BaseOpenAI):
|
|
|
184
184
|
Returns:
|
|
185
185
|
Maximum number of iterations allowed
|
|
186
186
|
"""
|
|
187
|
-
|
|
188
|
-
agent_config =
|
|
187
|
+
extra_params = cls.dynamic_config.model_extra
|
|
188
|
+
agent_config = extra_params.get('agent_config', {})
|
|
189
189
|
return agent_config.get('max_iterations', cls.max_iterations)
|
|
190
190
|
|
|
191
191
|
@classmethod
|
dingo/model/llm/base_openai.py
CHANGED
|
@@ -82,22 +82,18 @@ class BaseOpenAI(BaseLLM):
|
|
|
82
82
|
else:
|
|
83
83
|
model_name = cls.client.models.list().data[0].id
|
|
84
84
|
|
|
85
|
-
|
|
86
|
-
cls.validate_config(
|
|
85
|
+
extra_params = cls.dynamic_config.model_extra
|
|
86
|
+
cls.validate_config(extra_params)
|
|
87
87
|
|
|
88
88
|
completions = cls.client.chat.completions.create(
|
|
89
89
|
model=model_name,
|
|
90
90
|
messages=messages,
|
|
91
|
-
|
|
92
|
-
top_p=params.get("top_p", 1) if params else 1,
|
|
93
|
-
max_tokens=params.get("max_tokens", 4000) if params else 4000,
|
|
94
|
-
presence_penalty=params.get("presence_penalty", 0) if params else 0,
|
|
95
|
-
frequency_penalty=params.get("frequency_penalty", 0) if params else 0,
|
|
91
|
+
**extra_params,
|
|
96
92
|
)
|
|
97
93
|
|
|
98
94
|
if completions.choices[0].finish_reason == "length":
|
|
99
95
|
raise ExceedMaxTokens(
|
|
100
|
-
f"Exceed max tokens: {
|
|
96
|
+
f"Exceed max tokens: {extra_params.get('max_tokens', 4000)}"
|
|
101
97
|
)
|
|
102
98
|
|
|
103
99
|
return str(completions.choices[0].message.content)
|
|
@@ -25,9 +25,17 @@ class LLMHtmlExtractCompareV2(BaseOpenAI):
|
|
|
25
25
|
输入数据要求:
|
|
26
26
|
- input_data.prompt: 工具A提取的文本
|
|
27
27
|
- input_data.content: 工具B提取的文本
|
|
28
|
-
- input_data.raw_data
|
|
28
|
+
- language: 可选,来自 input_data.language 或 raw_data["language"],缺省为 "en"("zh" / "en")
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
|
+
_metric_info = {
|
|
32
|
+
'category': 'Pretrain Text Quality Assessment Metrics',
|
|
33
|
+
'metric_name': 'LLMHtmlExtractCompareV2',
|
|
34
|
+
'description': 'Compares two HTML main-content extraction tools by computing text diffs and using LLM to judge which preserves more core information',
|
|
35
|
+
'paper_title': '',
|
|
36
|
+
'paper_url': '',
|
|
37
|
+
}
|
|
38
|
+
|
|
31
39
|
_required_fields = [RequiredField.CONTENT, RequiredField.PROMPT]
|
|
32
40
|
prompt = {
|
|
33
41
|
"content_en": r"""Please compare the following two texts, each extracted from the same webpage using different HTML parsing methods. Your task is to determine whether there is a difference in the core informational content between them.
|
|
@@ -174,7 +182,8 @@ C. Text A 包含的核心信息内容少于 Text B
|
|
|
174
182
|
text_tool_b = input_data.content
|
|
175
183
|
|
|
176
184
|
# 获取配置参数
|
|
177
|
-
|
|
185
|
+
raw_data = getattr(input_data, 'raw_data', {}) or {}
|
|
186
|
+
language = raw_data.get("language", getattr(input_data, 'language', "en"))
|
|
178
187
|
|
|
179
188
|
# 计算文本差异
|
|
180
189
|
diff_result = cls.extract_text_diff(text_tool_a, text_tool_b)
|
|
@@ -283,8 +283,8 @@ Output:
|
|
|
283
283
|
|
|
284
284
|
# 判断是否通过(默认阈值 6.0)
|
|
285
285
|
threshold = 6.0
|
|
286
|
-
if hasattr(cls, 'dynamic_config')
|
|
287
|
-
threshold = cls.dynamic_config.
|
|
286
|
+
if hasattr(cls, 'dynamic_config'):
|
|
287
|
+
threshold = cls.dynamic_config.model_extra.get('threshold', 6.0)
|
|
288
288
|
|
|
289
289
|
if score >= threshold:
|
|
290
290
|
result.status = False
|
|
@@ -321,14 +321,14 @@ Output:
|
|
|
321
321
|
|
|
322
322
|
# 难度评估没有"通过/不通过"的概念,只是描述性的
|
|
323
323
|
# 但为了兼容框架,我们设置一个合理的默认行为
|
|
324
|
-
# 可以通过
|
|
324
|
+
# 可以通过 config 中的 min_difficulty 和 max_difficulty 配置难度范围
|
|
325
325
|
result.status = False # 默认不标记为问题
|
|
326
326
|
result.label = [f"TASK_DIFFICULTY.{difficulty_level.upper()}"]
|
|
327
327
|
|
|
328
328
|
# 如果配置了难度范围要求,进行检查
|
|
329
|
-
if hasattr(cls, 'dynamic_config')
|
|
330
|
-
min_difficulty = cls.dynamic_config.
|
|
331
|
-
max_difficulty = cls.dynamic_config.
|
|
329
|
+
if hasattr(cls, 'dynamic_config'):
|
|
330
|
+
min_difficulty = cls.dynamic_config.model_extra.get('min_difficulty', 0)
|
|
331
|
+
max_difficulty = cls.dynamic_config.model_extra.get('max_difficulty', 10)
|
|
332
332
|
|
|
333
333
|
if difficulty_score < min_difficulty:
|
|
334
334
|
result.status = True
|
|
@@ -242,14 +242,8 @@ class LLMRAGAnswerRelevancy(BaseOpenAI):
|
|
|
242
242
|
|
|
243
243
|
try:
|
|
244
244
|
# 增加温度参数以提高问题生成的随机性
|
|
245
|
-
if hasattr(cls, 'dynamic_config') and cls.dynamic_config.
|
|
246
|
-
|
|
247
|
-
cls.dynamic_config.parameters['temperature'] = 0.7
|
|
248
|
-
else:
|
|
249
|
-
# 如果没有parameters,创建一个包含temperature的parameters
|
|
250
|
-
current_params = cls.dynamic_config.parameters or {}
|
|
251
|
-
current_params['temperature'] = 0.7
|
|
252
|
-
cls.dynamic_config.parameters = current_params
|
|
245
|
+
if hasattr(cls, 'dynamic_config') and 'temperature' not in cls.dynamic_config.model_extra:
|
|
246
|
+
cls.dynamic_config.temperature = 0.7
|
|
253
247
|
|
|
254
248
|
# 生成多个相关问题
|
|
255
249
|
generated_questions = cls.generate_multiple_questions(input_data, cls.strictness)
|
|
@@ -263,10 +257,9 @@ class LLMRAGAnswerRelevancy(BaseOpenAI):
|
|
|
263
257
|
|
|
264
258
|
# 根据分数判断是否通过,默认阈值为5
|
|
265
259
|
threshold = 5
|
|
266
|
-
if hasattr(cls, 'dynamic_config')
|
|
267
|
-
threshold = cls.dynamic_config.
|
|
268
|
-
|
|
269
|
-
cls.strictness = cls.dynamic_config.parameters.get('strictness', 3)
|
|
260
|
+
if hasattr(cls, 'dynamic_config'):
|
|
261
|
+
threshold = cls.dynamic_config.model_extra.get('threshold', 5)
|
|
262
|
+
cls.strictness = cls.dynamic_config.model_extra.get('strictness', 3)
|
|
270
263
|
|
|
271
264
|
# 构建详细的reason文本
|
|
272
265
|
all_reasons = []
|
|
@@ -256,8 +256,8 @@ class LLMRAGContextPrecision(BaseOpenAI):
|
|
|
256
256
|
|
|
257
257
|
# 根据分数判断是否通过,默认阈值为5
|
|
258
258
|
threshold = 5
|
|
259
|
-
if hasattr(cls, 'dynamic_config')
|
|
260
|
-
threshold = cls.dynamic_config.
|
|
259
|
+
if hasattr(cls, 'dynamic_config'):
|
|
260
|
+
threshold = cls.dynamic_config.model_extra.get('threshold', 5)
|
|
261
261
|
|
|
262
262
|
if score >= threshold:
|
|
263
263
|
result.status = False
|
|
@@ -215,8 +215,8 @@ class LLMRAGContextRecall(BaseOpenAI):
|
|
|
215
215
|
|
|
216
216
|
# 根据分数判断是否通过,默认阈值为5
|
|
217
217
|
threshold = 5
|
|
218
|
-
if hasattr(cls, 'dynamic_config')
|
|
219
|
-
threshold = cls.dynamic_config.
|
|
218
|
+
if hasattr(cls, 'dynamic_config'):
|
|
219
|
+
threshold = cls.dynamic_config.model_extra.get('threshold', 5)
|
|
220
220
|
|
|
221
221
|
if score >= threshold:
|
|
222
222
|
result.status = False
|
|
@@ -206,8 +206,8 @@ class LLMRAGContextRelevancy(BaseOpenAI):
|
|
|
206
206
|
|
|
207
207
|
# 根据分数判断是否通过,默认阈值为5
|
|
208
208
|
threshold = 5
|
|
209
|
-
if hasattr(cls, 'dynamic_config')
|
|
210
|
-
threshold = cls.dynamic_config.
|
|
209
|
+
if hasattr(cls, 'dynamic_config'):
|
|
210
|
+
threshold = cls.dynamic_config.model_extra.get('threshold', 5)
|
|
211
211
|
|
|
212
212
|
if score >= threshold:
|
|
213
213
|
result.status = False
|
|
@@ -290,8 +290,8 @@ class LLMRAGFaithfulness(BaseOpenAI):
|
|
|
290
290
|
|
|
291
291
|
# 根据分数判断是否通过,默认阈值为5
|
|
292
292
|
threshold = 5
|
|
293
|
-
if hasattr(cls, 'dynamic_config')
|
|
294
|
-
threshold = cls.dynamic_config.
|
|
293
|
+
if hasattr(cls, 'dynamic_config'):
|
|
294
|
+
threshold = cls.dynamic_config.model_extra.get('threshold', 5)
|
|
295
295
|
|
|
296
296
|
if score >= threshold:
|
|
297
297
|
result.status = False
|
|
@@ -47,16 +47,11 @@ class BaseTextQuality(BaseOpenAI):
|
|
|
47
47
|
response_json = json.loads(response)
|
|
48
48
|
response_model = ResponseScoreTypeNameReason(**response_json)
|
|
49
49
|
|
|
50
|
-
# Create EvalDetail with all required fields
|
|
51
|
-
# status = False for Good quality (no issues found)
|
|
52
|
-
# status = True for Bad quality (issues found)
|
|
53
|
-
is_good = response_model.type == "Good"
|
|
54
|
-
|
|
55
50
|
result = EvalDetail(
|
|
56
51
|
metric=cls.__name__,
|
|
57
|
-
status=
|
|
52
|
+
status=False if response_model.score == 1 else True,
|
|
58
53
|
score=response_model.score,
|
|
59
|
-
label=["QUALITY_GOOD"] if
|
|
54
|
+
label=["QUALITY_GOOD"] if response_model.score == 1 else [f"{response_model.type}.{response_model.name}"],
|
|
60
55
|
reason=[response_model.reason]
|
|
61
56
|
)
|
|
62
57
|
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from dingo.io.input import RequiredField
|
|
2
|
+
from dingo.model import Model
|
|
3
|
+
from dingo.model.llm.text_quality.base_text_quality import BaseTextQuality
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@Model.llm_register("LLMTextEquation")
|
|
7
|
+
class LLMTextEquation(BaseTextQuality):
|
|
8
|
+
# Metadata for documentation generation
|
|
9
|
+
_metric_info = {
|
|
10
|
+
"category": "Pretrain Text Quality Assessment Metrics",
|
|
11
|
+
"metric_name": "LLMTextQualityV5",
|
|
12
|
+
"description": "Impact-driven text quality evaluation for LLM pretraining, focusing on structural completeness, readability, diversity, and safety with quantitative thresholds",
|
|
13
|
+
"paper_title": "WanJuanSiLu: A High-Quality Open-Source Webtext Dataset for Low-Resource Languages",
|
|
14
|
+
"paper_url": "https://arxiv.org/abs/2501.14506",
|
|
15
|
+
"paper_authors": "Yu et al., 2025",
|
|
16
|
+
"examples": "examples/llm_and_rule/llm_local.py",
|
|
17
|
+
"evaluation_results": "docs/eval/prompt/redpajama_data_evaluated_by_prompt.md"
|
|
18
|
+
}
|
|
19
|
+
_required_fields = [RequiredField.CONTENT]
|
|
20
|
+
prompt = r"""
|
|
21
|
+
你是一个专业的数学、化学等学科的公式质检员。我会给你一个从文档中提取的 equation 类型元素(JSON 格式),请对其 text 字段进行质量检测。
|
|
22
|
+
|
|
23
|
+
## 检测维度
|
|
24
|
+
|
|
25
|
+
1. **语法问题**
|
|
26
|
+
- LaTeX 命令拼写错误(如 \frace 代替 \frac)
|
|
27
|
+
- 括号未正确配对闭合({}、[]、())
|
|
28
|
+
- 环境标签不匹配(如 \begin{} 与 \end{} 不对应)
|
|
29
|
+
|
|
30
|
+
2. **识别问题**
|
|
31
|
+
- 疑似 OCR 识别错误(如字母与符号混淆:x 与 ×、- 与 −、l 与 1、O 与 0 等)
|
|
32
|
+
- 公式内容明显残缺或截断
|
|
33
|
+
- 出现乱码或无意义字符
|
|
34
|
+
|
|
35
|
+
3. **语义问题**
|
|
36
|
+
- 公式结构不完整,无法表达完整的数学含义
|
|
37
|
+
- 运算符或符号使用明显不合数学规范
|
|
38
|
+
|
|
39
|
+
## 一级错误类型(type)
|
|
40
|
+
|
|
41
|
+
- `syntax`:语法问题
|
|
42
|
+
- `recognition`:识别问题
|
|
43
|
+
- `semantic`:语义问题
|
|
44
|
+
|
|
45
|
+
## 二级错误类型(name)
|
|
46
|
+
|
|
47
|
+
- `command_error`:LaTeX 命令拼写错误
|
|
48
|
+
- `bracket_mismatch`:括号未正确配对
|
|
49
|
+
- `env_mismatch`:环境标签不匹配
|
|
50
|
+
- `ocr_error`:OCR 字符识别错误
|
|
51
|
+
- `truncated_content`:公式残缺或截断
|
|
52
|
+
- `garbled_text`:乱码或无意义字符
|
|
53
|
+
- `incomplete_expression`:公式结构不完整
|
|
54
|
+
- `invalid_notation`:数学符号使用不规范
|
|
55
|
+
- `none`:无问题
|
|
56
|
+
|
|
57
|
+
## Output Format
|
|
58
|
+
|
|
59
|
+
Return JSON only: {"score": 0/1, "type": "", "name": "", "reason": ""}
|
|
60
|
+
|
|
61
|
+
score 类型必须为int;
|
|
62
|
+
score 为 1 表示通过,type 填 "Good",name 填 "None",reason 说明公式正常的依据;
|
|
63
|
+
score 为 0 表示不通过,type 和 name 填对应的错误类型,reason 说明判断依据并指出具体的问题位置或内容。
|
|
64
|
+
|
|
65
|
+
## Input content to evaluate:
|
|
66
|
+
|
|
67
|
+
"""
|
|
68
|
+
# process_response method is now inherited from BaseTextQuality
|
|
@@ -30,7 +30,27 @@ Evaluate whether this text is suitable for LLM pretraining. Focus on issues that
|
|
|
30
30
|
**Impact**: Broken structures prevent models from learning correct formatting patterns.
|
|
31
31
|
|
|
32
32
|
**Check for**:
|
|
33
|
-
- **Error_Formula**: Mathematical
|
|
33
|
+
- **Error_Formula**: Mathematical content with **broken syntax** OR **systematically stripped symbols/formulas**
|
|
34
|
+
|
|
35
|
+
Two failure modes:
|
|
36
|
+
|
|
37
|
+
**(A) Broken LaTeX syntax** — delimiters or environments are present but malformed:
|
|
38
|
+
- Delimiters unmatched: $ without closing $ (LaTeX context, not dollar signs)
|
|
39
|
+
- Environments unclosed: \\begin{{align}} without \\end{{align}}
|
|
40
|
+
- Syntax broken: \\frac{{a}}{{b missing closing }}
|
|
41
|
+
- HTML tags unclosed: <sub>text without </sub>
|
|
42
|
+
- Impact: Prevents >50% of mainstream parsers from rendering
|
|
43
|
+
|
|
44
|
+
**(B) Stripped mathematical content** — symbols/formulas systematically removed during extraction:
|
|
45
|
+
- Orphan hyphens from stripped Greek letters: "κ-solutions" → "-solutions", "ε-net" → "-net"
|
|
46
|
+
- Empty positions after connective words: "thus ;" or "the interval ;" where a formula was removed
|
|
47
|
+
- Sentences referencing variables/expressions that are absent: "a small number" (number missing), "we have ." (equation missing)
|
|
48
|
+
- Systematic loss: multiple occurrences throughout the text, not just one or two typos
|
|
49
|
+
- Impact: Mathematical text becomes incoherent; models learn broken academic writing patterns
|
|
50
|
+
|
|
51
|
+
Example (BAD — stripped symbols):
|
|
52
|
+
"Let be a -solution to the Ricci flow which is -noncollapsed. Ancient, in the sense that t ranges on the interval ; Bounded curvature, thus ;"
|
|
53
|
+
(Greek letters κ stripped from "κ-solution" and "κ-noncollapsed"; interval expression and inequality after "thus" removed entirely)
|
|
34
54
|
|
|
35
55
|
⚠️ **Normal patterns (DO NOT flag)**:
|
|
36
56
|
- Mixing inline ($...$) and display ($$...$$) formulas
|
|
@@ -38,31 +58,39 @@ Evaluate whether this text is suitable for LLM pretraining. Focus on issues that
|
|
|
38
58
|
- Line breaks with \\\\ in alignment environments
|
|
39
59
|
- HTML tags: <sub>x</sub>, <sup>2</sup> for subscripts/superscripts
|
|
40
60
|
- Mixing LaTeX and HTML in web-extracted content
|
|
41
|
-
|
|
42
|
-
✅ **Only flag when**:
|
|
43
|
-
- Delimiters unmatched: $ without closing $ (LaTeX context, not dollar signs)
|
|
44
|
-
- Environments unclosed: \\begin{{align}} without \\end{{align}}
|
|
45
|
-
- Syntax broken: \\frac{{a}}{{b missing closing }}
|
|
46
|
-
- HTML tags unclosed: <sub>text without </sub>
|
|
61
|
+
- Plain-text math without any LaTeX (e.g., "a^2 + b^2 = c^2" without $ delimiters) — this is fine as long as the expressions are actually present
|
|
47
62
|
|
|
48
63
|
⚠️ **Important**: Distinguish LaTeX $ from dollar signs ($100)
|
|
49
64
|
- Dollar sign: "$100", "$5.99" (followed by numbers) → NOT LaTeX
|
|
50
65
|
- LaTeX delimiter: "$x$", "$\\alpha$" (contains math symbols) → IS LaTeX
|
|
51
|
-
- Example: "The price is $100 and equation $x=y$ costs $50" has 4 dollar symbols but only 2 are LaTeX delimiters (and they match)
|
|
52
66
|
|
|
53
|
-
- Example (BAD): "$x^2 + y^2 is broken here $$a = b$$$"
|
|
67
|
+
- Example (BAD — broken delimiters): "$x^2 + y^2 is broken here $$a = b$$$"
|
|
54
68
|
(First LaTeX $ never closes, extra $ at end)
|
|
55
69
|
- Example (GOOD): "The item costs $100 and satisfies $x^2 + y^2 = z^2$ where price is $50"
|
|
56
70
|
(Dollar signs for money + proper LaTeX pair)
|
|
57
|
-
- Impact: Only flag errors that prevent >50% of mainstream parsers (pdflatex, MathJax, KaTeX, Pandoc, Jupyter) from rendering
|
|
58
71
|
|
|
59
72
|
- **Error_Table**: Table structures that are malformed or unreadable
|
|
60
73
|
- Example (BAD): Misaligned columns, missing headers, or garbled HTML tags
|
|
61
74
|
- Impact: Models cannot learn proper table representation
|
|
62
75
|
|
|
63
76
|
- **Error_Code**: Code blocks with formatting corruption
|
|
64
|
-
|
|
65
|
-
-
|
|
77
|
+
**Common corruption patterns**:
|
|
78
|
+
- Missing code fence (` ``` `): code appears as plain text without language block
|
|
79
|
+
- Lost indentation: Python/YAML code with all indentation stripped (flat lines)
|
|
80
|
+
- Broken identifiers: spaces injected into tokens, e.g. `sys .argv`, `pts .append`, `i[ 0]`
|
|
81
|
+
- Line numbers mixed with code, broken syntax highlighting markers
|
|
82
|
+
- Keywords wrapped in inline backticks instead of a fenced block, e.g. `` `import` sys ``
|
|
83
|
+
|
|
84
|
+
Example (BAD — indentation and identifiers destroyed):
|
|
85
|
+
```
|
|
86
|
+
`import` sys
|
|
87
|
+
pts = []
|
|
88
|
+
for i in range( 1,len(sys .argv), 2):
|
|
89
|
+
pts .append([int(sys .argv[i]), int(sys .argv[i +1])])
|
|
90
|
+
```
|
|
91
|
+
Correct version would have a code fence, proper indentation, and no spaces inside `sys.argv`.
|
|
92
|
+
|
|
93
|
+
- Impact: Teaches incorrect code syntax, broken tokenization patterns, and wrong indentation conventions
|
|
66
94
|
|
|
67
95
|
**Key Question**: "Can the model learn proper formatting from this structure?"
|
|
68
96
|
|
|
@@ -160,10 +188,14 @@ Output: {{"score": 1, "type": "Good", "name": "None", "reason": "Well-formed mul
|
|
|
160
188
|
Input: "The eigenstate $\\psi_n$ where <sub>n</sub> is quantum number and energy E<sup>2</sup> = m<sup>2</sup>c<sup>4</sup>"
|
|
161
189
|
Output: {{"score": 1, "type": "Good", "name": "None", "reason": "Normal mix of LaTeX and HTML tags from web content"}}
|
|
162
190
|
|
|
163
|
-
**Example 2 (Bad - Completeness)**:
|
|
191
|
+
**Example 2 (Bad - Completeness, broken delimiters)**:
|
|
164
192
|
Input: "The formula $x^2 + y^2 is broken here $$a = b$$$"
|
|
165
193
|
Output: {"score": 0, "type": "Completeness", "name": "Error_Formula", "reason": "Unmatched delimiters: first $ never closes, extra $ at end"}
|
|
166
194
|
|
|
195
|
+
**Example 2.5 (Bad - Completeness, stripped math)**:
|
|
196
|
+
Input: "Definition 1.(-solutions) A -solution is a Ricci flow which is -noncollapsed at every scale. Ancient, in the sense that t ranges on the interval ; Bounded curvature, thus ;"
|
|
197
|
+
Output: {{"score": 0, "type": "Completeness", "name": "Error_Formula", "reason": "Mathematical symbols systematically stripped: Greek letters removed ('-solutions' instead of 'κ-solutions'), formulas missing after 'the interval' and 'thus'"}}
|
|
198
|
+
|
|
167
199
|
**Example 3 (Bad - Effectiveness)**:
|
|
168
200
|
Input: "Theappleisredandtasty�withsomegarbledtext□□"
|
|
169
201
|
Output: {"score": 0, "type": "Effectiveness", "name": "Error_Garbled_Characters", "reason": "Contains encoding corruption (�, □) and missing spaces (>1% of text)"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from dingo.io.input import RequiredField
|
|
2
|
+
from dingo.model import Model
|
|
3
|
+
from dingo.model.llm.text_quality.base_text_quality import BaseTextQuality
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@Model.llm_register("LLMTextTable")
|
|
7
|
+
class LLMTextTable(BaseTextQuality):
|
|
8
|
+
# Metadata for documentation generation
|
|
9
|
+
_metric_info = {
|
|
10
|
+
"category": "Pretrain Text Quality Assessment Metrics",
|
|
11
|
+
"metric_name": "LLMTextQualityV5",
|
|
12
|
+
"description": "Impact-driven text quality evaluation for LLM pretraining, focusing on structural completeness, readability, diversity, and safety with quantitative thresholds",
|
|
13
|
+
"paper_title": "WanJuanSiLu: A High-Quality Open-Source Webtext Dataset for Low-Resource Languages",
|
|
14
|
+
"paper_url": "https://arxiv.org/abs/2501.14506",
|
|
15
|
+
"paper_authors": "Yu et al., 2025",
|
|
16
|
+
"examples": "examples/llm_and_rule/llm_local.py",
|
|
17
|
+
"evaluation_results": "docs/eval/prompt/redpajama_data_evaluated_by_prompt.md"
|
|
18
|
+
}
|
|
19
|
+
_required_fields = [RequiredField.CONTENT]
|
|
20
|
+
prompt = r"""
|
|
21
|
+
你是一个专业的表格数据质检员。我会给你一段从文档中提取的 HTML 表格(table_body 字段),请判断该表格是否存在质量问题。
|
|
22
|
+
|
|
23
|
+
## 检测维度
|
|
24
|
+
|
|
25
|
+
请从以下维度进行检查:
|
|
26
|
+
|
|
27
|
+
1. **结构问题**
|
|
28
|
+
- HTML 标签不完整或嵌套错误(<table>、<tr>、<td> 未正确闭合)
|
|
29
|
+
- 行列结构异常(某行 <td> 数量与其他行差异过大)
|
|
30
|
+
- 表格内容全部为空
|
|
31
|
+
|
|
32
|
+
2. **识别问题**
|
|
33
|
+
- 存在明显乱码或无意义字符
|
|
34
|
+
- 疑似 OCR 识别错误(如字母/数字混淆:0与O、1与l、S与5等)
|
|
35
|
+
- 文字截断或内容残缺
|
|
36
|
+
|
|
37
|
+
3. **语义问题**
|
|
38
|
+
- 单元格内容语义不连贯,无法理解表格表达的含义
|
|
39
|
+
- 行列关系混乱,内容错位
|
|
40
|
+
|
|
41
|
+
## 一级错误类型(type)
|
|
42
|
+
|
|
43
|
+
- `structure`:结构问题
|
|
44
|
+
- `recognition`:识别问题
|
|
45
|
+
- `semantic`:语义问题
|
|
46
|
+
|
|
47
|
+
## 二级错误类型(name)
|
|
48
|
+
|
|
49
|
+
- `tag_error`:标签不完整或嵌套错误
|
|
50
|
+
- `row_col_mismatch`:行列数量不一致
|
|
51
|
+
- `empty_table`:表格内容为空
|
|
52
|
+
- `garbled_text`:乱码或无意义字符
|
|
53
|
+
- `ocr_error`:OCR 字符识别错误
|
|
54
|
+
- `truncated_content`:文字截断或内容残缺
|
|
55
|
+
- `incoherent_semantics`:语义不连贯
|
|
56
|
+
- `misaligned_content`:内容错位
|
|
57
|
+
- `none`:无问题
|
|
58
|
+
|
|
59
|
+
## Output Format
|
|
60
|
+
|
|
61
|
+
Return JSON only: {"score": 0/1, "type": "", "name": "", "reason": ""}
|
|
62
|
+
|
|
63
|
+
score 类型必须为int;
|
|
64
|
+
score 为 1 表示通过,type 填 "Good",name 填 "None",reason 说明公式正常的依据;
|
|
65
|
+
score 为 0 表示不通过,type 和 name 填对应的错误类型,reason 说明判断依据并指出具体位置或内容。
|
|
66
|
+
|
|
67
|
+
## Input content to evaluate:
|
|
68
|
+
|
|
69
|
+
"""
|
|
70
|
+
# process_response method is now inherited from BaseTextQuality
|
|
@@ -201,8 +201,8 @@ class VLMLayoutQuality(BaseOpenAI):
|
|
|
201
201
|
else:
|
|
202
202
|
model_name = cls.client.models.list().data[0].id
|
|
203
203
|
|
|
204
|
-
|
|
205
|
-
cls.validate_config(
|
|
204
|
+
extra_params = cls.dynamic_config.model_extra
|
|
205
|
+
cls.validate_config(extra_params)
|
|
206
206
|
|
|
207
207
|
completions = cls.client.chat.completions.create(
|
|
208
208
|
model=model_name,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dingo-python
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.2
|
|
4
4
|
Summary: A Comprehensive AI Data Quality Evaluation Tool for Large Models
|
|
5
5
|
Home-page: https://github.com/MigoXLab/dingo
|
|
6
6
|
Author: Dingo
|
|
@@ -41,6 +41,7 @@ Provides-Extra: agent
|
|
|
41
41
|
Requires-Dist: langchain>=1.0.0; extra == "agent"
|
|
42
42
|
Requires-Dist: langchain-openai>=1.0.0; extra == "agent"
|
|
43
43
|
Requires-Dist: tavily-python>=0.3.0; extra == "agent"
|
|
44
|
+
Requires-Dist: arxiv>=2.4.0; extra == "agent"
|
|
44
45
|
Provides-Extra: hhem
|
|
45
46
|
Requires-Dist: transformers>=4.30.0; extra == "hhem"
|
|
46
47
|
Requires-Dist: torch>=1.12.0; extra == "hhem"
|
|
@@ -54,6 +55,7 @@ Requires-Dist: tokenizers>=0.13.0; extra == "all"
|
|
|
54
55
|
Requires-Dist: langchain>=1.0.0; extra == "all"
|
|
55
56
|
Requires-Dist: langchain-openai>=1.0.0; extra == "all"
|
|
56
57
|
Requires-Dist: tavily-python>=0.3.0; extra == "all"
|
|
58
|
+
Requires-Dist: arxiv>=2.4.0; extra == "all"
|
|
57
59
|
Dynamic: author
|
|
58
60
|
Dynamic: classifier
|
|
59
61
|
Dynamic: description
|
|
@@ -94,6 +96,7 @@ Dynamic: summary
|
|
|
94
96
|
<a href="https://mseep.ai/app/dataeval-dingo"><img src="https://mseep.net/pr/dataeval-dingo-badge.png" alt="MseeP.ai Security Assessment Badge" height="20"></a>
|
|
95
97
|
<a href="https://deepwiki.com/MigoXLab/dingo"><img src="https://deepwiki.com/badge.svg" alt="Ask DeepWiki"></a>
|
|
96
98
|
<a href="https://archestra.ai/mcp-catalog/dataeval__dingo"><img src="https://archestra.ai/mcp-catalog/api/badge/quality/DataEval/dingo" alt="Trust Score"></a>
|
|
99
|
+
<a href="https://clawhub.ai/e06084/dingo"><img src="https://img.shields.io/badge/ClawHub-Skill-orange?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHRleHQgeT0iMTgiIGZvbnQtc2l6ZT0iMTYiPvCfpp48L3RleHQ+PC9zdmc+" alt="ClawHub Skill"></a>
|
|
97
100
|
</p>
|
|
98
101
|
|
|
99
102
|
</div>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
dingo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
dingo/config/__init__.py,sha256=SaeOmGEUG0Hp5lqHxnHUTE_9ysN5KzA_Icilb9xY2mQ,349
|
|
3
|
-
dingo/config/input_args.py,sha256=
|
|
3
|
+
dingo/config/input_args.py,sha256=48LEVcj7qIwIIVcPM7I29Mpl2YWbCGwv-nRdtVeTYcc,4147
|
|
4
4
|
dingo/data/__init__.py,sha256=reCw4XQoInUTtvRW6c1wY_LH1EWJ7XpZDQcBCW61Lf8,214
|
|
5
5
|
dingo/data/converter/__init__.py,sha256=1MiG4H8Sg2sYHQmYdg0F9_1okP_YoMNHyQorPEAf6zw,91
|
|
6
6
|
dingo/data/converter/base.py,sha256=_WXa_plKj83iFgQyHABchGbX-dv3d17QuODua-bd83w,12820
|
|
@@ -38,7 +38,7 @@ dingo/model/model.py,sha256=4Y73hETATJVzwb9p62D0NV7STpDUIJo4Sx0_NYak68w,6106
|
|
|
38
38
|
dingo/model/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
39
|
dingo/model/llm/base.py,sha256=n5ZHJNoJ0XSeG2i6ydN3W6pUYSAJaQgirjT_CiaJUlU,384
|
|
40
40
|
dingo/model/llm/base_lmdeploy_apiclient.py,sha256=fTfSyqynGH-C29IijIU0euIWpc3BxoYXEEj-UEJWtCA,3457
|
|
41
|
-
dingo/model/llm/base_openai.py,sha256=
|
|
41
|
+
dingo/model/llm/base_openai.py,sha256=_qu96G0nVQcJdyashrbXGbTCTEuz6WkL1Uh4TjXNS28,7894
|
|
42
42
|
dingo/model/llm/llm_classify_qr.py,sha256=CpaXg1jaRRxBHJEpznV8qr4thVrK93vd5K65ubKp1QI,2568
|
|
43
43
|
dingo/model/llm/llm_classify_topic.py,sha256=AjSXi6KR5sVrkuxPfTYF-HoQlIMa-OshlJjFgdIE48w,5185
|
|
44
44
|
dingo/model/llm/llm_dataman_assessment.py,sha256=yxHn3wc106XqKKyAaslJ-ZQhMinFYnEPmaHLOcGsqss,5574
|
|
@@ -55,14 +55,14 @@ dingo/model/llm/llm_text_chaos.py,sha256=tuvhSyatcImj8ZRB6-Ah2F7lmfe41sEHVxHMEdr
|
|
|
55
55
|
dingo/model/llm/llm_text_code_list_issue.py,sha256=hEa6L-_uc-lp_0cTMl3KmlHjKxJiChcC4acGg9bQGCY,3392
|
|
56
56
|
dingo/model/llm/llm_text_kaoti.py,sha256=8-MAALqF-iBNSE3Qukh5Tt9LhM8BF4ywR7KhjbllItk,8047
|
|
57
57
|
dingo/model/llm/vlm_image_relevant.py,sha256=nuOqMSxfqHPdYzCXy-le3baX9-RIKnSdCoy1IFwK1Bk,4209
|
|
58
|
-
dingo/model/llm/vlm_layout_quality.py,sha256=
|
|
58
|
+
dingo/model/llm/vlm_layout_quality.py,sha256=hILcPWmxls0dr9XF1idSxmE4KjSfamJ-CJGp4sBfIVU,14160
|
|
59
59
|
dingo/model/llm/vlm_ocr_understanding.py,sha256=_fmcYWeoh4rNx7WrkVx8PhxJ8JEw-nNm-nxFunuiMt4,7810
|
|
60
60
|
dingo/model/llm/agent/__init__.py,sha256=gPo09JDUrctXbiqruFlR_rs1et0Nz1_Au3N_xAOTLTg,718
|
|
61
|
-
dingo/model/llm/agent/agent_article_fact_checker.py,sha256=
|
|
62
|
-
dingo/model/llm/agent/agent_fact_check.py,sha256=
|
|
63
|
-
dingo/model/llm/agent/agent_hallucination.py,sha256=
|
|
64
|
-
dingo/model/llm/agent/agent_wrapper.py,sha256=
|
|
65
|
-
dingo/model/llm/agent/base_agent.py,sha256=
|
|
61
|
+
dingo/model/llm/agent/agent_article_fact_checker.py,sha256=oS5O-LOYjc3CsTzqL6ui7jTxB0_amNHYVrFTV6NFvq8,80030
|
|
62
|
+
dingo/model/llm/agent/agent_fact_check.py,sha256=fRvDoULR-bL6VvXpuKWkGH3nKHyDFoXEMsEGnW3bOOQ,15942
|
|
63
|
+
dingo/model/llm/agent/agent_hallucination.py,sha256=p0OX2SSwoo2cQJouIjZJiHmFJdlNRvAqD7Bpwx6RrDo,16137
|
|
64
|
+
dingo/model/llm/agent/agent_wrapper.py,sha256=EI-nYEM0Jwn3wJZUYSiEy-820G_vKTHItSpInrH5qVg,12417
|
|
65
|
+
dingo/model/llm/agent/base_agent.py,sha256=Gedq9WrFVFkrS-jzjg_EZj6QeR6Bj-xABqIiOSozMnI,17079
|
|
66
66
|
dingo/model/llm/agent/langchain_adapter.py,sha256=lVYr5yRha3vIRPwqne1wzz0fB0hjsN-rC0NWExJHZ2c,7438
|
|
67
67
|
dingo/model/llm/agent/tools/__init__.py,sha256=HRUXnmkOdoC8XOq_YRg4yuhfeANNrDL5kI01L4C1cSc,590
|
|
68
68
|
dingo/model/llm/agent/tools/arxiv_search.py,sha256=TqkJfqsBTmxmmzL-Ijw5rBBZGCqr3rsvXoY77lzs-fc,19645
|
|
@@ -76,7 +76,7 @@ dingo/model/llm/compare/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG
|
|
|
76
76
|
dingo/model/llm/compare/llm_code_compare.py,sha256=ihWgJQ22Ycsa3kcY6K2_vN0dZzCR2HTyXv2LJnwPpiI,7077
|
|
77
77
|
dingo/model/llm/compare/llm_html_extract_compare.py,sha256=23sK61QbEe3ap6LQh9NmxmjrVpl_0G5qfQH2oYQuYr8,6443
|
|
78
78
|
dingo/model/llm/compare/llm_html_extract_compare_en.py,sha256=siPbaCcCWqvHNyHduul3wCI9lfSWC08f81JMZH-Ebh4,5427
|
|
79
|
-
dingo/model/llm/compare/llm_html_extract_compare_v2.py,sha256
|
|
79
|
+
dingo/model/llm/compare/llm_html_extract_compare_v2.py,sha256=-HWm4NLBTaQmF9fcjEs2V8IOHPo4f_hegdPxjZckpzA,11940
|
|
80
80
|
dingo/model/llm/compare/llm_math_compare.py,sha256=Jseu9i6cCQ0uXxEp7SovraKDxDZkRdsGwI0hfadbdoQ,7662
|
|
81
81
|
dingo/model/llm/compare/llm_table_compare.py,sha256=zw7JhFK1v-NZitOIRmTdbw-GdWkUGMyah5N1gFsdMF4,7564
|
|
82
82
|
dingo/model/llm/hhh/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -85,8 +85,8 @@ dingo/model/llm/hhh/llm_text_3h_harmless.py,sha256=BEllrmFzBfGJvC_gN4TOeit9FXAAT
|
|
|
85
85
|
dingo/model/llm/hhh/llm_text_3h_helpful.py,sha256=OrK3chIL6KOTnIHDZciICQNM5pURhv3KhtzuXlcjCWE,2397
|
|
86
86
|
dingo/model/llm/hhh/llm_text_3h_honest.py,sha256=4xgHwzbqfuK_HNB8qqwYI-YVXfaGy2c6U5i-wltnVdY,2151
|
|
87
87
|
dingo/model/llm/instruction_quality/__init__.py,sha256=PRFsZUG1oBA6EP74b5eQCs-gaLkG5_W2isOeX6TCcxM,708
|
|
88
|
-
dingo/model/llm/instruction_quality/llm_instruction_clarity.py,sha256=
|
|
89
|
-
dingo/model/llm/instruction_quality/llm_task_difficulty.py,sha256=
|
|
88
|
+
dingo/model/llm/instruction_quality/llm_instruction_clarity.py,sha256=sq_UdY_y0zRsow3KtyTDTlPgITSDZ5ludtVjhTKS-pA,11681
|
|
89
|
+
dingo/model/llm/instruction_quality/llm_task_difficulty.py,sha256=iSwYrbRYD33Gqu_2KEr7zSrJRAuge7qsuWuhQxXVZ1U,14712
|
|
90
90
|
dingo/model/llm/meta_rater/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
91
91
|
dingo/model/llm/meta_rater/llm_meta_rater_cleanliness.py,sha256=PzpHBzuVZJ4FRuBEsWdyJtFErW09-K3fpBU6GFp81Gk,6879
|
|
92
92
|
dingo/model/llm/meta_rater/llm_meta_rater_professionalism.py,sha256=o_J9KpFzYujMzvnYTNVdJD1Ka-icq-yvUwivmSnOgKc,7435
|
|
@@ -105,22 +105,24 @@ dingo/model/llm/minor_lan/llm_text_language_sr.py,sha256=ff-cV1XJHTy7UI27LbWN7b2
|
|
|
105
105
|
dingo/model/llm/minor_lan/llm_text_language_th.py,sha256=4t5yqwGysGFmqwYOvfNydsqAvJ4ZLe__iFyqYfPib6k,1232
|
|
106
106
|
dingo/model/llm/minor_lan/llm_text_language_vi.py,sha256=IihiHbONc1dCumJb1FI4c79sSP1kkcImbdSgopd-oBU,1244
|
|
107
107
|
dingo/model/llm/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
108
|
-
dingo/model/llm/rag/llm_rag_answer_relevancy.py,sha256=
|
|
109
|
-
dingo/model/llm/rag/llm_rag_context_precision.py,sha256=
|
|
110
|
-
dingo/model/llm/rag/llm_rag_context_recall.py,sha256=
|
|
111
|
-
dingo/model/llm/rag/llm_rag_context_relevancy.py,sha256=
|
|
112
|
-
dingo/model/llm/rag/llm_rag_faithfulness.py,sha256=
|
|
108
|
+
dingo/model/llm/rag/llm_rag_answer_relevancy.py,sha256=GAs0-vUURALZL825RHc8d6agrD08C5nO289KB1iJ7sE,11554
|
|
109
|
+
dingo/model/llm/rag/llm_rag_context_precision.py,sha256=7oX1hMDLlp4oD4aobATYl4TPCcJd6IVAR8RZO_3psmI,11294
|
|
110
|
+
dingo/model/llm/rag/llm_rag_context_recall.py,sha256=SkZx1UByl35y50d5Ckwk-6LPTOSCehgmlgVOycFhXPE,10485
|
|
111
|
+
dingo/model/llm/rag/llm_rag_context_relevancy.py,sha256=OjtFKufsbgDL2aIHKzuM3hHdCrYpdXKy9tsQbuBhbwU,7638
|
|
112
|
+
dingo/model/llm/rag/llm_rag_faithfulness.py,sha256=x2BkMu5SKybjUh0b3yiC6ZhHNXf_LM3JdK8gprf-R8U,9411
|
|
113
113
|
dingo/model/llm/security/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
114
|
dingo/model/llm/security/llm_security.py,sha256=f2PvPUGfm3hhvI4FFlnz_0G5a2pGZeA7-3mk8FLe0i8,1194
|
|
115
115
|
dingo/model/llm/security/llm_security_politics.py,sha256=dYMQagmIGJqnAPXY_LCHLH6R7JcJxDSnzxNtFzWbEF0,1314
|
|
116
116
|
dingo/model/llm/security/llm_security_prohibition.py,sha256=arPI0jIvHbMibrGReBBAadEiXVeLYXR_aVRukf3lZ0Y,591
|
|
117
117
|
dingo/model/llm/text_quality/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
|
-
dingo/model/llm/text_quality/base_text_quality.py,sha256=
|
|
118
|
+
dingo/model/llm/text_quality/base_text_quality.py,sha256=dI1Byf2jR3KENX0ieoLVSnWysuLeL_lb5gomxbQ4Ozo,1858
|
|
119
|
+
dingo/model/llm/text_quality/llm_text_equation.py,sha256=zTsAO0NwSuCjUJNEPJ4mN2lfYrhcsac5AQOsDuRIgT0,2753
|
|
119
120
|
dingo/model/llm/text_quality/llm_text_quality_v2.py,sha256=Kgu-hkvhXDT5PzG4a2TSkzQZBb8fvlYHy188GY2XzCI,3523
|
|
120
121
|
dingo/model/llm/text_quality/llm_text_quality_v3.py,sha256=Tqg6AcdSmnTsyra5wQ_LzBmmfYEoi0NPhlRWdajtN_4,5341
|
|
121
122
|
dingo/model/llm/text_quality/llm_text_quality_v4.py,sha256=EEn3CDTnp44cW3cmvXs2AGIV_PoK9_ySLA-F3ToQDKs,4659
|
|
122
|
-
dingo/model/llm/text_quality/llm_text_quality_v5.py,sha256=
|
|
123
|
+
dingo/model/llm/text_quality/llm_text_quality_v5.py,sha256=zGXV6yodZnNonjtXXPLH1NUBOkKeLWYug3i1klmDiQc,10010
|
|
123
124
|
dingo/model/llm/text_quality/llm_text_repeat.py,sha256=4vVO1L2jIXMIRT7UOAW5R2wMN4KQfGdXC9H23wE6VvQ,1764
|
|
125
|
+
dingo/model/llm/text_quality/llm_text_table.py,sha256=E09Ye2pkCGn1NO050_PuFjmXREW7NK3JqJAbv2U1WpQ,2720
|
|
124
126
|
dingo/model/llm/text_quality/llm_text_unread_issue.py,sha256=wklbD5Znt7LVTiXB-DH8-Yvsb7OQnh25ThA2mqDVYkQ,3669
|
|
125
127
|
dingo/model/llm/text_quality/llm_text_word_stick.py,sha256=yqGAu3WHPXiECrpmPJd1GJproBrqzlZLcXpS9HkzWx4,3551
|
|
126
128
|
dingo/model/response/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -146,9 +148,9 @@ dingo/utils/__init__.py,sha256=d8nJluje6i4z_Bb1rcXJSmEoAhyn1mkqEXJEOEdaMy4,51
|
|
|
146
148
|
dingo/utils/exception.py,sha256=fh58dSLSmYSnwW4MQXg-Jfai2QcZfDruTaYGbaWk7Wc,446
|
|
147
149
|
dingo/utils/log_util/__init__.py,sha256=VfzAAHUV8RuN-QaySahfAPfhM__-myigUlKx7ywVerA,717
|
|
148
150
|
dingo/utils/log_util/logger.py,sha256=spGK0w22UgXsCcArd1rpt2teLPy7QPlIuvBaKYioHdY,1414
|
|
149
|
-
dingo_python-2.2.
|
|
150
|
-
dingo_python-2.2.
|
|
151
|
-
dingo_python-2.2.
|
|
152
|
-
dingo_python-2.2.
|
|
153
|
-
dingo_python-2.2.
|
|
154
|
-
dingo_python-2.2.
|
|
151
|
+
dingo_python-2.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
152
|
+
dingo_python-2.2.2.dist-info/METADATA,sha256=tBPvDUcDxzjjBcM170bnA7fYVeWvWuW7M4IbIMK054Y,27086
|
|
153
|
+
dingo_python-2.2.2.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
|
154
|
+
dingo_python-2.2.2.dist-info/entry_points.txt,sha256=Vo_p8qSVnOENdy1uubqxJRppZIpiQ753JG3WPAUeYps,45
|
|
155
|
+
dingo_python-2.2.2.dist-info/top_level.txt,sha256=gSXQSLowu_WOQRi75wK3qyjbHxeN5PqsaA4ChGmJdek,6
|
|
156
|
+
dingo_python-2.2.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|