dingo-python 2.2.1__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dingo/config/input_args.py +13 -2
- dingo/exec/local.py +2 -1
- dingo/io/output/__init__.py +1 -0
- dingo/io/output/result_info.py +16 -0
- dingo/model/llm/agent/agent_article_fact_checker.py +102 -29
- dingo/model/llm/agent/agent_fact_check.py +7 -9
- dingo/model/llm/agent/agent_hallucination.py +7 -9
- dingo/model/llm/agent/agent_wrapper.py +6 -6
- dingo/model/llm/agent/base_agent.py +5 -5
- dingo/model/llm/base_openai.py +4 -8
- dingo/model/llm/compare/llm_html_extract_compare.py +17 -2
- dingo/model/llm/compare/llm_html_extract_compare_v2.py +12 -3
- dingo/model/llm/compare/llm_html_extract_compare_v3.py +221 -0
- dingo/model/llm/hhh/llm_text_3h.py +1 -1
- dingo/model/llm/instruction_quality/llm_instruction_clarity.py +2 -2
- dingo/model/llm/instruction_quality/llm_task_difficulty.py +4 -4
- dingo/model/llm/llm_classify_qr.py +4 -2
- dingo/model/llm/llm_custom_metric.py +211 -0
- dingo/model/llm/llm_document_parsing_ocr.py +6 -2
- dingo/model/llm/llm_factcheck_public.py +1 -1
- dingo/model/llm/llm_keyword_matcher.py +1 -1
- dingo/model/llm/llm_scout.py +1 -1
- dingo/model/llm/mineru/vlm_document_parsing.py +4 -8
- dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py +4 -8
- dingo/model/llm/rag/llm_rag_answer_relevancy.py +6 -13
- dingo/model/llm/rag/llm_rag_chunk_quality.py +99 -0
- dingo/model/llm/rag/llm_rag_context_precision.py +3 -3
- dingo/model/llm/rag/llm_rag_context_recall.py +3 -3
- dingo/model/llm/rag/llm_rag_context_relevancy.py +2 -2
- dingo/model/llm/rag/llm_rag_faithfulness.py +3 -3
- dingo/model/llm/text_quality/base_text_quality.py +2 -7
- dingo/model/llm/text_quality/llm_text_equation.py +68 -0
- dingo/model/llm/text_quality/llm_text_quality_v5.py +45 -13
- dingo/model/llm/text_quality/llm_text_table.py +70 -0
- dingo/model/llm/vlm_image_relevant.py +9 -52
- dingo/model/llm/vlm_layout_quality.py +5 -56
- dingo/model/model.py +37 -24
- dingo/model/rule/rule_common.py +76 -0
- dingo/model/rule/rule_image.py +41 -32
- dingo/model/rule/scibase/__init__.py +1 -0
- dingo/model/rule/scibase/rule_quanliang.py +655 -0
- dingo/run/cli.py +22 -1
- dingo/utils/image_loader.py +141 -0
- {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/METADATA +25 -1
- {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/RECORD +49 -41
- {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/WHEEL +0 -0
- {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/entry_points.txt +0 -0
- {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/licenses/LICENSE +0 -0
- {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/top_level.txt +0 -0
dingo/config/input_args.py
CHANGED
|
@@ -87,6 +87,8 @@ class ExecutorArgs(BaseModel):
|
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
class EvaluatorRuleArgs(BaseModel):
|
|
90
|
+
model_config = {"extra": "forbid"}
|
|
91
|
+
|
|
90
92
|
threshold: Optional[float] = None
|
|
91
93
|
pattern: Optional[str] = None
|
|
92
94
|
key_list: Optional[List[str]] = None
|
|
@@ -101,16 +103,25 @@ class EmbeddingConfigArgs(BaseModel):
|
|
|
101
103
|
api_url: Optional[str] = None
|
|
102
104
|
|
|
103
105
|
|
|
106
|
+
class CustomLLMMetricArgs(BaseModel):
|
|
107
|
+
metric: str
|
|
108
|
+
description: Optional[str] = ""
|
|
109
|
+
criteria: List[str]
|
|
110
|
+
input_fields: List[str]
|
|
111
|
+
|
|
112
|
+
|
|
104
113
|
class EvaluatorLLMArgs(BaseModel):
|
|
114
|
+
model_config = {"extra": "allow"}
|
|
115
|
+
|
|
105
116
|
model: Optional[str] = None
|
|
106
117
|
key: Optional[str] = None
|
|
107
118
|
api_url: Optional[str] = None
|
|
108
|
-
parameters: Optional[dict] = None
|
|
109
119
|
embedding_config: Optional[EmbeddingConfigArgs] = None
|
|
120
|
+
custom_metric: Optional[CustomLLMMetricArgs] = None
|
|
110
121
|
|
|
111
122
|
|
|
112
123
|
class EvalPiplineConfig(BaseModel):
|
|
113
|
-
"""Single evaluator configuration item"""
|
|
124
|
+
"""Single evaluator configuration item."""
|
|
114
125
|
name: str
|
|
115
126
|
config: Optional[EvaluatorRuleArgs | EvaluatorLLMArgs] = None
|
|
116
127
|
|
dingo/exec/local.py
CHANGED
|
@@ -178,8 +178,9 @@ class LocalExecutor(ExecProto):
|
|
|
178
178
|
Model.set_config_rule(model, e_c_i.config)
|
|
179
179
|
elif eval_type == 'llm':
|
|
180
180
|
model_cls = Model.llm_name_map.get(e_c_i.name)
|
|
181
|
-
model = model_cls()
|
|
181
|
+
model = model_cls()
|
|
182
182
|
Model.set_config_llm(model, e_c_i.config)
|
|
183
|
+
Model.set_config_llm(model_cls, e_c_i.config)
|
|
183
184
|
else:
|
|
184
185
|
raise ValueError(f"Error eval_type: {eval_type}")
|
|
185
186
|
|
dingo/io/output/__init__.py
CHANGED
dingo/io/output/result_info.py
CHANGED
|
@@ -33,6 +33,19 @@ class ResultInfo(BaseModel):
|
|
|
33
33
|
Returns:
|
|
34
34
|
包含原始数据和dingo_result的字典
|
|
35
35
|
"""
|
|
36
|
+
def move_conflict_field(field_name: str):
|
|
37
|
+
if field_name not in self.raw_data:
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
index = 1
|
|
41
|
+
while True:
|
|
42
|
+
backup_field = f'{field_name}_old_v{index}'
|
|
43
|
+
if backup_field not in self.raw_data:
|
|
44
|
+
self.raw_data[backup_field] = self.raw_data[field_name]
|
|
45
|
+
del self.raw_data[field_name]
|
|
46
|
+
return
|
|
47
|
+
index += 1
|
|
48
|
+
|
|
36
49
|
dingo_result = {
|
|
37
50
|
'eval_status': self.eval_status,
|
|
38
51
|
'eval_details': {
|
|
@@ -40,5 +53,8 @@ class ResultInfo(BaseModel):
|
|
|
40
53
|
for k, v in self.eval_details.items()
|
|
41
54
|
},
|
|
42
55
|
}
|
|
56
|
+
move_conflict_field('dingo_id')
|
|
57
|
+
move_conflict_field('dingo_result')
|
|
58
|
+
self.raw_data['dingo_id'] = self.dingo_id
|
|
43
59
|
self.raw_data['dingo_result'] = dingo_result
|
|
44
60
|
return self.raw_data
|
|
@@ -343,21 +343,21 @@ class ArticleFactChecker(BaseAgent):
|
|
|
343
343
|
"config": {
|
|
344
344
|
"key": "your-openai-api-key",
|
|
345
345
|
"model": "gpt-4o-mini",
|
|
346
|
-
"
|
|
347
|
-
"
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
}
|
|
346
|
+
"agent_config": {
|
|
347
|
+
"max_iterations": 10,
|
|
348
|
+
"overall_timeout": 900,
|
|
349
|
+
"max_concurrent_claims": 5,
|
|
350
|
+
"tools": {
|
|
351
|
+
"claims_extractor": {
|
|
352
|
+
"api_key": "your-openai-api-key",
|
|
353
|
+
"max_claims": 50,
|
|
354
|
+
"claim_types": ["factual", "institutional", "statistical", "attribution"]
|
|
355
|
+
},
|
|
356
|
+
"tavily_search": {
|
|
357
|
+
"api_key": "your-tavily-api-key",
|
|
358
|
+
"max_results": 5
|
|
359
|
+
},
|
|
360
|
+
"arxiv_search": {"max_results": 5}
|
|
361
361
|
}
|
|
362
362
|
}
|
|
363
363
|
}
|
|
@@ -372,6 +372,9 @@ class ArticleFactChecker(BaseAgent):
|
|
|
372
372
|
]
|
|
373
373
|
max_iterations = 10 # Allow more iterations for comprehensive checking
|
|
374
374
|
max_concurrent_claims = 5 # Default parallel claim verification slots
|
|
375
|
+
overall_timeout = 900 # 15-minute wall-clock timeout for entire evaluation
|
|
376
|
+
_MIN_OVERALL_TIMEOUT = 30 # Floor: 30 seconds
|
|
377
|
+
_MAX_OVERALL_TIMEOUT = 7200 # Ceiling: 2 hours
|
|
375
378
|
|
|
376
379
|
_required_fields = [RequiredField.CONTENT] # Article text
|
|
377
380
|
|
|
@@ -394,8 +397,8 @@ class ArticleFactChecker(BaseAgent):
|
|
|
394
397
|
Returns:
|
|
395
398
|
Output directory path (created if needed), or None if saving is disabled.
|
|
396
399
|
"""
|
|
397
|
-
|
|
398
|
-
agent_cfg =
|
|
400
|
+
extra_params = cls.dynamic_config.model_extra
|
|
401
|
+
agent_cfg = extra_params.get('agent_config') or {}
|
|
399
402
|
|
|
400
403
|
explicit_path = agent_cfg.get('output_path')
|
|
401
404
|
if explicit_path:
|
|
@@ -816,24 +819,42 @@ class ArticleFactChecker(BaseAgent):
|
|
|
816
819
|
output_dir = cls._get_output_dir()
|
|
817
820
|
|
|
818
821
|
if cls.dynamic_config:
|
|
819
|
-
if cls.dynamic_config.
|
|
820
|
-
cls.dynamic_config.
|
|
821
|
-
cls.dynamic_config.parameters.setdefault("temperature", 0)
|
|
822
|
+
if 'temperature' not in cls.dynamic_config.model_extra:
|
|
823
|
+
cls.dynamic_config.temperature = 0
|
|
822
824
|
|
|
823
825
|
if output_dir and input_data.content:
|
|
824
826
|
cls._save_article_content(output_dir, input_data.content)
|
|
825
827
|
|
|
828
|
+
timeout = cls._get_overall_timeout()
|
|
829
|
+
|
|
830
|
+
async def _run_with_timeout() -> EvalDetail:
|
|
831
|
+
return await asyncio.wait_for(
|
|
832
|
+
cls._async_eval(input_data, start_time, output_dir),
|
|
833
|
+
timeout=timeout,
|
|
834
|
+
)
|
|
835
|
+
|
|
826
836
|
try:
|
|
827
|
-
return asyncio.run(
|
|
837
|
+
return asyncio.run(_run_with_timeout())
|
|
838
|
+
except asyncio.TimeoutError:
|
|
839
|
+
elapsed = time.time() - start_time
|
|
840
|
+
log.warning(f"ArticleFactChecker: overall timeout exceeded ({elapsed:.1f}s / {timeout:.0f}s limit)")
|
|
841
|
+
return cls._create_overall_timeout_result(elapsed, timeout)
|
|
828
842
|
except RuntimeError as e:
|
|
829
843
|
# Fallback when called inside an already-running event loop (e.g. Jupyter, tests)
|
|
830
844
|
if "cannot run" in str(e).lower() or "already running" in str(e).lower():
|
|
831
845
|
import concurrent.futures
|
|
832
846
|
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
|
833
|
-
future = pool.submit(
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
847
|
+
future = pool.submit(lambda: asyncio.run(_run_with_timeout()))
|
|
848
|
+
try:
|
|
849
|
+
# Extra margin so asyncio.wait_for fires before this outer timeout
|
|
850
|
+
return future.result(timeout=timeout + 30)
|
|
851
|
+
except (asyncio.TimeoutError, concurrent.futures.TimeoutError):
|
|
852
|
+
elapsed = time.time() - start_time
|
|
853
|
+
log.warning(
|
|
854
|
+
f"ArticleFactChecker: overall timeout exceeded "
|
|
855
|
+
f"({elapsed:.1f}s / {timeout:.0f}s limit, fallback path)"
|
|
856
|
+
)
|
|
857
|
+
return cls._create_overall_timeout_result(elapsed, timeout)
|
|
837
858
|
raise
|
|
838
859
|
|
|
839
860
|
# --- Two-Phase Async Architecture Methods ---
|
|
@@ -922,8 +943,8 @@ class ArticleFactChecker(BaseAgent):
|
|
|
922
943
|
"""
|
|
923
944
|
from dingo.model.llm.agent.tools.claims_extractor import ClaimsExtractor, ClaimsExtractorConfig
|
|
924
945
|
|
|
925
|
-
|
|
926
|
-
agent_cfg =
|
|
946
|
+
extra_params = cls.dynamic_config.model_extra
|
|
947
|
+
agent_cfg = extra_params.get('agent_config') or {}
|
|
927
948
|
extractor_cfg = agent_cfg.get('tools', {}).get('claims_extractor', {})
|
|
928
949
|
|
|
929
950
|
config_kwargs: Dict[str, Any] = {
|
|
@@ -1019,10 +1040,30 @@ class ArticleFactChecker(BaseAgent):
|
|
|
1019
1040
|
@classmethod
|
|
1020
1041
|
def _get_max_concurrent_claims(cls) -> int:
|
|
1021
1042
|
"""Read max_concurrent_claims from agent_config or use class default."""
|
|
1022
|
-
|
|
1023
|
-
agent_cfg =
|
|
1043
|
+
extra_params = cls.dynamic_config.model_extra
|
|
1044
|
+
agent_cfg = extra_params.get('agent_config') or {}
|
|
1024
1045
|
return agent_cfg.get('max_concurrent_claims', cls.max_concurrent_claims)
|
|
1025
1046
|
|
|
1047
|
+
@classmethod
|
|
1048
|
+
def _get_overall_timeout(cls) -> float:
|
|
1049
|
+
"""Read overall_timeout from agent_config or use class default (900s).
|
|
1050
|
+
|
|
1051
|
+
Returns:
|
|
1052
|
+
Positive timeout in seconds, clamped to [30, 7200].
|
|
1053
|
+
"""
|
|
1054
|
+
extra_params = cls.dynamic_config.model_extra
|
|
1055
|
+
agent_cfg = extra_params.get('agent_config') or {}
|
|
1056
|
+
raw = agent_cfg.get('overall_timeout', cls.overall_timeout)
|
|
1057
|
+
try:
|
|
1058
|
+
timeout = float(raw)
|
|
1059
|
+
except (TypeError, ValueError):
|
|
1060
|
+
log.warning(f"Invalid overall_timeout={raw!r}, using default {cls.overall_timeout}s")
|
|
1061
|
+
return float(cls.overall_timeout)
|
|
1062
|
+
clamped = max(cls._MIN_OVERALL_TIMEOUT, min(timeout, cls._MAX_OVERALL_TIMEOUT))
|
|
1063
|
+
if clamped != timeout:
|
|
1064
|
+
log.warning(f"overall_timeout={timeout} out of range, clamped to {clamped}s")
|
|
1065
|
+
return float(clamped)
|
|
1066
|
+
|
|
1026
1067
|
@classmethod
|
|
1027
1068
|
def _parse_claim_json_robust(cls, output: Optional[str]) -> Dict[str, Any]:
|
|
1028
1069
|
"""
|
|
@@ -1795,6 +1836,38 @@ Begin your systematic fact-checking process now.
|
|
|
1795
1836
|
]
|
|
1796
1837
|
return result
|
|
1797
1838
|
|
|
1839
|
+
@classmethod
|
|
1840
|
+
def _create_overall_timeout_result(cls, elapsed: float, timeout: float) -> EvalDetail:
|
|
1841
|
+
"""
|
|
1842
|
+
Create error result when overall wall-clock timeout is exceeded.
|
|
1843
|
+
|
|
1844
|
+
Args:
|
|
1845
|
+
elapsed: Actual elapsed time in seconds
|
|
1846
|
+
timeout: Configured timeout limit in seconds
|
|
1847
|
+
|
|
1848
|
+
Returns:
|
|
1849
|
+
EvalDetail with timeout error status
|
|
1850
|
+
"""
|
|
1851
|
+
minutes, seconds = divmod(int(timeout), 60)
|
|
1852
|
+
limit_str = f"{minutes}m{seconds}s" if minutes else f"{int(timeout)}s"
|
|
1853
|
+
result = EvalDetail(metric=cls.__name__)
|
|
1854
|
+
result.status = True
|
|
1855
|
+
result.label = [f"{QualityLabel.QUALITY_BAD_PREFIX}AGENT_OVERALL_TIMEOUT"]
|
|
1856
|
+
result.reason = [
|
|
1857
|
+
"Article Fact-Checking Failed: Overall Timeout Exceeded",
|
|
1858
|
+
"=" * 70,
|
|
1859
|
+
f"Execution exceeded the {int(timeout)}s ({limit_str}) wall-clock limit.",
|
|
1860
|
+
f"Elapsed time: {elapsed:.1f}s",
|
|
1861
|
+
"",
|
|
1862
|
+
"Recommendations:",
|
|
1863
|
+
f" 1. Increase overall_timeout (current: {int(timeout)}s) in agent_config",
|
|
1864
|
+
" 2. Reduce max_claims in claims_extractor config (e.g., 50 -> 20)",
|
|
1865
|
+
" 3. Use a faster model (e.g., gpt-4o-mini instead of gpt-4o)",
|
|
1866
|
+
" 4. Reduce max_concurrent_claims to lower API rate-limit pressure",
|
|
1867
|
+
" 5. Split long articles into shorter sections",
|
|
1868
|
+
]
|
|
1869
|
+
return result
|
|
1870
|
+
|
|
1798
1871
|
@classmethod
|
|
1799
1872
|
def plan_execution(cls, input_data: Data) -> List[Dict[str, Any]]:
|
|
1800
1873
|
"""
|
|
@@ -70,15 +70,13 @@ class AgentFactCheck(BaseAgent):
|
|
|
70
70
|
"key": "your-openai-api-key",
|
|
71
71
|
"api_url": "https://api.openai.com/v1",
|
|
72
72
|
"model": "gpt-4.1-mini-2025-04-14",
|
|
73
|
-
"
|
|
74
|
-
"
|
|
75
|
-
|
|
76
|
-
"
|
|
77
|
-
"
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
"search_depth": "advanced"
|
|
81
|
-
}
|
|
73
|
+
"agent_config": {
|
|
74
|
+
"max_iterations": 5,
|
|
75
|
+
"tools": {
|
|
76
|
+
"tavily_search": {
|
|
77
|
+
"api_key": "your-tavily-api-key",
|
|
78
|
+
"max_results": 5,
|
|
79
|
+
"search_depth": "advanced"
|
|
82
80
|
}
|
|
83
81
|
}
|
|
84
82
|
}
|
|
@@ -82,15 +82,13 @@ class AgentHallucination(BaseAgent):
|
|
|
82
82
|
"key": "your-openai-api-key",
|
|
83
83
|
"api_url": "https://api.openai.com/v1",
|
|
84
84
|
"model": "gpt-4.1-mini-2025-04-14",
|
|
85
|
-
"
|
|
86
|
-
"
|
|
87
|
-
|
|
88
|
-
"
|
|
89
|
-
"
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
"search_depth": "advanced"
|
|
93
|
-
}
|
|
85
|
+
"agent_config": {
|
|
86
|
+
"max_iterations": 3,
|
|
87
|
+
"tools": {
|
|
88
|
+
"tavily_search": {
|
|
89
|
+
"api_key": "your-tavily-api-key",
|
|
90
|
+
"max_results": 5,
|
|
91
|
+
"search_depth": "advanced"
|
|
94
92
|
}
|
|
95
93
|
}
|
|
96
94
|
}
|
|
@@ -327,22 +327,22 @@ class AgentWrapper:
|
|
|
327
327
|
)
|
|
328
328
|
|
|
329
329
|
# Extract parameters
|
|
330
|
-
|
|
330
|
+
extra_params = dynamic_config.model_extra
|
|
331
331
|
|
|
332
332
|
# Create ChatOpenAI instance
|
|
333
333
|
llm = ChatOpenAI(
|
|
334
334
|
api_key=dynamic_config.key,
|
|
335
335
|
base_url=dynamic_config.api_url,
|
|
336
336
|
model=dynamic_config.model or "gpt-4.1-mini",
|
|
337
|
-
temperature=
|
|
338
|
-
max_tokens=
|
|
339
|
-
top_p=
|
|
340
|
-
timeout=
|
|
337
|
+
temperature=extra_params.get("temperature", 0.3),
|
|
338
|
+
max_tokens=extra_params.get("max_tokens", 4096),
|
|
339
|
+
top_p=extra_params.get("top_p", 1.0),
|
|
340
|
+
timeout=extra_params.get("timeout", 30)
|
|
341
341
|
)
|
|
342
342
|
|
|
343
343
|
log.debug(
|
|
344
344
|
f"Created ChatOpenAI: model={dynamic_config.model}, "
|
|
345
|
-
f"temp={
|
|
345
|
+
f"temp={extra_params.get('temperature', 0.3)}"
|
|
346
346
|
)
|
|
347
347
|
|
|
348
348
|
return llm
|
|
@@ -146,7 +146,7 @@ class BaseAgent(BaseOpenAI):
|
|
|
146
146
|
Extract tool configuration from agent's dynamic_config.
|
|
147
147
|
|
|
148
148
|
Configuration is expected in:
|
|
149
|
-
dynamic_config.
|
|
149
|
+
dynamic_config.agent_config.tools.{tool_name}
|
|
150
150
|
|
|
151
151
|
Args:
|
|
152
152
|
tool_name: Name of the tool
|
|
@@ -154,8 +154,8 @@ class BaseAgent(BaseOpenAI):
|
|
|
154
154
|
Returns:
|
|
155
155
|
Dict of configuration values for the tool
|
|
156
156
|
"""
|
|
157
|
-
|
|
158
|
-
agent_config =
|
|
157
|
+
extra_params = cls.dynamic_config.model_extra
|
|
158
|
+
agent_config = extra_params.get('agent_config', {})
|
|
159
159
|
tools_config = agent_config.get('tools', {})
|
|
160
160
|
return tools_config.get(tool_name, {})
|
|
161
161
|
|
|
@@ -184,8 +184,8 @@ class BaseAgent(BaseOpenAI):
|
|
|
184
184
|
Returns:
|
|
185
185
|
Maximum number of iterations allowed
|
|
186
186
|
"""
|
|
187
|
-
|
|
188
|
-
agent_config =
|
|
187
|
+
extra_params = cls.dynamic_config.model_extra
|
|
188
|
+
agent_config = extra_params.get('agent_config', {})
|
|
189
189
|
return agent_config.get('max_iterations', cls.max_iterations)
|
|
190
190
|
|
|
191
191
|
@classmethod
|
dingo/model/llm/base_openai.py
CHANGED
|
@@ -82,22 +82,18 @@ class BaseOpenAI(BaseLLM):
|
|
|
82
82
|
else:
|
|
83
83
|
model_name = cls.client.models.list().data[0].id
|
|
84
84
|
|
|
85
|
-
|
|
86
|
-
cls.validate_config(
|
|
85
|
+
extra_params = cls.dynamic_config.model_extra
|
|
86
|
+
cls.validate_config(extra_params)
|
|
87
87
|
|
|
88
88
|
completions = cls.client.chat.completions.create(
|
|
89
89
|
model=model_name,
|
|
90
90
|
messages=messages,
|
|
91
|
-
|
|
92
|
-
top_p=params.get("top_p", 1) if params else 1,
|
|
93
|
-
max_tokens=params.get("max_tokens", 4000) if params else 4000,
|
|
94
|
-
presence_penalty=params.get("presence_penalty", 0) if params else 0,
|
|
95
|
-
frequency_penalty=params.get("frequency_penalty", 0) if params else 0,
|
|
91
|
+
**extra_params,
|
|
96
92
|
)
|
|
97
93
|
|
|
98
94
|
if completions.choices[0].finish_reason == "length":
|
|
99
95
|
raise ExceedMaxTokens(
|
|
100
|
-
f"Exceed max tokens: {
|
|
96
|
+
f"Exceed max tokens: {extra_params.get('max_tokens', 4000)}"
|
|
101
97
|
)
|
|
102
98
|
|
|
103
99
|
return str(completions.choices[0].message.content)
|
|
@@ -95,13 +95,28 @@ class LLMHtmlExtractCompare(BaseOpenAI):
|
|
|
95
95
|
|
|
96
96
|
@classmethod
|
|
97
97
|
def build_messages(cls, input_data: Data) -> List:
|
|
98
|
+
raw_data = getattr(input_data, "raw_data", None) or {}
|
|
99
|
+
# Backward-compatible input handling:
|
|
100
|
+
# - Preferred: raw_data["magic_md"] and raw_data["content"] (legacy dataset schema)
|
|
101
|
+
# - Fallback: input_data.prompt (tool A) and input_data.reference (tool B)
|
|
102
|
+
# - Last resort: input_data.prompt (tool A) and input_data.extra fields if provided
|
|
103
|
+
tool_a_md = raw_data.get("magic_md", None) or getattr(input_data, "prompt", None)
|
|
104
|
+
tool_b_md = raw_data.get("content", None) or getattr(input_data, "reference", None)
|
|
105
|
+
|
|
106
|
+
if tool_a_md is None or tool_b_md is None:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
"LLMHtmlExtractCompare requires Tool A and Tool B markdown. "
|
|
109
|
+
"Provide raw_data['magic_md'] and raw_data['content'], or provide Data.prompt (tool A) "
|
|
110
|
+
"and Data.reference (tool B)."
|
|
111
|
+
)
|
|
112
|
+
|
|
98
113
|
messages = [
|
|
99
114
|
{
|
|
100
115
|
"role": "user",
|
|
101
116
|
"content": cls.prompt.format(
|
|
102
117
|
input_data.content,
|
|
103
|
-
|
|
104
|
-
|
|
118
|
+
tool_a_md,
|
|
119
|
+
tool_b_md,
|
|
105
120
|
),
|
|
106
121
|
}
|
|
107
122
|
]
|
|
@@ -25,10 +25,18 @@ class LLMHtmlExtractCompareV2(BaseOpenAI):
|
|
|
25
25
|
输入数据要求:
|
|
26
26
|
- input_data.prompt: 工具A提取的文本
|
|
27
27
|
- input_data.content: 工具B提取的文本
|
|
28
|
-
- input_data.raw_data
|
|
28
|
+
- language: 可选,来自 input_data.language 或 raw_data["language"],缺省为 "en"("zh" / "en")
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
_metric_info = {
|
|
32
|
+
'category': 'Pretrain Text Quality Assessment Metrics',
|
|
33
|
+
'metric_name': 'LLMHtmlExtractCompareV2',
|
|
34
|
+
'description': 'Compares two HTML main-content extraction tools by computing text diffs and using LLM to judge which preserves more core information',
|
|
35
|
+
'paper_title': '',
|
|
36
|
+
'paper_url': '',
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
_required_fields = [RequiredField.PROMPT, RequiredField.CONTENT]
|
|
32
40
|
prompt = {
|
|
33
41
|
"content_en": r"""Please compare the following two texts, each extracted from the same webpage using different HTML parsing methods. Your task is to determine whether there is a difference in the core informational content between them.
|
|
34
42
|
|
|
@@ -174,7 +182,8 @@ C. Text A 包含的核心信息内容少于 Text B
|
|
|
174
182
|
text_tool_b = input_data.content
|
|
175
183
|
|
|
176
184
|
# 获取配置参数
|
|
177
|
-
|
|
185
|
+
raw_data = getattr(input_data, 'raw_data', {}) or {}
|
|
186
|
+
language = raw_data.get("language", getattr(input_data, 'language', "en"))
|
|
178
187
|
|
|
179
188
|
# 计算文本差异
|
|
180
189
|
diff_result = cls.extract_text_diff(text_tool_a, text_tool_b)
|