dingo-python 2.2.1__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. dingo/config/input_args.py +13 -2
  2. dingo/exec/local.py +2 -1
  3. dingo/io/output/__init__.py +1 -0
  4. dingo/io/output/result_info.py +16 -0
  5. dingo/model/llm/agent/agent_article_fact_checker.py +102 -29
  6. dingo/model/llm/agent/agent_fact_check.py +7 -9
  7. dingo/model/llm/agent/agent_hallucination.py +7 -9
  8. dingo/model/llm/agent/agent_wrapper.py +6 -6
  9. dingo/model/llm/agent/base_agent.py +5 -5
  10. dingo/model/llm/base_openai.py +4 -8
  11. dingo/model/llm/compare/llm_html_extract_compare.py +17 -2
  12. dingo/model/llm/compare/llm_html_extract_compare_v2.py +12 -3
  13. dingo/model/llm/compare/llm_html_extract_compare_v3.py +221 -0
  14. dingo/model/llm/hhh/llm_text_3h.py +1 -1
  15. dingo/model/llm/instruction_quality/llm_instruction_clarity.py +2 -2
  16. dingo/model/llm/instruction_quality/llm_task_difficulty.py +4 -4
  17. dingo/model/llm/llm_classify_qr.py +4 -2
  18. dingo/model/llm/llm_custom_metric.py +211 -0
  19. dingo/model/llm/llm_document_parsing_ocr.py +6 -2
  20. dingo/model/llm/llm_factcheck_public.py +1 -1
  21. dingo/model/llm/llm_keyword_matcher.py +1 -1
  22. dingo/model/llm/llm_scout.py +1 -1
  23. dingo/model/llm/mineru/vlm_document_parsing.py +4 -8
  24. dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py +4 -8
  25. dingo/model/llm/rag/llm_rag_answer_relevancy.py +6 -13
  26. dingo/model/llm/rag/llm_rag_chunk_quality.py +99 -0
  27. dingo/model/llm/rag/llm_rag_context_precision.py +3 -3
  28. dingo/model/llm/rag/llm_rag_context_recall.py +3 -3
  29. dingo/model/llm/rag/llm_rag_context_relevancy.py +2 -2
  30. dingo/model/llm/rag/llm_rag_faithfulness.py +3 -3
  31. dingo/model/llm/text_quality/base_text_quality.py +2 -7
  32. dingo/model/llm/text_quality/llm_text_equation.py +68 -0
  33. dingo/model/llm/text_quality/llm_text_quality_v5.py +45 -13
  34. dingo/model/llm/text_quality/llm_text_table.py +70 -0
  35. dingo/model/llm/vlm_image_relevant.py +9 -52
  36. dingo/model/llm/vlm_layout_quality.py +5 -56
  37. dingo/model/model.py +37 -24
  38. dingo/model/rule/rule_common.py +76 -0
  39. dingo/model/rule/rule_image.py +41 -32
  40. dingo/model/rule/scibase/__init__.py +1 -0
  41. dingo/model/rule/scibase/rule_quanliang.py +655 -0
  42. dingo/run/cli.py +22 -1
  43. dingo/utils/image_loader.py +141 -0
  44. {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/METADATA +25 -1
  45. {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/RECORD +49 -41
  46. {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/WHEEL +0 -0
  47. {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/entry_points.txt +0 -0
  48. {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/licenses/LICENSE +0 -0
  49. {dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/top_level.txt +0 -0
@@ -87,6 +87,8 @@ class ExecutorArgs(BaseModel):
87
87
 
88
88
 
89
89
  class EvaluatorRuleArgs(BaseModel):
90
+ model_config = {"extra": "forbid"}
91
+
90
92
  threshold: Optional[float] = None
91
93
  pattern: Optional[str] = None
92
94
  key_list: Optional[List[str]] = None
@@ -101,16 +103,25 @@ class EmbeddingConfigArgs(BaseModel):
101
103
  api_url: Optional[str] = None
102
104
 
103
105
 
106
+ class CustomLLMMetricArgs(BaseModel):
107
+ metric: str
108
+ description: Optional[str] = ""
109
+ criteria: List[str]
110
+ input_fields: List[str]
111
+
112
+
104
113
  class EvaluatorLLMArgs(BaseModel):
114
+ model_config = {"extra": "allow"}
115
+
105
116
  model: Optional[str] = None
106
117
  key: Optional[str] = None
107
118
  api_url: Optional[str] = None
108
- parameters: Optional[dict] = None
109
119
  embedding_config: Optional[EmbeddingConfigArgs] = None
120
+ custom_metric: Optional[CustomLLMMetricArgs] = None
110
121
 
111
122
 
112
123
  class EvalPiplineConfig(BaseModel):
113
- """Single evaluator configuration item"""
124
+ """Single evaluator configuration item."""
114
125
  name: str
115
126
  config: Optional[EvaluatorRuleArgs | EvaluatorLLMArgs] = None
116
127
 
dingo/exec/local.py CHANGED
@@ -178,8 +178,9 @@ class LocalExecutor(ExecProto):
178
178
  Model.set_config_rule(model, e_c_i.config)
179
179
  elif eval_type == 'llm':
180
180
  model_cls = Model.llm_name_map.get(e_c_i.name)
181
- model = model_cls() # 实例化类为对象,避免多线程配置覆盖
181
+ model = model_cls()
182
182
  Model.set_config_llm(model, e_c_i.config)
183
+ Model.set_config_llm(model_cls, e_c_i.config)
183
184
  else:
184
185
  raise ValueError(f"Error eval_type: {eval_type}")
185
186
 
@@ -1,2 +1,3 @@
1
+ # from dingo.io.output.benchmark_report import BenchmarkReport # noqa E402.
1
2
  from dingo.io.output.result_info import ResultInfo # noqa E402.
2
3
  from dingo.io.output.summary_model import SummaryModel # noqa E402.
@@ -33,6 +33,19 @@ class ResultInfo(BaseModel):
33
33
  Returns:
34
34
  包含原始数据和dingo_result的字典
35
35
  """
36
+ def move_conflict_field(field_name: str):
37
+ if field_name not in self.raw_data:
38
+ return
39
+
40
+ index = 1
41
+ while True:
42
+ backup_field = f'{field_name}_old_v{index}'
43
+ if backup_field not in self.raw_data:
44
+ self.raw_data[backup_field] = self.raw_data[field_name]
45
+ del self.raw_data[field_name]
46
+ return
47
+ index += 1
48
+
36
49
  dingo_result = {
37
50
  'eval_status': self.eval_status,
38
51
  'eval_details': {
@@ -40,5 +53,8 @@ class ResultInfo(BaseModel):
40
53
  for k, v in self.eval_details.items()
41
54
  },
42
55
  }
56
+ move_conflict_field('dingo_id')
57
+ move_conflict_field('dingo_result')
58
+ self.raw_data['dingo_id'] = self.dingo_id
43
59
  self.raw_data['dingo_result'] = dingo_result
44
60
  return self.raw_data
@@ -343,21 +343,21 @@ class ArticleFactChecker(BaseAgent):
343
343
  "config": {
344
344
  "key": "your-openai-api-key",
345
345
  "model": "gpt-4o-mini",
346
- "parameters": {
347
- "agent_config": {
348
- "max_iterations": 10,
349
- "tools": {
350
- "claims_extractor": {
351
- "api_key": "your-openai-api-key",
352
- "max_claims": 50,
353
- "claim_types": ["factual", "institutional", "statistical", "attribution"]
354
- },
355
- "tavily_search": {
356
- "api_key": "your-tavily-api-key",
357
- "max_results": 5
358
- },
359
- "arxiv_search": {"max_results": 5}
360
- }
346
+ "agent_config": {
347
+ "max_iterations": 10,
348
+ "overall_timeout": 900,
349
+ "max_concurrent_claims": 5,
350
+ "tools": {
351
+ "claims_extractor": {
352
+ "api_key": "your-openai-api-key",
353
+ "max_claims": 50,
354
+ "claim_types": ["factual", "institutional", "statistical", "attribution"]
355
+ },
356
+ "tavily_search": {
357
+ "api_key": "your-tavily-api-key",
358
+ "max_results": 5
359
+ },
360
+ "arxiv_search": {"max_results": 5}
361
361
  }
362
362
  }
363
363
  }
@@ -372,6 +372,9 @@ class ArticleFactChecker(BaseAgent):
372
372
  ]
373
373
  max_iterations = 10 # Allow more iterations for comprehensive checking
374
374
  max_concurrent_claims = 5 # Default parallel claim verification slots
375
+ overall_timeout = 900 # 15-minute wall-clock timeout for entire evaluation
376
+ _MIN_OVERALL_TIMEOUT = 30 # Floor: 30 seconds
377
+ _MAX_OVERALL_TIMEOUT = 7200 # Ceiling: 2 hours
375
378
 
376
379
  _required_fields = [RequiredField.CONTENT] # Article text
377
380
 
@@ -394,8 +397,8 @@ class ArticleFactChecker(BaseAgent):
394
397
  Returns:
395
398
  Output directory path (created if needed), or None if saving is disabled.
396
399
  """
397
- params = cls.dynamic_config.parameters or {}
398
- agent_cfg = params.get('agent_config') or {}
400
+ extra_params = cls.dynamic_config.model_extra
401
+ agent_cfg = extra_params.get('agent_config') or {}
399
402
 
400
403
  explicit_path = agent_cfg.get('output_path')
401
404
  if explicit_path:
@@ -816,24 +819,42 @@ class ArticleFactChecker(BaseAgent):
816
819
  output_dir = cls._get_output_dir()
817
820
 
818
821
  if cls.dynamic_config:
819
- if cls.dynamic_config.parameters is None:
820
- cls.dynamic_config.parameters = {}
821
- cls.dynamic_config.parameters.setdefault("temperature", 0)
822
+ if 'temperature' not in cls.dynamic_config.model_extra:
823
+ cls.dynamic_config.temperature = 0
822
824
 
823
825
  if output_dir and input_data.content:
824
826
  cls._save_article_content(output_dir, input_data.content)
825
827
 
828
+ timeout = cls._get_overall_timeout()
829
+
830
+ async def _run_with_timeout() -> EvalDetail:
831
+ return await asyncio.wait_for(
832
+ cls._async_eval(input_data, start_time, output_dir),
833
+ timeout=timeout,
834
+ )
835
+
826
836
  try:
827
- return asyncio.run(cls._async_eval(input_data, start_time, output_dir))
837
+ return asyncio.run(_run_with_timeout())
838
+ except asyncio.TimeoutError:
839
+ elapsed = time.time() - start_time
840
+ log.warning(f"ArticleFactChecker: overall timeout exceeded ({elapsed:.1f}s / {timeout:.0f}s limit)")
841
+ return cls._create_overall_timeout_result(elapsed, timeout)
828
842
  except RuntimeError as e:
829
843
  # Fallback when called inside an already-running event loop (e.g. Jupyter, tests)
830
844
  if "cannot run" in str(e).lower() or "already running" in str(e).lower():
831
845
  import concurrent.futures
832
846
  with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
833
- future = pool.submit(
834
- lambda: asyncio.run(cls._async_eval(input_data, start_time, output_dir))
835
- )
836
- return future.result()
847
+ future = pool.submit(lambda: asyncio.run(_run_with_timeout()))
848
+ try:
849
+ # Extra margin so asyncio.wait_for fires before this outer timeout
850
+ return future.result(timeout=timeout + 30)
851
+ except (asyncio.TimeoutError, concurrent.futures.TimeoutError):
852
+ elapsed = time.time() - start_time
853
+ log.warning(
854
+ f"ArticleFactChecker: overall timeout exceeded "
855
+ f"({elapsed:.1f}s / {timeout:.0f}s limit, fallback path)"
856
+ )
857
+ return cls._create_overall_timeout_result(elapsed, timeout)
837
858
  raise
838
859
 
839
860
  # --- Two-Phase Async Architecture Methods ---
@@ -922,8 +943,8 @@ class ArticleFactChecker(BaseAgent):
922
943
  """
923
944
  from dingo.model.llm.agent.tools.claims_extractor import ClaimsExtractor, ClaimsExtractorConfig
924
945
 
925
- params = cls.dynamic_config.parameters or {}
926
- agent_cfg = params.get('agent_config') or {}
946
+ extra_params = cls.dynamic_config.model_extra
947
+ agent_cfg = extra_params.get('agent_config') or {}
927
948
  extractor_cfg = agent_cfg.get('tools', {}).get('claims_extractor', {})
928
949
 
929
950
  config_kwargs: Dict[str, Any] = {
@@ -1019,10 +1040,30 @@ class ArticleFactChecker(BaseAgent):
1019
1040
  @classmethod
1020
1041
  def _get_max_concurrent_claims(cls) -> int:
1021
1042
  """Read max_concurrent_claims from agent_config or use class default."""
1022
- params = cls.dynamic_config.parameters or {}
1023
- agent_cfg = params.get('agent_config') or {}
1043
+ extra_params = cls.dynamic_config.model_extra
1044
+ agent_cfg = extra_params.get('agent_config') or {}
1024
1045
  return agent_cfg.get('max_concurrent_claims', cls.max_concurrent_claims)
1025
1046
 
1047
+ @classmethod
1048
+ def _get_overall_timeout(cls) -> float:
1049
+ """Read overall_timeout from agent_config or use class default (900s).
1050
+
1051
+ Returns:
1052
+ Positive timeout in seconds, clamped to [30, 7200].
1053
+ """
1054
+ extra_params = cls.dynamic_config.model_extra
1055
+ agent_cfg = extra_params.get('agent_config') or {}
1056
+ raw = agent_cfg.get('overall_timeout', cls.overall_timeout)
1057
+ try:
1058
+ timeout = float(raw)
1059
+ except (TypeError, ValueError):
1060
+ log.warning(f"Invalid overall_timeout={raw!r}, using default {cls.overall_timeout}s")
1061
+ return float(cls.overall_timeout)
1062
+ clamped = max(cls._MIN_OVERALL_TIMEOUT, min(timeout, cls._MAX_OVERALL_TIMEOUT))
1063
+ if clamped != timeout:
1064
+ log.warning(f"overall_timeout={timeout} out of range, clamped to {clamped}s")
1065
+ return float(clamped)
1066
+
1026
1067
  @classmethod
1027
1068
  def _parse_claim_json_robust(cls, output: Optional[str]) -> Dict[str, Any]:
1028
1069
  """
@@ -1795,6 +1836,38 @@ Begin your systematic fact-checking process now.
1795
1836
  ]
1796
1837
  return result
1797
1838
 
1839
+ @classmethod
1840
+ def _create_overall_timeout_result(cls, elapsed: float, timeout: float) -> EvalDetail:
1841
+ """
1842
+ Create error result when overall wall-clock timeout is exceeded.
1843
+
1844
+ Args:
1845
+ elapsed: Actual elapsed time in seconds
1846
+ timeout: Configured timeout limit in seconds
1847
+
1848
+ Returns:
1849
+ EvalDetail with timeout error status
1850
+ """
1851
+ minutes, seconds = divmod(int(timeout), 60)
1852
+ limit_str = f"{minutes}m{seconds}s" if minutes else f"{int(timeout)}s"
1853
+ result = EvalDetail(metric=cls.__name__)
1854
+ result.status = True
1855
+ result.label = [f"{QualityLabel.QUALITY_BAD_PREFIX}AGENT_OVERALL_TIMEOUT"]
1856
+ result.reason = [
1857
+ "Article Fact-Checking Failed: Overall Timeout Exceeded",
1858
+ "=" * 70,
1859
+ f"Execution exceeded the {int(timeout)}s ({limit_str}) wall-clock limit.",
1860
+ f"Elapsed time: {elapsed:.1f}s",
1861
+ "",
1862
+ "Recommendations:",
1863
+ f" 1. Increase overall_timeout (current: {int(timeout)}s) in agent_config",
1864
+ " 2. Reduce max_claims in claims_extractor config (e.g., 50 -> 20)",
1865
+ " 3. Use a faster model (e.g., gpt-4o-mini instead of gpt-4o)",
1866
+ " 4. Reduce max_concurrent_claims to lower API rate-limit pressure",
1867
+ " 5. Split long articles into shorter sections",
1868
+ ]
1869
+ return result
1870
+
1798
1871
  @classmethod
1799
1872
  def plan_execution(cls, input_data: Data) -> List[Dict[str, Any]]:
1800
1873
  """
@@ -70,15 +70,13 @@ class AgentFactCheck(BaseAgent):
70
70
  "key": "your-openai-api-key",
71
71
  "api_url": "https://api.openai.com/v1",
72
72
  "model": "gpt-4.1-mini-2025-04-14",
73
- "parameters": {
74
- "agent_config": {
75
- "max_iterations": 5,
76
- "tools": {
77
- "tavily_search": {
78
- "api_key": "your-tavily-api-key",
79
- "max_results": 5,
80
- "search_depth": "advanced"
81
- }
73
+ "agent_config": {
74
+ "max_iterations": 5,
75
+ "tools": {
76
+ "tavily_search": {
77
+ "api_key": "your-tavily-api-key",
78
+ "max_results": 5,
79
+ "search_depth": "advanced"
82
80
  }
83
81
  }
84
82
  }
@@ -82,15 +82,13 @@ class AgentHallucination(BaseAgent):
82
82
  "key": "your-openai-api-key",
83
83
  "api_url": "https://api.openai.com/v1",
84
84
  "model": "gpt-4.1-mini-2025-04-14",
85
- "parameters": {
86
- "agent_config": {
87
- "max_iterations": 3,
88
- "tools": {
89
- "tavily_search": {
90
- "api_key": "your-tavily-api-key",
91
- "max_results": 5,
92
- "search_depth": "advanced"
93
- }
85
+ "agent_config": {
86
+ "max_iterations": 3,
87
+ "tools": {
88
+ "tavily_search": {
89
+ "api_key": "your-tavily-api-key",
90
+ "max_results": 5,
91
+ "search_depth": "advanced"
94
92
  }
95
93
  }
96
94
  }
@@ -327,22 +327,22 @@ class AgentWrapper:
327
327
  )
328
328
 
329
329
  # Extract parameters
330
- params = dynamic_config.parameters or {}
330
+ extra_params = dynamic_config.model_extra
331
331
 
332
332
  # Create ChatOpenAI instance
333
333
  llm = ChatOpenAI(
334
334
  api_key=dynamic_config.key,
335
335
  base_url=dynamic_config.api_url,
336
336
  model=dynamic_config.model or "gpt-4.1-mini",
337
- temperature=params.get("temperature", 0.3),
338
- max_tokens=params.get("max_tokens", 4096),
339
- top_p=params.get("top_p", 1.0),
340
- timeout=params.get("timeout", 30)
337
+ temperature=extra_params.get("temperature", 0.3),
338
+ max_tokens=extra_params.get("max_tokens", 4096),
339
+ top_p=extra_params.get("top_p", 1.0),
340
+ timeout=extra_params.get("timeout", 30)
341
341
  )
342
342
 
343
343
  log.debug(
344
344
  f"Created ChatOpenAI: model={dynamic_config.model}, "
345
- f"temp={params.get('temperature', 0.3)}"
345
+ f"temp={extra_params.get('temperature', 0.3)}"
346
346
  )
347
347
 
348
348
  return llm
@@ -146,7 +146,7 @@ class BaseAgent(BaseOpenAI):
146
146
  Extract tool configuration from agent's dynamic_config.
147
147
 
148
148
  Configuration is expected in:
149
- dynamic_config.parameters.agent_config.tools.{tool_name}
149
+ dynamic_config.agent_config.tools.{tool_name}
150
150
 
151
151
  Args:
152
152
  tool_name: Name of the tool
@@ -154,8 +154,8 @@ class BaseAgent(BaseOpenAI):
154
154
  Returns:
155
155
  Dict of configuration values for the tool
156
156
  """
157
- params = cls.dynamic_config.parameters or {}
158
- agent_config = params.get('agent_config', {})
157
+ extra_params = cls.dynamic_config.model_extra
158
+ agent_config = extra_params.get('agent_config', {})
159
159
  tools_config = agent_config.get('tools', {})
160
160
  return tools_config.get(tool_name, {})
161
161
 
@@ -184,8 +184,8 @@ class BaseAgent(BaseOpenAI):
184
184
  Returns:
185
185
  Maximum number of iterations allowed
186
186
  """
187
- params = cls.dynamic_config.parameters or {}
188
- agent_config = params.get('agent_config', {})
187
+ extra_params = cls.dynamic_config.model_extra
188
+ agent_config = extra_params.get('agent_config', {})
189
189
  return agent_config.get('max_iterations', cls.max_iterations)
190
190
 
191
191
  @classmethod
@@ -82,22 +82,18 @@ class BaseOpenAI(BaseLLM):
82
82
  else:
83
83
  model_name = cls.client.models.list().data[0].id
84
84
 
85
- params = cls.dynamic_config.parameters
86
- cls.validate_config(params)
85
+ extra_params = cls.dynamic_config.model_extra
86
+ cls.validate_config(extra_params)
87
87
 
88
88
  completions = cls.client.chat.completions.create(
89
89
  model=model_name,
90
90
  messages=messages,
91
- temperature=params.get("temperature", 0.3) if params else 0.3,
92
- top_p=params.get("top_p", 1) if params else 1,
93
- max_tokens=params.get("max_tokens", 4000) if params else 4000,
94
- presence_penalty=params.get("presence_penalty", 0) if params else 0,
95
- frequency_penalty=params.get("frequency_penalty", 0) if params else 0,
91
+ **extra_params,
96
92
  )
97
93
 
98
94
  if completions.choices[0].finish_reason == "length":
99
95
  raise ExceedMaxTokens(
100
- f"Exceed max tokens: {params.get('max_tokens', 4000) if params else 4000}"
96
+ f"Exceed max tokens: {extra_params.get('max_tokens', 4000)}"
101
97
  )
102
98
 
103
99
  return str(completions.choices[0].message.content)
@@ -95,13 +95,28 @@ class LLMHtmlExtractCompare(BaseOpenAI):
95
95
 
96
96
  @classmethod
97
97
  def build_messages(cls, input_data: Data) -> List:
98
+ raw_data = getattr(input_data, "raw_data", None) or {}
99
+ # Backward-compatible input handling:
100
+ # - Preferred: raw_data["magic_md"] and raw_data["content"] (legacy dataset schema)
101
+ # - Fallback: input_data.prompt (tool A) and input_data.reference (tool B)
102
+ # - Last resort: input_data.prompt (tool A) and input_data.extra fields if provided
103
+ tool_a_md = raw_data.get("magic_md", None) or getattr(input_data, "prompt", None)
104
+ tool_b_md = raw_data.get("content", None) or getattr(input_data, "reference", None)
105
+
106
+ if tool_a_md is None or tool_b_md is None:
107
+ raise ValueError(
108
+ "LLMHtmlExtractCompare requires Tool A and Tool B markdown. "
109
+ "Provide raw_data['magic_md'] and raw_data['content'], or provide Data.prompt (tool A) "
110
+ "and Data.reference (tool B)."
111
+ )
112
+
98
113
  messages = [
99
114
  {
100
115
  "role": "user",
101
116
  "content": cls.prompt.format(
102
117
  input_data.content,
103
- input_data.raw_data["magic_md"],
104
- input_data.raw_data["content"],
118
+ tool_a_md,
119
+ tool_b_md,
105
120
  ),
106
121
  }
107
122
  ]
@@ -25,10 +25,18 @@ class LLMHtmlExtractCompareV2(BaseOpenAI):
25
25
  输入数据要求:
26
26
  - input_data.prompt: 工具A提取的文本
27
27
  - input_data.content: 工具B提取的文本
28
- - input_data.raw_data.get("language", "en"): 语言类型 ("zh" "en")
28
+ - language: 可选,来自 input_data.language 或 raw_data["language"],缺省为 "en""zh" / "en"
29
29
  """
30
30
 
31
- _required_fields = [RequiredField.CONTENT, RequiredField.PROMPT]
31
+ _metric_info = {
32
+ 'category': 'Pretrain Text Quality Assessment Metrics',
33
+ 'metric_name': 'LLMHtmlExtractCompareV2',
34
+ 'description': 'Compares two HTML main-content extraction tools by computing text diffs and using LLM to judge which preserves more core information',
35
+ 'paper_title': '',
36
+ 'paper_url': '',
37
+ }
38
+
39
+ _required_fields = [RequiredField.PROMPT, RequiredField.CONTENT]
32
40
  prompt = {
33
41
  "content_en": r"""Please compare the following two texts, each extracted from the same webpage using different HTML parsing methods. Your task is to determine whether there is a difference in the core informational content between them.
34
42
 
@@ -174,7 +182,8 @@ C. Text A 包含的核心信息内容少于 Text B
174
182
  text_tool_b = input_data.content
175
183
 
176
184
  # 获取配置参数
177
- language = input_data.raw_data.get("language", "en")
185
+ raw_data = getattr(input_data, 'raw_data', {}) or {}
186
+ language = raw_data.get("language", getattr(input_data, 'language', "en"))
178
187
 
179
188
  # 计算文本差异
180
189
  diff_result = cls.extract_text_diff(text_tool_a, text_tool_b)