PyPI - dingo-python - Versions diffs - 2.2.1__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

dingo-python 2.2.1py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

dingo/config/input_args.py +13 -2
dingo/exec/local.py +2 -1
dingo/io/output/__init__.py +1 -0
dingo/io/output/result_info.py +16 -0
dingo/model/llm/agent/agent_article_fact_checker.py +102 -29
dingo/model/llm/agent/agent_fact_check.py +7 -9
dingo/model/llm/agent/agent_hallucination.py +7 -9
dingo/model/llm/agent/agent_wrapper.py +6 -6
dingo/model/llm/agent/base_agent.py +5 -5
dingo/model/llm/base_openai.py +4 -8
dingo/model/llm/compare/llm_html_extract_compare.py +17 -2
dingo/model/llm/compare/llm_html_extract_compare_v2.py +12 -3
dingo/model/llm/compare/llm_html_extract_compare_v3.py +221 -0
dingo/model/llm/hhh/llm_text_3h.py +1 -1
dingo/model/llm/instruction_quality/llm_instruction_clarity.py +2 -2
dingo/model/llm/instruction_quality/llm_task_difficulty.py +4 -4
dingo/model/llm/llm_classify_qr.py +4 -2
dingo/model/llm/llm_custom_metric.py +211 -0
dingo/model/llm/llm_document_parsing_ocr.py +6 -2
dingo/model/llm/llm_factcheck_public.py +1 -1
dingo/model/llm/llm_keyword_matcher.py +1 -1
dingo/model/llm/llm_scout.py +1 -1
dingo/model/llm/mineru/vlm_document_parsing.py +4 -8
dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py +4 -8
dingo/model/llm/rag/llm_rag_answer_relevancy.py +6 -13
dingo/model/llm/rag/llm_rag_chunk_quality.py +99 -0
dingo/model/llm/rag/llm_rag_context_precision.py +3 -3
dingo/model/llm/rag/llm_rag_context_recall.py +3 -3
dingo/model/llm/rag/llm_rag_context_relevancy.py +2 -2
dingo/model/llm/rag/llm_rag_faithfulness.py +3 -3
dingo/model/llm/text_quality/base_text_quality.py +2 -7
dingo/model/llm/text_quality/llm_text_equation.py +68 -0
dingo/model/llm/text_quality/llm_text_quality_v5.py +45 -13
dingo/model/llm/text_quality/llm_text_table.py +70 -0
dingo/model/llm/vlm_image_relevant.py +9 -52
dingo/model/llm/vlm_layout_quality.py +5 -56
dingo/model/model.py +37 -24
dingo/model/rule/rule_common.py +76 -0
dingo/model/rule/rule_image.py +41 -32
dingo/model/rule/scibase/__init__.py +1 -0
dingo/model/rule/scibase/rule_quanliang.py +655 -0
dingo/run/cli.py +22 -1
dingo/utils/image_loader.py +141 -0
{dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/METADATA +25 -1
{dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/RECORD +49 -41
{dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/WHEEL +0 -0
{dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/entry_points.txt +0 -0
{dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/licenses/LICENSE +0 -0
{dingo_python-2.2.1.dist-info → dingo_python-2.3.0.dist-info}/top_level.txt +0 -0

dingo/config/input_args.py CHANGED Viewed

@@ -87,6 +87,8 @@ class ExecutorArgs(BaseModel):
 class EvaluatorRuleArgs(BaseModel):
+    model_config = {"extra": "forbid"}
     threshold: Optional[float] = None
     pattern: Optional[str] = None
     key_list: Optional[List[str]] = None
@@ -101,16 +103,25 @@ class EmbeddingConfigArgs(BaseModel):
     api_url: Optional[str] = None
+class CustomLLMMetricArgs(BaseModel):
+    metric: str
+    description: Optional[str] = ""
+    criteria: List[str]
+    input_fields: List[str]
 class EvaluatorLLMArgs(BaseModel):
+    model_config = {"extra": "allow"}
     model: Optional[str] = None
     key: Optional[str] = None
     api_url: Optional[str] = None
-    parameters: Optional[dict] = None
     embedding_config: Optional[EmbeddingConfigArgs] = None
+    custom_metric: Optional[CustomLLMMetricArgs] = None
 class EvalPiplineConfig(BaseModel):
-    """Single evaluator configuration item"""
+    """Single evaluator configuration item."""
     name: str
     config: Optional[EvaluatorRuleArgs | EvaluatorLLMArgs] = None

dingo/exec/local.py CHANGED Viewed

@@ -178,8 +178,9 @@ class LocalExecutor(ExecProto):
                 Model.set_config_rule(model, e_c_i.config)
             elif eval_type == 'llm':
                 model_cls = Model.llm_name_map.get(e_c_i.name)
-                model = model_cls()  # 实例化类为对象，避免多线程配置覆盖
+                model = model_cls()
                 Model.set_config_llm(model, e_c_i.config)
+                Model.set_config_llm(model_cls, e_c_i.config)
             else:
                 raise ValueError(f"Error eval_type: {eval_type}")

dingo/io/output/__init__.py CHANGED Viewed

@@ -1,2 +1,3 @@
+# from dingo.io.output.benchmark_report import BenchmarkReport  # noqa E402.
 from dingo.io.output.result_info import ResultInfo  # noqa E402.
 from dingo.io.output.summary_model import SummaryModel  # noqa E402.

dingo/io/output/result_info.py CHANGED Viewed

@@ -33,6 +33,19 @@ class ResultInfo(BaseModel):
         Returns:
             包含原始数据和dingo_result的字典
         """
+        def move_conflict_field(field_name: str):
+            if field_name not in self.raw_data:
+                return
+            index = 1
+            while True:
+                backup_field = f'{field_name}_old_v{index}'
+                if backup_field not in self.raw_data:
+                    self.raw_data[backup_field] = self.raw_data[field_name]
+                    del self.raw_data[field_name]
+                    return
+                index += 1
         dingo_result = {
             'eval_status': self.eval_status,
             'eval_details': {
@@ -40,5 +53,8 @@ class ResultInfo(BaseModel):
                 for k, v in self.eval_details.items()
             },
         }
+        move_conflict_field('dingo_id')
+        move_conflict_field('dingo_result')
+        self.raw_data['dingo_id'] = self.dingo_id
         self.raw_data['dingo_result'] = dingo_result
         return self.raw_data

dingo/model/llm/agent/agent_article_fact_checker.py CHANGED Viewed

@@ -343,21 +343,21 @@ class ArticleFactChecker(BaseAgent):
             "config": {
                 "key": "your-openai-api-key",
                 "model": "gpt-4o-mini",
-                "parameters": {
-                    "agent_config": {
-                        "max_iterations": 10,
-                        "tools": {
-                            "claims_extractor": {
-                                "api_key": "your-openai-api-key",
-                                "max_claims": 50,
-                                "claim_types": ["factual", "institutional", "statistical", "attribution"]
-                            },
-                            "tavily_search": {
-                                "api_key": "your-tavily-api-key",
-                                "max_results": 5
-                            },
-                            "arxiv_search": {"max_results": 5}
-                        }
+                "agent_config": {
+                    "max_iterations": 10,
+                    "overall_timeout": 900,
+                    "max_concurrent_claims": 5,
+                    "tools": {
+                        "claims_extractor": {
+                            "api_key": "your-openai-api-key",
+                            "max_claims": 50,
+                            "claim_types": ["factual", "institutional", "statistical", "attribution"]
+                        },
+                        "tavily_search": {
+                            "api_key": "your-tavily-api-key",
+                            "max_results": 5
+                        },
+                        "arxiv_search": {"max_results": 5}
                     }
                 }
             }
@@ -372,6 +372,9 @@ class ArticleFactChecker(BaseAgent):
     ]
     max_iterations = 10  # Allow more iterations for comprehensive checking
     max_concurrent_claims = 5  # Default parallel claim verification slots
+    overall_timeout = 900       # 15-minute wall-clock timeout for entire evaluation
+    _MIN_OVERALL_TIMEOUT = 30   # Floor: 30 seconds
+    _MAX_OVERALL_TIMEOUT = 7200  # Ceiling: 2 hours
     _required_fields = [RequiredField.CONTENT]  # Article text
@@ -394,8 +397,8 @@ class ArticleFactChecker(BaseAgent):
         Returns:
             Output directory path (created if needed), or None if saving is disabled.
         """
-        params = cls.dynamic_config.parameters or {}
-        agent_cfg = params.get('agent_config') or {}
+        extra_params = cls.dynamic_config.model_extra
+        agent_cfg = extra_params.get('agent_config') or {}
         explicit_path = agent_cfg.get('output_path')
         if explicit_path:
@@ -816,24 +819,42 @@ class ArticleFactChecker(BaseAgent):
         output_dir = cls._get_output_dir()
         if cls.dynamic_config:
-            if cls.dynamic_config.parameters is None:
-                cls.dynamic_config.parameters = {}
-            cls.dynamic_config.parameters.setdefault("temperature", 0)
+            if 'temperature' not in cls.dynamic_config.model_extra:
+                cls.dynamic_config.temperature = 0
         if output_dir and input_data.content:
             cls._save_article_content(output_dir, input_data.content)
+        timeout = cls._get_overall_timeout()
+        async def _run_with_timeout() -> EvalDetail:
+            return await asyncio.wait_for(
+                cls._async_eval(input_data, start_time, output_dir),
+                timeout=timeout,
+            )
         try:
-            return asyncio.run(cls._async_eval(input_data, start_time, output_dir))
+            return asyncio.run(_run_with_timeout())
+        except asyncio.TimeoutError:
+            elapsed = time.time() - start_time
+            log.warning(f"ArticleFactChecker: overall timeout exceeded ({elapsed:.1f}s / {timeout:.0f}s limit)")
+            return cls._create_overall_timeout_result(elapsed, timeout)
         except RuntimeError as e:
             # Fallback when called inside an already-running event loop (e.g. Jupyter, tests)
             if "cannot run" in str(e).lower() or "already running" in str(e).lower():
                 import concurrent.futures
                 with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-                    future = pool.submit(
-                        lambda: asyncio.run(cls._async_eval(input_data, start_time, output_dir))
-                    )
-                    return future.result()
+                    future = pool.submit(lambda: asyncio.run(_run_with_timeout()))
+                    try:
+                        # Extra margin so asyncio.wait_for fires before this outer timeout
+                        return future.result(timeout=timeout + 30)
+                    except (asyncio.TimeoutError, concurrent.futures.TimeoutError):
+                        elapsed = time.time() - start_time
+                        log.warning(
+                            f"ArticleFactChecker: overall timeout exceeded "
+                            f"({elapsed:.1f}s / {timeout:.0f}s limit, fallback path)"
+                        )
+                        return cls._create_overall_timeout_result(elapsed, timeout)
             raise
     # --- Two-Phase Async Architecture Methods ---
@@ -922,8 +943,8 @@ class ArticleFactChecker(BaseAgent):
         """
         from dingo.model.llm.agent.tools.claims_extractor import ClaimsExtractor, ClaimsExtractorConfig
-        params = cls.dynamic_config.parameters or {}
-        agent_cfg = params.get('agent_config') or {}
+        extra_params = cls.dynamic_config.model_extra
+        agent_cfg = extra_params.get('agent_config') or {}
         extractor_cfg = agent_cfg.get('tools', {}).get('claims_extractor', {})
         config_kwargs: Dict[str, Any] = {
@@ -1019,10 +1040,30 @@ class ArticleFactChecker(BaseAgent):
     @classmethod
     def _get_max_concurrent_claims(cls) -> int:
         """Read max_concurrent_claims from agent_config or use class default."""
-        params = cls.dynamic_config.parameters or {}
-        agent_cfg = params.get('agent_config') or {}
+        extra_params = cls.dynamic_config.model_extra
+        agent_cfg = extra_params.get('agent_config') or {}
         return agent_cfg.get('max_concurrent_claims', cls.max_concurrent_claims)
+    @classmethod
+    def _get_overall_timeout(cls) -> float:
+        """Read overall_timeout from agent_config or use class default (900s).
+        Returns:
+            Positive timeout in seconds, clamped to [30, 7200].
+        """
+        extra_params = cls.dynamic_config.model_extra
+        agent_cfg = extra_params.get('agent_config') or {}
+        raw = agent_cfg.get('overall_timeout', cls.overall_timeout)
+        try:
+            timeout = float(raw)
+        except (TypeError, ValueError):
+            log.warning(f"Invalid overall_timeout={raw!r}, using default {cls.overall_timeout}s")
+            return float(cls.overall_timeout)
+        clamped = max(cls._MIN_OVERALL_TIMEOUT, min(timeout, cls._MAX_OVERALL_TIMEOUT))
+        if clamped != timeout:
+            log.warning(f"overall_timeout={timeout} out of range, clamped to {clamped}s")
+        return float(clamped)
     @classmethod
     def _parse_claim_json_robust(cls, output: Optional[str]) -> Dict[str, Any]:
         """
@@ -1795,6 +1836,38 @@ Begin your systematic fact-checking process now.
         ]
         return result
+    @classmethod
+    def _create_overall_timeout_result(cls, elapsed: float, timeout: float) -> EvalDetail:
+        """
+        Create error result when overall wall-clock timeout is exceeded.
+        Args:
+            elapsed: Actual elapsed time in seconds
+            timeout: Configured timeout limit in seconds
+        Returns:
+            EvalDetail with timeout error status
+        """
+        minutes, seconds = divmod(int(timeout), 60)
+        limit_str = f"{minutes}m{seconds}s" if minutes else f"{int(timeout)}s"
+        result = EvalDetail(metric=cls.__name__)
+        result.status = True
+        result.label = [f"{QualityLabel.QUALITY_BAD_PREFIX}AGENT_OVERALL_TIMEOUT"]
+        result.reason = [
+            "Article Fact-Checking Failed: Overall Timeout Exceeded",
+            "=" * 70,
+            f"Execution exceeded the {int(timeout)}s ({limit_str}) wall-clock limit.",
+            f"Elapsed time: {elapsed:.1f}s",
+            "",
+            "Recommendations:",
+            f"  1. Increase overall_timeout (current: {int(timeout)}s) in agent_config",
+            "  2. Reduce max_claims in claims_extractor config (e.g., 50 -> 20)",
+            "  3. Use a faster model (e.g., gpt-4o-mini instead of gpt-4o)",
+            "  4. Reduce max_concurrent_claims to lower API rate-limit pressure",
+            "  5. Split long articles into shorter sections",
+        ]
+        return result
     @classmethod
     def plan_execution(cls, input_data: Data) -> List[Dict[str, Any]]:
         """

dingo/model/llm/agent/agent_fact_check.py CHANGED Viewed

@@ -70,15 +70,13 @@ class AgentFactCheck(BaseAgent):
             "key": "your-openai-api-key",
             "api_url": "https://api.openai.com/v1",
             "model": "gpt-4.1-mini-2025-04-14",
-            "parameters": {
-                "agent_config": {
-                    "max_iterations": 5,
-                    "tools": {
-                        "tavily_search": {
-                            "api_key": "your-tavily-api-key",
-                            "max_results": 5,
-                            "search_depth": "advanced"
-                        }
+            "agent_config": {
+                "max_iterations": 5,
+                "tools": {
+                    "tavily_search": {
+                        "api_key": "your-tavily-api-key",
+                        "max_results": 5,
+                        "search_depth": "advanced"
                     }
                 }
             }

dingo/model/llm/agent/agent_hallucination.py CHANGED Viewed

@@ -82,15 +82,13 @@ class AgentHallucination(BaseAgent):
             "key": "your-openai-api-key",
             "api_url": "https://api.openai.com/v1",
             "model": "gpt-4.1-mini-2025-04-14",
-            "parameters": {
-                "agent_config": {
-                    "max_iterations": 3,
-                    "tools": {
-                        "tavily_search": {
-                            "api_key": "your-tavily-api-key",
-                            "max_results": 5,
-                            "search_depth": "advanced"
-                        }
+            "agent_config": {
+                "max_iterations": 3,
+                "tools": {
+                    "tavily_search": {
+                        "api_key": "your-tavily-api-key",
+                        "max_results": 5,
+                        "search_depth": "advanced"
                     }
                 }
             }

dingo/model/llm/agent/agent_wrapper.py CHANGED Viewed

@@ -327,22 +327,22 @@ class AgentWrapper:
             )
         # Extract parameters
-        params = dynamic_config.parameters or {}
+        extra_params = dynamic_config.model_extra
         # Create ChatOpenAI instance
         llm = ChatOpenAI(
             api_key=dynamic_config.key,
             base_url=dynamic_config.api_url,
             model=dynamic_config.model or "gpt-4.1-mini",
-            temperature=params.get("temperature", 0.3),
-            max_tokens=params.get("max_tokens", 4096),
-            top_p=params.get("top_p", 1.0),
-            timeout=params.get("timeout", 30)
+            temperature=extra_params.get("temperature", 0.3),
+            max_tokens=extra_params.get("max_tokens", 4096),
+            top_p=extra_params.get("top_p", 1.0),
+            timeout=extra_params.get("timeout", 30)
         )
         log.debug(
             f"Created ChatOpenAI: model={dynamic_config.model}, "
-            f"temp={params.get('temperature', 0.3)}"
+            f"temp={extra_params.get('temperature', 0.3)}"
         )
         return llm

dingo/model/llm/agent/base_agent.py CHANGED Viewed

@@ -146,7 +146,7 @@ class BaseAgent(BaseOpenAI):
         Extract tool configuration from agent's dynamic_config.
         Configuration is expected in:
-        dynamic_config.parameters.agent_config.tools.{tool_name}
+        dynamic_config.agent_config.tools.{tool_name}
         Args:
             tool_name: Name of the tool
@@ -154,8 +154,8 @@ class BaseAgent(BaseOpenAI):
         Returns:
             Dict of configuration values for the tool
         """
-        params = cls.dynamic_config.parameters or {}
-        agent_config = params.get('agent_config', {})
+        extra_params = cls.dynamic_config.model_extra
+        agent_config = extra_params.get('agent_config', {})
         tools_config = agent_config.get('tools', {})
         return tools_config.get(tool_name, {})
@@ -184,8 +184,8 @@ class BaseAgent(BaseOpenAI):
         Returns:
             Maximum number of iterations allowed
         """
-        params = cls.dynamic_config.parameters or {}
-        agent_config = params.get('agent_config', {})
+        extra_params = cls.dynamic_config.model_extra
+        agent_config = extra_params.get('agent_config', {})
         return agent_config.get('max_iterations', cls.max_iterations)
     @classmethod

dingo/model/llm/base_openai.py CHANGED Viewed

@@ -82,22 +82,18 @@ class BaseOpenAI(BaseLLM):
         else:
             model_name = cls.client.models.list().data[0].id
-        params = cls.dynamic_config.parameters
-        cls.validate_config(params)
+        extra_params = cls.dynamic_config.model_extra
+        cls.validate_config(extra_params)
         completions = cls.client.chat.completions.create(
             model=model_name,
             messages=messages,
-            temperature=params.get("temperature", 0.3) if params else 0.3,
-            top_p=params.get("top_p", 1) if params else 1,
-            max_tokens=params.get("max_tokens", 4000) if params else 4000,
-            presence_penalty=params.get("presence_penalty", 0) if params else 0,
-            frequency_penalty=params.get("frequency_penalty", 0) if params else 0,
+            **extra_params,
         )
         if completions.choices[0].finish_reason == "length":
             raise ExceedMaxTokens(
-                f"Exceed max tokens: {params.get('max_tokens', 4000) if params else 4000}"
+                f"Exceed max tokens: {extra_params.get('max_tokens', 4000)}"
             )
         return str(completions.choices[0].message.content)

dingo/model/llm/compare/llm_html_extract_compare.py CHANGED Viewed

@@ -95,13 +95,28 @@ class LLMHtmlExtractCompare(BaseOpenAI):
     @classmethod
     def build_messages(cls, input_data: Data) -> List:
+        raw_data = getattr(input_data, "raw_data", None) or {}
+        # Backward-compatible input handling:
+        # - Preferred: raw_data["magic_md"] and raw_data["content"] (legacy dataset schema)
+        # - Fallback: input_data.prompt (tool A) and input_data.reference (tool B)
+        # - Last resort: input_data.prompt (tool A) and input_data.extra fields if provided
+        tool_a_md = raw_data.get("magic_md", None) or getattr(input_data, "prompt", None)
+        tool_b_md = raw_data.get("content", None) or getattr(input_data, "reference", None)
+        if tool_a_md is None or tool_b_md is None:
+            raise ValueError(
+                "LLMHtmlExtractCompare requires Tool A and Tool B markdown. "
+                "Provide raw_data['magic_md'] and raw_data['content'], or provide Data.prompt (tool A) "
+                "and Data.reference (tool B)."
+            )
         messages = [
             {
                 "role": "user",
                 "content": cls.prompt.format(
                     input_data.content,
-                    input_data.raw_data["magic_md"],
-                    input_data.raw_data["content"],
+                    tool_a_md,
+                    tool_b_md,
                 ),
             }
         ]

dingo/model/llm/compare/llm_html_extract_compare_v2.py CHANGED Viewed

@@ -25,10 +25,18 @@ class LLMHtmlExtractCompareV2(BaseOpenAI):
     输入数据要求：
     - input_data.prompt: 工具A提取的文本
     - input_data.content: 工具B提取的文本
-    - input_data.raw_data.get("language", "en"): 语言类型 ("zh" 或 "en")
+    - language: 可选，来自 input_data.language 或 raw_data["language"]，缺省为 "en"（"zh" / "en"）
     """
-    _required_fields = [RequiredField.CONTENT, RequiredField.PROMPT]
+    _metric_info = {
+        'category': 'Pretrain Text Quality Assessment Metrics',
+        'metric_name': 'LLMHtmlExtractCompareV2',
+        'description': 'Compares two HTML main-content extraction tools by computing text diffs and using LLM to judge which preserves more core information',
+        'paper_title': '',
+        'paper_url': '',
+    }
+    _required_fields = [RequiredField.PROMPT, RequiredField.CONTENT]
     prompt = {
         "content_en": r"""Please compare the following two texts, each extracted from the same webpage using different HTML parsing methods. Your task is to determine whether there is a difference in the core informational content between them.
@@ -174,7 +182,8 @@ C. Text A 包含的核心信息内容少于 Text B
         text_tool_b = input_data.content
         # 获取配置参数
-        language = input_data.raw_data.get("language", "en")
+        raw_data = getattr(input_data, 'raw_data', {}) or {}
+        language = raw_data.get("language", getattr(input_data, 'language', "en"))
         # 计算文本差异
         diff_result = cls.extract_text_diff(text_tool_a, text_tool_b)

dingo-python 2.2.1__py3-none-any.whl → 2.3.0__py3-none-any.whl

dingo-python 2.2.1py3-none-any.whl → 2.3.0py3-none-any.whl