PyPI - dingo-python - Versions diffs - 2.2.2__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

dingo-python 2.2.2py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

dingo/config/input_args.py +11 -1
dingo/exec/local.py +2 -1
dingo/io/output/__init__.py +1 -0
dingo/io/output/result_info.py +16 -0
dingo/model/llm/compare/llm_html_extract_compare.py +17 -2
dingo/model/llm/compare/llm_html_extract_compare_v2.py +1 -1
dingo/model/llm/compare/llm_html_extract_compare_v3.py +221 -0
dingo/model/llm/hhh/llm_text_3h.py +1 -1
dingo/model/llm/llm_classify_qr.py +4 -2
dingo/model/llm/llm_custom_metric.py +211 -0
dingo/model/llm/llm_document_parsing_ocr.py +6 -2
dingo/model/llm/llm_factcheck_public.py +1 -1
dingo/model/llm/llm_keyword_matcher.py +1 -1
dingo/model/llm/llm_scout.py +1 -1
dingo/model/llm/mineru/vlm_document_parsing.py +4 -8
dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py +4 -8
dingo/model/llm/rag/llm_rag_answer_relevancy.py +1 -1
dingo/model/llm/rag/llm_rag_chunk_quality.py +99 -0
dingo/model/llm/rag/llm_rag_context_precision.py +1 -1
dingo/model/llm/rag/llm_rag_context_recall.py +1 -1
dingo/model/llm/rag/llm_rag_faithfulness.py +1 -1
dingo/model/llm/vlm_image_relevant.py +9 -52
dingo/model/llm/vlm_layout_quality.py +3 -54
dingo/model/model.py +37 -24
dingo/model/rule/rule_common.py +76 -0
dingo/model/rule/rule_image.py +41 -32
dingo/model/rule/scibase/__init__.py +1 -0
dingo/model/rule/scibase/rule_quanliang.py +655 -0
dingo/run/cli.py +22 -1
dingo/utils/image_loader.py +141 -0
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/METADATA +22 -1
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/RECORD +36 -30
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/WHEEL +0 -0
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/entry_points.txt +0 -0
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/licenses/LICENSE +0 -0
{dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/top_level.txt +0 -0

dingo/model/rule/rule_image.py CHANGED Viewed

@@ -15,6 +15,7 @@ from dingo.io.input import Data, RequiredField
 from dingo.io.output.eval_detail import EvalDetail, QualityLabel
 from dingo.model.model import Model
 from dingo.model.rule.base import BaseRule
+from dingo.utils.image_loader import ImageLoader
 @Model.rule_register("QUALITY_BAD_IMG_EFFECTIVENESS", ["img"])
@@ -39,10 +40,7 @@ class RuleImageValid(BaseRule):
     @classmethod
     def eval(cls, input_data: Data) -> EvalDetail:
         res = EvalDetail(metric=cls.__name__)
-        if isinstance(input_data.image[0], str):
-            img = Image.open(input_data.image[0])
-        else:
-            img = input_data.image[0]
+        img = ImageLoader.load_pil(input_data.image)
         img_new = img.convert("RGB")
         img_np = np.asarray(img_new)
         if np.all(img_np == (255, 255, 255)) or np.all(img_np == (0, 0, 0)):
@@ -76,10 +74,7 @@ class RuleImageSizeValid(BaseRule):
     @classmethod
     def eval(cls, input_data: Data) -> EvalDetail:
         res = EvalDetail(metric=cls.__name__)
-        if isinstance(input_data.image[0], str):
-            img = Image.open(input_data.image[0])
-        else:
-            img = input_data.image[0]
+        img = ImageLoader.load_pil(input_data.image)
         width, height = img.size
         aspect_ratio = width / height
         if aspect_ratio > 4 or aspect_ratio < 0.25:
@@ -119,10 +114,7 @@ class RuleImageQuality(BaseRule):
         import torch
         res = EvalDetail(metric=cls.__name__)
-        if isinstance(input_data.image[0], str):
-            img = Image.open(input_data.image[0])
-        else:
-            img = input_data.image[0]
+        img = ImageLoader.load_pil(input_data.image)
         device = (
             torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
         )
@@ -140,7 +132,12 @@ class RuleImageQuality(BaseRule):
 @Model.rule_register("QUALITY_BAD_IMG_SIMILARITY", [])
 class RuleImageRepeat(BaseRule):
-    """Check for duplicate images using PHash and CNN methods."""
+    """Check for duplicate images using PHash and CNN methods.
+    NOTE: This is a directory-level evaluator. Unlike other image evaluators,
+    it takes a directory path via the ``content`` field (not ``image``).
+    The directory is scanned for duplicate images using PHash and CNN.
+    """
     # Metadata for documentation generation
     _metric_info = {
@@ -227,10 +224,7 @@ class RuleImageTextSimilarity(BaseRule):
         res = EvalDetail(metric=cls.__name__)
         if not input_data.image or not input_data.content:
             return res
-        if isinstance(input_data.image[0], str):
-            img = Image.open(input_data.image[0])
-        else:
-            img = input_data.image[0]
+        img = ImageLoader.load_pil(input_data.image)
         tokenized_texts = word_tokenize(input_data.content)
         if cls.dynamic_config.refer_path is None:
             similar_tool_path = download_similar_tool()
@@ -265,16 +259,23 @@ class RuleImageArtimuse(BaseRule):
         "evaluation_results": ""
     }
-    _required_fields = [RequiredField.CONTENT]
+    _required_fields = [RequiredField.IMAGE]
     dynamic_config = EvaluatorRuleArgs(threshold=6, refer_path=['https://artimuse.intern-ai.org.cn/'])
     @classmethod
     def eval(cls, input_data: Data) -> EvalDetail:
         try:
+            img_url = input_data.image
+            if isinstance(img_url, (list, tuple)):
+                img_url = img_url[0] if img_url else None
+            if not isinstance(img_url, str) or not img_url.startswith(("http://", "https://")):
+                raise ValueError(
+                    f"RuleImageArtimuse requires an HTTP/HTTPS image URL, got: {type(img_url).__name__}"
+                )
             response_create_task = requests.post(
                 cls.dynamic_config.refer_path[0] + 'api/v1/task/create_task',
                 json={
-                    "img_url": input_data.content,
+                    "img_url": img_url,
                     "style": 1
                 },
                 headers={
@@ -357,7 +358,11 @@ class RuleImageLabelOverlap(BaseRule):
             # 2. 解析输入数据
             content = input_data.content
-            image_path = input_data.image[0] if (input_data.image and len(input_data.image) > 0) else None
+            raw_image = input_data.image
+            if isinstance(raw_image, (list, tuple)):
+                image_source = raw_image[0] if raw_image else None
+            else:
+                image_source = raw_image if raw_image else None
             # 3. 解析标注内容
             if isinstance(content, str):
@@ -385,11 +390,11 @@ class RuleImageLabelOverlap(BaseRule):
                 res.label = ["LabelOverlap_Fail.EmptyAnnotations"]
                 res.reason = ["annotations为空"]
                 return res
-            if not image_path or not os.path.exists(image_path):
+            if not image_source:
                 res = EvalDetail(metric=cls.__name__)
                 res.status = False
                 res.label = ["LabelOverlap_Fail.InvalidImagePath"]
-                res.reason = [f"图片路径无效：{image_path}"]
+                res.reason = [f"图片路径无效：{image_source}"]
                 return res
             # 5. 提取边界框并计算重叠
@@ -492,7 +497,7 @@ class RuleImageLabelOverlap(BaseRule):
                 logging.info(f"开始保存图像到: {vis_path}")
                 # 生成可视化图像
-                img = Image.open(image_path).convert("RGB")
+                img = ImageLoader.load_pil(image_source).convert("RGB")
                 draw = ImageDraw.Draw(img)
                 # 绘制边界框
@@ -627,14 +632,18 @@ class RuleImageLabelVisualization(BaseRule):
             # --------------------------
             # 提取核心数据
             content = input_data.content  # 标注数据（str或dict）
-            image_path = input_data.image[0] if (input_data.image and len(input_data.image) > 0) else None
+            raw_image = input_data.image
+            if isinstance(raw_image, (list, tuple)):
+                image_source = raw_image[0] if raw_image else None
+            else:
+                image_source = raw_image if raw_image else None
-            # 验证图片路径有效性
-            if not image_path or not os.path.exists(image_path):
+            # 验证图片源有效性
+            if not image_source:
                 res = EvalDetail(metric=cls.__name__)
                 res.status = False
                 res.label = ["LabelVisualization_Fail.InvalidImagePath"]
-                res.reason = [f"图片路径无效/不存在：{image_path}"]
+                res.reason = [f"图片路径无效/不存在：{image_source}"]
                 return res
             # 解析标注内容
@@ -687,7 +696,7 @@ class RuleImageLabelVisualization(BaseRule):
             # 4. 绘制标注并保存可视化图像
             # --------------------------
             # 打开原始图像
-            img = Image.open(image_path).convert("RGB")
+            img = ImageLoader.load_pil(image_source).convert("RGB")
             draw = ImageDraw.Draw(img)
             # 调用内部函数绘制标注
@@ -698,7 +707,7 @@ class RuleImageLabelVisualization(BaseRule):
                 output_dir = Path(cls.dynamic_config.refer_path[0]).resolve()
                 output_dir.mkdir(parents=True, exist_ok=True)
                 # 生成文件名
-                img_basename = Path(image_path).name
+                img_basename = Path(str(image_source)).name
                 vis_filename = f"visual_{img_basename}"
                 vis_path = str(output_dir / vis_filename)
             except Exception as path_error:
@@ -707,7 +716,7 @@ class RuleImageLabelVisualization(BaseRule):
                 import tempfile
                 output_dir = Path(tempfile.gettempdir()) / "dingo_visualization"
                 output_dir.mkdir(parents=True, exist_ok=True)
-                img_basename = Path(image_path).name
+                img_basename = Path(str(image_source)).name
                 vis_filename = f"visual_{img_basename}"
                 vis_path = str(output_dir / vis_filename)
@@ -740,8 +749,8 @@ class RuleImageLabelVisualization(BaseRule):
 if __name__ == "__main__":
     data = Data(
         data_id='1',
-        content="https://openxlab.oss-cn-shanghai.aliyuncs.com/artimuse/upload/ef39eef6-2b40-4ea3-8285-934684734298-"
-                "stsupload-1753254621827-dog.jpg"
+        image=["https://openxlab.oss-cn-shanghai.aliyuncs.com/artimuse/upload/ef39eef6-2b40-4ea3-8285-934684734298-"
+               "stsupload-1753254621827-dog.jpg"]
     )
     res = RuleImageArtimuse.eval(data)
     print(res)

dingo/model/rule/scibase/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Quanliang/scibase rule implementations."""

dingo-python 2.2.2__py3-none-any.whl → 2.3.0__py3-none-any.whl

dingo-python 2.2.2py3-none-any.whl → 2.3.0py3-none-any.whl