dingo-python 2.2.2__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. dingo/config/input_args.py +11 -1
  2. dingo/exec/local.py +2 -1
  3. dingo/io/output/__init__.py +1 -0
  4. dingo/io/output/result_info.py +16 -0
  5. dingo/model/llm/compare/llm_html_extract_compare.py +17 -2
  6. dingo/model/llm/compare/llm_html_extract_compare_v2.py +1 -1
  7. dingo/model/llm/compare/llm_html_extract_compare_v3.py +221 -0
  8. dingo/model/llm/hhh/llm_text_3h.py +1 -1
  9. dingo/model/llm/llm_classify_qr.py +4 -2
  10. dingo/model/llm/llm_custom_metric.py +211 -0
  11. dingo/model/llm/llm_document_parsing_ocr.py +6 -2
  12. dingo/model/llm/llm_factcheck_public.py +1 -1
  13. dingo/model/llm/llm_keyword_matcher.py +1 -1
  14. dingo/model/llm/llm_scout.py +1 -1
  15. dingo/model/llm/mineru/vlm_document_parsing.py +4 -8
  16. dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py +4 -8
  17. dingo/model/llm/rag/llm_rag_answer_relevancy.py +1 -1
  18. dingo/model/llm/rag/llm_rag_chunk_quality.py +99 -0
  19. dingo/model/llm/rag/llm_rag_context_precision.py +1 -1
  20. dingo/model/llm/rag/llm_rag_context_recall.py +1 -1
  21. dingo/model/llm/rag/llm_rag_faithfulness.py +1 -1
  22. dingo/model/llm/vlm_image_relevant.py +9 -52
  23. dingo/model/llm/vlm_layout_quality.py +3 -54
  24. dingo/model/model.py +37 -24
  25. dingo/model/rule/rule_common.py +76 -0
  26. dingo/model/rule/rule_image.py +41 -32
  27. dingo/model/rule/scibase/__init__.py +1 -0
  28. dingo/model/rule/scibase/rule_quanliang.py +655 -0
  29. dingo/run/cli.py +22 -1
  30. dingo/utils/image_loader.py +141 -0
  31. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/METADATA +22 -1
  32. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/RECORD +36 -30
  33. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/WHEEL +0 -0
  34. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/entry_points.txt +0 -0
  35. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/licenses/LICENSE +0 -0
  36. {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@ from dingo.io.input import Data, RequiredField
15
15
  from dingo.io.output.eval_detail import EvalDetail, QualityLabel
16
16
  from dingo.model.model import Model
17
17
  from dingo.model.rule.base import BaseRule
18
+ from dingo.utils.image_loader import ImageLoader
18
19
 
19
20
 
20
21
  @Model.rule_register("QUALITY_BAD_IMG_EFFECTIVENESS", ["img"])
@@ -39,10 +40,7 @@ class RuleImageValid(BaseRule):
39
40
  @classmethod
40
41
  def eval(cls, input_data: Data) -> EvalDetail:
41
42
  res = EvalDetail(metric=cls.__name__)
42
- if isinstance(input_data.image[0], str):
43
- img = Image.open(input_data.image[0])
44
- else:
45
- img = input_data.image[0]
43
+ img = ImageLoader.load_pil(input_data.image)
46
44
  img_new = img.convert("RGB")
47
45
  img_np = np.asarray(img_new)
48
46
  if np.all(img_np == (255, 255, 255)) or np.all(img_np == (0, 0, 0)):
@@ -76,10 +74,7 @@ class RuleImageSizeValid(BaseRule):
76
74
  @classmethod
77
75
  def eval(cls, input_data: Data) -> EvalDetail:
78
76
  res = EvalDetail(metric=cls.__name__)
79
- if isinstance(input_data.image[0], str):
80
- img = Image.open(input_data.image[0])
81
- else:
82
- img = input_data.image[0]
77
+ img = ImageLoader.load_pil(input_data.image)
83
78
  width, height = img.size
84
79
  aspect_ratio = width / height
85
80
  if aspect_ratio > 4 or aspect_ratio < 0.25:
@@ -119,10 +114,7 @@ class RuleImageQuality(BaseRule):
119
114
  import torch
120
115
 
121
116
  res = EvalDetail(metric=cls.__name__)
122
- if isinstance(input_data.image[0], str):
123
- img = Image.open(input_data.image[0])
124
- else:
125
- img = input_data.image[0]
117
+ img = ImageLoader.load_pil(input_data.image)
126
118
  device = (
127
119
  torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
128
120
  )
@@ -140,7 +132,12 @@ class RuleImageQuality(BaseRule):
140
132
 
141
133
  @Model.rule_register("QUALITY_BAD_IMG_SIMILARITY", [])
142
134
  class RuleImageRepeat(BaseRule):
143
- """Check for duplicate images using PHash and CNN methods."""
135
+ """Check for duplicate images using PHash and CNN methods.
136
+
137
+ NOTE: This is a directory-level evaluator. Unlike other image evaluators,
138
+ it takes a directory path via the ``content`` field (not ``image``).
139
+ The directory is scanned for duplicate images using PHash and CNN.
140
+ """
144
141
 
145
142
  # Metadata for documentation generation
146
143
  _metric_info = {
@@ -227,10 +224,7 @@ class RuleImageTextSimilarity(BaseRule):
227
224
  res = EvalDetail(metric=cls.__name__)
228
225
  if not input_data.image or not input_data.content:
229
226
  return res
230
- if isinstance(input_data.image[0], str):
231
- img = Image.open(input_data.image[0])
232
- else:
233
- img = input_data.image[0]
227
+ img = ImageLoader.load_pil(input_data.image)
234
228
  tokenized_texts = word_tokenize(input_data.content)
235
229
  if cls.dynamic_config.refer_path is None:
236
230
  similar_tool_path = download_similar_tool()
@@ -265,16 +259,23 @@ class RuleImageArtimuse(BaseRule):
265
259
  "evaluation_results": ""
266
260
  }
267
261
 
268
- _required_fields = [RequiredField.CONTENT]
262
+ _required_fields = [RequiredField.IMAGE]
269
263
  dynamic_config = EvaluatorRuleArgs(threshold=6, refer_path=['https://artimuse.intern-ai.org.cn/'])
270
264
 
271
265
  @classmethod
272
266
  def eval(cls, input_data: Data) -> EvalDetail:
273
267
  try:
268
+ img_url = input_data.image
269
+ if isinstance(img_url, (list, tuple)):
270
+ img_url = img_url[0] if img_url else None
271
+ if not isinstance(img_url, str) or not img_url.startswith(("http://", "https://")):
272
+ raise ValueError(
273
+ f"RuleImageArtimuse requires an HTTP/HTTPS image URL, got: {type(img_url).__name__}"
274
+ )
274
275
  response_create_task = requests.post(
275
276
  cls.dynamic_config.refer_path[0] + 'api/v1/task/create_task',
276
277
  json={
277
- "img_url": input_data.content,
278
+ "img_url": img_url,
278
279
  "style": 1
279
280
  },
280
281
  headers={
@@ -357,7 +358,11 @@ class RuleImageLabelOverlap(BaseRule):
357
358
 
358
359
  # 2. 解析输入数据
359
360
  content = input_data.content
360
- image_path = input_data.image[0] if (input_data.image and len(input_data.image) > 0) else None
361
+ raw_image = input_data.image
362
+ if isinstance(raw_image, (list, tuple)):
363
+ image_source = raw_image[0] if raw_image else None
364
+ else:
365
+ image_source = raw_image if raw_image else None
361
366
 
362
367
  # 3. 解析标注内容
363
368
  if isinstance(content, str):
@@ -385,11 +390,11 @@ class RuleImageLabelOverlap(BaseRule):
385
390
  res.label = ["LabelOverlap_Fail.EmptyAnnotations"]
386
391
  res.reason = ["annotations为空"]
387
392
  return res
388
- if not image_path or not os.path.exists(image_path):
393
+ if not image_source:
389
394
  res = EvalDetail(metric=cls.__name__)
390
395
  res.status = False
391
396
  res.label = ["LabelOverlap_Fail.InvalidImagePath"]
392
- res.reason = [f"图片路径无效:{image_path}"]
397
+ res.reason = [f"图片路径无效:{image_source}"]
393
398
  return res
394
399
 
395
400
  # 5. 提取边界框并计算重叠
@@ -492,7 +497,7 @@ class RuleImageLabelOverlap(BaseRule):
492
497
  logging.info(f"开始保存图像到: {vis_path}")
493
498
 
494
499
  # 生成可视化图像
495
- img = Image.open(image_path).convert("RGB")
500
+ img = ImageLoader.load_pil(image_source).convert("RGB")
496
501
  draw = ImageDraw.Draw(img)
497
502
 
498
503
  # 绘制边界框
@@ -627,14 +632,18 @@ class RuleImageLabelVisualization(BaseRule):
627
632
  # --------------------------
628
633
  # 提取核心数据
629
634
  content = input_data.content # 标注数据(str或dict)
630
- image_path = input_data.image[0] if (input_data.image and len(input_data.image) > 0) else None
635
+ raw_image = input_data.image
636
+ if isinstance(raw_image, (list, tuple)):
637
+ image_source = raw_image[0] if raw_image else None
638
+ else:
639
+ image_source = raw_image if raw_image else None
631
640
 
632
- # 验证图片路径有效性
633
- if not image_path or not os.path.exists(image_path):
641
+ # 验证图片源有效性
642
+ if not image_source:
634
643
  res = EvalDetail(metric=cls.__name__)
635
644
  res.status = False
636
645
  res.label = ["LabelVisualization_Fail.InvalidImagePath"]
637
- res.reason = [f"图片路径无效/不存在:{image_path}"]
646
+ res.reason = [f"图片路径无效/不存在:{image_source}"]
638
647
  return res
639
648
 
640
649
  # 解析标注内容
@@ -687,7 +696,7 @@ class RuleImageLabelVisualization(BaseRule):
687
696
  # 4. 绘制标注并保存可视化图像
688
697
  # --------------------------
689
698
  # 打开原始图像
690
- img = Image.open(image_path).convert("RGB")
699
+ img = ImageLoader.load_pil(image_source).convert("RGB")
691
700
  draw = ImageDraw.Draw(img)
692
701
 
693
702
  # 调用内部函数绘制标注
@@ -698,7 +707,7 @@ class RuleImageLabelVisualization(BaseRule):
698
707
  output_dir = Path(cls.dynamic_config.refer_path[0]).resolve()
699
708
  output_dir.mkdir(parents=True, exist_ok=True)
700
709
  # 生成文件名
701
- img_basename = Path(image_path).name
710
+ img_basename = Path(str(image_source)).name
702
711
  vis_filename = f"visual_{img_basename}"
703
712
  vis_path = str(output_dir / vis_filename)
704
713
  except Exception as path_error:
@@ -707,7 +716,7 @@ class RuleImageLabelVisualization(BaseRule):
707
716
  import tempfile
708
717
  output_dir = Path(tempfile.gettempdir()) / "dingo_visualization"
709
718
  output_dir.mkdir(parents=True, exist_ok=True)
710
- img_basename = Path(image_path).name
719
+ img_basename = Path(str(image_source)).name
711
720
  vis_filename = f"visual_{img_basename}"
712
721
  vis_path = str(output_dir / vis_filename)
713
722
 
@@ -740,8 +749,8 @@ class RuleImageLabelVisualization(BaseRule):
740
749
  if __name__ == "__main__":
741
750
  data = Data(
742
751
  data_id='1',
743
- content="https://openxlab.oss-cn-shanghai.aliyuncs.com/artimuse/upload/ef39eef6-2b40-4ea3-8285-934684734298-"
744
- "stsupload-1753254621827-dog.jpg"
752
+ image=["https://openxlab.oss-cn-shanghai.aliyuncs.com/artimuse/upload/ef39eef6-2b40-4ea3-8285-934684734298-"
753
+ "stsupload-1753254621827-dog.jpg"]
745
754
  )
746
755
  res = RuleImageArtimuse.eval(data)
747
756
  print(res)
@@ -0,0 +1 @@
1
+ """Quanliang/scibase rule implementations."""