dingo-python 2.2.2__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dingo/config/input_args.py +11 -1
- dingo/exec/local.py +2 -1
- dingo/io/output/__init__.py +1 -0
- dingo/io/output/result_info.py +16 -0
- dingo/model/llm/compare/llm_html_extract_compare.py +17 -2
- dingo/model/llm/compare/llm_html_extract_compare_v2.py +1 -1
- dingo/model/llm/compare/llm_html_extract_compare_v3.py +221 -0
- dingo/model/llm/hhh/llm_text_3h.py +1 -1
- dingo/model/llm/llm_classify_qr.py +4 -2
- dingo/model/llm/llm_custom_metric.py +211 -0
- dingo/model/llm/llm_document_parsing_ocr.py +6 -2
- dingo/model/llm/llm_factcheck_public.py +1 -1
- dingo/model/llm/llm_keyword_matcher.py +1 -1
- dingo/model/llm/llm_scout.py +1 -1
- dingo/model/llm/mineru/vlm_document_parsing.py +4 -8
- dingo/model/llm/mineru/vlm_document_parsing_ocr_train.py +4 -8
- dingo/model/llm/rag/llm_rag_answer_relevancy.py +1 -1
- dingo/model/llm/rag/llm_rag_chunk_quality.py +99 -0
- dingo/model/llm/rag/llm_rag_context_precision.py +1 -1
- dingo/model/llm/rag/llm_rag_context_recall.py +1 -1
- dingo/model/llm/rag/llm_rag_faithfulness.py +1 -1
- dingo/model/llm/vlm_image_relevant.py +9 -52
- dingo/model/llm/vlm_layout_quality.py +3 -54
- dingo/model/model.py +37 -24
- dingo/model/rule/rule_common.py +76 -0
- dingo/model/rule/rule_image.py +41 -32
- dingo/model/rule/scibase/__init__.py +1 -0
- dingo/model/rule/scibase/rule_quanliang.py +655 -0
- dingo/run/cli.py +22 -1
- dingo/utils/image_loader.py +141 -0
- {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/METADATA +22 -1
- {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/RECORD +36 -30
- {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/WHEEL +0 -0
- {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/entry_points.txt +0 -0
- {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/licenses/LICENSE +0 -0
- {dingo_python-2.2.2.dist-info → dingo_python-2.3.0.dist-info}/top_level.txt +0 -0
dingo/model/rule/rule_image.py
CHANGED
|
@@ -15,6 +15,7 @@ from dingo.io.input import Data, RequiredField
|
|
|
15
15
|
from dingo.io.output.eval_detail import EvalDetail, QualityLabel
|
|
16
16
|
from dingo.model.model import Model
|
|
17
17
|
from dingo.model.rule.base import BaseRule
|
|
18
|
+
from dingo.utils.image_loader import ImageLoader
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
@Model.rule_register("QUALITY_BAD_IMG_EFFECTIVENESS", ["img"])
|
|
@@ -39,10 +40,7 @@ class RuleImageValid(BaseRule):
|
|
|
39
40
|
@classmethod
|
|
40
41
|
def eval(cls, input_data: Data) -> EvalDetail:
|
|
41
42
|
res = EvalDetail(metric=cls.__name__)
|
|
42
|
-
|
|
43
|
-
img = Image.open(input_data.image[0])
|
|
44
|
-
else:
|
|
45
|
-
img = input_data.image[0]
|
|
43
|
+
img = ImageLoader.load_pil(input_data.image)
|
|
46
44
|
img_new = img.convert("RGB")
|
|
47
45
|
img_np = np.asarray(img_new)
|
|
48
46
|
if np.all(img_np == (255, 255, 255)) or np.all(img_np == (0, 0, 0)):
|
|
@@ -76,10 +74,7 @@ class RuleImageSizeValid(BaseRule):
|
|
|
76
74
|
@classmethod
|
|
77
75
|
def eval(cls, input_data: Data) -> EvalDetail:
|
|
78
76
|
res = EvalDetail(metric=cls.__name__)
|
|
79
|
-
|
|
80
|
-
img = Image.open(input_data.image[0])
|
|
81
|
-
else:
|
|
82
|
-
img = input_data.image[0]
|
|
77
|
+
img = ImageLoader.load_pil(input_data.image)
|
|
83
78
|
width, height = img.size
|
|
84
79
|
aspect_ratio = width / height
|
|
85
80
|
if aspect_ratio > 4 or aspect_ratio < 0.25:
|
|
@@ -119,10 +114,7 @@ class RuleImageQuality(BaseRule):
|
|
|
119
114
|
import torch
|
|
120
115
|
|
|
121
116
|
res = EvalDetail(metric=cls.__name__)
|
|
122
|
-
|
|
123
|
-
img = Image.open(input_data.image[0])
|
|
124
|
-
else:
|
|
125
|
-
img = input_data.image[0]
|
|
117
|
+
img = ImageLoader.load_pil(input_data.image)
|
|
126
118
|
device = (
|
|
127
119
|
torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
|
128
120
|
)
|
|
@@ -140,7 +132,12 @@ class RuleImageQuality(BaseRule):
|
|
|
140
132
|
|
|
141
133
|
@Model.rule_register("QUALITY_BAD_IMG_SIMILARITY", [])
|
|
142
134
|
class RuleImageRepeat(BaseRule):
|
|
143
|
-
"""Check for duplicate images using PHash and CNN methods.
|
|
135
|
+
"""Check for duplicate images using PHash and CNN methods.
|
|
136
|
+
|
|
137
|
+
NOTE: This is a directory-level evaluator. Unlike other image evaluators,
|
|
138
|
+
it takes a directory path via the ``content`` field (not ``image``).
|
|
139
|
+
The directory is scanned for duplicate images using PHash and CNN.
|
|
140
|
+
"""
|
|
144
141
|
|
|
145
142
|
# Metadata for documentation generation
|
|
146
143
|
_metric_info = {
|
|
@@ -227,10 +224,7 @@ class RuleImageTextSimilarity(BaseRule):
|
|
|
227
224
|
res = EvalDetail(metric=cls.__name__)
|
|
228
225
|
if not input_data.image or not input_data.content:
|
|
229
226
|
return res
|
|
230
|
-
|
|
231
|
-
img = Image.open(input_data.image[0])
|
|
232
|
-
else:
|
|
233
|
-
img = input_data.image[0]
|
|
227
|
+
img = ImageLoader.load_pil(input_data.image)
|
|
234
228
|
tokenized_texts = word_tokenize(input_data.content)
|
|
235
229
|
if cls.dynamic_config.refer_path is None:
|
|
236
230
|
similar_tool_path = download_similar_tool()
|
|
@@ -265,16 +259,23 @@ class RuleImageArtimuse(BaseRule):
|
|
|
265
259
|
"evaluation_results": ""
|
|
266
260
|
}
|
|
267
261
|
|
|
268
|
-
_required_fields = [RequiredField.
|
|
262
|
+
_required_fields = [RequiredField.IMAGE]
|
|
269
263
|
dynamic_config = EvaluatorRuleArgs(threshold=6, refer_path=['https://artimuse.intern-ai.org.cn/'])
|
|
270
264
|
|
|
271
265
|
@classmethod
|
|
272
266
|
def eval(cls, input_data: Data) -> EvalDetail:
|
|
273
267
|
try:
|
|
268
|
+
img_url = input_data.image
|
|
269
|
+
if isinstance(img_url, (list, tuple)):
|
|
270
|
+
img_url = img_url[0] if img_url else None
|
|
271
|
+
if not isinstance(img_url, str) or not img_url.startswith(("http://", "https://")):
|
|
272
|
+
raise ValueError(
|
|
273
|
+
f"RuleImageArtimuse requires an HTTP/HTTPS image URL, got: {type(img_url).__name__}"
|
|
274
|
+
)
|
|
274
275
|
response_create_task = requests.post(
|
|
275
276
|
cls.dynamic_config.refer_path[0] + 'api/v1/task/create_task',
|
|
276
277
|
json={
|
|
277
|
-
"img_url":
|
|
278
|
+
"img_url": img_url,
|
|
278
279
|
"style": 1
|
|
279
280
|
},
|
|
280
281
|
headers={
|
|
@@ -357,7 +358,11 @@ class RuleImageLabelOverlap(BaseRule):
|
|
|
357
358
|
|
|
358
359
|
# 2. 解析输入数据
|
|
359
360
|
content = input_data.content
|
|
360
|
-
|
|
361
|
+
raw_image = input_data.image
|
|
362
|
+
if isinstance(raw_image, (list, tuple)):
|
|
363
|
+
image_source = raw_image[0] if raw_image else None
|
|
364
|
+
else:
|
|
365
|
+
image_source = raw_image if raw_image else None
|
|
361
366
|
|
|
362
367
|
# 3. 解析标注内容
|
|
363
368
|
if isinstance(content, str):
|
|
@@ -385,11 +390,11 @@ class RuleImageLabelOverlap(BaseRule):
|
|
|
385
390
|
res.label = ["LabelOverlap_Fail.EmptyAnnotations"]
|
|
386
391
|
res.reason = ["annotations为空"]
|
|
387
392
|
return res
|
|
388
|
-
if not
|
|
393
|
+
if not image_source:
|
|
389
394
|
res = EvalDetail(metric=cls.__name__)
|
|
390
395
|
res.status = False
|
|
391
396
|
res.label = ["LabelOverlap_Fail.InvalidImagePath"]
|
|
392
|
-
res.reason = [f"图片路径无效:{
|
|
397
|
+
res.reason = [f"图片路径无效:{image_source}"]
|
|
393
398
|
return res
|
|
394
399
|
|
|
395
400
|
# 5. 提取边界框并计算重叠
|
|
@@ -492,7 +497,7 @@ class RuleImageLabelOverlap(BaseRule):
|
|
|
492
497
|
logging.info(f"开始保存图像到: {vis_path}")
|
|
493
498
|
|
|
494
499
|
# 生成可视化图像
|
|
495
|
-
img =
|
|
500
|
+
img = ImageLoader.load_pil(image_source).convert("RGB")
|
|
496
501
|
draw = ImageDraw.Draw(img)
|
|
497
502
|
|
|
498
503
|
# 绘制边界框
|
|
@@ -627,14 +632,18 @@ class RuleImageLabelVisualization(BaseRule):
|
|
|
627
632
|
# --------------------------
|
|
628
633
|
# 提取核心数据
|
|
629
634
|
content = input_data.content # 标注数据(str或dict)
|
|
630
|
-
|
|
635
|
+
raw_image = input_data.image
|
|
636
|
+
if isinstance(raw_image, (list, tuple)):
|
|
637
|
+
image_source = raw_image[0] if raw_image else None
|
|
638
|
+
else:
|
|
639
|
+
image_source = raw_image if raw_image else None
|
|
631
640
|
|
|
632
|
-
#
|
|
633
|
-
if not
|
|
641
|
+
# 验证图片源有效性
|
|
642
|
+
if not image_source:
|
|
634
643
|
res = EvalDetail(metric=cls.__name__)
|
|
635
644
|
res.status = False
|
|
636
645
|
res.label = ["LabelVisualization_Fail.InvalidImagePath"]
|
|
637
|
-
res.reason = [f"图片路径无效/不存在:{
|
|
646
|
+
res.reason = [f"图片路径无效/不存在:{image_source}"]
|
|
638
647
|
return res
|
|
639
648
|
|
|
640
649
|
# 解析标注内容
|
|
@@ -687,7 +696,7 @@ class RuleImageLabelVisualization(BaseRule):
|
|
|
687
696
|
# 4. 绘制标注并保存可视化图像
|
|
688
697
|
# --------------------------
|
|
689
698
|
# 打开原始图像
|
|
690
|
-
img =
|
|
699
|
+
img = ImageLoader.load_pil(image_source).convert("RGB")
|
|
691
700
|
draw = ImageDraw.Draw(img)
|
|
692
701
|
|
|
693
702
|
# 调用内部函数绘制标注
|
|
@@ -698,7 +707,7 @@ class RuleImageLabelVisualization(BaseRule):
|
|
|
698
707
|
output_dir = Path(cls.dynamic_config.refer_path[0]).resolve()
|
|
699
708
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
700
709
|
# 生成文件名
|
|
701
|
-
img_basename = Path(
|
|
710
|
+
img_basename = Path(str(image_source)).name
|
|
702
711
|
vis_filename = f"visual_{img_basename}"
|
|
703
712
|
vis_path = str(output_dir / vis_filename)
|
|
704
713
|
except Exception as path_error:
|
|
@@ -707,7 +716,7 @@ class RuleImageLabelVisualization(BaseRule):
|
|
|
707
716
|
import tempfile
|
|
708
717
|
output_dir = Path(tempfile.gettempdir()) / "dingo_visualization"
|
|
709
718
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
710
|
-
img_basename = Path(
|
|
719
|
+
img_basename = Path(str(image_source)).name
|
|
711
720
|
vis_filename = f"visual_{img_basename}"
|
|
712
721
|
vis_path = str(output_dir / vis_filename)
|
|
713
722
|
|
|
@@ -740,8 +749,8 @@ class RuleImageLabelVisualization(BaseRule):
|
|
|
740
749
|
if __name__ == "__main__":
|
|
741
750
|
data = Data(
|
|
742
751
|
data_id='1',
|
|
743
|
-
|
|
744
|
-
|
|
752
|
+
image=["https://openxlab.oss-cn-shanghai.aliyuncs.com/artimuse/upload/ef39eef6-2b40-4ea3-8285-934684734298-"
|
|
753
|
+
"stsupload-1753254621827-dog.jpg"]
|
|
745
754
|
)
|
|
746
755
|
res = RuleImageArtimuse.eval(data)
|
|
747
756
|
print(res)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Quanliang/scibase rule implementations."""
|