PyPI - dataset-toolkit - Versions diffs - 0.1.2__tar.gz → 0.2.0__tar.gz - Mend

dataset-toolkit 0.1.2tar.gz → 0.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{dataset_toolkit-0.1.2/dataset_toolkit.egg-info → dataset_toolkit-0.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dataset-toolkit
-Version: 0.1.2
+Version: 0.2.0
 Summary: 一个用于加载、处理和导出计算机视觉数据集的工具包
 Home-page: https://github.com/yourusername/dataset-toolkit
 Author: wenxiang.han
@@ -42,6 +42,7 @@ Dynamic: requires-python
 - 📤 **灵活导出**：导出为 COCO JSON、TXT 等多种格式
 - 🛠️ **工具函数**：提供坐标转换等实用工具
 - 📦 **标准化数据模型**：统一的内部数据表示，方便扩展
+- 📊 **模型评估**：完整的目标检测模型评估系统（v0.2.0+）
 ## 📦 安装
@@ -121,6 +122,43 @@ result = (pipeline
     .execute())
 ```
+### 模型评估（v0.2.0+）
+```python
+from dataset_toolkit import (
+    load_yolo_from_local,
+    load_predictions_from_streamlined,
+    Evaluator
+)
+# 1. 加载GT和预测结果
+gt_dataset = load_yolo_from_local("/data/test/labels", {0: 'parcel'})
+pred_dataset = load_predictions_from_streamlined(
+    "/results/predictions",
+    categories={0: 'parcel'},
+    image_dir="/data/test/images"
+)
+# 2. 创建评估器
+evaluator = Evaluator(
+    positive_gt=gt_dataset,
+    positive_pred=pred_dataset,
+    iou_threshold=0.5
+)
+# 3. 计算指标
+metrics = evaluator.calculate_metrics(confidence_threshold=0.5)
+print(f"Precision: {metrics['precision']:.4f}")
+print(f"Recall: {metrics['recall']:.4f}")
+print(f"F1-Score: {metrics['f1']:.4f}")
+# 4. 寻找最优阈值
+optimal = evaluator.find_optimal_threshold(metric='f1')
+print(f"最优阈值: {optimal['optimal_threshold']}")
+```
+详细文档请参考 [EVALUATION_GUIDE.md](EVALUATION_GUIDE.md)
 ## 📚 API 文档
 ### 数据加载器

{dataset_toolkit-0.1.2 → dataset_toolkit-0.2.0}/README.md RENAMED Viewed

@@ -9,6 +9,7 @@
 - 📤 **灵活导出**：导出为 COCO JSON、TXT 等多种格式
 - 🛠️ **工具函数**：提供坐标转换等实用工具
 - 📦 **标准化数据模型**：统一的内部数据表示，方便扩展
+- 📊 **模型评估**：完整的目标检测模型评估系统（v0.2.0+）
 ## 📦 安装
@@ -88,6 +89,43 @@ result = (pipeline
     .execute())
 ```
+### 模型评估（v0.2.0+）
+```python
+from dataset_toolkit import (
+    load_yolo_from_local,
+    load_predictions_from_streamlined,
+    Evaluator
+)
+# 1. 加载GT和预测结果
+gt_dataset = load_yolo_from_local("/data/test/labels", {0: 'parcel'})
+pred_dataset = load_predictions_from_streamlined(
+    "/results/predictions",
+    categories={0: 'parcel'},
+    image_dir="/data/test/images"
+)
+# 2. 创建评估器
+evaluator = Evaluator(
+    positive_gt=gt_dataset,
+    positive_pred=pred_dataset,
+    iou_threshold=0.5
+)
+# 3. 计算指标
+metrics = evaluator.calculate_metrics(confidence_threshold=0.5)
+print(f"Precision: {metrics['precision']:.4f}")
+print(f"Recall: {metrics['recall']:.4f}")
+print(f"F1-Score: {metrics['f1']:.4f}")
+# 4. 寻找最优阈值
+optimal = evaluator.find_optimal_threshold(metric='f1')
+print(f"最优阈值: {optimal['optimal_threshold']}")
+```
+详细文档请参考 [EVALUATION_GUIDE.md](EVALUATION_GUIDE.md)
 ## 📚 API 文档
 ### 数据加载器

{dataset_toolkit-0.1.2 → dataset_toolkit-0.2.0}/dataset_toolkit/__init__.py RENAMED Viewed

@@ -15,7 +15,7 @@ Dataset Toolkit - 计算机视觉数据集处理工具包
     >>> export_to_coco(dataset, "output.json")
 """
-__version__ = "0.1.2"
+__version__ = "0.2.0"
 __author__ = "wenxiang.han"
 __email__ = "wenxiang.han@anker-in.com"
@@ -28,13 +28,18 @@ from dataset_toolkit.models import (
 from dataset_toolkit.loaders.local_loader import (
     load_yolo_from_local,
-    load_csv_result_from_local
+    load_csv_result_from_local,
+    load_predictions_from_streamlined
 )
 from dataset_toolkit.processors.merger import (
     merge_datasets
 )
+from dataset_toolkit.processors.evaluator import (
+    Evaluator
+)
 from dataset_toolkit.exporters.coco_exporter import (
     export_to_coco
 )
@@ -69,9 +74,11 @@ __all__ = [
     # 加载器
     "load_yolo_from_local",
     "load_csv_result_from_local",
+    "load_predictions_from_streamlined",
     # 处理器
     "merge_datasets",
+    "Evaluator",
     # 导出器
     "export_to_coco",

{dataset_toolkit-0.1.2 → dataset_toolkit-0.2.0}/dataset_toolkit/exporters/yolo_exporter.py RENAMED Viewed

@@ -146,8 +146,10 @@ def export_to_yolo_and_txt(
                 rel_path = os.path.relpath(img_in_yolo, txt_path.parent)
                 f.write(f"{rel_path}\n")
             else:
-                # 绝对路径（指向 YOLO images 目录，不要 resolve，保持 YOLO 结构）
-                f.write(f"{str(img_in_yolo.absolute())}\n")
+                # 绝对路径（规范化但不解析软链接）
+                # 使用 os.path.normpath 规范化路径，去除 .. 等
+                normalized_path = os.path.normpath(str(img_in_yolo.absolute()))
+                f.write(f"{normalized_path}\n")
     print(f"✓ txt 列表已生成: {len(dataset.images)} 行")

{dataset_toolkit-0.1.2 → dataset_toolkit-0.2.0}/dataset_toolkit/loaders/local_loader.py RENAMED Viewed

@@ -186,4 +186,149 @@ def load_csv_result_from_local(dataset_path: str, categories: Dict[int, str] = N
     print(f"加载完成. 共找到 {image_count} 张图片, {len(dataset.categories)} 个类别.")
     print(f"类别映射: {dataset.categories}")
+    return dataset
+def load_predictions_from_streamlined(
+    predictions_dir: str,
+    categories: Dict[int, str],
+    image_dir: str = None
+) -> Dataset:
+    """
+    从streamlined推理结果目录加载预测数据集。
+    预测文件格式（每行一个检测）：
+        class_id,confidence,center_x,center_y,width,height
+        例如: 0,0.934679,354.00,388.00,274.00,102.00
+    参数:
+        predictions_dir: 预测结果txt文件所在目录
+        categories: 类别映射字典 {class_id: class_name}
+        image_dir: 图像目录（可选，用于读取图像尺寸）
+                  如果不提供，将尝试从预测文件同级目录查找
+    返回:
+        Dataset: 预测数据集对象，dataset_type='pred'
+    """
+    pred_path = Path(predictions_dir)
+    if not pred_path.is_dir():
+        raise FileNotFoundError(f"预测结果目录不存在: {pred_path}")
+    # 尝试自动查找图像目录
+    if image_dir is None:
+        # 尝试常见的图像目录位置
+        possible_image_dirs = [
+            pred_path.parent / 'images',
+            pred_path.parent.parent / 'images',
+        ]
+        for possible_dir in possible_image_dirs:
+            if possible_dir.is_dir():
+                image_dir = str(possible_dir)
+                print(f"自动找到图像目录: {image_dir}")
+                break
+    dataset = Dataset(
+        name=pred_path.name,
+        categories=categories,
+        dataset_type="pred"
+    )
+    supported_extensions = ['.jpg', '.jpeg', '.png']
+    txt_files = list(pred_path.glob('*.txt'))
+    print(f"开始加载预测结果: {pred_path.name}...")
+    print(f"找到 {len(txt_files)} 个预测文件")
+    loaded_count = 0
+    skipped_count = 0
+    for txt_file in txt_files:
+        # 预测文件名对应的图像文件名（假设同名）
+        image_base_name = txt_file.stem
+        # 尝试查找对应的图像文件
+        image_path = None
+        img_width, img_height = None, None
+        if image_dir:
+            image_dir_path = Path(image_dir)
+            for ext in supported_extensions:
+                potential_image = image_dir_path / (image_base_name + ext)
+                if potential_image.exists():
+                    image_path = str(potential_image.resolve())
+                    try:
+                        with Image.open(potential_image) as img:
+                            img_width, img_height = img.size
+                    except IOError:
+                        print(f"警告: 无法打开图片 {potential_image}")
+                    break
+        # 如果没有找到图像，使用默认值
+        if image_path is None:
+            # 假设一个默认的图像路径和尺寸
+            image_path = f"unknown/{image_base_name}.jpg"
+            img_width, img_height = 640, 640  # 默认尺寸
+            if image_dir:
+                skipped_count += 1
+        # 创建图像标注对象
+        image_annotation = ImageAnnotation(
+            image_id=image_base_name + '.jpg',
+            path=image_path,
+            width=img_width,
+            height=img_height
+        )
+        # 读取预测结果
+        try:
+            with open(txt_file, 'r') as f:
+                for line in f:
+                    line = line.strip()
+                    if not line:
+                        continue
+                    # 解析格式: class_id,confidence,center_x,center_y,width,height
+                    parts = line.split(',')
+                    if len(parts) != 6:
+                        print(f"警告: 格式错误，已跳过: {txt_file} -> '{line}'")
+                        continue
+                    try:
+                        class_id = int(parts[0])
+                        confidence = float(parts[1])
+                        center_x = float(parts[2])
+                        center_y = float(parts[3])
+                        width = float(parts[4])
+                        height = float(parts[5])
+                        # 转换为 [x_min, y_min, width, height] 格式
+                        x_min = center_x - width / 2
+                        y_min = center_y - height / 2
+                        annotation = Annotation(
+                            category_id=class_id,
+                            bbox=[x_min, y_min, width, height],
+                            confidence=confidence
+                        )
+                        image_annotation.annotations.append(annotation)
+                    except (ValueError, IndexError) as e:
+                        print(f"警告: 解析错误，已跳过: {txt_file} -> '{line}' ({e})")
+                        continue
+        except Exception as e:
+            print(f"警告: 读取文件失败，已跳过: {txt_file} ({e})")
+            continue
+        dataset.images.append(image_annotation)
+        loaded_count += 1
+    print(f"加载完成. 成功加载 {loaded_count} 个预测文件")
+    if skipped_count > 0:
+        print(f"警告: {skipped_count} 个文件未找到对应图像，使用默认尺寸")
+    total_detections = sum(len(img.annotations) for img in dataset.images)
+    print(f"总检测数: {total_detections}")
     return dataset

{dataset_toolkit-0.1.2 → dataset_toolkit-0.2.0}/dataset_toolkit/models.py RENAMED Viewed

@@ -24,4 +24,6 @@ class Dataset:
     """代表一个完整的数据集对象，作为系统内部的标准化表示."""
     name: str
     images: List[ImageAnnotation] = field(default_factory=list)
-    categories: Dict[int, str] = field(default_factory=dict)
+    categories: Dict[int, str] = field(default_factory=dict)
+    dataset_type: str = "train"  # 'train', 'gt', 'pred'
+    metadata: Dict = field(default_factory=dict)  # 存储描述性信息，不包含处理参数

dataset_toolkit-0.2.0/dataset_toolkit/processors/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# dataset_toolkit/processors/__init__.py
+from .merger import merge_datasets
+from .evaluator import Evaluator
+__all__ = [
+    'merge_datasets',
+    'Evaluator',
+]

dataset_toolkit-0.2.0/dataset_toolkit/processors/evaluator.py ADDED Viewed

@@ -0,0 +1,535 @@
+# dataset_toolkit/processors/evaluator.py
+from typing import Dict, List, Optional, Tuple
+from dataset_toolkit.models import Dataset, Annotation, ImageAnnotation
+class Evaluator:
+    """
+    评估器：支持正检集和误检集分离评估
+    用于比较GT和Pred数据集，计算Precision、Recall、F1、FPPI等指标。
+    支持在不同置信度阈值下动态评估，无需重新加载数据。
+    """
+    def __init__(
+        self,
+        positive_gt: Dataset,
+        positive_pred: Dataset,
+        negative_gt: Optional[Dataset] = None,
+        negative_pred: Optional[Dataset] = None,
+        iou_threshold: float = 0.5
+    ):
+        """
+        初始化评估器
+        Args:
+            positive_gt: 正检集GT（必需）- 包含目标物体的测试集
+            positive_pred: 正检集预测（必需）- 对正检集的预测结果
+            negative_gt: 误检集GT（可选）- 不包含目标的背景图像GT
+            negative_pred: 误检集预测（可选）- 对误检集的预测结果
+            iou_threshold: IoU阈值，用于判断检测是否匹配GT（默认0.5）
+        """
+        self.positive_gt = positive_gt
+        self.positive_pred = positive_pred
+        self.negative_gt = negative_gt
+        self.negative_pred = negative_pred
+        self.iou_threshold = iou_threshold
+        # 验证数据集
+        self._validate_datasets()
+    def _validate_datasets(self):
+        """验证数据集的有效性"""
+        if self.positive_gt is None or self.positive_pred is None:
+            raise ValueError("正检集的GT和Pred是必需的")
+        if len(self.positive_gt.images) == 0:
+            raise ValueError("正检集GT为空")
+        if len(self.positive_pred.images) == 0:
+            raise ValueError("正检集Pred为空")
+        # 检查类别是否一致
+        if self.positive_gt.categories != self.positive_pred.categories:
+            print("警告: GT和Pred的类别映射不一致")
+            print(f"  GT categories: {self.positive_gt.categories}")
+            print(f"  Pred categories: {self.positive_pred.categories}")
+    def calculate_metrics(
+        self,
+        confidence_threshold: float = 0.5,
+        class_id: Optional[int] = None,
+        calculate_fppi: bool = True
+    ) -> Dict:
+        """
+        计算综合评估指标
+        Args:
+            confidence_threshold: 置信度阈值（动态传入，不存储在数据集中）
+            class_id: 指定类别ID，None表示所有类别
+            calculate_fppi: 是否计算FPPI（需要negative_pred）
+        Returns:
+            包含所有指标的字典:
+                - tp, fp, fn: True/False Positives/Negatives
+                - precision, recall, f1: 精确率、召回率、F1分数
+                - fppi: False Positives Per Image（如果计算）
+                - confidence_threshold, iou_threshold: 使用的阈值
+                - positive_set_size, negative_set_size: 数据集大小
+        """
+        metrics = {}
+        # 1. 从正检集计算 Precision, Recall, F1
+        positive_metrics = self._calculate_positive_metrics(
+            confidence_threshold, class_id
+        )
+        metrics.update(positive_metrics)
+        # 2. 从误检集计算 FPPI
+        if calculate_fppi and self.negative_pred is not None:
+            fppi_metrics = self._calculate_fppi_metrics(
+                confidence_threshold, class_id
+            )
+            metrics.update(fppi_metrics)
+        else:
+            metrics['fppi'] = None
+            metrics['fppi_note'] = "未提供误检集" if self.negative_pred is None else "未计算FPPI"
+        # 3. 添加配置信息
+        metrics['confidence_threshold'] = confidence_threshold
+        metrics['iou_threshold'] = self.iou_threshold
+        metrics['positive_set_size'] = len(self.positive_gt.images)
+        metrics['negative_set_size'] = (
+            len(self.negative_pred.images)
+            if self.negative_pred else 0
+        )
+        return metrics
+    def _calculate_positive_metrics(
+        self,
+        confidence_threshold: float,
+        class_id: Optional[int] = None
+    ) -> Dict:
+        """
+        从正检集计算 Precision, Recall, F1
+        Args:
+            confidence_threshold: 置信度阈值
+            class_id: 指定类别ID
+        Returns:
+            包含TP, FP, FN, Precision, Recall, F1的字典
+        """
+        # 1. 过滤预测结果
+        filtered_preds = self._filter_predictions(
+            self.positive_pred,
+            confidence_threshold,
+            class_id
+        )
+        # 2. 匹配GT和Pred
+        tp = 0  # True Positives
+        fp = 0  # False Positives
+        fn = 0  # False Negatives
+        matched_gt = set()  # 记录已匹配的GT，格式: (image_id, gt_index)
+        # 遍历每张图像
+        for img_gt in self.positive_gt.images:
+            # 获取该图像的GT标注
+            gt_anns = [
+                ann for ann in img_gt.annotations
+                if class_id is None or ann.category_id == class_id
+            ]
+            # 获取该图像的预测结果
+            img_preds = filtered_preds.get(img_gt.image_id, [])
+            # 匹配预测和GT
+            for pred in img_preds:
+                best_iou = 0
+                best_gt_idx = -1
+                for i, gt_ann in enumerate(gt_anns):
+                    if (img_gt.image_id, i) not in matched_gt:
+                        iou = self._calculate_iou(pred.bbox, gt_ann.bbox)
+                        if iou > best_iou:
+                            best_iou = iou
+                            best_gt_idx = i
+                if best_iou >= self.iou_threshold:
+                    tp += 1
+                    matched_gt.add((img_gt.image_id, best_gt_idx))
+                else:
+                    fp += 1
+            # 统计未匹配的GT（False Negatives）
+            fn += len([
+                i for i in range(len(gt_anns))
+                if (img_gt.image_id, i) not in matched_gt
+            ])
+        # 3. 计算指标
+        precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
+        recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
+        f1 = (2 * precision * recall / (precision + recall)
+              if (precision + recall) > 0 else 0.0)
+        return {
+            'tp': tp,
+            'fp': fp,
+            'fn': fn,
+            'precision': precision,
+            'recall': recall,
+            'f1': f1,
+            'positive_set_note': f"基于{len(self.positive_gt.images)}张正检图像"
+        }
+    def _calculate_fppi_metrics(
+        self,
+        confidence_threshold: float,
+        class_id: Optional[int] = None
+    ) -> Dict:
+        """
+        从误检集计算 FPPI (False Positives Per Image)
+        Args:
+            confidence_threshold: 置信度阈值
+            class_id: 指定类别ID
+        Returns:
+            包含FPPI相关指标的字典
+        """
+        # 1. 过滤预测结果
+        filtered_preds = self._filter_predictions(
+            self.negative_pred,
+            confidence_threshold,
+            class_id
+        )
+        # 2. 统计误检数
+        total_fp = 0
+        fp_per_image = []
+        for img_pred in self.negative_pred.images:
+            img_preds = filtered_preds.get(img_pred.image_id, [])
+            # 在误检集中，所有检测都是False Positive
+            # 但如果提供了negative_gt，可以更精确地判断
+            if self.negative_gt is not None:
+                # 找到对应的GT图像
+                gt_img = next(
+                    (img for img in self.negative_gt.images
+                     if img.image_id == img_pred.image_id),
+                    None
+                )
+                if gt_img is not None:
+                    # 获取GT标注
+                    gt_anns = [
+                        ann for ann in gt_img.annotations
+                        if class_id is None or ann.category_id == class_id
+                    ]
+                    # 匹配预测和GT，未匹配的是FP
+                    matched = set()
+                    fp_count = 0
+                    for pred in img_preds:
+                        is_matched = False
+                        for i, gt_ann in enumerate(gt_anns):
+                            if i not in matched:
+                                iou = self._calculate_iou(pred.bbox, gt_ann.bbox)
+                                if iou >= self.iou_threshold:
+                                    matched.add(i)
+                                    is_matched = True
+                                    break
+                        if not is_matched:
+                            fp_count += 1
+                    total_fp += fp_count
+                    fp_per_image.append(fp_count)
+                else:
+                    # 没有对应的GT，所有检测都是FP
+                    fp_count = len(img_preds)
+                    total_fp += fp_count
+                    fp_per_image.append(fp_count)
+            else:
+                # 没有提供negative_gt，假设所有检测都是FP
+                fp_count = len(img_preds)
+                total_fp += fp_count
+                fp_per_image.append(fp_count)
+        # 3. 计算FPPI
+        num_images = len(self.negative_pred.images)
+        fppi = total_fp / num_images if num_images > 0 else 0.0
+        return {
+            'fppi': fppi,
+            'total_false_positives': total_fp,
+            'negative_set_size': num_images,
+            'fppi_note': f"基于{num_images}张误检图像",
+            'max_fp_per_image': max(fp_per_image) if fp_per_image else 0,
+            'min_fp_per_image': min(fp_per_image) if fp_per_image else 0,
+            'avg_fp_per_image': total_fp / num_images if num_images > 0 else 0.0
+        }
+    def _filter_predictions(
+        self,
+        pred_dataset: Dataset,
+        confidence_threshold: float,
+        class_id: Optional[int] = None
+    ) -> Dict[str, List[Annotation]]:
+        """
+        根据置信度阈值和类别过滤预测结果
+        Args:
+            pred_dataset: 预测数据集
+            confidence_threshold: 置信度阈值
+            class_id: 指定类别ID
+        Returns:
+            {image_id: [annotations]} 字典
+        """
+        filtered = {}
+        for img in pred_dataset.images:
+            img_preds = [
+                ann for ann in img.annotations
+                if ann.confidence >= confidence_threshold
+                and (class_id is None or ann.category_id == class_id)
+            ]
+            if img_preds:
+                filtered[img.image_id] = img_preds
+        return filtered
+    def _calculate_iou(self, bbox1: List[float], bbox2: List[float]) -> float:
+        """
+        计算两个边界框的IoU (Intersection over Union)
+        Args:
+            bbox1, bbox2: [x_min, y_min, width, height] 格式的边界框
+        Returns:
+            IoU值 (0.0 到 1.0)
+        """
+        # bbox格式: [x_min, y_min, width, height]
+        x1_min, y1_min, w1, h1 = bbox1
+        x2_min, y2_min, w2, h2 = bbox2
+        x1_max = x1_min + w1
+        y1_max = y1_min + h1
+        x2_max = x2_min + w2
+        y2_max = y2_min + h2
+        # 计算交集
+        inter_x_min = max(x1_min, x2_min)
+        inter_y_min = max(y1_min, y2_min)
+        inter_x_max = min(x1_max, x2_max)
+        inter_y_max = min(y1_max, y2_max)
+        if inter_x_max <= inter_x_min or inter_y_max <= inter_y_min:
+            return 0.0
+        inter_area = (inter_x_max - inter_x_min) * (inter_y_max - inter_y_min)
+        # 计算并集
+        area1 = w1 * h1
+        area2 = w2 * h2
+        union_area = area1 + area2 - inter_area
+        return inter_area / union_area if union_area > 0 else 0.0
+    def calculate_pr_curve(
+        self,
+        thresholds: Optional[List[float]] = None,
+        class_id: Optional[int] = None
+    ) -> List[Dict]:
+        """
+        计算PR曲线（不同置信度阈值下的Precision-Recall）
+        Args:
+            thresholds: 要测试的置信度阈值列表，None则使用默认值
+            class_id: 指定类别ID
+        Returns:
+            每个阈值对应的指标列表
+        """
+        if thresholds is None:
+            thresholds = [i/10 for i in range(1, 10)]  # 0.1 到 0.9
+        pr_points = []
+        for threshold in thresholds:
+            # 对同一份数据，使用不同阈值计算指标
+            metrics = self.calculate_metrics(
+                confidence_threshold=threshold,
+                class_id=class_id,
+                calculate_fppi=False  # PR曲线不需要FPPI
+            )
+            pr_points.append({
+                'threshold': threshold,
+                'precision': metrics['precision'],
+                'recall': metrics['recall'],
+                'f1': metrics['f1'],
+                'tp': metrics['tp'],
+                'fp': metrics['fp'],
+                'fn': metrics['fn']
+            })
+        return pr_points
+    def find_optimal_threshold(
+        self,
+        metric: str = 'f1',
+        class_id: Optional[int] = None,
+        thresholds: Optional[List[float]] = None
+    ) -> Dict:
+        """
+        找到使指定指标最优的置信度阈值
+        Args:
+            metric: 优化目标指标 ('precision', 'recall', 'f1')
+            class_id: 指定类别ID
+            thresholds: 要测试的阈值列表
+        Returns:
+            最优阈值及对应的所有指标
+        """
+        if metric not in ['precision', 'recall', 'f1']:
+            raise ValueError(f"不支持的指标: {metric}")
+        pr_curve = self.calculate_pr_curve(thresholds, class_id)
+        # 找到指定指标最大的点
+        best_point = max(pr_curve, key=lambda x: x[metric])
+        return {
+            'optimal_threshold': best_point['threshold'],
+            'optimized_metric': metric,
+            'metrics': best_point
+        }
+    def find_threshold_with_constraint(
+        self,
+        target_metric: str,
+        constraint_metric: str,
+        constraint_value: float,
+        class_id: Optional[int] = None,
+        thresholds: Optional[List[float]] = None
+    ) -> Optional[Dict]:
+        """
+        在约束条件下找到最优阈值
+        例如：在FPPI < 0.01的约束下，找到Recall最高的阈值
+        Args:
+            target_metric: 要优化的目标指标 ('precision', 'recall', 'f1')
+            constraint_metric: 约束指标 ('fppi', 'precision', 'recall')
+            constraint_value: 约束值（如 fppi < 0.01）
+            class_id: 指定类别ID
+            thresholds: 要测试的阈值列表
+        Returns:
+            最优阈值及对应的指标，如果无满足约束的阈值则返回None
+        """
+        if thresholds is None:
+            thresholds = [i/100 for i in range(1, 100)]  # 0.01 到 0.99
+        best_threshold = None
+        best_value = 0
+        best_metrics = None
+        for threshold in thresholds:
+            metrics = self.calculate_metrics(
+                confidence_threshold=threshold,
+                class_id=class_id,
+                calculate_fppi=(constraint_metric == 'fppi')
+            )
+            # 检查约束条件
+            constraint_satisfied = False
+            if constraint_metric == 'fppi':
+                if metrics['fppi'] is not None and metrics['fppi'] <= constraint_value:
+                    constraint_satisfied = True
+            elif constraint_metric in metrics:
+                if metrics[constraint_metric] >= constraint_value:
+                    constraint_satisfied = True
+            # 如果满足约束，检查是否是最优的
+            if constraint_satisfied:
+                if metrics[target_metric] > best_value:
+                    best_value = metrics[target_metric]
+                    best_threshold = threshold
+                    best_metrics = metrics
+        if best_threshold is None:
+            return None
+        return {
+            'optimal_threshold': best_threshold,
+            'target_metric': target_metric,
+            'target_value': best_value,
+            'constraint': f"{constraint_metric} <= {constraint_value}",
+            'metrics': best_metrics
+        }
+    def generate_report(
+        self,
+        confidence_threshold: float = 0.5,
+        class_id: Optional[int] = None
+    ) -> str:
+        """
+        生成评估报告
+        Args:
+            confidence_threshold: 置信度阈值
+            class_id: 指定类别ID
+        Returns:
+            格式化的评估报告字符串
+        """
+        metrics = self.calculate_metrics(confidence_threshold, class_id)
+        class_name = "所有类别"
+        if class_id is not None and class_id in self.positive_gt.categories:
+            class_name = f"类别 {class_id} ({self.positive_gt.categories[class_id]})"
+        report = f"""
+{'='*60}
+评估报告 - {class_name}
+{'='*60}
+配置信息:
+  置信度阈值: {metrics['confidence_threshold']}
+  IoU阈值: {metrics['iou_threshold']}
+  正检集大小: {metrics['positive_set_size']} 张图像
+  误检集大小: {metrics['negative_set_size']} 张图像
+正检集指标 (Precision & Recall):
+  True Positives (TP):  {metrics['tp']}
+  False Positives (FP): {metrics['fp']}
+  False Negatives (FN): {metrics['fn']}
+  Precision: {metrics['precision']:.4f} ({metrics['precision']*100:.2f}%)
+  Recall:    {metrics['recall']:.4f} ({metrics['recall']*100:.2f}%)
+  F1-Score:  {metrics['f1']:.4f}
+"""
+        if metrics['fppi'] is not None:
+            report += f"""
+误检集指标 (FPPI):
+  FPPI (False Positives Per Image): {metrics['fppi']:.6f}
+  总误检数: {metrics.get('total_false_positives', 'N/A')}
+  平均每图误检数: {metrics.get('avg_fp_per_image', 'N/A'):.2f}
+  单图最大误检数: {metrics.get('max_fp_per_image', 'N/A')}
+  单图最小误检数: {metrics.get('min_fp_per_image', 'N/A')}
+"""
+        else:
+            report += f"""
+误检集指标 (FPPI):
+  {metrics['fppi_note']}
+"""
+        report += f"\n{'='*60}\n"
+        return report

{dataset_toolkit-0.1.2 → dataset_toolkit-0.2.0/dataset_toolkit.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dataset-toolkit
-Version: 0.1.2
+Version: 0.2.0
 Summary: 一个用于加载、处理和导出计算机视觉数据集的工具包
 Home-page: https://github.com/yourusername/dataset-toolkit
 Author: wenxiang.han
@@ -42,6 +42,7 @@ Dynamic: requires-python
 - 📤 **灵活导出**：导出为 COCO JSON、TXT 等多种格式
 - 🛠️ **工具函数**：提供坐标转换等实用工具
 - 📦 **标准化数据模型**：统一的内部数据表示，方便扩展
+- 📊 **模型评估**：完整的目标检测模型评估系统（v0.2.0+）
 ## 📦 安装
@@ -121,6 +122,43 @@ result = (pipeline
     .execute())
 ```
+### 模型评估（v0.2.0+）
+```python
+from dataset_toolkit import (
+    load_yolo_from_local,
+    load_predictions_from_streamlined,
+    Evaluator
+)
+# 1. 加载GT和预测结果
+gt_dataset = load_yolo_from_local("/data/test/labels", {0: 'parcel'})
+pred_dataset = load_predictions_from_streamlined(
+    "/results/predictions",
+    categories={0: 'parcel'},
+    image_dir="/data/test/images"
+)
+# 2. 创建评估器
+evaluator = Evaluator(
+    positive_gt=gt_dataset,
+    positive_pred=pred_dataset,
+    iou_threshold=0.5
+)
+# 3. 计算指标
+metrics = evaluator.calculate_metrics(confidence_threshold=0.5)
+print(f"Precision: {metrics['precision']:.4f}")
+print(f"Recall: {metrics['recall']:.4f}")
+print(f"F1-Score: {metrics['f1']:.4f}")
+# 4. 寻找最优阈值
+optimal = evaluator.find_optimal_threshold(metric='f1')
+print(f"最优阈值: {optimal['optimal_threshold']}")
+```
+详细文档请参考 [EVALUATION_GUIDE.md](EVALUATION_GUIDE.md)
 ## 📚 API 文档
 ### 数据加载器

{dataset_toolkit-0.1.2 → dataset_toolkit-0.2.0}/dataset_toolkit.egg-info/SOURCES.txt RENAMED Viewed

@@ -19,10 +19,12 @@ dataset_toolkit/exporters/yolo_exporter.py
 dataset_toolkit/loaders/__init__.py
 dataset_toolkit/loaders/local_loader.py
 dataset_toolkit/processors/__init__.py
+dataset_toolkit/processors/evaluator.py
 dataset_toolkit/processors/merger.py
 dataset_toolkit/utils/__init__.py
 dataset_toolkit/utils/coords.py
 examples/basic_usage.py
+examples/evaluation_example.py
 tests/__init__.py
 tests/conftest.py
 tests/test_exporters.py

dataset_toolkit-0.2.0/examples/evaluation_example.py ADDED Viewed

@@ -0,0 +1,250 @@
+"""
+评估系统使用示例
+演示如何使用dataset_toolkit进行模型评估：
+1. 加载GT数据集（正检集和误检集）
+2. 加载预测结果
+3. 计算评估指标（Precision, Recall, F1, FPPI）
+4. 测试不同置信度阈值
+5. 找到最优阈值
+"""
+import sys
+from pathlib import Path
+# 添加项目路径
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from dataset_toolkit import (
+    load_yolo_from_local,
+    load_predictions_from_streamlined,
+    Evaluator
+)
+def main():
+    print("="*60)
+    print("评估系统使用示例")
+    print("="*60)
+    # ============================================================
+    # 1. 配置路径（请根据实际情况修改）
+    # ============================================================
+    # 正检集路径
+    positive_gt_path = "/opt/dlami/nvme/workspace_wenxiang/parcel/test_val/labels"
+    positive_pred_path = "/opt/dlami/nvme/workspace_wenxiang/ai_train/onnx_infer/detections/results/streamlined_test"
+    positive_image_path = "/opt/dlami/nvme/workspace_wenxiang/parcel/test_val/images"
+    # 误检集路径（可选）
+    negative_pred_path = None  # 如果有误检集，设置路径
+    negative_image_path = None
+    # 类别映射
+    categories = {0: 'parcel'}
+    # ============================================================
+    # 2. 加载数据集
+    # ============================================================
+    print("\n步骤1: 加载数据集...")
+    print("-" * 60)
+    # 加载正检集GT
+    print("\n加载正检集GT...")
+    gt_positive = load_yolo_from_local(
+        positive_gt_path,
+        categories=categories
+    )
+    gt_positive.dataset_type = "gt"
+    gt_positive.metadata = {
+        "test_purpose": "positive",
+        "description": "包含目标物体的测试集"
+    }
+    print(f"✓ 正检集GT: {len(gt_positive.images)} 张图像")
+    # 加载正检集预测
+    print("\n加载正检集预测结果...")
+    pred_positive = load_predictions_from_streamlined(
+        positive_pred_path,
+        categories=categories,
+        image_dir=positive_image_path
+    )
+    pred_positive.dataset_type = "pred"
+    pred_positive.metadata = {
+        "test_purpose": "positive",
+        "model_name": "yolov8_parcel"
+    }
+    print(f"✓ 正检集Pred: {len(pred_positive.images)} 张图像")
+    # 加载误检集预测（如果有）
+    pred_negative = None
+    if negative_pred_path:
+        print("\n加载误检集预测结果...")
+        pred_negative = load_predictions_from_streamlined(
+            negative_pred_path,
+            categories=categories,
+            image_dir=negative_image_path
+        )
+        pred_negative.dataset_type = "pred"
+        pred_negative.metadata = {
+            "test_purpose": "negative",
+            "model_name": "yolov8_parcel"
+        }
+        print(f"✓ 误检集Pred: {len(pred_negative.images)} 张图像")
+    else:
+        print("\n未提供误检集，将只计算Precision/Recall/F1")
+    # ============================================================
+    # 3. 创建评估器
+    # ============================================================
+    print("\n步骤2: 创建评估器...")
+    print("-" * 60)
+    evaluator = Evaluator(
+        positive_gt=gt_positive,
+        positive_pred=pred_positive,
+        negative_pred=pred_negative,
+        iou_threshold=0.5
+    )
+    print("✓ 评估器创建成功")
+    # ============================================================
+    # 4. 计算单个阈值的指标
+    # ============================================================
+    print("\n步骤3: 计算评估指标（置信度阈值=0.5）...")
+    print("-" * 60)
+    metrics = evaluator.calculate_metrics(
+        confidence_threshold=0.5,
+        class_id=0  # 只评估parcel类别
+    )
+    print(f"\n正检集指标:")
+    print(f"  TP: {metrics['tp']}, FP: {metrics['fp']}, FN: {metrics['fn']}")
+    print(f"  Precision: {metrics['precision']:.4f} ({metrics['precision']*100:.2f}%)")
+    print(f"  Recall:    {metrics['recall']:.4f} ({metrics['recall']*100:.2f}%)")
+    print(f"  F1-Score:  {metrics['f1']:.4f}")
+    if metrics['fppi'] is not None:
+        print(f"\n误检集指标:")
+        print(f"  FPPI: {metrics['fppi']:.6f}")
+        print(f"  总误检数: {metrics['total_false_positives']}")
+    # ============================================================
+    # 5. 测试多个阈值
+    # ============================================================
+    print("\n步骤4: 测试多个置信度阈值...")
+    print("-" * 60)
+    test_thresholds = [0.3, 0.4, 0.5, 0.6, 0.7]
+    print(f"\n{'阈值':<10} {'Precision':<12} {'Recall':<12} {'F1':<12} {'FPPI':<12}")
+    print("-" * 60)
+    for threshold in test_thresholds:
+        m = evaluator.calculate_metrics(
+            confidence_threshold=threshold,
+            class_id=0
+        )
+        fppi_str = f"{m['fppi']:.6f}" if m['fppi'] is not None else "N/A"
+        print(f"{threshold:<10.2f} {m['precision']:<12.4f} {m['recall']:<12.4f} "
+              f"{m['f1']:<12.4f} {fppi_str:<12}")
+    # ============================================================
+    # 6. 找到最优阈值
+    # ============================================================
+    print("\n步骤5: 寻找最优阈值...")
+    print("-" * 60)
+    # 找到F1最高的阈值
+    optimal = evaluator.find_optimal_threshold(
+        metric='f1',
+        class_id=0
+    )
+    print(f"\n最优阈值（F1最大）:")
+    print(f"  阈值: {optimal['optimal_threshold']}")
+    print(f"  Precision: {optimal['metrics']['precision']:.4f}")
+    print(f"  Recall:    {optimal['metrics']['recall']:.4f}")
+    print(f"  F1-Score:  {optimal['metrics']['f1']:.4f}")
+    # 如果有误检集，找到FPPI约束下的最优阈值
+    if pred_negative:
+        constrained = evaluator.find_threshold_with_constraint(
+            target_metric='recall',
+            constraint_metric='fppi',
+            constraint_value=0.01,  # FPPI < 0.01
+            class_id=0
+        )
+        if constrained:
+            print(f"\n最优阈值（FPPI < 0.01约束下，Recall最大）:")
+            print(f"  阈值: {constrained['optimal_threshold']}")
+            print(f"  Recall: {constrained['target_value']:.4f}")
+            print(f"  FPPI:   {constrained['metrics']['fppi']:.6f}")
+        else:
+            print(f"\n警告: 无法找到满足 FPPI < 0.01 约束的阈值")
+    # ============================================================
+    # 7. 生成完整报告
+    # ============================================================
+    print("\n步骤6: 生成完整评估报告...")
+    print("-" * 60)
+    report = evaluator.generate_report(
+        confidence_threshold=0.5,
+        class_id=0
+    )
+    print(report)
+    # ============================================================
+    # 8. 计算PR曲线数据（可用于绘图）
+    # ============================================================
+    print("\n步骤7: 计算PR曲线数据...")
+    print("-" * 60)
+    pr_curve = evaluator.calculate_pr_curve(
+        thresholds=[i/10 for i in range(1, 10)],
+        class_id=0
+    )
+    print(f"\nPR曲线数据点: {len(pr_curve)} 个")
+    print(f"{'阈值':<10} {'Precision':<12} {'Recall':<12}")
+    print("-" * 40)
+    for point in pr_curve[:5]:  # 只显示前5个
+        print(f"{point['threshold']:<10.2f} {point['precision']:<12.4f} {point['recall']:<12.4f}")
+    print("...")
+    print("\n" + "="*60)
+    print("评估完成！")
+    print("="*60)
+    # 可以将PR曲线数据保存或绘图
+    # import matplotlib.pyplot as plt
+    # precisions = [p['precision'] for p in pr_curve]
+    # recalls = [p['recall'] for p in pr_curve]
+    # plt.plot(recalls, precisions)
+    # plt.xlabel('Recall')
+    # plt.ylabel('Precision')
+    # plt.title('PR Curve')
+    # plt.savefig('pr_curve.png')
+if __name__ == '__main__':
+    try:
+        main()
+    except FileNotFoundError as e:
+        print(f"\n错误: {e}")
+        print("\n请修改脚本中的路径配置，指向实际的数据集位置。")
+    except Exception as e:
+        print(f"\n发生错误: {e}")
+        import traceback
+        traceback.print_exc()

{dataset_toolkit-0.1.2 → dataset_toolkit-0.2.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "dataset-toolkit"
-version = "0.1.2"
+version = "0.2.0"
 description = "一个用于加载、处理和导出计算机视觉数据集的工具包"
 readme = "README.md"
 requires-python = ">=3.7"

{dataset_toolkit-0.1.2 → dataset_toolkit-0.2.0}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 setup(
     name="dataset-toolkit",
-    version="0.1.2",
+    version="0.2.0",
     author="wenxiang.han",
     author_email="wenxiang.han@anker-in.com",
     description="一个用于加载、处理和导出计算机视觉数据集的工具包",