intellif-aihub 0.1.19__tar.gz → 0.1.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of intellif-aihub might be problematic. Click here for more details.
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/PKG-INFO +1 -1
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/pyproject.toml +1 -1
- intellif_aihub-0.1.21/src/aihub/__init__.py +1 -0
- intellif_aihub-0.1.21/src/aihub/models/eval.py +81 -0
- intellif_aihub-0.1.21/src/aihub/services/eval.py +172 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/PKG-INFO +1 -1
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/SOURCES.txt +1 -0
- intellif_aihub-0.1.21/tests/test_eval.py +520 -0
- intellif_aihub-0.1.19/src/aihub/__init__.py +0 -1
- intellif_aihub-0.1.19/src/aihub/models/eval.py +0 -26
- intellif_aihub-0.1.19/src/aihub/services/eval.py +0 -75
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/LICENSE +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/README.md +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/setup.cfg +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/client.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/exceptions.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/__init__.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/artifact.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/common.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/data_warehouse.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/dataset_management.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/document_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/labelfree.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/model_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/model_training_platform.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/quota_schedule_management.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/tag_resource_management.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/task_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/user_system.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/workflow_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/__init__.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/artifact.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/data_warehouse.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/dataset_management.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/document_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/labelfree.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/model_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/model_training_platform.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/quota_schedule_management.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/reporter.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/tag_resource_management.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/task_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/user_system.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/workflow_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/utils/__init__.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/utils/di.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/utils/download.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/utils/http.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/utils/s3.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/dependency_links.txt +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/requires.txt +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/top_level.txt +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_artifact.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_data_warehouse.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_dataset_management.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_di.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_document_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_labelfree.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_model_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_model_training_platform.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_quota_schedule_management.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_s3.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_tag_resource_management.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_task_center.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_user_system.py +0 -0
- {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_workflow_center.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.21"
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# !/usr/bin/env python
|
|
2
|
+
# -*-coding:utf-8 -*-
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseEvalReq(BaseModel):
|
|
9
|
+
"""评测任务基础请求模型"""
|
|
10
|
+
run_id: str = Field(description="运行ID")
|
|
11
|
+
type: str = Field(description="评测类型,支持 'llm' 和 'cv'")
|
|
12
|
+
prediction_artifact_path: str = Field(description="推理产物的路径")
|
|
13
|
+
user_id: int = Field(0, description="用户ID,默认0")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CreateLLMEvalReq(BaseEvalReq):
|
|
17
|
+
"""创建LLM类型评测任务请求"""
|
|
18
|
+
type: str = Field(default="llm", description="评测类型,固定为 'llm'")
|
|
19
|
+
dataset_id: int = Field(description="数据集ID")
|
|
20
|
+
dataset_version_id: int = Field(description="数据集版本ID")
|
|
21
|
+
evaled_artifact_path: str = Field(description="评测结果产物的路径")
|
|
22
|
+
report: Dict = Field(description="评测报告")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CreateCVEvalReq(BaseEvalReq):
|
|
26
|
+
"""创建CV类型评测任务请求"""
|
|
27
|
+
type: str = Field(default="cv", description="评测类型,固定为 'cv'")
|
|
28
|
+
metrics_artifact_path: str = Field(description="指标产物的路径")
|
|
29
|
+
ground_truth_artifact_path: str = Field(description="真实标签产物的路径")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class EvalRun(BaseModel):
|
|
33
|
+
"""评测任务的运行实体"""
|
|
34
|
+
id: int = Field(description="评测的运行ID")
|
|
35
|
+
name: str = Field(description="评测名称")
|
|
36
|
+
description: str = Field(description="评测描述")
|
|
37
|
+
user_id: int = Field(description="用户ID")
|
|
38
|
+
model_id: int = Field(description="模型ID")
|
|
39
|
+
model_name: str = Field(description="模型名称")
|
|
40
|
+
dataset_id: int = Field(description="数据集ID")
|
|
41
|
+
dataset_version_id: int = Field(description="数据集版本ID")
|
|
42
|
+
dataset_name: str = Field(description="数据集名称")
|
|
43
|
+
status: str = Field(description="状态")
|
|
44
|
+
prediction_artifact_path: str = Field(description="推理产物路径")
|
|
45
|
+
evaled_artifact_path: str = Field(description="评测结果产物路径")
|
|
46
|
+
run_id: str = Field(description="运行ID")
|
|
47
|
+
dataset_summary: Dict = Field(default_factory=dict, description="数据集摘要")
|
|
48
|
+
metrics_summary: Dict = Field(default_factory=dict, description="指标摘要")
|
|
49
|
+
viz_summary: Dict = Field(default_factory=dict, description="可视化摘要")
|
|
50
|
+
eval_config: Optional[Dict] = Field(default=None, description="评测配置")
|
|
51
|
+
created_at: int = Field(description="创建时间")
|
|
52
|
+
updated_at: int = Field(description="更新时间")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class CreateEvalResp(BaseModel):
|
|
56
|
+
"""创建评测任务的返回结果"""
|
|
57
|
+
eval_run: EvalRun = Field(alias="eval_run", description="评测运行信息")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class ListEvalReq(BaseModel):
|
|
61
|
+
"""列出评测任务请求"""
|
|
62
|
+
page_size: int = Field(20, description="页面大小")
|
|
63
|
+
page_num: int = Field(1, description="页码")
|
|
64
|
+
status: Optional[str] = Field(None, description="状态过滤")
|
|
65
|
+
name: Optional[str] = Field(None, description="名称过滤")
|
|
66
|
+
model_id: Optional[int] = Field(None, description="模型ID过滤")
|
|
67
|
+
dataset_id: Optional[int] = Field(None, description="数据集ID过滤")
|
|
68
|
+
dataset_version_id: Optional[int] = Field(None, description="数据集版本ID过滤")
|
|
69
|
+
run_id: Optional[str] = Field(None, description="运行ID过滤")
|
|
70
|
+
user_id: Optional[int] = Field(None, description="用户ID过滤")
|
|
71
|
+
model_ids: Optional[str] = Field(None, description="模型ID列表过滤")
|
|
72
|
+
dataset_ids: Optional[str] = Field(None, description="数据集ID列表过滤")
|
|
73
|
+
dataset_version_ids: Optional[str] = Field(None, description="数据集版本ID列表过滤")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ListEvalResp(BaseModel):
|
|
77
|
+
"""列出评测任务响应"""
|
|
78
|
+
total: int = Field(description="总数")
|
|
79
|
+
page_size: int = Field(description="页面大小")
|
|
80
|
+
page_num: int = Field(description="页码")
|
|
81
|
+
data: List[EvalRun] = Field(description="评测运行列表")
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# !/usr/bin/env python
|
|
2
|
+
# -*- coding:utf-8 -*-
|
|
3
|
+
"""评测平台服务模块
|
|
4
|
+
|
|
5
|
+
本模块围绕 **“模型评测(Run → Report)”** 提供能力:
|
|
6
|
+
|
|
7
|
+
- **创建评测任务 / 评测报告**
|
|
8
|
+
- **获取评测任务列表**
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
from ..exceptions import APIError
|
|
14
|
+
from ..models.common import APIWrapper
|
|
15
|
+
from ..models.eval import CreateLLMEvalReq, CreateCVEvalReq, CreateEvalResp, ListEvalReq, ListEvalResp
|
|
16
|
+
|
|
17
|
+
_BASE = "/eval-platform/api/v1"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class EvalService:
|
|
21
|
+
"""评测服务"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, http: httpx.Client):
|
|
24
|
+
self._http = http
|
|
25
|
+
self._eval = _Eval(http)
|
|
26
|
+
|
|
27
|
+
def create(
|
|
28
|
+
self,
|
|
29
|
+
dataset_version_name: str,
|
|
30
|
+
prediction_artifact_path: str,
|
|
31
|
+
evaled_artifact_path: str,
|
|
32
|
+
report_json: dict,
|
|
33
|
+
run_id,
|
|
34
|
+
user_id: int = 0,
|
|
35
|
+
) -> int:
|
|
36
|
+
"""创建评测报告
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
run_id (str): RUN ID
|
|
40
|
+
report_json (dict): 报告内容
|
|
41
|
+
evaled_artifact_path: 评测结果制品路径
|
|
42
|
+
prediction_artifact_path: 推理结果制品路径
|
|
43
|
+
dataset_version_name (str): 数据集名称
|
|
44
|
+
user_id (int, optional): 用户ID,默认为0
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
id (int): 评测报告id
|
|
48
|
+
|
|
49
|
+
"""
|
|
50
|
+
from .dataset_management import DatasetManagementService
|
|
51
|
+
|
|
52
|
+
dataset_service = DatasetManagementService(self._http)
|
|
53
|
+
dataset_version = dataset_service.get_dataset_version_by_name(
|
|
54
|
+
dataset_version_name
|
|
55
|
+
)
|
|
56
|
+
payload = CreateLLMEvalReq(
|
|
57
|
+
dataset_id=dataset_version.dataset_id,
|
|
58
|
+
dataset_version_id=dataset_version.id,
|
|
59
|
+
evaled_artifact_path=evaled_artifact_path,
|
|
60
|
+
prediction_artifact_path=prediction_artifact_path,
|
|
61
|
+
report=report_json,
|
|
62
|
+
run_id=run_id,
|
|
63
|
+
user_id=user_id,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
return self._eval.create(payload)
|
|
67
|
+
|
|
68
|
+
def create_cv_run(
|
|
69
|
+
self,
|
|
70
|
+
run_id: str,
|
|
71
|
+
prediction_artifact_path: str,
|
|
72
|
+
metrics_artifact_path: str,
|
|
73
|
+
ground_truth_artifact_path: str,
|
|
74
|
+
user_id: int = 0,
|
|
75
|
+
) -> int:
|
|
76
|
+
"""创建 CV 类型评测运行
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
run_id (str): 运行ID
|
|
80
|
+
prediction_artifact_path (str): 推理产物的路径
|
|
81
|
+
metrics_artifact_path (str): 指标产物的路径
|
|
82
|
+
ground_truth_artifact_path (str): 真实标签产物的路径
|
|
83
|
+
user_id (int, optional): 用户ID,默认为0
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
id (int): 评测运行id
|
|
87
|
+
"""
|
|
88
|
+
payload = CreateCVEvalReq(
|
|
89
|
+
run_id=run_id,
|
|
90
|
+
prediction_artifact_path=prediction_artifact_path,
|
|
91
|
+
metrics_artifact_path=metrics_artifact_path,
|
|
92
|
+
ground_truth_artifact_path=ground_truth_artifact_path,
|
|
93
|
+
user_id=user_id,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return self._eval.create(payload)
|
|
97
|
+
|
|
98
|
+
def list(
|
|
99
|
+
self,
|
|
100
|
+
page_size: int = 20,
|
|
101
|
+
page_num: int = 1,
|
|
102
|
+
status: str = None,
|
|
103
|
+
name: str = None,
|
|
104
|
+
model_id: int = None,
|
|
105
|
+
dataset_id: int = None,
|
|
106
|
+
dataset_version_id: int = None,
|
|
107
|
+
run_id: str = None,
|
|
108
|
+
user_id: int = None,
|
|
109
|
+
model_ids: str = None,
|
|
110
|
+
dataset_ids: str = None,
|
|
111
|
+
dataset_version_ids: str = None,
|
|
112
|
+
) -> ListEvalResp:
|
|
113
|
+
"""列出评测结果
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
page_size (int): 页面大小,默认为20
|
|
117
|
+
page_num (int): 页码,默认为1
|
|
118
|
+
status (str, optional): 状态过滤
|
|
119
|
+
name (str, optional): 名称过滤
|
|
120
|
+
model_id (int, optional): 模型ID过滤
|
|
121
|
+
dataset_id (int, optional): 数据集ID过滤
|
|
122
|
+
dataset_version_id (int, optional): 数据集版本ID过滤
|
|
123
|
+
run_id (str, optional): 运行ID过滤
|
|
124
|
+
user_id (int, optional): 用户ID过滤
|
|
125
|
+
model_ids (str, optional): 模型ID列表过滤(逗号分隔)
|
|
126
|
+
dataset_ids (str, optional): 数据集ID列表过滤(逗号分隔)
|
|
127
|
+
dataset_version_ids (str, optional): 数据集版本ID列表过滤(逗号分隔)
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
ListEvalResp: 评测结果列表响应
|
|
131
|
+
"""
|
|
132
|
+
payload = ListEvalReq(
|
|
133
|
+
page_size=page_size,
|
|
134
|
+
page_num=page_num,
|
|
135
|
+
status=status,
|
|
136
|
+
name=name,
|
|
137
|
+
model_id=model_id,
|
|
138
|
+
dataset_id=dataset_id,
|
|
139
|
+
dataset_version_id=dataset_version_id,
|
|
140
|
+
run_id=run_id,
|
|
141
|
+
user_id=user_id,
|
|
142
|
+
model_ids=model_ids,
|
|
143
|
+
dataset_ids=dataset_ids,
|
|
144
|
+
dataset_version_ids=dataset_version_ids,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return self._eval.list(payload)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class _Eval:
|
|
151
|
+
def __init__(self, http: httpx.Client):
|
|
152
|
+
self._http = http
|
|
153
|
+
|
|
154
|
+
def create(self, payload) -> int:
|
|
155
|
+
resp = self._http.post(f"{_BASE}/run/", json=payload.model_dump())
|
|
156
|
+
wrapper = APIWrapper[CreateEvalResp].model_validate(resp.json())
|
|
157
|
+
if wrapper.code != 0:
|
|
158
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
159
|
+
return wrapper.data.eval_run.id
|
|
160
|
+
|
|
161
|
+
def list(self, payload: ListEvalReq) -> ListEvalResp:
|
|
162
|
+
# Build query parameters, excluding None values
|
|
163
|
+
params = {}
|
|
164
|
+
for field, value in payload.model_dump().items():
|
|
165
|
+
if value is not None:
|
|
166
|
+
params[field] = value
|
|
167
|
+
|
|
168
|
+
resp = self._http.get(f"{_BASE}/run/", params=params)
|
|
169
|
+
wrapper = APIWrapper[ListEvalResp].model_validate(resp.json())
|
|
170
|
+
if wrapper.code != 0:
|
|
171
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
172
|
+
return wrapper.data
|
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
# !/usr/bin/env python
|
|
2
|
+
# -*-coding:utf-8 -*-
|
|
3
|
+
import sys
|
|
4
|
+
import os
|
|
5
|
+
import unittest
|
|
6
|
+
import uuid
|
|
7
|
+
from unittest.mock import Mock, patch
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
|
|
11
|
+
from aihub.services.eval import EvalService
|
|
12
|
+
from aihub.models.eval import ListEvalResp, EvalRun, CreateEvalResp
|
|
13
|
+
from aihub.models.common import APIWrapper
|
|
14
|
+
|
|
15
|
+
BASE_URL = "http://192.168.13.160:30052"
|
|
16
|
+
TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NTI1NDQwNDksImlhdCI6MTc1MVkzOTI0OSwidWlkIjoyfQ.MfB_7LK5oR3RAhga3jtgcvJqYESeUPLbz8Bc_y3fouc"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestEvalService(unittest.TestCase):
|
|
20
|
+
|
|
21
|
+
def setUp(self):
|
|
22
|
+
self.http_client = Mock(spec=httpx.Client)
|
|
23
|
+
self.eval_service = EvalService(self.http_client)
|
|
24
|
+
|
|
25
|
+
def test_list_eval_runs_default(self):
|
|
26
|
+
mock_eval_run = {
|
|
27
|
+
"id": 1,
|
|
28
|
+
"name": "test_eval",
|
|
29
|
+
"description": "Test evaluation",
|
|
30
|
+
"user_id": 1,
|
|
31
|
+
"model_id": 1,
|
|
32
|
+
"model_name": "test_model",
|
|
33
|
+
"dataset_id": 1,
|
|
34
|
+
"dataset_version_id": 1,
|
|
35
|
+
"dataset_name": "test_dataset",
|
|
36
|
+
"status": "completed",
|
|
37
|
+
"prediction_artifact_path": "/path/to/prediction",
|
|
38
|
+
"evaled_artifact_path": "/path/to/eval",
|
|
39
|
+
"run_id": "test_run_123",
|
|
40
|
+
"dataset_summary": {},
|
|
41
|
+
"metrics_summary": {"accuracy": 0.95},
|
|
42
|
+
"viz_summary": {},
|
|
43
|
+
"eval_config": {"metric": "accuracy"},
|
|
44
|
+
"created_at": 1640995200,
|
|
45
|
+
"updated_at": 1640995200
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
mock_response = {
|
|
49
|
+
"code": 0,
|
|
50
|
+
"msg": None,
|
|
51
|
+
"data": {
|
|
52
|
+
"total": 1,
|
|
53
|
+
"page_size": 20,
|
|
54
|
+
"page_num": 1,
|
|
55
|
+
"data": [mock_eval_run]
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
mock_resp = Mock()
|
|
60
|
+
mock_resp.json.return_value = mock_response
|
|
61
|
+
self.http_client.get.return_value = mock_resp
|
|
62
|
+
|
|
63
|
+
result = self.eval_service.list()
|
|
64
|
+
|
|
65
|
+
self.assertIsInstance(result, ListEvalResp)
|
|
66
|
+
self.assertEqual(result.total, 1)
|
|
67
|
+
self.assertEqual(result.page_size, 20)
|
|
68
|
+
self.assertEqual(result.page_num, 1)
|
|
69
|
+
self.assertEqual(len(result.data), 1)
|
|
70
|
+
self.assertEqual(result.data[0].id, 1)
|
|
71
|
+
self.assertEqual(result.data[0].name, "test_eval")
|
|
72
|
+
|
|
73
|
+
self.http_client.get.assert_called_once_with(
|
|
74
|
+
"/eval-platform/api/v1/run/",
|
|
75
|
+
params={"page_size": 20, "page_num": 1}
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def test_list_eval_runs_with_filters(self):
|
|
79
|
+
mock_response = {
|
|
80
|
+
"code": 0,
|
|
81
|
+
"msg": None,
|
|
82
|
+
"data": {
|
|
83
|
+
"total": 0,
|
|
84
|
+
"page_size": 10,
|
|
85
|
+
"page_num": 1,
|
|
86
|
+
"data": []
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
mock_resp = Mock()
|
|
91
|
+
mock_resp.json.return_value = mock_response
|
|
92
|
+
self.http_client.get.return_value = mock_resp
|
|
93
|
+
|
|
94
|
+
# 带过滤参数
|
|
95
|
+
result = self.eval_service.list(
|
|
96
|
+
page_size=10,
|
|
97
|
+
page_num=1,
|
|
98
|
+
status="completed",
|
|
99
|
+
name="test",
|
|
100
|
+
model_id=1,
|
|
101
|
+
dataset_id=2,
|
|
102
|
+
dataset_version_id=3,
|
|
103
|
+
run_id="test_run",
|
|
104
|
+
user_id=1,
|
|
105
|
+
model_ids="1,2,3",
|
|
106
|
+
dataset_ids="2,3,4",
|
|
107
|
+
dataset_version_ids="3,4,5"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
self.assertIsInstance(result, ListEvalResp)
|
|
111
|
+
self.assertEqual(result.total, 0)
|
|
112
|
+
self.assertEqual(len(result.data), 0)
|
|
113
|
+
|
|
114
|
+
expected_params = {
|
|
115
|
+
"page_size": 10,
|
|
116
|
+
"page_num": 1,
|
|
117
|
+
"status": "completed",
|
|
118
|
+
"name": "test",
|
|
119
|
+
"model_id": 1,
|
|
120
|
+
"dataset_id": 2,
|
|
121
|
+
"dataset_version_id": 3,
|
|
122
|
+
"run_id": "test_run",
|
|
123
|
+
"user_id": 1,
|
|
124
|
+
"model_ids": "1,2,3",
|
|
125
|
+
"dataset_ids": "2,3,4",
|
|
126
|
+
"dataset_version_ids": "3,4,5"
|
|
127
|
+
}
|
|
128
|
+
self.http_client.get.assert_called_once_with(
|
|
129
|
+
"/eval-platform/api/v1/run/",
|
|
130
|
+
params=expected_params
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def test_list_eval_runs_api_error(self):
|
|
134
|
+
"""测试列出评测运行 - API错误"""
|
|
135
|
+
# 模拟 API 错误
|
|
136
|
+
mock_response = {
|
|
137
|
+
"code": 1001,
|
|
138
|
+
"msg": "Database connection failed",
|
|
139
|
+
"data": None
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
mock_resp = Mock()
|
|
143
|
+
mock_resp.json.return_value = mock_response
|
|
144
|
+
self.http_client.get.return_value = mock_resp
|
|
145
|
+
|
|
146
|
+
with self.assertRaises(Exception) as context:
|
|
147
|
+
self.eval_service.list()
|
|
148
|
+
|
|
149
|
+
self.assertIn("backend code 1001", str(context.exception))
|
|
150
|
+
self.assertIn("Database connection failed", str(context.exception))
|
|
151
|
+
|
|
152
|
+
def test_list_eval_runs_only_specified_filters(self):
|
|
153
|
+
mock_response = {
|
|
154
|
+
"code": 0,
|
|
155
|
+
"msg": None,
|
|
156
|
+
"data": {
|
|
157
|
+
"total": 0,
|
|
158
|
+
"page_size": 20,
|
|
159
|
+
"page_num": 1,
|
|
160
|
+
"data": []
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
mock_resp = Mock()
|
|
165
|
+
mock_resp.json.return_value = mock_response
|
|
166
|
+
self.http_client.get.return_value = mock_resp
|
|
167
|
+
|
|
168
|
+
result = self.eval_service.list(
|
|
169
|
+
status="completed",
|
|
170
|
+
model_id=1
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
expected_params = {
|
|
174
|
+
"page_size": 20,
|
|
175
|
+
"page_num": 1,
|
|
176
|
+
"status": "completed",
|
|
177
|
+
"model_id": 1
|
|
178
|
+
}
|
|
179
|
+
self.http_client.get.assert_called_once_with(
|
|
180
|
+
"/eval-platform/api/v1/run/",
|
|
181
|
+
params=expected_params
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
@patch('aihub.services.dataset_management.DatasetManagementService')
|
|
185
|
+
def test_create_eval_llm_default_user_id(self, mock_dataset_service_class):
|
|
186
|
+
"""测试创建 LLM 类型评测 - 使用默认 user_id"""
|
|
187
|
+
# 模拟数据集版本
|
|
188
|
+
mock_dataset_version = Mock()
|
|
189
|
+
mock_dataset_version.dataset_id = 1
|
|
190
|
+
mock_dataset_version.id = 1
|
|
191
|
+
|
|
192
|
+
mock_dataset_service = Mock()
|
|
193
|
+
mock_dataset_service.get_dataset_version_by_name.return_value = mock_dataset_version
|
|
194
|
+
mock_dataset_service_class.return_value = mock_dataset_service
|
|
195
|
+
|
|
196
|
+
# 模拟成功的创建响应
|
|
197
|
+
mock_eval_run = {
|
|
198
|
+
"id": 123,
|
|
199
|
+
"name": "test_eval",
|
|
200
|
+
"description": "Test evaluation",
|
|
201
|
+
"user_id": 0,
|
|
202
|
+
"model_id": 1,
|
|
203
|
+
"model_name": "test_model",
|
|
204
|
+
"dataset_id": 1,
|
|
205
|
+
"dataset_version_id": 1,
|
|
206
|
+
"dataset_name": "test_dataset",
|
|
207
|
+
"status": "created",
|
|
208
|
+
"prediction_artifact_path": "/path/to/prediction.json",
|
|
209
|
+
"evaled_artifact_path": "/path/to/evaled.json",
|
|
210
|
+
"run_id": "test_run_123",
|
|
211
|
+
"dataset_summary": {},
|
|
212
|
+
"metrics_summary": {},
|
|
213
|
+
"viz_summary": {},
|
|
214
|
+
"eval_config": None,
|
|
215
|
+
"created_at": 1640995200,
|
|
216
|
+
"updated_at": 1640995200
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
mock_response = {
|
|
220
|
+
"code": 0,
|
|
221
|
+
"msg": None,
|
|
222
|
+
"data": {"eval_run": mock_eval_run}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
mock_resp = Mock()
|
|
226
|
+
mock_resp.json.return_value = mock_response
|
|
227
|
+
self.http_client.post.return_value = mock_resp
|
|
228
|
+
|
|
229
|
+
# 调用方法(不提供 user_id,使用默认值)
|
|
230
|
+
result = self.eval_service.create(
|
|
231
|
+
dataset_version_name="test_dataset_v1",
|
|
232
|
+
prediction_artifact_path="/path/to/prediction.json",
|
|
233
|
+
evaled_artifact_path="/path/to/evaled.json",
|
|
234
|
+
report_json={"accuracy": 0.95},
|
|
235
|
+
run_id="test_run_123"
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# 验证结果
|
|
239
|
+
self.assertEqual(result, 123)
|
|
240
|
+
|
|
241
|
+
# 验证 HTTP 调用
|
|
242
|
+
self.http_client.post.assert_called_once()
|
|
243
|
+
call_args = self.http_client.post.call_args
|
|
244
|
+
|
|
245
|
+
# 验证端点
|
|
246
|
+
self.assertEqual(call_args[0][0], "/eval-platform/api/v1/run/")
|
|
247
|
+
|
|
248
|
+
# 验证 payload 内容
|
|
249
|
+
actual_payload = call_args[1]["json"]
|
|
250
|
+
|
|
251
|
+
# 验证 LLM 类型必需字段
|
|
252
|
+
self.assertEqual(actual_payload["run_id"], "test_run_123")
|
|
253
|
+
self.assertEqual(actual_payload["type"], "llm")
|
|
254
|
+
self.assertEqual(actual_payload["prediction_artifact_path"], "/path/to/prediction.json")
|
|
255
|
+
self.assertEqual(actual_payload["user_id"], 0)
|
|
256
|
+
self.assertEqual(actual_payload["dataset_id"], 1)
|
|
257
|
+
self.assertEqual(actual_payload["dataset_version_id"], 1)
|
|
258
|
+
self.assertEqual(actual_payload["evaled_artifact_path"], "/path/to/evaled.json")
|
|
259
|
+
self.assertEqual(actual_payload["report"], {"accuracy": 0.95})
|
|
260
|
+
|
|
261
|
+
# CV 类型字段不会出现在 LLM 类型评测的 payload 中
|
|
262
|
+
self.assertNotIn("metrics_artifact_path", actual_payload)
|
|
263
|
+
self.assertNotIn("ground_truth_artifact_path", actual_payload)
|
|
264
|
+
|
|
265
|
+
@patch('aihub.services.dataset_management.DatasetManagementService')
|
|
266
|
+
def test_create_eval_llm_custom_user_id(self, mock_dataset_service_class):
|
|
267
|
+
"""测试创建 LLM 类型评测 - 自定义 user_id"""
|
|
268
|
+
# 模拟数据集版本
|
|
269
|
+
mock_dataset_version = Mock()
|
|
270
|
+
mock_dataset_version.dataset_id = 2
|
|
271
|
+
mock_dataset_version.id = 3
|
|
272
|
+
|
|
273
|
+
mock_dataset_service = Mock()
|
|
274
|
+
mock_dataset_service.get_dataset_version_by_name.return_value = mock_dataset_version
|
|
275
|
+
mock_dataset_service_class.return_value = mock_dataset_service
|
|
276
|
+
|
|
277
|
+
# 模拟成功的创建响应
|
|
278
|
+
mock_eval_run = {
|
|
279
|
+
"id": 456,
|
|
280
|
+
"name": "test_eval",
|
|
281
|
+
"description": "Test evaluation",
|
|
282
|
+
"user_id": 3750,
|
|
283
|
+
"model_id": 1,
|
|
284
|
+
"model_name": "test_model",
|
|
285
|
+
"dataset_id": 2,
|
|
286
|
+
"dataset_version_id": 3,
|
|
287
|
+
"dataset_name": "test_dataset",
|
|
288
|
+
"status": "created",
|
|
289
|
+
"prediction_artifact_path": "/path/to/prediction.json",
|
|
290
|
+
"evaled_artifact_path": "/path/to/evaled.json",
|
|
291
|
+
"run_id": "test_run_456",
|
|
292
|
+
"dataset_summary": {},
|
|
293
|
+
"metrics_summary": {},
|
|
294
|
+
"viz_summary": {},
|
|
295
|
+
"eval_config": None,
|
|
296
|
+
"created_at": 1640995200,
|
|
297
|
+
"updated_at": 1640995200
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
mock_response = {
|
|
301
|
+
"code": 0,
|
|
302
|
+
"msg": None,
|
|
303
|
+
"data": {"eval_run": mock_eval_run}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
mock_resp = Mock()
|
|
307
|
+
mock_resp.json.return_value = mock_response
|
|
308
|
+
self.http_client.post.return_value = mock_resp
|
|
309
|
+
|
|
310
|
+
# 调用方法(提供自定义 user_id)
|
|
311
|
+
result = self.eval_service.create(
|
|
312
|
+
dataset_version_name="test_dataset_v2",
|
|
313
|
+
prediction_artifact_path="/path/to/prediction.json",
|
|
314
|
+
evaled_artifact_path="/path/to/evaled.json",
|
|
315
|
+
report_json={"f1_score": 0.88},
|
|
316
|
+
run_id="test_run_456",
|
|
317
|
+
user_id=3750
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
# 验证结果
|
|
321
|
+
self.assertEqual(result, 456)
|
|
322
|
+
|
|
323
|
+
# 验证 HTTP 调用
|
|
324
|
+
self.http_client.post.assert_called_once()
|
|
325
|
+
call_args = self.http_client.post.call_args
|
|
326
|
+
actual_payload = call_args[1]["json"]
|
|
327
|
+
|
|
328
|
+
# 验证关键字段
|
|
329
|
+
self.assertEqual(actual_payload["type"], "llm")
|
|
330
|
+
self.assertEqual(actual_payload["user_id"], 3750)
|
|
331
|
+
self.assertEqual(actual_payload["dataset_id"], 2)
|
|
332
|
+
self.assertEqual(actual_payload["dataset_version_id"], 3)
|
|
333
|
+
|
|
334
|
+
def test_create_cv_run_default_user_id(self):
|
|
335
|
+
"""测试创建 CV 类型评测运行 - 使用默认 user_id"""
|
|
336
|
+
# 模拟成功的创建响应
|
|
337
|
+
mock_eval_run = {
|
|
338
|
+
"id": 789,
|
|
339
|
+
"name": "cv_eval",
|
|
340
|
+
"description": "CV evaluation",
|
|
341
|
+
"user_id": 0,
|
|
342
|
+
"model_id": 2,
|
|
343
|
+
"model_name": "cv_model",
|
|
344
|
+
"dataset_id": 0, # 使用默认值而不是 None
|
|
345
|
+
"dataset_version_id": 0, # 使用默认值而不是 None
|
|
346
|
+
"dataset_name": "", # 使用空字符串而不是 None
|
|
347
|
+
"status": "created",
|
|
348
|
+
"prediction_artifact_path": "coco_dt.json",
|
|
349
|
+
"evaled_artifact_path": "", # 使用空字符串而不是 None
|
|
350
|
+
"run_id": "cv_run_789",
|
|
351
|
+
"dataset_summary": {},
|
|
352
|
+
"metrics_summary": {},
|
|
353
|
+
"viz_summary": {},
|
|
354
|
+
"eval_config": None,
|
|
355
|
+
"created_at": 1640995200,
|
|
356
|
+
"updated_at": 1640995200
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
mock_response = {
|
|
360
|
+
"code": 0,
|
|
361
|
+
"msg": None,
|
|
362
|
+
"data": {"eval_run": mock_eval_run}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
mock_resp = Mock()
|
|
366
|
+
mock_resp.json.return_value = mock_response
|
|
367
|
+
self.http_client.post.return_value = mock_resp
|
|
368
|
+
|
|
369
|
+
# 调用方法(不提供 user_id,使用默认值)
|
|
370
|
+
result = self.eval_service.create_cv_run(
|
|
371
|
+
run_id="cv_run_789",
|
|
372
|
+
prediction_artifact_path="coco_dt.json",
|
|
373
|
+
metrics_artifact_path="metrics.json",
|
|
374
|
+
ground_truth_artifact_path="coco_gt.json"
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
# 验证结果
|
|
378
|
+
self.assertEqual(result, 789)
|
|
379
|
+
|
|
380
|
+
# 验证 HTTP 调用
|
|
381
|
+
self.http_client.post.assert_called_once()
|
|
382
|
+
call_args = self.http_client.post.call_args
|
|
383
|
+
actual_payload = call_args[1]["json"]
|
|
384
|
+
|
|
385
|
+
# 验证 CV 类型必需字段
|
|
386
|
+
self.assertEqual(actual_payload["type"], "cv")
|
|
387
|
+
self.assertEqual(actual_payload["user_id"], 0)
|
|
388
|
+
self.assertEqual(actual_payload["run_id"], "cv_run_789")
|
|
389
|
+
self.assertEqual(actual_payload["prediction_artifact_path"], "coco_dt.json")
|
|
390
|
+
self.assertEqual(actual_payload["metrics_artifact_path"], "metrics.json")
|
|
391
|
+
self.assertEqual(actual_payload["ground_truth_artifact_path"], "coco_gt.json")
|
|
392
|
+
|
|
393
|
+
# LLM 类型字段不会出现在 CV 类型评测的 payload 中(Pydantic v2 自动排除未设置的可选字段)
|
|
394
|
+
self.assertNotIn("dataset_id", actual_payload)
|
|
395
|
+
self.assertNotIn("dataset_version_id", actual_payload)
|
|
396
|
+
self.assertNotIn("evaled_artifact_path", actual_payload)
|
|
397
|
+
self.assertNotIn("report", actual_payload)
|
|
398
|
+
|
|
399
|
+
def test_create_cv_run_custom_user_id(self):
|
|
400
|
+
"""测试创建 CV 类型评测运行 - 自定义 user_id"""
|
|
401
|
+
# 模拟成功的创建响应
|
|
402
|
+
mock_eval_run = {
|
|
403
|
+
"id": 999,
|
|
404
|
+
"name": "cv_eval_custom",
|
|
405
|
+
"description": "CV evaluation custom",
|
|
406
|
+
"user_id": 3750,
|
|
407
|
+
"model_id": 3,
|
|
408
|
+
"model_name": "cv_model_v2",
|
|
409
|
+
"dataset_id": 0, # 使用默认值而不是 None
|
|
410
|
+
"dataset_version_id": 0, # 使用默认值而不是 None
|
|
411
|
+
"dataset_name": "", # 使用空字符串而不是 None
|
|
412
|
+
"status": "created",
|
|
413
|
+
"prediction_artifact_path": "coco_dt.json",
|
|
414
|
+
"evaled_artifact_path": "", # 使用空字符串而不是 None
|
|
415
|
+
"run_id": "cv_run_999",
|
|
416
|
+
"dataset_summary": {},
|
|
417
|
+
"metrics_summary": {},
|
|
418
|
+
"viz_summary": {},
|
|
419
|
+
"eval_config": None,
|
|
420
|
+
"created_at": 1640995200,
|
|
421
|
+
"updated_at": 1640995200
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
mock_response = {
|
|
425
|
+
"code": 0,
|
|
426
|
+
"msg": None,
|
|
427
|
+
"data": {"eval_run": mock_eval_run}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
mock_resp = Mock()
|
|
431
|
+
mock_resp.json.return_value = mock_response
|
|
432
|
+
self.http_client.post.return_value = mock_resp
|
|
433
|
+
|
|
434
|
+
# 调用方法(提供自定义 user_id)
|
|
435
|
+
result = self.eval_service.create_cv_run(
|
|
436
|
+
run_id="cv_run_999",
|
|
437
|
+
prediction_artifact_path="coco_dt.json",
|
|
438
|
+
metrics_artifact_path="metrics.json",
|
|
439
|
+
ground_truth_artifact_path="coco_gt.json",
|
|
440
|
+
user_id=3750
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
# 验证结果
|
|
444
|
+
self.assertEqual(result, 999)
|
|
445
|
+
|
|
446
|
+
# 验证 HTTP 调用
|
|
447
|
+
self.http_client.post.assert_called_once()
|
|
448
|
+
call_args = self.http_client.post.call_args
|
|
449
|
+
actual_payload = call_args[1]["json"]
|
|
450
|
+
|
|
451
|
+
# 验证 CV 类型关键字段
|
|
452
|
+
self.assertEqual(actual_payload["type"], "cv")
|
|
453
|
+
self.assertEqual(actual_payload["user_id"], 3750)
|
|
454
|
+
self.assertEqual(actual_payload["run_id"], "cv_run_999")
|
|
455
|
+
self.assertEqual(actual_payload["prediction_artifact_path"], "coco_dt.json")
|
|
456
|
+
self.assertEqual(actual_payload["metrics_artifact_path"], "metrics.json")
|
|
457
|
+
self.assertEqual(actual_payload["ground_truth_artifact_path"], "coco_gt.json")
|
|
458
|
+
|
|
459
|
+
def test_create_eval_api_error(self):
|
|
460
|
+
"""测试创建评测 - API错误"""
|
|
461
|
+
with patch('aihub.services.dataset_management.DatasetManagementService') as mock_dataset_service_class:
|
|
462
|
+
# 模拟数据集版本
|
|
463
|
+
mock_dataset_version = Mock()
|
|
464
|
+
mock_dataset_version.dataset_id = 1
|
|
465
|
+
mock_dataset_version.id = 1
|
|
466
|
+
|
|
467
|
+
mock_dataset_service = Mock()
|
|
468
|
+
mock_dataset_service.get_dataset_version_by_name.return_value = mock_dataset_version
|
|
469
|
+
mock_dataset_service_class.return_value = mock_dataset_service
|
|
470
|
+
|
|
471
|
+
# 模拟 API 错误
|
|
472
|
+
mock_response = {
|
|
473
|
+
"code": 2001,
|
|
474
|
+
"msg": "Invalid dataset version",
|
|
475
|
+
"data": None
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
mock_resp = Mock()
|
|
479
|
+
mock_resp.json.return_value = mock_response
|
|
480
|
+
self.http_client.post.return_value = mock_resp
|
|
481
|
+
|
|
482
|
+
with self.assertRaises(Exception) as context:
|
|
483
|
+
self.eval_service.create(
|
|
484
|
+
dataset_version_name="invalid_dataset",
|
|
485
|
+
prediction_artifact_path="/path/to/prediction.json",
|
|
486
|
+
evaled_artifact_path="/path/to/evaled.json",
|
|
487
|
+
report_json={"accuracy": 0.95},
|
|
488
|
+
run_id="test_run_error"
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
self.assertIn("backend code 2001", str(context.exception))
|
|
492
|
+
self.assertIn("Invalid dataset version", str(context.exception))
|
|
493
|
+
|
|
494
|
+
def test_create_cv_run_api_error(self):
|
|
495
|
+
"""测试创建 CV 评测运行 - API错误"""
|
|
496
|
+
# 模拟 API 错误
|
|
497
|
+
mock_response = {
|
|
498
|
+
"code": 3001,
|
|
499
|
+
"msg": "Invalid CV run parameters",
|
|
500
|
+
"data": None
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
mock_resp = Mock()
|
|
504
|
+
mock_resp.json.return_value = mock_response
|
|
505
|
+
self.http_client.post.return_value = mock_resp
|
|
506
|
+
|
|
507
|
+
with self.assertRaises(Exception) as context:
|
|
508
|
+
self.eval_service.create_cv_run(
|
|
509
|
+
run_id="invalid_cv_run",
|
|
510
|
+
prediction_artifact_path="invalid.json",
|
|
511
|
+
metrics_artifact_path="invalid_metrics.json",
|
|
512
|
+
ground_truth_artifact_path="invalid_gt.json"
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
self.assertIn("backend code 3001", str(context.exception))
|
|
516
|
+
self.assertIn("Invalid CV run parameters", str(context.exception))
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
if __name__ == "__main__":
|
|
520
|
+
unittest.main()
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.1.19"
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# !/usr/bin/env python
|
|
2
|
-
# -*-coding:utf-8 -*-
|
|
3
|
-
from typing import Dict
|
|
4
|
-
|
|
5
|
-
from pydantic import BaseModel, Field
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class CreateEvalReq(BaseModel):
|
|
9
|
-
"""创建评测任务"""
|
|
10
|
-
dataset_id: int = Field(description="数据集ID")
|
|
11
|
-
dataset_version_id: int = Field(description="数据集版本ID")
|
|
12
|
-
prediction_artifact_path: str = Field(description="推理产物的路径")
|
|
13
|
-
evaled_artifact_path: str = Field(description="评测结果产物的路径")
|
|
14
|
-
run_id: str = Field(description="运行ID")
|
|
15
|
-
user_id: int = Field(0, description="用户ID")
|
|
16
|
-
report: Dict = Field(default_factory=dict, description="评测报告")
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class EvalRun(BaseModel):
|
|
20
|
-
"""评测任务的运行实体"""
|
|
21
|
-
id: int = Field(description="评测的运行ID")
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class CreateEvalResp(BaseModel):
|
|
25
|
-
"""创建评测任务的返回结果"""
|
|
26
|
-
eval_run: EvalRun = Field(alias="eval_run", description="评测运行信息")
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
# !/usr/bin/env python
|
|
2
|
-
# -*- coding:utf-8 -*-
|
|
3
|
-
"""评测平台服务模块
|
|
4
|
-
|
|
5
|
-
本模块围绕 **“模型评测(Run → Report)”** 提供能力:
|
|
6
|
-
|
|
7
|
-
- **创建评测任务 / 评测报告**
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import httpx
|
|
11
|
-
|
|
12
|
-
from ..exceptions import APIError
|
|
13
|
-
from ..models.common import APIWrapper
|
|
14
|
-
from ..models.eval import CreateEvalReq, CreateEvalResp
|
|
15
|
-
|
|
16
|
-
_BASE = "/eval-platform/api/v1"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class EvalService:
|
|
20
|
-
"""评测服务"""
|
|
21
|
-
|
|
22
|
-
def __init__(self, http: httpx.Client):
|
|
23
|
-
self._http = http
|
|
24
|
-
self._eval = _Eval(http)
|
|
25
|
-
|
|
26
|
-
def create(
|
|
27
|
-
self,
|
|
28
|
-
dataset_version_name: str,
|
|
29
|
-
prediction_artifact_path: str,
|
|
30
|
-
evaled_artifact_path: str,
|
|
31
|
-
report_json: dict,
|
|
32
|
-
run_id,
|
|
33
|
-
) -> int:
|
|
34
|
-
"""创建评测报告
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
run_id (str): RUN ID
|
|
38
|
-
report_json (dict): 报告内容
|
|
39
|
-
evaled_artifact_path: 评测结果制品路径
|
|
40
|
-
prediction_artifact_path: 推理结果制品路径
|
|
41
|
-
dataset_version_name (str): 数据集名称
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
Returns:
|
|
45
|
-
id (int): 评测报告id
|
|
46
|
-
|
|
47
|
-
"""
|
|
48
|
-
from .dataset_management import DatasetManagementService
|
|
49
|
-
|
|
50
|
-
dataset_service = DatasetManagementService(self._http)
|
|
51
|
-
dataset_version = dataset_service.get_dataset_version_by_name(
|
|
52
|
-
dataset_version_name
|
|
53
|
-
)
|
|
54
|
-
payload = CreateEvalReq(
|
|
55
|
-
dataset_id=dataset_version.dataset_id,
|
|
56
|
-
dataset_version_id=dataset_version.id,
|
|
57
|
-
evaled_artifact_path=evaled_artifact_path,
|
|
58
|
-
prediction_artifact_path=prediction_artifact_path,
|
|
59
|
-
report=report_json,
|
|
60
|
-
run_id=run_id,
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
return self._eval.create(payload)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
class _Eval:
|
|
67
|
-
def __init__(self, http: httpx.Client):
|
|
68
|
-
self._http = http
|
|
69
|
-
|
|
70
|
-
def create(self, payload: CreateEvalReq) -> int:
|
|
71
|
-
resp = self._http.post(f"{_BASE}/run/", json=payload.model_dump())
|
|
72
|
-
wrapper = APIWrapper[CreateEvalResp].model_validate(resp.json())
|
|
73
|
-
if wrapper.code != 0:
|
|
74
|
-
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
75
|
-
return wrapper.data.eval_run.id
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/quota_schedule_management.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/model_training_platform.py
RENAMED
|
File without changes
|
{intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/quota_schedule_management.py
RENAMED
|
File without changes
|
|
File without changes
|
{intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/tag_resource_management.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|