intellif-aihub 0.1.19__tar.gz → 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of intellif-aihub might be problematic. Click here for more details.

Files changed (66) hide show
  1. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/PKG-INFO +1 -1
  2. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/pyproject.toml +1 -1
  3. intellif_aihub-0.1.21/src/aihub/__init__.py +1 -0
  4. intellif_aihub-0.1.21/src/aihub/models/eval.py +81 -0
  5. intellif_aihub-0.1.21/src/aihub/services/eval.py +172 -0
  6. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/PKG-INFO +1 -1
  7. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/SOURCES.txt +1 -0
  8. intellif_aihub-0.1.21/tests/test_eval.py +520 -0
  9. intellif_aihub-0.1.19/src/aihub/__init__.py +0 -1
  10. intellif_aihub-0.1.19/src/aihub/models/eval.py +0 -26
  11. intellif_aihub-0.1.19/src/aihub/services/eval.py +0 -75
  12. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/LICENSE +0 -0
  13. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/README.md +0 -0
  14. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/setup.cfg +0 -0
  15. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/client.py +0 -0
  16. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/exceptions.py +0 -0
  17. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/__init__.py +0 -0
  18. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/artifact.py +0 -0
  19. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/common.py +0 -0
  20. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/data_warehouse.py +0 -0
  21. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/dataset_management.py +0 -0
  22. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/document_center.py +0 -0
  23. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/labelfree.py +0 -0
  24. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/model_center.py +0 -0
  25. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/model_training_platform.py +0 -0
  26. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/quota_schedule_management.py +0 -0
  27. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/tag_resource_management.py +0 -0
  28. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/task_center.py +0 -0
  29. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/user_system.py +0 -0
  30. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/models/workflow_center.py +0 -0
  31. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/__init__.py +0 -0
  32. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/artifact.py +0 -0
  33. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/data_warehouse.py +0 -0
  34. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/dataset_management.py +0 -0
  35. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/document_center.py +0 -0
  36. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/labelfree.py +0 -0
  37. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/model_center.py +0 -0
  38. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/model_training_platform.py +0 -0
  39. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/quota_schedule_management.py +0 -0
  40. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/reporter.py +0 -0
  41. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/tag_resource_management.py +0 -0
  42. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/task_center.py +0 -0
  43. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/user_system.py +0 -0
  44. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/services/workflow_center.py +0 -0
  45. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/utils/__init__.py +0 -0
  46. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/utils/di.py +0 -0
  47. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/utils/download.py +0 -0
  48. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/utils/http.py +0 -0
  49. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/aihub/utils/s3.py +0 -0
  50. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/dependency_links.txt +0 -0
  51. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/requires.txt +0 -0
  52. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/src/intellif_aihub.egg-info/top_level.txt +0 -0
  53. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_artifact.py +0 -0
  54. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_data_warehouse.py +0 -0
  55. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_dataset_management.py +0 -0
  56. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_di.py +0 -0
  57. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_document_center.py +0 -0
  58. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_labelfree.py +0 -0
  59. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_model_center.py +0 -0
  60. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_model_training_platform.py +0 -0
  61. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_quota_schedule_management.py +0 -0
  62. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_s3.py +0 -0
  63. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_tag_resource_management.py +0 -0
  64. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_task_center.py +0 -0
  65. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_user_system.py +0 -0
  66. {intellif_aihub-0.1.19 → intellif_aihub-0.1.21}/tests/test_workflow_center.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: intellif-aihub
3
- Version: 0.1.19
3
+ Version: 0.1.21
4
4
  Summary: Intellif AI-hub SDK.
5
5
  Author-email: Platform Team <aihub@example.com>
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "intellif-aihub"
3
- version = "0.1.19"
3
+ version = "0.1.21"
4
4
  description = "Intellif AI-hub SDK."
5
5
  readme = {file = "README.md", content-type = "text/markdown"}
6
6
  requires-python = ">=3.9"
@@ -0,0 +1 @@
1
+ __version__ = "0.1.21"
@@ -0,0 +1,81 @@
1
+ # !/usr/bin/env python
2
+ # -*-coding:utf-8 -*-
3
+ from typing import Dict, List, Optional
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class BaseEvalReq(BaseModel):
9
+ """评测任务基础请求模型"""
10
+ run_id: str = Field(description="运行ID")
11
+ type: str = Field(description="评测类型,支持 'llm' 和 'cv'")
12
+ prediction_artifact_path: str = Field(description="推理产物的路径")
13
+ user_id: int = Field(0, description="用户ID,默认0")
14
+
15
+
16
+ class CreateLLMEvalReq(BaseEvalReq):
17
+ """创建LLM类型评测任务请求"""
18
+ type: str = Field(default="llm", description="评测类型,固定为 'llm'")
19
+ dataset_id: int = Field(description="数据集ID")
20
+ dataset_version_id: int = Field(description="数据集版本ID")
21
+ evaled_artifact_path: str = Field(description="评测结果产物的路径")
22
+ report: Dict = Field(description="评测报告")
23
+
24
+
25
+ class CreateCVEvalReq(BaseEvalReq):
26
+ """创建CV类型评测任务请求"""
27
+ type: str = Field(default="cv", description="评测类型,固定为 'cv'")
28
+ metrics_artifact_path: str = Field(description="指标产物的路径")
29
+ ground_truth_artifact_path: str = Field(description="真实标签产物的路径")
30
+
31
+
32
+ class EvalRun(BaseModel):
33
+ """评测任务的运行实体"""
34
+ id: int = Field(description="评测的运行ID")
35
+ name: str = Field(description="评测名称")
36
+ description: str = Field(description="评测描述")
37
+ user_id: int = Field(description="用户ID")
38
+ model_id: int = Field(description="模型ID")
39
+ model_name: str = Field(description="模型名称")
40
+ dataset_id: int = Field(description="数据集ID")
41
+ dataset_version_id: int = Field(description="数据集版本ID")
42
+ dataset_name: str = Field(description="数据集名称")
43
+ status: str = Field(description="状态")
44
+ prediction_artifact_path: str = Field(description="推理产物路径")
45
+ evaled_artifact_path: str = Field(description="评测结果产物路径")
46
+ run_id: str = Field(description="运行ID")
47
+ dataset_summary: Dict = Field(default_factory=dict, description="数据集摘要")
48
+ metrics_summary: Dict = Field(default_factory=dict, description="指标摘要")
49
+ viz_summary: Dict = Field(default_factory=dict, description="可视化摘要")
50
+ eval_config: Optional[Dict] = Field(default=None, description="评测配置")
51
+ created_at: int = Field(description="创建时间")
52
+ updated_at: int = Field(description="更新时间")
53
+
54
+
55
+ class CreateEvalResp(BaseModel):
56
+ """创建评测任务的返回结果"""
57
+ eval_run: EvalRun = Field(alias="eval_run", description="评测运行信息")
58
+
59
+
60
+ class ListEvalReq(BaseModel):
61
+ """列出评测任务请求"""
62
+ page_size: int = Field(20, description="页面大小")
63
+ page_num: int = Field(1, description="页码")
64
+ status: Optional[str] = Field(None, description="状态过滤")
65
+ name: Optional[str] = Field(None, description="名称过滤")
66
+ model_id: Optional[int] = Field(None, description="模型ID过滤")
67
+ dataset_id: Optional[int] = Field(None, description="数据集ID过滤")
68
+ dataset_version_id: Optional[int] = Field(None, description="数据集版本ID过滤")
69
+ run_id: Optional[str] = Field(None, description="运行ID过滤")
70
+ user_id: Optional[int] = Field(None, description="用户ID过滤")
71
+ model_ids: Optional[str] = Field(None, description="模型ID列表过滤")
72
+ dataset_ids: Optional[str] = Field(None, description="数据集ID列表过滤")
73
+ dataset_version_ids: Optional[str] = Field(None, description="数据集版本ID列表过滤")
74
+
75
+
76
+ class ListEvalResp(BaseModel):
77
+ """列出评测任务响应"""
78
+ total: int = Field(description="总数")
79
+ page_size: int = Field(description="页面大小")
80
+ page_num: int = Field(description="页码")
81
+ data: List[EvalRun] = Field(description="评测运行列表")
@@ -0,0 +1,172 @@
1
+ # !/usr/bin/env python
2
+ # -*- coding:utf-8 -*-
3
+ """评测平台服务模块
4
+
5
+ 本模块围绕 **“模型评测(Run → Report)”** 提供能力:
6
+
7
+ - **创建评测任务 / 评测报告**
8
+ - **获取评测任务列表**
9
+ """
10
+
11
+ import httpx
12
+
13
+ from ..exceptions import APIError
14
+ from ..models.common import APIWrapper
15
+ from ..models.eval import CreateLLMEvalReq, CreateCVEvalReq, CreateEvalResp, ListEvalReq, ListEvalResp
16
+
17
+ _BASE = "/eval-platform/api/v1"
18
+
19
+
20
+ class EvalService:
21
+ """评测服务"""
22
+
23
+ def __init__(self, http: httpx.Client):
24
+ self._http = http
25
+ self._eval = _Eval(http)
26
+
27
+ def create(
28
+ self,
29
+ dataset_version_name: str,
30
+ prediction_artifact_path: str,
31
+ evaled_artifact_path: str,
32
+ report_json: dict,
33
+ run_id,
34
+ user_id: int = 0,
35
+ ) -> int:
36
+ """创建评测报告
37
+
38
+ Args:
39
+ run_id (str): RUN ID
40
+ report_json (dict): 报告内容
41
+ evaled_artifact_path: 评测结果制品路径
42
+ prediction_artifact_path: 推理结果制品路径
43
+ dataset_version_name (str): 数据集名称
44
+ user_id (int, optional): 用户ID,默认为0
45
+
46
+ Returns:
47
+ id (int): 评测报告id
48
+
49
+ """
50
+ from .dataset_management import DatasetManagementService
51
+
52
+ dataset_service = DatasetManagementService(self._http)
53
+ dataset_version = dataset_service.get_dataset_version_by_name(
54
+ dataset_version_name
55
+ )
56
+ payload = CreateLLMEvalReq(
57
+ dataset_id=dataset_version.dataset_id,
58
+ dataset_version_id=dataset_version.id,
59
+ evaled_artifact_path=evaled_artifact_path,
60
+ prediction_artifact_path=prediction_artifact_path,
61
+ report=report_json,
62
+ run_id=run_id,
63
+ user_id=user_id,
64
+ )
65
+
66
+ return self._eval.create(payload)
67
+
68
+ def create_cv_run(
69
+ self,
70
+ run_id: str,
71
+ prediction_artifact_path: str,
72
+ metrics_artifact_path: str,
73
+ ground_truth_artifact_path: str,
74
+ user_id: int = 0,
75
+ ) -> int:
76
+ """创建 CV 类型评测运行
77
+
78
+ Args:
79
+ run_id (str): 运行ID
80
+ prediction_artifact_path (str): 推理产物的路径
81
+ metrics_artifact_path (str): 指标产物的路径
82
+ ground_truth_artifact_path (str): 真实标签产物的路径
83
+ user_id (int, optional): 用户ID,默认为0
84
+
85
+ Returns:
86
+ id (int): 评测运行id
87
+ """
88
+ payload = CreateCVEvalReq(
89
+ run_id=run_id,
90
+ prediction_artifact_path=prediction_artifact_path,
91
+ metrics_artifact_path=metrics_artifact_path,
92
+ ground_truth_artifact_path=ground_truth_artifact_path,
93
+ user_id=user_id,
94
+ )
95
+
96
+ return self._eval.create(payload)
97
+
98
+ def list(
99
+ self,
100
+ page_size: int = 20,
101
+ page_num: int = 1,
102
+ status: str = None,
103
+ name: str = None,
104
+ model_id: int = None,
105
+ dataset_id: int = None,
106
+ dataset_version_id: int = None,
107
+ run_id: str = None,
108
+ user_id: int = None,
109
+ model_ids: str = None,
110
+ dataset_ids: str = None,
111
+ dataset_version_ids: str = None,
112
+ ) -> ListEvalResp:
113
+ """列出评测结果
114
+
115
+ Args:
116
+ page_size (int): 页面大小,默认为20
117
+ page_num (int): 页码,默认为1
118
+ status (str, optional): 状态过滤
119
+ name (str, optional): 名称过滤
120
+ model_id (int, optional): 模型ID过滤
121
+ dataset_id (int, optional): 数据集ID过滤
122
+ dataset_version_id (int, optional): 数据集版本ID过滤
123
+ run_id (str, optional): 运行ID过滤
124
+ user_id (int, optional): 用户ID过滤
125
+ model_ids (str, optional): 模型ID列表过滤(逗号分隔)
126
+ dataset_ids (str, optional): 数据集ID列表过滤(逗号分隔)
127
+ dataset_version_ids (str, optional): 数据集版本ID列表过滤(逗号分隔)
128
+
129
+ Returns:
130
+ ListEvalResp: 评测结果列表响应
131
+ """
132
+ payload = ListEvalReq(
133
+ page_size=page_size,
134
+ page_num=page_num,
135
+ status=status,
136
+ name=name,
137
+ model_id=model_id,
138
+ dataset_id=dataset_id,
139
+ dataset_version_id=dataset_version_id,
140
+ run_id=run_id,
141
+ user_id=user_id,
142
+ model_ids=model_ids,
143
+ dataset_ids=dataset_ids,
144
+ dataset_version_ids=dataset_version_ids,
145
+ )
146
+
147
+ return self._eval.list(payload)
148
+
149
+
150
+ class _Eval:
151
+ def __init__(self, http: httpx.Client):
152
+ self._http = http
153
+
154
+ def create(self, payload) -> int:
155
+ resp = self._http.post(f"{_BASE}/run/", json=payload.model_dump())
156
+ wrapper = APIWrapper[CreateEvalResp].model_validate(resp.json())
157
+ if wrapper.code != 0:
158
+ raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
159
+ return wrapper.data.eval_run.id
160
+
161
+ def list(self, payload: ListEvalReq) -> ListEvalResp:
162
+ # Build query parameters, excluding None values
163
+ params = {}
164
+ for field, value in payload.model_dump().items():
165
+ if value is not None:
166
+ params[field] = value
167
+
168
+ resp = self._http.get(f"{_BASE}/run/", params=params)
169
+ wrapper = APIWrapper[ListEvalResp].model_validate(resp.json())
170
+ if wrapper.code != 0:
171
+ raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
172
+ return wrapper.data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: intellif-aihub
3
- Version: 0.1.19
3
+ Version: 0.1.21
4
4
  Summary: Intellif AI-hub SDK.
5
5
  Author-email: Platform Team <aihub@example.com>
6
6
  License-Expression: Apache-2.0
@@ -49,6 +49,7 @@ tests/test_data_warehouse.py
49
49
  tests/test_dataset_management.py
50
50
  tests/test_di.py
51
51
  tests/test_document_center.py
52
+ tests/test_eval.py
52
53
  tests/test_labelfree.py
53
54
  tests/test_model_center.py
54
55
  tests/test_model_training_platform.py
@@ -0,0 +1,520 @@
1
+ # !/usr/bin/env python
2
+ # -*-coding:utf-8 -*-
3
+ import sys
4
+ import os
5
+ import unittest
6
+ import uuid
7
+ from unittest.mock import Mock, patch
8
+
9
+ import httpx
10
+
11
+ from aihub.services.eval import EvalService
12
+ from aihub.models.eval import ListEvalResp, EvalRun, CreateEvalResp
13
+ from aihub.models.common import APIWrapper
14
+
15
+ BASE_URL = "http://192.168.13.160:30052"
16
+ TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NTI1NDQwNDksImlhdCI6MTc1MVkzOTI0OSwidWlkIjoyfQ.MfB_7LK5oR3RAhga3jtgcvJqYESeUPLbz8Bc_y3fouc"
17
+
18
+
19
+ class TestEvalService(unittest.TestCase):
20
+
21
+ def setUp(self):
22
+ self.http_client = Mock(spec=httpx.Client)
23
+ self.eval_service = EvalService(self.http_client)
24
+
25
+ def test_list_eval_runs_default(self):
26
+ mock_eval_run = {
27
+ "id": 1,
28
+ "name": "test_eval",
29
+ "description": "Test evaluation",
30
+ "user_id": 1,
31
+ "model_id": 1,
32
+ "model_name": "test_model",
33
+ "dataset_id": 1,
34
+ "dataset_version_id": 1,
35
+ "dataset_name": "test_dataset",
36
+ "status": "completed",
37
+ "prediction_artifact_path": "/path/to/prediction",
38
+ "evaled_artifact_path": "/path/to/eval",
39
+ "run_id": "test_run_123",
40
+ "dataset_summary": {},
41
+ "metrics_summary": {"accuracy": 0.95},
42
+ "viz_summary": {},
43
+ "eval_config": {"metric": "accuracy"},
44
+ "created_at": 1640995200,
45
+ "updated_at": 1640995200
46
+ }
47
+
48
+ mock_response = {
49
+ "code": 0,
50
+ "msg": None,
51
+ "data": {
52
+ "total": 1,
53
+ "page_size": 20,
54
+ "page_num": 1,
55
+ "data": [mock_eval_run]
56
+ }
57
+ }
58
+
59
+ mock_resp = Mock()
60
+ mock_resp.json.return_value = mock_response
61
+ self.http_client.get.return_value = mock_resp
62
+
63
+ result = self.eval_service.list()
64
+
65
+ self.assertIsInstance(result, ListEvalResp)
66
+ self.assertEqual(result.total, 1)
67
+ self.assertEqual(result.page_size, 20)
68
+ self.assertEqual(result.page_num, 1)
69
+ self.assertEqual(len(result.data), 1)
70
+ self.assertEqual(result.data[0].id, 1)
71
+ self.assertEqual(result.data[0].name, "test_eval")
72
+
73
+ self.http_client.get.assert_called_once_with(
74
+ "/eval-platform/api/v1/run/",
75
+ params={"page_size": 20, "page_num": 1}
76
+ )
77
+
78
+ def test_list_eval_runs_with_filters(self):
79
+ mock_response = {
80
+ "code": 0,
81
+ "msg": None,
82
+ "data": {
83
+ "total": 0,
84
+ "page_size": 10,
85
+ "page_num": 1,
86
+ "data": []
87
+ }
88
+ }
89
+
90
+ mock_resp = Mock()
91
+ mock_resp.json.return_value = mock_response
92
+ self.http_client.get.return_value = mock_resp
93
+
94
+ # 带过滤参数
95
+ result = self.eval_service.list(
96
+ page_size=10,
97
+ page_num=1,
98
+ status="completed",
99
+ name="test",
100
+ model_id=1,
101
+ dataset_id=2,
102
+ dataset_version_id=3,
103
+ run_id="test_run",
104
+ user_id=1,
105
+ model_ids="1,2,3",
106
+ dataset_ids="2,3,4",
107
+ dataset_version_ids="3,4,5"
108
+ )
109
+
110
+ self.assertIsInstance(result, ListEvalResp)
111
+ self.assertEqual(result.total, 0)
112
+ self.assertEqual(len(result.data), 0)
113
+
114
+ expected_params = {
115
+ "page_size": 10,
116
+ "page_num": 1,
117
+ "status": "completed",
118
+ "name": "test",
119
+ "model_id": 1,
120
+ "dataset_id": 2,
121
+ "dataset_version_id": 3,
122
+ "run_id": "test_run",
123
+ "user_id": 1,
124
+ "model_ids": "1,2,3",
125
+ "dataset_ids": "2,3,4",
126
+ "dataset_version_ids": "3,4,5"
127
+ }
128
+ self.http_client.get.assert_called_once_with(
129
+ "/eval-platform/api/v1/run/",
130
+ params=expected_params
131
+ )
132
+
133
+ def test_list_eval_runs_api_error(self):
134
+ """测试列出评测运行 - API错误"""
135
+ # 模拟 API 错误
136
+ mock_response = {
137
+ "code": 1001,
138
+ "msg": "Database connection failed",
139
+ "data": None
140
+ }
141
+
142
+ mock_resp = Mock()
143
+ mock_resp.json.return_value = mock_response
144
+ self.http_client.get.return_value = mock_resp
145
+
146
+ with self.assertRaises(Exception) as context:
147
+ self.eval_service.list()
148
+
149
+ self.assertIn("backend code 1001", str(context.exception))
150
+ self.assertIn("Database connection failed", str(context.exception))
151
+
152
+ def test_list_eval_runs_only_specified_filters(self):
153
+ mock_response = {
154
+ "code": 0,
155
+ "msg": None,
156
+ "data": {
157
+ "total": 0,
158
+ "page_size": 20,
159
+ "page_num": 1,
160
+ "data": []
161
+ }
162
+ }
163
+
164
+ mock_resp = Mock()
165
+ mock_resp.json.return_value = mock_response
166
+ self.http_client.get.return_value = mock_resp
167
+
168
+ result = self.eval_service.list(
169
+ status="completed",
170
+ model_id=1
171
+ )
172
+
173
+ expected_params = {
174
+ "page_size": 20,
175
+ "page_num": 1,
176
+ "status": "completed",
177
+ "model_id": 1
178
+ }
179
+ self.http_client.get.assert_called_once_with(
180
+ "/eval-platform/api/v1/run/",
181
+ params=expected_params
182
+ )
183
+
184
+ @patch('aihub.services.dataset_management.DatasetManagementService')
185
+ def test_create_eval_llm_default_user_id(self, mock_dataset_service_class):
186
+ """测试创建 LLM 类型评测 - 使用默认 user_id"""
187
+ # 模拟数据集版本
188
+ mock_dataset_version = Mock()
189
+ mock_dataset_version.dataset_id = 1
190
+ mock_dataset_version.id = 1
191
+
192
+ mock_dataset_service = Mock()
193
+ mock_dataset_service.get_dataset_version_by_name.return_value = mock_dataset_version
194
+ mock_dataset_service_class.return_value = mock_dataset_service
195
+
196
+ # 模拟成功的创建响应
197
+ mock_eval_run = {
198
+ "id": 123,
199
+ "name": "test_eval",
200
+ "description": "Test evaluation",
201
+ "user_id": 0,
202
+ "model_id": 1,
203
+ "model_name": "test_model",
204
+ "dataset_id": 1,
205
+ "dataset_version_id": 1,
206
+ "dataset_name": "test_dataset",
207
+ "status": "created",
208
+ "prediction_artifact_path": "/path/to/prediction.json",
209
+ "evaled_artifact_path": "/path/to/evaled.json",
210
+ "run_id": "test_run_123",
211
+ "dataset_summary": {},
212
+ "metrics_summary": {},
213
+ "viz_summary": {},
214
+ "eval_config": None,
215
+ "created_at": 1640995200,
216
+ "updated_at": 1640995200
217
+ }
218
+
219
+ mock_response = {
220
+ "code": 0,
221
+ "msg": None,
222
+ "data": {"eval_run": mock_eval_run}
223
+ }
224
+
225
+ mock_resp = Mock()
226
+ mock_resp.json.return_value = mock_response
227
+ self.http_client.post.return_value = mock_resp
228
+
229
+ # 调用方法(不提供 user_id,使用默认值)
230
+ result = self.eval_service.create(
231
+ dataset_version_name="test_dataset_v1",
232
+ prediction_artifact_path="/path/to/prediction.json",
233
+ evaled_artifact_path="/path/to/evaled.json",
234
+ report_json={"accuracy": 0.95},
235
+ run_id="test_run_123"
236
+ )
237
+
238
+ # 验证结果
239
+ self.assertEqual(result, 123)
240
+
241
+ # 验证 HTTP 调用
242
+ self.http_client.post.assert_called_once()
243
+ call_args = self.http_client.post.call_args
244
+
245
+ # 验证端点
246
+ self.assertEqual(call_args[0][0], "/eval-platform/api/v1/run/")
247
+
248
+ # 验证 payload 内容
249
+ actual_payload = call_args[1]["json"]
250
+
251
+ # 验证 LLM 类型必需字段
252
+ self.assertEqual(actual_payload["run_id"], "test_run_123")
253
+ self.assertEqual(actual_payload["type"], "llm")
254
+ self.assertEqual(actual_payload["prediction_artifact_path"], "/path/to/prediction.json")
255
+ self.assertEqual(actual_payload["user_id"], 0)
256
+ self.assertEqual(actual_payload["dataset_id"], 1)
257
+ self.assertEqual(actual_payload["dataset_version_id"], 1)
258
+ self.assertEqual(actual_payload["evaled_artifact_path"], "/path/to/evaled.json")
259
+ self.assertEqual(actual_payload["report"], {"accuracy": 0.95})
260
+
261
+ # CV 类型字段不会出现在 LLM 类型评测的 payload 中
262
+ self.assertNotIn("metrics_artifact_path", actual_payload)
263
+ self.assertNotIn("ground_truth_artifact_path", actual_payload)
264
+
265
+ @patch('aihub.services.dataset_management.DatasetManagementService')
266
+ def test_create_eval_llm_custom_user_id(self, mock_dataset_service_class):
267
+ """测试创建 LLM 类型评测 - 自定义 user_id"""
268
+ # 模拟数据集版本
269
+ mock_dataset_version = Mock()
270
+ mock_dataset_version.dataset_id = 2
271
+ mock_dataset_version.id = 3
272
+
273
+ mock_dataset_service = Mock()
274
+ mock_dataset_service.get_dataset_version_by_name.return_value = mock_dataset_version
275
+ mock_dataset_service_class.return_value = mock_dataset_service
276
+
277
+ # 模拟成功的创建响应
278
+ mock_eval_run = {
279
+ "id": 456,
280
+ "name": "test_eval",
281
+ "description": "Test evaluation",
282
+ "user_id": 3750,
283
+ "model_id": 1,
284
+ "model_name": "test_model",
285
+ "dataset_id": 2,
286
+ "dataset_version_id": 3,
287
+ "dataset_name": "test_dataset",
288
+ "status": "created",
289
+ "prediction_artifact_path": "/path/to/prediction.json",
290
+ "evaled_artifact_path": "/path/to/evaled.json",
291
+ "run_id": "test_run_456",
292
+ "dataset_summary": {},
293
+ "metrics_summary": {},
294
+ "viz_summary": {},
295
+ "eval_config": None,
296
+ "created_at": 1640995200,
297
+ "updated_at": 1640995200
298
+ }
299
+
300
+ mock_response = {
301
+ "code": 0,
302
+ "msg": None,
303
+ "data": {"eval_run": mock_eval_run}
304
+ }
305
+
306
+ mock_resp = Mock()
307
+ mock_resp.json.return_value = mock_response
308
+ self.http_client.post.return_value = mock_resp
309
+
310
+ # 调用方法(提供自定义 user_id)
311
+ result = self.eval_service.create(
312
+ dataset_version_name="test_dataset_v2",
313
+ prediction_artifact_path="/path/to/prediction.json",
314
+ evaled_artifact_path="/path/to/evaled.json",
315
+ report_json={"f1_score": 0.88},
316
+ run_id="test_run_456",
317
+ user_id=3750
318
+ )
319
+
320
+ # 验证结果
321
+ self.assertEqual(result, 456)
322
+
323
+ # 验证 HTTP 调用
324
+ self.http_client.post.assert_called_once()
325
+ call_args = self.http_client.post.call_args
326
+ actual_payload = call_args[1]["json"]
327
+
328
+ # 验证关键字段
329
+ self.assertEqual(actual_payload["type"], "llm")
330
+ self.assertEqual(actual_payload["user_id"], 3750)
331
+ self.assertEqual(actual_payload["dataset_id"], 2)
332
+ self.assertEqual(actual_payload["dataset_version_id"], 3)
333
+
334
+ def test_create_cv_run_default_user_id(self):
335
+ """测试创建 CV 类型评测运行 - 使用默认 user_id"""
336
+ # 模拟成功的创建响应
337
+ mock_eval_run = {
338
+ "id": 789,
339
+ "name": "cv_eval",
340
+ "description": "CV evaluation",
341
+ "user_id": 0,
342
+ "model_id": 2,
343
+ "model_name": "cv_model",
344
+ "dataset_id": 0, # 使用默认值而不是 None
345
+ "dataset_version_id": 0, # 使用默认值而不是 None
346
+ "dataset_name": "", # 使用空字符串而不是 None
347
+ "status": "created",
348
+ "prediction_artifact_path": "coco_dt.json",
349
+ "evaled_artifact_path": "", # 使用空字符串而不是 None
350
+ "run_id": "cv_run_789",
351
+ "dataset_summary": {},
352
+ "metrics_summary": {},
353
+ "viz_summary": {},
354
+ "eval_config": None,
355
+ "created_at": 1640995200,
356
+ "updated_at": 1640995200
357
+ }
358
+
359
+ mock_response = {
360
+ "code": 0,
361
+ "msg": None,
362
+ "data": {"eval_run": mock_eval_run}
363
+ }
364
+
365
+ mock_resp = Mock()
366
+ mock_resp.json.return_value = mock_response
367
+ self.http_client.post.return_value = mock_resp
368
+
369
+ # 调用方法(不提供 user_id,使用默认值)
370
+ result = self.eval_service.create_cv_run(
371
+ run_id="cv_run_789",
372
+ prediction_artifact_path="coco_dt.json",
373
+ metrics_artifact_path="metrics.json",
374
+ ground_truth_artifact_path="coco_gt.json"
375
+ )
376
+
377
+ # 验证结果
378
+ self.assertEqual(result, 789)
379
+
380
+ # 验证 HTTP 调用
381
+ self.http_client.post.assert_called_once()
382
+ call_args = self.http_client.post.call_args
383
+ actual_payload = call_args[1]["json"]
384
+
385
+ # 验证 CV 类型必需字段
386
+ self.assertEqual(actual_payload["type"], "cv")
387
+ self.assertEqual(actual_payload["user_id"], 0)
388
+ self.assertEqual(actual_payload["run_id"], "cv_run_789")
389
+ self.assertEqual(actual_payload["prediction_artifact_path"], "coco_dt.json")
390
+ self.assertEqual(actual_payload["metrics_artifact_path"], "metrics.json")
391
+ self.assertEqual(actual_payload["ground_truth_artifact_path"], "coco_gt.json")
392
+
393
+ # LLM 类型字段不会出现在 CV 类型评测的 payload 中(Pydantic v2 自动排除未设置的可选字段)
394
+ self.assertNotIn("dataset_id", actual_payload)
395
+ self.assertNotIn("dataset_version_id", actual_payload)
396
+ self.assertNotIn("evaled_artifact_path", actual_payload)
397
+ self.assertNotIn("report", actual_payload)
398
+
399
+ def test_create_cv_run_custom_user_id(self):
400
+ """测试创建 CV 类型评测运行 - 自定义 user_id"""
401
+ # 模拟成功的创建响应
402
+ mock_eval_run = {
403
+ "id": 999,
404
+ "name": "cv_eval_custom",
405
+ "description": "CV evaluation custom",
406
+ "user_id": 3750,
407
+ "model_id": 3,
408
+ "model_name": "cv_model_v2",
409
+ "dataset_id": 0, # 使用默认值而不是 None
410
+ "dataset_version_id": 0, # 使用默认值而不是 None
411
+ "dataset_name": "", # 使用空字符串而不是 None
412
+ "status": "created",
413
+ "prediction_artifact_path": "coco_dt.json",
414
+ "evaled_artifact_path": "", # 使用空字符串而不是 None
415
+ "run_id": "cv_run_999",
416
+ "dataset_summary": {},
417
+ "metrics_summary": {},
418
+ "viz_summary": {},
419
+ "eval_config": None,
420
+ "created_at": 1640995200,
421
+ "updated_at": 1640995200
422
+ }
423
+
424
+ mock_response = {
425
+ "code": 0,
426
+ "msg": None,
427
+ "data": {"eval_run": mock_eval_run}
428
+ }
429
+
430
+ mock_resp = Mock()
431
+ mock_resp.json.return_value = mock_response
432
+ self.http_client.post.return_value = mock_resp
433
+
434
+ # 调用方法(提供自定义 user_id)
435
+ result = self.eval_service.create_cv_run(
436
+ run_id="cv_run_999",
437
+ prediction_artifact_path="coco_dt.json",
438
+ metrics_artifact_path="metrics.json",
439
+ ground_truth_artifact_path="coco_gt.json",
440
+ user_id=3750
441
+ )
442
+
443
+ # 验证结果
444
+ self.assertEqual(result, 999)
445
+
446
+ # 验证 HTTP 调用
447
+ self.http_client.post.assert_called_once()
448
+ call_args = self.http_client.post.call_args
449
+ actual_payload = call_args[1]["json"]
450
+
451
+ # 验证 CV 类型关键字段
452
+ self.assertEqual(actual_payload["type"], "cv")
453
+ self.assertEqual(actual_payload["user_id"], 3750)
454
+ self.assertEqual(actual_payload["run_id"], "cv_run_999")
455
+ self.assertEqual(actual_payload["prediction_artifact_path"], "coco_dt.json")
456
+ self.assertEqual(actual_payload["metrics_artifact_path"], "metrics.json")
457
+ self.assertEqual(actual_payload["ground_truth_artifact_path"], "coco_gt.json")
458
+
459
+ def test_create_eval_api_error(self):
460
+ """测试创建评测 - API错误"""
461
+ with patch('aihub.services.dataset_management.DatasetManagementService') as mock_dataset_service_class:
462
+ # 模拟数据集版本
463
+ mock_dataset_version = Mock()
464
+ mock_dataset_version.dataset_id = 1
465
+ mock_dataset_version.id = 1
466
+
467
+ mock_dataset_service = Mock()
468
+ mock_dataset_service.get_dataset_version_by_name.return_value = mock_dataset_version
469
+ mock_dataset_service_class.return_value = mock_dataset_service
470
+
471
+ # 模拟 API 错误
472
+ mock_response = {
473
+ "code": 2001,
474
+ "msg": "Invalid dataset version",
475
+ "data": None
476
+ }
477
+
478
+ mock_resp = Mock()
479
+ mock_resp.json.return_value = mock_response
480
+ self.http_client.post.return_value = mock_resp
481
+
482
+ with self.assertRaises(Exception) as context:
483
+ self.eval_service.create(
484
+ dataset_version_name="invalid_dataset",
485
+ prediction_artifact_path="/path/to/prediction.json",
486
+ evaled_artifact_path="/path/to/evaled.json",
487
+ report_json={"accuracy": 0.95},
488
+ run_id="test_run_error"
489
+ )
490
+
491
+ self.assertIn("backend code 2001", str(context.exception))
492
+ self.assertIn("Invalid dataset version", str(context.exception))
493
+
494
+ def test_create_cv_run_api_error(self):
495
+ """测试创建 CV 评测运行 - API错误"""
496
+ # 模拟 API 错误
497
+ mock_response = {
498
+ "code": 3001,
499
+ "msg": "Invalid CV run parameters",
500
+ "data": None
501
+ }
502
+
503
+ mock_resp = Mock()
504
+ mock_resp.json.return_value = mock_response
505
+ self.http_client.post.return_value = mock_resp
506
+
507
+ with self.assertRaises(Exception) as context:
508
+ self.eval_service.create_cv_run(
509
+ run_id="invalid_cv_run",
510
+ prediction_artifact_path="invalid.json",
511
+ metrics_artifact_path="invalid_metrics.json",
512
+ ground_truth_artifact_path="invalid_gt.json"
513
+ )
514
+
515
+ self.assertIn("backend code 3001", str(context.exception))
516
+ self.assertIn("Invalid CV run parameters", str(context.exception))
517
+
518
+
519
+ if __name__ == "__main__":
520
+ unittest.main()
@@ -1 +0,0 @@
1
- __version__ = "0.1.19"
@@ -1,26 +0,0 @@
1
- # !/usr/bin/env python
2
- # -*-coding:utf-8 -*-
3
- from typing import Dict
4
-
5
- from pydantic import BaseModel, Field
6
-
7
-
8
- class CreateEvalReq(BaseModel):
9
- """创建评测任务"""
10
- dataset_id: int = Field(description="数据集ID")
11
- dataset_version_id: int = Field(description="数据集版本ID")
12
- prediction_artifact_path: str = Field(description="推理产物的路径")
13
- evaled_artifact_path: str = Field(description="评测结果产物的路径")
14
- run_id: str = Field(description="运行ID")
15
- user_id: int = Field(0, description="用户ID")
16
- report: Dict = Field(default_factory=dict, description="评测报告")
17
-
18
-
19
- class EvalRun(BaseModel):
20
- """评测任务的运行实体"""
21
- id: int = Field(description="评测的运行ID")
22
-
23
-
24
- class CreateEvalResp(BaseModel):
25
- """创建评测任务的返回结果"""
26
- eval_run: EvalRun = Field(alias="eval_run", description="评测运行信息")
@@ -1,75 +0,0 @@
1
- # !/usr/bin/env python
2
- # -*- coding:utf-8 -*-
3
- """评测平台服务模块
4
-
5
- 本模块围绕 **“模型评测(Run → Report)”** 提供能力:
6
-
7
- - **创建评测任务 / 评测报告**
8
- """
9
-
10
- import httpx
11
-
12
- from ..exceptions import APIError
13
- from ..models.common import APIWrapper
14
- from ..models.eval import CreateEvalReq, CreateEvalResp
15
-
16
- _BASE = "/eval-platform/api/v1"
17
-
18
-
19
- class EvalService:
20
- """评测服务"""
21
-
22
- def __init__(self, http: httpx.Client):
23
- self._http = http
24
- self._eval = _Eval(http)
25
-
26
- def create(
27
- self,
28
- dataset_version_name: str,
29
- prediction_artifact_path: str,
30
- evaled_artifact_path: str,
31
- report_json: dict,
32
- run_id,
33
- ) -> int:
34
- """创建评测报告
35
-
36
- Args:
37
- run_id (str): RUN ID
38
- report_json (dict): 报告内容
39
- evaled_artifact_path: 评测结果制品路径
40
- prediction_artifact_path: 推理结果制品路径
41
- dataset_version_name (str): 数据集名称
42
-
43
-
44
- Returns:
45
- id (int): 评测报告id
46
-
47
- """
48
- from .dataset_management import DatasetManagementService
49
-
50
- dataset_service = DatasetManagementService(self._http)
51
- dataset_version = dataset_service.get_dataset_version_by_name(
52
- dataset_version_name
53
- )
54
- payload = CreateEvalReq(
55
- dataset_id=dataset_version.dataset_id,
56
- dataset_version_id=dataset_version.id,
57
- evaled_artifact_path=evaled_artifact_path,
58
- prediction_artifact_path=prediction_artifact_path,
59
- report=report_json,
60
- run_id=run_id,
61
- )
62
-
63
- return self._eval.create(payload)
64
-
65
-
66
- class _Eval:
67
- def __init__(self, http: httpx.Client):
68
- self._http = http
69
-
70
- def create(self, payload: CreateEvalReq) -> int:
71
- resp = self._http.post(f"{_BASE}/run/", json=payload.model_dump())
72
- wrapper = APIWrapper[CreateEvalResp].model_validate(resp.json())
73
- if wrapper.code != 0:
74
- raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
75
- return wrapper.data.eval_run.id
File without changes