pro-craft 0.1.25__py3-none-any.whl → 0.1.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pro-craft might be problematic. Click here for more details.
- pro_craft/database.py +3 -5
- pro_craft/prompt_craft/async_.py +112 -6
- pro_craft/prompt_craft/new.py +3 -3
- pro_craft/prompt_craft/sync.py +3 -3
- pro_craft/server/router/prompt.py +3 -3
- {pro_craft-0.1.25.dist-info → pro_craft-0.1.26.dist-info}/METADATA +1 -1
- {pro_craft-0.1.25.dist-info → pro_craft-0.1.26.dist-info}/RECORD +9 -10
- pro_craft/prompt_craft/evals.py +0 -61
- {pro_craft-0.1.25.dist-info → pro_craft-0.1.26.dist-info}/WHEEL +0 -0
- {pro_craft-0.1.25.dist-info → pro_craft-0.1.26.dist-info}/top_level.txt +0 -0
pro_craft/database.py
CHANGED
|
@@ -72,7 +72,7 @@ PromptBase = declarative_base()
|
|
|
72
72
|
|
|
73
73
|
class SyncMetadata(PromptBase):
|
|
74
74
|
"""用于存储同步元数据的表模型"""
|
|
75
|
-
__tablename__ = "
|
|
75
|
+
__tablename__ = "ai_sync_metadata"
|
|
76
76
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
77
77
|
last_sync_time = Column(DateTime, default=datetime(1970, 1, 1))
|
|
78
78
|
table_name = Column(String(255), unique=True)
|
|
@@ -80,9 +80,8 @@ class SyncMetadata(PromptBase):
|
|
|
80
80
|
def __repr__(self):
|
|
81
81
|
return f"<SyncMetadata(table_name='{self.table_name}', last_sync_time='{self.last_sync_time}')>"
|
|
82
82
|
|
|
83
|
-
|
|
84
83
|
class Prompt(PromptBase):
|
|
85
|
-
__tablename__ = '
|
|
84
|
+
__tablename__ = 'ai_prompts' # 数据库中的表名,你可以改成你希望的名字
|
|
86
85
|
# __tablename__ = 'llm_prompt' # 数据库中的表名,你可以改成你希望的名字
|
|
87
86
|
|
|
88
87
|
# 定义联合唯一约束
|
|
@@ -177,9 +176,8 @@ class Prompt(PromptBase):
|
|
|
177
176
|
f"is_deleted='{self.is_deleted}...'>"
|
|
178
177
|
)
|
|
179
178
|
|
|
180
|
-
|
|
181
179
|
class UseCase(PromptBase):
|
|
182
|
-
__tablename__ = '
|
|
180
|
+
__tablename__ = 'ai_usecase' # 数据库中的表名,你可以改成你希望的名字
|
|
183
181
|
|
|
184
182
|
# id (int, primary_key=True, autoincrement=True)
|
|
185
183
|
# 你的属性表中 id 为 int, true (not null), true (primary key), 0 (length), ASC (key order), true (auto increment)
|
pro_craft/prompt_craft/async_.py
CHANGED
|
@@ -34,6 +34,8 @@ class IntellectRemoveError(Exception):
|
|
|
34
34
|
pass
|
|
35
35
|
|
|
36
36
|
BATCH_SIZE = 100
|
|
37
|
+
MIN_SUCCESS_RATE = 00.0 # 这里定义通过阈值, 高于该比例则通过
|
|
38
|
+
|
|
37
39
|
|
|
38
40
|
def slog(s, target: str = "target",logger = None):
|
|
39
41
|
COLOR_GREEN = "\033[92m"
|
|
@@ -94,7 +96,7 @@ async def get_last_sync_time(target_session: AsyncSession) -> datetime:
|
|
|
94
96
|
"""从目标数据库获取上次同步时间"""
|
|
95
97
|
# 修正点:使用 select() 和 execute()
|
|
96
98
|
result = await target_session.execute(
|
|
97
|
-
select(SyncMetadata).filter_by(table_name="
|
|
99
|
+
select(SyncMetadata).filter_by(table_name="ai_sync_metadata")
|
|
98
100
|
)
|
|
99
101
|
metadata_entry = result.scalar_one_or_none() # 获取单个对象或 None
|
|
100
102
|
|
|
@@ -110,7 +112,7 @@ async def update_last_sync_time(target_session: AsyncSession, new_sync_time: dat
|
|
|
110
112
|
"""更新目标数据库的上次同步时间"""
|
|
111
113
|
# 修正点:使用 select() 和 execute()
|
|
112
114
|
result = await target_session.execute(
|
|
113
|
-
select(SyncMetadata).filter_by(table_name="
|
|
115
|
+
select(SyncMetadata).filter_by(table_name="ai_sync_metadata")
|
|
114
116
|
)
|
|
115
117
|
metadata_entry = result.scalar_one_or_none()
|
|
116
118
|
|
|
@@ -118,7 +120,7 @@ async def update_last_sync_time(target_session: AsyncSession, new_sync_time: dat
|
|
|
118
120
|
metadata_entry.last_sync_time = new_sync_time
|
|
119
121
|
else:
|
|
120
122
|
# 如果不存在,则创建
|
|
121
|
-
new_metadata = SyncMetadata(table_name="
|
|
123
|
+
new_metadata = SyncMetadata(table_name="ai_sync_metadata", last_sync_time=new_sync_time)
|
|
122
124
|
target_session.add(new_metadata)
|
|
123
125
|
|
|
124
126
|
# 异步提交事务
|
|
@@ -128,6 +130,8 @@ async def update_last_sync_time(target_session: AsyncSession, new_sync_time: dat
|
|
|
128
130
|
|
|
129
131
|
|
|
130
132
|
|
|
133
|
+
|
|
134
|
+
|
|
131
135
|
class IntellectType(Enum):
|
|
132
136
|
train = "train"
|
|
133
137
|
inference = "inference"
|
|
@@ -364,6 +368,21 @@ class AsyncIntel():
|
|
|
364
368
|
session.add(prompt1)
|
|
365
369
|
await session.commit() # 提交事务,将数据写入数据库
|
|
366
370
|
|
|
371
|
+
async def get_use_case_by_sql(self,
|
|
372
|
+
target_prompt_id: str,
|
|
373
|
+
session = None
|
|
374
|
+
):
|
|
375
|
+
"""
|
|
376
|
+
从sql保存提示词
|
|
377
|
+
"""
|
|
378
|
+
stmt = select(UseCase).filter(UseCase.is_deleted == 0,
|
|
379
|
+
UseCase.prompt_id == target_prompt_id)
|
|
380
|
+
|
|
381
|
+
result = await session.execute(stmt)
|
|
382
|
+
# use_case = result.scalars().one_or_none()
|
|
383
|
+
use_case = result.scalars().all()
|
|
384
|
+
return use_case
|
|
385
|
+
|
|
367
386
|
async def save_use_case_by_sql(self,
|
|
368
387
|
prompt_id: str,
|
|
369
388
|
use_case:str = "",
|
|
@@ -374,6 +393,8 @@ class AsyncIntel():
|
|
|
374
393
|
"""
|
|
375
394
|
从sql保存提示词
|
|
376
395
|
"""
|
|
396
|
+
#TODO 存之前保证数据库中相同的prompt_id中没有重复的use_case
|
|
397
|
+
|
|
377
398
|
use_case = UseCase(prompt_id=prompt_id,
|
|
378
399
|
use_case = use_case,
|
|
379
400
|
output = output,
|
|
@@ -789,7 +810,6 @@ class AsyncIntel():
|
|
|
789
810
|
)
|
|
790
811
|
|
|
791
812
|
try:
|
|
792
|
-
|
|
793
813
|
json_str = extract_(ai_result,r'json')
|
|
794
814
|
# json_str = fix_broken_json_string(json_str)
|
|
795
815
|
ai_result = json.loads(json_str)
|
|
@@ -804,7 +824,7 @@ class AsyncIntel():
|
|
|
804
824
|
OutputFormat(**ai_result)
|
|
805
825
|
|
|
806
826
|
except JSONDecodeError as e:
|
|
807
|
-
raise IntellectRemoveFormatError(f"prompt_id: {prompt_id}
|
|
827
|
+
raise IntellectRemoveFormatError(f"prompt_id: {prompt_id} 生成的内容为无法被Json解析 {e}") from e
|
|
808
828
|
|
|
809
829
|
except ValidationError as e:
|
|
810
830
|
err_info = e.errors()[0]
|
|
@@ -845,7 +865,6 @@ class AsyncIntel():
|
|
|
845
865
|
)
|
|
846
866
|
results = await asyncio.gather(*tasks, return_exceptions=False)
|
|
847
867
|
return results
|
|
848
|
-
|
|
849
868
|
|
|
850
869
|
def intellect_remove_warp(self,prompt_id: str):
|
|
851
870
|
def outer_packing(func):
|
|
@@ -874,3 +893,90 @@ class AsyncIntel():
|
|
|
874
893
|
return result
|
|
875
894
|
return wrapper
|
|
876
895
|
return outer_packing
|
|
896
|
+
|
|
897
|
+
async def intellect_remove_format_eval(self,
|
|
898
|
+
OutputFormat: object,
|
|
899
|
+
prompt_id: str,
|
|
900
|
+
ExtraFormats: list[object] = [],
|
|
901
|
+
version: str = None,
|
|
902
|
+
):
|
|
903
|
+
|
|
904
|
+
async with create_async_session(self.engine) as session:
|
|
905
|
+
use_cases = await self.get_use_case_by_sql(target_prompt_id=prompt_id,session=session)
|
|
906
|
+
prompt_result = await self.get_prompts_from_sql(prompt_id=prompt_id,
|
|
907
|
+
session=session)
|
|
908
|
+
if prompt_result is None:
|
|
909
|
+
raise IntellectRemoveError("不存在的prompt_id")
|
|
910
|
+
if prompt_result.action_type != "inference":
|
|
911
|
+
raise IntellectRemoveError("请在inference模式下使用次类")
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
total_assertions = len(use_cases)
|
|
915
|
+
result_cases = []
|
|
916
|
+
|
|
917
|
+
async def evals_func(use_case,prompt_id,OutputFormat,ExtraFormats,version):
|
|
918
|
+
try:
|
|
919
|
+
# 这里将参数传入
|
|
920
|
+
await self.intellect_remove_format(
|
|
921
|
+
input_data = use_case.use_case,
|
|
922
|
+
prompt_id = prompt_id,
|
|
923
|
+
OutputFormat = OutputFormat,
|
|
924
|
+
ExtraFormats = ExtraFormats,
|
|
925
|
+
version = version,
|
|
926
|
+
inference_save_case = False,
|
|
927
|
+
)
|
|
928
|
+
# TODO base_eval
|
|
929
|
+
# TODO 人类评价 eval
|
|
930
|
+
# TODO llm 评价 eval
|
|
931
|
+
result_cases.append({"type":"Successful","case":use_case.use_case,"reply":f"pass"})
|
|
932
|
+
use_case.output = "Successful"
|
|
933
|
+
except IntellectRemoveFormatError as e:
|
|
934
|
+
result_cases.append({"type":"FAILED","case":use_case.use_case,"reply":f"{e}"})
|
|
935
|
+
use_case.output = f"{"FAILED"}-{e}"
|
|
936
|
+
except Exception as e: # 捕获其他可能的错误
|
|
937
|
+
result_cases.append({"type":"FAILED","case":use_case.use_case,"reply":f"Exp {e}"})
|
|
938
|
+
use_case.output = f"{"FAILED"}-{e}"
|
|
939
|
+
await session.commit()
|
|
940
|
+
|
|
941
|
+
tasks = []
|
|
942
|
+
for use_case in use_cases:
|
|
943
|
+
tasks.append(
|
|
944
|
+
evals_func(
|
|
945
|
+
use_case = use_case,
|
|
946
|
+
prompt_id = prompt_id,
|
|
947
|
+
OutputFormat = OutputFormat,
|
|
948
|
+
ExtraFormats = ExtraFormats,
|
|
949
|
+
version = version
|
|
950
|
+
)
|
|
951
|
+
)
|
|
952
|
+
await asyncio.gather(*tasks, return_exceptions=False)
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
successful_assertions = 0
|
|
956
|
+
bad_case = []
|
|
957
|
+
for i in result_cases:
|
|
958
|
+
if i['type'] == "Successful":
|
|
959
|
+
successful_assertions += 1
|
|
960
|
+
else:
|
|
961
|
+
bad_case.append(i)
|
|
962
|
+
|
|
963
|
+
success_rate = (successful_assertions / total_assertions) * 100
|
|
964
|
+
print(f"\n--- Aggregated Results ---")
|
|
965
|
+
print(f"Total test cases: {total_assertions}")
|
|
966
|
+
print(f"Successful cases: {successful_assertions}")
|
|
967
|
+
print(f"Success Rate: {success_rate:.2f}%")
|
|
968
|
+
|
|
969
|
+
# if success_rate >= MIN_SUCCESS_RATE:
|
|
970
|
+
# return "通过", json.dumps(result_cases,ensure_ascii=False)
|
|
971
|
+
# else:
|
|
972
|
+
# return "未通过",json.dumps(result_cases,ensure_ascii=False)
|
|
973
|
+
|
|
974
|
+
print(bad_case)
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
# return results
|
|
978
|
+
|
|
979
|
+
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
# 整体测试d, 测试未通过d, 大模型调整再测试, 依旧不通过, 大模型裂变, 仍不通过, 互换人力
|
pro_craft/prompt_craft/new.py
CHANGED
|
@@ -37,19 +37,19 @@ def slog(s, target: str = "target",logger = None):
|
|
|
37
37
|
|
|
38
38
|
def get_last_sync_time(target_session) -> datetime:
|
|
39
39
|
"""从目标数据库获取上次同步时间"""
|
|
40
|
-
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="
|
|
40
|
+
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="ai_sync_metadata").first()
|
|
41
41
|
if metadata_entry:
|
|
42
42
|
return metadata_entry.last_sync_time
|
|
43
43
|
return datetime(1970, 1, 1) # 默认一个很早的时间
|
|
44
44
|
|
|
45
45
|
def update_last_sync_time(target_session, new_sync_time: datetime):
|
|
46
46
|
"""更新目标数据库的上次同步时间"""
|
|
47
|
-
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="
|
|
47
|
+
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="ai_sync_metadata").first()
|
|
48
48
|
if metadata_entry:
|
|
49
49
|
metadata_entry.last_sync_time = new_sync_time
|
|
50
50
|
else:
|
|
51
51
|
# 如果不存在,则创建
|
|
52
|
-
new_metadata = SyncMetadata(table_name="
|
|
52
|
+
new_metadata = SyncMetadata(table_name="ai_sync_metadata", last_sync_time=new_sync_time)
|
|
53
53
|
target_session.add(new_metadata)
|
|
54
54
|
target_session.commit()
|
|
55
55
|
print(f"Updated last sync time to: {new_sync_time}")
|
pro_craft/prompt_craft/sync.py
CHANGED
|
@@ -39,19 +39,19 @@ def slog(s, target: str = "target",logger = None):
|
|
|
39
39
|
|
|
40
40
|
def get_last_sync_time(target_session) -> datetime:
|
|
41
41
|
"""从目标数据库获取上次同步时间"""
|
|
42
|
-
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="
|
|
42
|
+
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="ai_sync_metadata").first()
|
|
43
43
|
if metadata_entry:
|
|
44
44
|
return metadata_entry.last_sync_time
|
|
45
45
|
return datetime(1970, 1, 1) # 默认一个很早的时间
|
|
46
46
|
|
|
47
47
|
def update_last_sync_time(target_session, new_sync_time: datetime):
|
|
48
48
|
"""更新目标数据库的上次同步时间"""
|
|
49
|
-
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="
|
|
49
|
+
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="ai_sync_metadata").first()
|
|
50
50
|
if metadata_entry:
|
|
51
51
|
metadata_entry.last_sync_time = new_sync_time
|
|
52
52
|
else:
|
|
53
53
|
# 如果不存在,则创建
|
|
54
|
-
new_metadata = SyncMetadata(table_name="
|
|
54
|
+
new_metadata = SyncMetadata(table_name="ai_sync_metadata", last_sync_time=new_sync_time)
|
|
55
55
|
target_session.add(new_metadata)
|
|
56
56
|
target_session.commit()
|
|
57
57
|
print(f"Updated last sync time to: {new_sync_time}")
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from fastapi import APIRouter
|
|
4
4
|
from pro_craft import Intel,AsyncIntel
|
|
5
|
-
from pro_craft.utils import
|
|
5
|
+
from pro_craft.utils import create_async_session
|
|
6
6
|
|
|
7
7
|
def create_router(database_url: str,
|
|
8
8
|
slave_database_url: str,
|
|
@@ -44,7 +44,7 @@ def create_router(database_url: str,
|
|
|
44
44
|
|
|
45
45
|
@router.get("/get_latest_prompt")
|
|
46
46
|
async def get_latest_prompt(prompt_id: str):
|
|
47
|
-
with
|
|
47
|
+
async with create_async_session(intels.engine) as session:
|
|
48
48
|
result = await intels.get_prompts_from_sql(
|
|
49
49
|
prompt_id=prompt_id,
|
|
50
50
|
session=session
|
|
@@ -59,7 +59,7 @@ def create_router(database_url: str,
|
|
|
59
59
|
|
|
60
60
|
@router.get("/roll_back")
|
|
61
61
|
async def roll_back(prompt_id:str,version:str):
|
|
62
|
-
with
|
|
62
|
+
async with create_async_session(intels.engine) as session:
|
|
63
63
|
result = await intels.get_prompts_from_sql(
|
|
64
64
|
prompt_id=prompt_id,
|
|
65
65
|
version = version,
|
|
@@ -1,20 +1,19 @@
|
|
|
1
1
|
pro_craft/__init__.py,sha256=KU9yexREoJVO5eyDNnOCzdYC8Bfo6p4Z45KWZTeO63U,698
|
|
2
|
-
pro_craft/database.py,sha256=
|
|
2
|
+
pro_craft/database.py,sha256=FJmknolcXpx6sYHuiS1LgXX7FV8drSpoN3RwVvRR2FI,8741
|
|
3
3
|
pro_craft/file_manager.py,sha256=abVAlJ07_egWNuTj4JiP4me8NloQrsXGNd-SP63ab94,3738
|
|
4
4
|
pro_craft/log.py,sha256=x9RS_0LITN2SE8dcVaEcUFdcfr__jCYpFkIkVmqE5f0,3061
|
|
5
5
|
pro_craft/utils.py,sha256=R1DFkS4dsm5dIhg8lLTgBBvItvIYyyojROdh-ykqiYk,5250
|
|
6
6
|
pro_craft/code_helper/coder.py,sha256=L6pRQr0pYRIHrMFZ4-pO_tZf1koxgGgF3L7Vl-GIyjM,24687
|
|
7
7
|
pro_craft/code_helper/designer.py,sha256=3gyCqrjcw61sHzDjUPKhL1LOAE8xWLLbNT8NlK2mFLc,4739
|
|
8
8
|
pro_craft/prompt_craft/__init__.py,sha256=83ruWO1Oci-DWvdVhPqcQrgdZTNfbmK72VQCkWASk7A,80
|
|
9
|
-
pro_craft/prompt_craft/async_.py,sha256=
|
|
10
|
-
pro_craft/prompt_craft/
|
|
11
|
-
pro_craft/prompt_craft/
|
|
12
|
-
pro_craft/prompt_craft/sync.py,sha256=X2BOzdqLBVZyMm5noEOaX2X5LYHl3nl2d3qAOwLoJr8,26169
|
|
9
|
+
pro_craft/prompt_craft/async_.py,sha256=w_M3d9-8BRQ3EUMimH_D1VSbQCelLHGqhcsQUG9K5tY,43085
|
|
10
|
+
pro_craft/prompt_craft/new.py,sha256=ULjGGl95vmHrOs7XECJGlaqj1NE9BypE5WnFYhGugRY,25903
|
|
11
|
+
pro_craft/prompt_craft/sync.py,sha256=w-zXC9_yVxgvnotimcs2POEXaHWD6ibspdPwWL9zqXk,26178
|
|
13
12
|
pro_craft/server/mcp/__init__.py,sha256=4dbl-lFcm0r2tkOP04OxqiZG2jR-rqF181qi2AfU6UA,123
|
|
14
13
|
pro_craft/server/mcp/prompt.py,sha256=OZrsyUfSQMOY_KX7dWthW209adz5JfELsQ0ODfuQR44,1245
|
|
15
14
|
pro_craft/server/router/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
-
pro_craft/server/router/prompt.py,sha256=
|
|
17
|
-
pro_craft-0.1.
|
|
18
|
-
pro_craft-0.1.
|
|
19
|
-
pro_craft-0.1.
|
|
20
|
-
pro_craft-0.1.
|
|
15
|
+
pro_craft/server/router/prompt.py,sha256=Wa4FfYRL6oeyA3F-79pmPeIH0Vo8wSEv7RH1lP6jXck,2907
|
|
16
|
+
pro_craft-0.1.26.dist-info/METADATA,sha256=LE_FozWikrjyahzEUicUl2xBo9XGrBeRyctJ8T-B_is,1689
|
|
17
|
+
pro_craft-0.1.26.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
+
pro_craft-0.1.26.dist-info/top_level.txt,sha256=yqYDHArnYMWpeCxkmGRwlL6sJtxiOUnYylLDx9EOgFg,10
|
|
19
|
+
pro_craft-0.1.26.dist-info/RECORD,,
|
pro_craft/prompt_craft/evals.py
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
import json
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
############evals##############
|
|
6
|
-
|
|
7
|
-
# 异步
|
|
8
|
-
class Base_Evals():
|
|
9
|
-
def __init__(self):
|
|
10
|
-
"""
|
|
11
|
-
# TODO 2 自动优化prompt 并提升稳定性, 并测试
|
|
12
|
-
通过重写继承来使用它
|
|
13
|
-
"""
|
|
14
|
-
self.MIN_SUCCESS_RATE = 00.0 # 这里定义通过阈值, 高于该比例则通过
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
async def _assert_eval_function(self,params):
|
|
18
|
-
#这里定义函数的评价体系
|
|
19
|
-
print(params,'params')
|
|
20
|
-
|
|
21
|
-
async def get_success_rate(self,test_cases:list[tuple]):
|
|
22
|
-
"""
|
|
23
|
-
# 这里定义数据
|
|
24
|
-
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
successful_assertions = 0
|
|
28
|
-
total_assertions = len(test_cases)
|
|
29
|
-
result_cases = []
|
|
30
|
-
|
|
31
|
-
for i, params in enumerate(test_cases):
|
|
32
|
-
try:
|
|
33
|
-
# 这里将参数传入
|
|
34
|
-
await self._assert_eval_function(params)
|
|
35
|
-
successful_assertions += 1
|
|
36
|
-
result_cases.append({"type":"Successful","--input--":params,"evaluate_info":f"满足要求"})
|
|
37
|
-
except AssertionError as e:
|
|
38
|
-
result_cases.append({"type":"FAILED","--input--":params,"evaluate_info":f"ERROR {e}"})
|
|
39
|
-
except Exception as e: # 捕获其他可能的错误
|
|
40
|
-
result_cases.append({"type":"FAILED","--input--":params,"evaluate_info":f"ERROR {e}"})
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
success_rate = (successful_assertions / total_assertions) * 100
|
|
44
|
-
print(f"\n--- Aggregated Results ---")
|
|
45
|
-
print(f"Total test cases: {total_assertions}")
|
|
46
|
-
print(f"Successful cases: {successful_assertions}")
|
|
47
|
-
print(f"Success Rate: {success_rate:.2f}%")
|
|
48
|
-
|
|
49
|
-
if success_rate >= self.MIN_SUCCESS_RATE:
|
|
50
|
-
return "通过", json.dumps(result_cases,ensure_ascii=False)
|
|
51
|
-
else:
|
|
52
|
-
return "未通过",json.dumps(result_cases,ensure_ascii=False)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def global_evals():
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
File without changes
|
|
File without changes
|