pro-craft 0.1.24__tar.gz → 0.1.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pro-craft might be problematic. Click here for more details.
- {pro_craft-0.1.24 → pro_craft-0.1.26}/PKG-INFO +1 -1
- {pro_craft-0.1.24 → pro_craft-0.1.26}/pyproject.toml +1 -1
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/database.py +3 -5
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/prompt_craft/async_.py +112 -10
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/prompt_craft/new.py +3 -3
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/prompt_craft/sync.py +3 -3
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/server/router/prompt.py +14 -12
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft.egg-info/PKG-INFO +1 -1
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft.egg-info/SOURCES.txt +0 -1
- pro_craft-0.1.24/src/pro_craft/prompt_craft/evals.py +0 -61
- {pro_craft-0.1.24 → pro_craft-0.1.26}/README.md +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/setup.cfg +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/__init__.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/code_helper/coder.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/code_helper/designer.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/file_manager.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/log.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/prompt_craft/__init__.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/server/mcp/__init__.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/server/mcp/prompt.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/server/router/__init__.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft/utils.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft.egg-info/dependency_links.txt +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft.egg-info/requires.txt +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/src/pro_craft.egg-info/top_level.txt +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/tests/test22.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/tests/test_coder.py +0 -0
- {pro_craft-0.1.24 → pro_craft-0.1.26}/tests/test_designer.py +0 -0
|
@@ -72,7 +72,7 @@ PromptBase = declarative_base()
|
|
|
72
72
|
|
|
73
73
|
class SyncMetadata(PromptBase):
|
|
74
74
|
"""用于存储同步元数据的表模型"""
|
|
75
|
-
__tablename__ = "
|
|
75
|
+
__tablename__ = "ai_sync_metadata"
|
|
76
76
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
77
77
|
last_sync_time = Column(DateTime, default=datetime(1970, 1, 1))
|
|
78
78
|
table_name = Column(String(255), unique=True)
|
|
@@ -80,9 +80,8 @@ class SyncMetadata(PromptBase):
|
|
|
80
80
|
def __repr__(self):
|
|
81
81
|
return f"<SyncMetadata(table_name='{self.table_name}', last_sync_time='{self.last_sync_time}')>"
|
|
82
82
|
|
|
83
|
-
|
|
84
83
|
class Prompt(PromptBase):
|
|
85
|
-
__tablename__ = '
|
|
84
|
+
__tablename__ = 'ai_prompts' # 数据库中的表名,你可以改成你希望的名字
|
|
86
85
|
# __tablename__ = 'llm_prompt' # 数据库中的表名,你可以改成你希望的名字
|
|
87
86
|
|
|
88
87
|
# 定义联合唯一约束
|
|
@@ -177,9 +176,8 @@ class Prompt(PromptBase):
|
|
|
177
176
|
f"is_deleted='{self.is_deleted}...'>"
|
|
178
177
|
)
|
|
179
178
|
|
|
180
|
-
|
|
181
179
|
class UseCase(PromptBase):
|
|
182
|
-
__tablename__ = '
|
|
180
|
+
__tablename__ = 'ai_usecase' # 数据库中的表名,你可以改成你希望的名字
|
|
183
181
|
|
|
184
182
|
# id (int, primary_key=True, autoincrement=True)
|
|
185
183
|
# 你的属性表中 id 为 int, true (not null), true (primary key), 0 (length), ASC (key order), true (auto increment)
|
|
@@ -34,6 +34,8 @@ class IntellectRemoveError(Exception):
|
|
|
34
34
|
pass
|
|
35
35
|
|
|
36
36
|
BATCH_SIZE = 100
|
|
37
|
+
MIN_SUCCESS_RATE = 00.0 # 这里定义通过阈值, 高于该比例则通过
|
|
38
|
+
|
|
37
39
|
|
|
38
40
|
def slog(s, target: str = "target",logger = None):
|
|
39
41
|
COLOR_GREEN = "\033[92m"
|
|
@@ -94,7 +96,7 @@ async def get_last_sync_time(target_session: AsyncSession) -> datetime:
|
|
|
94
96
|
"""从目标数据库获取上次同步时间"""
|
|
95
97
|
# 修正点:使用 select() 和 execute()
|
|
96
98
|
result = await target_session.execute(
|
|
97
|
-
select(SyncMetadata).filter_by(table_name="
|
|
99
|
+
select(SyncMetadata).filter_by(table_name="ai_sync_metadata")
|
|
98
100
|
)
|
|
99
101
|
metadata_entry = result.scalar_one_or_none() # 获取单个对象或 None
|
|
100
102
|
|
|
@@ -110,7 +112,7 @@ async def update_last_sync_time(target_session: AsyncSession, new_sync_time: dat
|
|
|
110
112
|
"""更新目标数据库的上次同步时间"""
|
|
111
113
|
# 修正点:使用 select() 和 execute()
|
|
112
114
|
result = await target_session.execute(
|
|
113
|
-
select(SyncMetadata).filter_by(table_name="
|
|
115
|
+
select(SyncMetadata).filter_by(table_name="ai_sync_metadata")
|
|
114
116
|
)
|
|
115
117
|
metadata_entry = result.scalar_one_or_none()
|
|
116
118
|
|
|
@@ -118,7 +120,7 @@ async def update_last_sync_time(target_session: AsyncSession, new_sync_time: dat
|
|
|
118
120
|
metadata_entry.last_sync_time = new_sync_time
|
|
119
121
|
else:
|
|
120
122
|
# 如果不存在,则创建
|
|
121
|
-
new_metadata = SyncMetadata(table_name="
|
|
123
|
+
new_metadata = SyncMetadata(table_name="ai_sync_metadata", last_sync_time=new_sync_time)
|
|
122
124
|
target_session.add(new_metadata)
|
|
123
125
|
|
|
124
126
|
# 异步提交事务
|
|
@@ -128,6 +130,8 @@ async def update_last_sync_time(target_session: AsyncSession, new_sync_time: dat
|
|
|
128
130
|
|
|
129
131
|
|
|
130
132
|
|
|
133
|
+
|
|
134
|
+
|
|
131
135
|
class IntellectType(Enum):
|
|
132
136
|
train = "train"
|
|
133
137
|
inference = "inference"
|
|
@@ -364,6 +368,21 @@ class AsyncIntel():
|
|
|
364
368
|
session.add(prompt1)
|
|
365
369
|
await session.commit() # 提交事务,将数据写入数据库
|
|
366
370
|
|
|
371
|
+
async def get_use_case_by_sql(self,
|
|
372
|
+
target_prompt_id: str,
|
|
373
|
+
session = None
|
|
374
|
+
):
|
|
375
|
+
"""
|
|
376
|
+
从sql保存提示词
|
|
377
|
+
"""
|
|
378
|
+
stmt = select(UseCase).filter(UseCase.is_deleted == 0,
|
|
379
|
+
UseCase.prompt_id == target_prompt_id)
|
|
380
|
+
|
|
381
|
+
result = await session.execute(stmt)
|
|
382
|
+
# use_case = result.scalars().one_or_none()
|
|
383
|
+
use_case = result.scalars().all()
|
|
384
|
+
return use_case
|
|
385
|
+
|
|
367
386
|
async def save_use_case_by_sql(self,
|
|
368
387
|
prompt_id: str,
|
|
369
388
|
use_case:str = "",
|
|
@@ -374,6 +393,8 @@ class AsyncIntel():
|
|
|
374
393
|
"""
|
|
375
394
|
从sql保存提示词
|
|
376
395
|
"""
|
|
396
|
+
#TODO 存之前保证数据库中相同的prompt_id中没有重复的use_case
|
|
397
|
+
|
|
377
398
|
use_case = UseCase(prompt_id=prompt_id,
|
|
378
399
|
use_case = use_case,
|
|
379
400
|
output = output,
|
|
@@ -496,10 +517,6 @@ class AsyncIntel():
|
|
|
496
517
|
return "init"
|
|
497
518
|
|
|
498
519
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
520
|
|
|
504
521
|
async def intellect_remove(self,
|
|
505
522
|
input_data: dict | str,
|
|
@@ -793,7 +810,6 @@ class AsyncIntel():
|
|
|
793
810
|
)
|
|
794
811
|
|
|
795
812
|
try:
|
|
796
|
-
|
|
797
813
|
json_str = extract_(ai_result,r'json')
|
|
798
814
|
# json_str = fix_broken_json_string(json_str)
|
|
799
815
|
ai_result = json.loads(json_str)
|
|
@@ -808,7 +824,7 @@ class AsyncIntel():
|
|
|
808
824
|
OutputFormat(**ai_result)
|
|
809
825
|
|
|
810
826
|
except JSONDecodeError as e:
|
|
811
|
-
raise IntellectRemoveFormatError(f"prompt_id: {prompt_id}
|
|
827
|
+
raise IntellectRemoveFormatError(f"prompt_id: {prompt_id} 生成的内容为无法被Json解析 {e}") from e
|
|
812
828
|
|
|
813
829
|
except ValidationError as e:
|
|
814
830
|
err_info = e.errors()[0]
|
|
@@ -849,7 +865,6 @@ class AsyncIntel():
|
|
|
849
865
|
)
|
|
850
866
|
results = await asyncio.gather(*tasks, return_exceptions=False)
|
|
851
867
|
return results
|
|
852
|
-
|
|
853
868
|
|
|
854
869
|
def intellect_remove_warp(self,prompt_id: str):
|
|
855
870
|
def outer_packing(func):
|
|
@@ -878,3 +893,90 @@ class AsyncIntel():
|
|
|
878
893
|
return result
|
|
879
894
|
return wrapper
|
|
880
895
|
return outer_packing
|
|
896
|
+
|
|
897
|
+
async def intellect_remove_format_eval(self,
|
|
898
|
+
OutputFormat: object,
|
|
899
|
+
prompt_id: str,
|
|
900
|
+
ExtraFormats: list[object] = [],
|
|
901
|
+
version: str = None,
|
|
902
|
+
):
|
|
903
|
+
|
|
904
|
+
async with create_async_session(self.engine) as session:
|
|
905
|
+
use_cases = await self.get_use_case_by_sql(target_prompt_id=prompt_id,session=session)
|
|
906
|
+
prompt_result = await self.get_prompts_from_sql(prompt_id=prompt_id,
|
|
907
|
+
session=session)
|
|
908
|
+
if prompt_result is None:
|
|
909
|
+
raise IntellectRemoveError("不存在的prompt_id")
|
|
910
|
+
if prompt_result.action_type != "inference":
|
|
911
|
+
raise IntellectRemoveError("请在inference模式下使用次类")
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
total_assertions = len(use_cases)
|
|
915
|
+
result_cases = []
|
|
916
|
+
|
|
917
|
+
async def evals_func(use_case,prompt_id,OutputFormat,ExtraFormats,version):
|
|
918
|
+
try:
|
|
919
|
+
# 这里将参数传入
|
|
920
|
+
await self.intellect_remove_format(
|
|
921
|
+
input_data = use_case.use_case,
|
|
922
|
+
prompt_id = prompt_id,
|
|
923
|
+
OutputFormat = OutputFormat,
|
|
924
|
+
ExtraFormats = ExtraFormats,
|
|
925
|
+
version = version,
|
|
926
|
+
inference_save_case = False,
|
|
927
|
+
)
|
|
928
|
+
# TODO base_eval
|
|
929
|
+
# TODO 人类评价 eval
|
|
930
|
+
# TODO llm 评价 eval
|
|
931
|
+
result_cases.append({"type":"Successful","case":use_case.use_case,"reply":f"pass"})
|
|
932
|
+
use_case.output = "Successful"
|
|
933
|
+
except IntellectRemoveFormatError as e:
|
|
934
|
+
result_cases.append({"type":"FAILED","case":use_case.use_case,"reply":f"{e}"})
|
|
935
|
+
use_case.output = f"{"FAILED"}-{e}"
|
|
936
|
+
except Exception as e: # 捕获其他可能的错误
|
|
937
|
+
result_cases.append({"type":"FAILED","case":use_case.use_case,"reply":f"Exp {e}"})
|
|
938
|
+
use_case.output = f"{"FAILED"}-{e}"
|
|
939
|
+
await session.commit()
|
|
940
|
+
|
|
941
|
+
tasks = []
|
|
942
|
+
for use_case in use_cases:
|
|
943
|
+
tasks.append(
|
|
944
|
+
evals_func(
|
|
945
|
+
use_case = use_case,
|
|
946
|
+
prompt_id = prompt_id,
|
|
947
|
+
OutputFormat = OutputFormat,
|
|
948
|
+
ExtraFormats = ExtraFormats,
|
|
949
|
+
version = version
|
|
950
|
+
)
|
|
951
|
+
)
|
|
952
|
+
await asyncio.gather(*tasks, return_exceptions=False)
|
|
953
|
+
|
|
954
|
+
|
|
955
|
+
successful_assertions = 0
|
|
956
|
+
bad_case = []
|
|
957
|
+
for i in result_cases:
|
|
958
|
+
if i['type'] == "Successful":
|
|
959
|
+
successful_assertions += 1
|
|
960
|
+
else:
|
|
961
|
+
bad_case.append(i)
|
|
962
|
+
|
|
963
|
+
success_rate = (successful_assertions / total_assertions) * 100
|
|
964
|
+
print(f"\n--- Aggregated Results ---")
|
|
965
|
+
print(f"Total test cases: {total_assertions}")
|
|
966
|
+
print(f"Successful cases: {successful_assertions}")
|
|
967
|
+
print(f"Success Rate: {success_rate:.2f}%")
|
|
968
|
+
|
|
969
|
+
# if success_rate >= MIN_SUCCESS_RATE:
|
|
970
|
+
# return "通过", json.dumps(result_cases,ensure_ascii=False)
|
|
971
|
+
# else:
|
|
972
|
+
# return "未通过",json.dumps(result_cases,ensure_ascii=False)
|
|
973
|
+
|
|
974
|
+
print(bad_case)
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
# return results
|
|
978
|
+
|
|
979
|
+
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
# 整体测试d, 测试未通过d, 大模型调整再测试, 依旧不通过, 大模型裂变, 仍不通过, 互换人力
|
|
@@ -37,19 +37,19 @@ def slog(s, target: str = "target",logger = None):
|
|
|
37
37
|
|
|
38
38
|
def get_last_sync_time(target_session) -> datetime:
|
|
39
39
|
"""从目标数据库获取上次同步时间"""
|
|
40
|
-
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="
|
|
40
|
+
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="ai_sync_metadata").first()
|
|
41
41
|
if metadata_entry:
|
|
42
42
|
return metadata_entry.last_sync_time
|
|
43
43
|
return datetime(1970, 1, 1) # 默认一个很早的时间
|
|
44
44
|
|
|
45
45
|
def update_last_sync_time(target_session, new_sync_time: datetime):
|
|
46
46
|
"""更新目标数据库的上次同步时间"""
|
|
47
|
-
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="
|
|
47
|
+
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="ai_sync_metadata").first()
|
|
48
48
|
if metadata_entry:
|
|
49
49
|
metadata_entry.last_sync_time = new_sync_time
|
|
50
50
|
else:
|
|
51
51
|
# 如果不存在,则创建
|
|
52
|
-
new_metadata = SyncMetadata(table_name="
|
|
52
|
+
new_metadata = SyncMetadata(table_name="ai_sync_metadata", last_sync_time=new_sync_time)
|
|
53
53
|
target_session.add(new_metadata)
|
|
54
54
|
target_session.commit()
|
|
55
55
|
print(f"Updated last sync time to: {new_sync_time}")
|
|
@@ -39,19 +39,19 @@ def slog(s, target: str = "target",logger = None):
|
|
|
39
39
|
|
|
40
40
|
def get_last_sync_time(target_session) -> datetime:
|
|
41
41
|
"""从目标数据库获取上次同步时间"""
|
|
42
|
-
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="
|
|
42
|
+
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="ai_sync_metadata").first()
|
|
43
43
|
if metadata_entry:
|
|
44
44
|
return metadata_entry.last_sync_time
|
|
45
45
|
return datetime(1970, 1, 1) # 默认一个很早的时间
|
|
46
46
|
|
|
47
47
|
def update_last_sync_time(target_session, new_sync_time: datetime):
|
|
48
48
|
"""更新目标数据库的上次同步时间"""
|
|
49
|
-
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="
|
|
49
|
+
metadata_entry = target_session.query(SyncMetadata).filter_by(table_name="ai_sync_metadata").first()
|
|
50
50
|
if metadata_entry:
|
|
51
51
|
metadata_entry.last_sync_time = new_sync_time
|
|
52
52
|
else:
|
|
53
53
|
# 如果不存在,则创建
|
|
54
|
-
new_metadata = SyncMetadata(table_name="
|
|
54
|
+
new_metadata = SyncMetadata(table_name="ai_sync_metadata", last_sync_time=new_sync_time)
|
|
55
55
|
target_session.add(new_metadata)
|
|
56
56
|
target_session.commit()
|
|
57
57
|
print(f"Updated last sync time to: {new_sync_time}")
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
|
|
2
2
|
|
|
3
3
|
from fastapi import APIRouter
|
|
4
|
-
from pro_craft import Intel
|
|
5
|
-
from pro_craft.utils import
|
|
4
|
+
from pro_craft import Intel,AsyncIntel
|
|
5
|
+
from pro_craft.utils import create_async_session
|
|
6
6
|
|
|
7
7
|
def create_router(database_url: str,
|
|
8
8
|
slave_database_url: str,
|
|
9
|
-
model_name: str
|
|
9
|
+
model_name: str,
|
|
10
|
+
logger = None):
|
|
10
11
|
"""
|
|
11
12
|
# TODO 整理改为异步
|
|
12
13
|
创建一个包含 ProCraft 路由的 FastAPI APIRouter 实例。
|
|
@@ -21,9 +22,10 @@ def create_router(database_url: str,
|
|
|
21
22
|
APIRouter: 配置好的 FastAPI APIRouter 实例。
|
|
22
23
|
"""
|
|
23
24
|
|
|
24
|
-
intels =
|
|
25
|
+
intels = AsyncIntel(
|
|
25
26
|
database_url=database_url,
|
|
26
|
-
model_name=model_name
|
|
27
|
+
model_name=model_name,
|
|
28
|
+
logger=logger
|
|
27
29
|
)
|
|
28
30
|
|
|
29
31
|
router = APIRouter(
|
|
@@ -33,7 +35,7 @@ def create_router(database_url: str,
|
|
|
33
35
|
@router.get("/push_order",
|
|
34
36
|
description="可选 train,inference,summary,finetune,patch",)
|
|
35
37
|
async def push_order(demand: str, prompt_id: str, action_type: str = "train"):
|
|
36
|
-
result = intels.push_action_order(
|
|
38
|
+
result = await intels.push_action_order(
|
|
37
39
|
demand=demand,
|
|
38
40
|
prompt_id=prompt_id,
|
|
39
41
|
action_type=action_type
|
|
@@ -42,8 +44,8 @@ def create_router(database_url: str,
|
|
|
42
44
|
|
|
43
45
|
@router.get("/get_latest_prompt")
|
|
44
46
|
async def get_latest_prompt(prompt_id: str):
|
|
45
|
-
with
|
|
46
|
-
result = intels.get_prompts_from_sql(
|
|
47
|
+
async with create_async_session(intels.engine) as session:
|
|
48
|
+
result = await intels.get_prompts_from_sql(
|
|
47
49
|
prompt_id=prompt_id,
|
|
48
50
|
session=session
|
|
49
51
|
)
|
|
@@ -51,20 +53,20 @@ def create_router(database_url: str,
|
|
|
51
53
|
|
|
52
54
|
@router.get("/sync_database")
|
|
53
55
|
async def sync_database():
|
|
54
|
-
result = intels.sync_prompt_data_to_database(slave_database_url)
|
|
56
|
+
result = await intels.sync_prompt_data_to_database(slave_database_url)
|
|
55
57
|
return {"message": "success","result":result}
|
|
56
58
|
|
|
57
59
|
|
|
58
60
|
@router.get("/roll_back")
|
|
59
61
|
async def roll_back(prompt_id:str,version:str):
|
|
60
|
-
with
|
|
61
|
-
result = intels.get_prompts_from_sql(
|
|
62
|
+
async with create_async_session(intels.engine) as session:
|
|
63
|
+
result = await intels.get_prompts_from_sql(
|
|
62
64
|
prompt_id=prompt_id,
|
|
63
65
|
version = version,
|
|
64
66
|
session=session
|
|
65
67
|
)
|
|
66
68
|
assert result.version == version
|
|
67
|
-
intels.save_prompt_increment_version(
|
|
69
|
+
await intels.save_prompt_increment_version(
|
|
68
70
|
prompt_id = prompt_id,
|
|
69
71
|
new_prompt = result.prompt,
|
|
70
72
|
use_case = result.use_case,
|
|
@@ -14,7 +14,6 @@ src/pro_craft/code_helper/coder.py
|
|
|
14
14
|
src/pro_craft/code_helper/designer.py
|
|
15
15
|
src/pro_craft/prompt_craft/__init__.py
|
|
16
16
|
src/pro_craft/prompt_craft/async_.py
|
|
17
|
-
src/pro_craft/prompt_craft/evals.py
|
|
18
17
|
src/pro_craft/prompt_craft/new.py
|
|
19
18
|
src/pro_craft/prompt_craft/sync.py
|
|
20
19
|
src/pro_craft/server/mcp/__init__.py
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
import json
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
############evals##############
|
|
6
|
-
|
|
7
|
-
# 异步
|
|
8
|
-
class Base_Evals():
|
|
9
|
-
def __init__(self):
|
|
10
|
-
"""
|
|
11
|
-
# TODO 2 自动优化prompt 并提升稳定性, 并测试
|
|
12
|
-
通过重写继承来使用它
|
|
13
|
-
"""
|
|
14
|
-
self.MIN_SUCCESS_RATE = 00.0 # 这里定义通过阈值, 高于该比例则通过
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
async def _assert_eval_function(self,params):
|
|
18
|
-
#这里定义函数的评价体系
|
|
19
|
-
print(params,'params')
|
|
20
|
-
|
|
21
|
-
async def get_success_rate(self,test_cases:list[tuple]):
|
|
22
|
-
"""
|
|
23
|
-
# 这里定义数据
|
|
24
|
-
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
successful_assertions = 0
|
|
28
|
-
total_assertions = len(test_cases)
|
|
29
|
-
result_cases = []
|
|
30
|
-
|
|
31
|
-
for i, params in enumerate(test_cases):
|
|
32
|
-
try:
|
|
33
|
-
# 这里将参数传入
|
|
34
|
-
await self._assert_eval_function(params)
|
|
35
|
-
successful_assertions += 1
|
|
36
|
-
result_cases.append({"type":"Successful","--input--":params,"evaluate_info":f"满足要求"})
|
|
37
|
-
except AssertionError as e:
|
|
38
|
-
result_cases.append({"type":"FAILED","--input--":params,"evaluate_info":f"ERROR {e}"})
|
|
39
|
-
except Exception as e: # 捕获其他可能的错误
|
|
40
|
-
result_cases.append({"type":"FAILED","--input--":params,"evaluate_info":f"ERROR {e}"})
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
success_rate = (successful_assertions / total_assertions) * 100
|
|
44
|
-
print(f"\n--- Aggregated Results ---")
|
|
45
|
-
print(f"Total test cases: {total_assertions}")
|
|
46
|
-
print(f"Successful cases: {successful_assertions}")
|
|
47
|
-
print(f"Success Rate: {success_rate:.2f}%")
|
|
48
|
-
|
|
49
|
-
if success_rate >= self.MIN_SUCCESS_RATE:
|
|
50
|
-
return "通过", json.dumps(result_cases,ensure_ascii=False)
|
|
51
|
-
else:
|
|
52
|
-
return "未通过",json.dumps(result_cases,ensure_ascii=False)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def global_evals():
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|