pro-craft 0.1.22__py3-none-any.whl → 0.1.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pro-craft might be problematic. Click here for more details.
- pro_craft/__init__.py +7 -12
- pro_craft/database.py +22 -5
- pro_craft/file_manager.py +2 -2
- pro_craft/log.py +14 -14
- pro_craft/prompt_craft/async_.py +579 -336
- pro_craft/prompt_craft/new.py +3 -3
- pro_craft/prompt_craft/sync.py +28 -23
- pro_craft/server/router/prompt.py +15 -12
- {pro_craft-0.1.22.dist-info → pro_craft-0.1.36.dist-info}/METADATA +2 -1
- pro_craft-0.1.36.dist-info/RECORD +19 -0
- pro_craft/prompt_craft/evals.py +0 -61
- pro_craft-0.1.22.dist-info/RECORD +0 -20
- {pro_craft-0.1.22.dist-info → pro_craft-0.1.36.dist-info}/WHEEL +0 -0
- {pro_craft-0.1.22.dist-info → pro_craft-0.1.36.dist-info}/top_level.txt +0 -0
pro_craft/prompt_craft/async_.py
CHANGED
|
@@ -16,22 +16,23 @@ from datetime import datetime
|
|
|
16
16
|
from pro_craft.utils import extract_
|
|
17
17
|
import asyncio
|
|
18
18
|
import re
|
|
19
|
-
|
|
19
|
+
from pydantic import BaseModel, ValidationError, field_validator
|
|
20
20
|
from sqlalchemy import select, desc
|
|
21
21
|
from json.decoder import JSONDecodeError
|
|
22
|
+
from pro_craft.database import SyncMetadata
|
|
23
|
+
from datetime import datetime, timedelta
|
|
24
|
+
from datetime import datetime, timedelta
|
|
25
|
+
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
|
|
26
|
+
from sqlalchemy import select, and_ # 引入 select 和 and_
|
|
27
|
+
from sqlalchemy.orm import class_mapper # 用于检查对象是否是持久化的
|
|
28
|
+
import tqdm
|
|
29
|
+
from tqdm.asyncio import tqdm
|
|
30
|
+
import pandas as pd
|
|
31
|
+
import plotly.graph_objects as go
|
|
32
|
+
from pro_craft import super_log
|
|
22
33
|
|
|
23
|
-
|
|
24
|
-
pass
|
|
25
|
-
|
|
34
|
+
BATCH_SIZE = int(os.getenv("DATABASE_SYNC_BATCH_SIZE",100))
|
|
26
35
|
|
|
27
|
-
def slog(s, target: str = "target",logger = None):
|
|
28
|
-
COLOR_GREEN = "\033[92m"
|
|
29
|
-
COLOR_RESET = "\033[0m" # 重置颜色
|
|
30
|
-
logger("\n"+f"{COLOR_GREEN}=={COLOR_RESET}" * 50)
|
|
31
|
-
logger(target + "\n "+"--" * 40)
|
|
32
|
-
logger(type(s))
|
|
33
|
-
logger(s)
|
|
34
|
-
logger("\n"+f"{COLOR_GREEN}=={COLOR_RESET}" * 50)
|
|
35
36
|
|
|
36
37
|
def fix_broken_json_string(broken_json_str):
|
|
37
38
|
# 移除 BOM
|
|
@@ -58,6 +59,47 @@ def fix_broken_json_string(broken_json_str):
|
|
|
58
59
|
|
|
59
60
|
return fixed_json_str
|
|
60
61
|
|
|
62
|
+
async def get_last_sync_time(target_session: AsyncSession) -> datetime:
|
|
63
|
+
"""从目标数据库获取上次同步时间"""
|
|
64
|
+
# 修正点:使用 select() 和 execute()
|
|
65
|
+
result = await target_session.execute(
|
|
66
|
+
select(SyncMetadata).filter_by(table_name="ai_sync_metadata")
|
|
67
|
+
)
|
|
68
|
+
metadata_entry = result.scalar_one_or_none() # 获取单个对象或 None
|
|
69
|
+
|
|
70
|
+
if metadata_entry:
|
|
71
|
+
return metadata_entry.last_sync_time
|
|
72
|
+
return datetime(1970, 1, 1) # 默认一个很早的时间
|
|
73
|
+
|
|
74
|
+
async def update_last_sync_time(target_session: AsyncSession, new_sync_time: datetime):
|
|
75
|
+
"""更新目标数据库的上次同步时间"""
|
|
76
|
+
# 修正点:使用 select() 和 execute()
|
|
77
|
+
result = await target_session.execute(
|
|
78
|
+
select(SyncMetadata).filter_by(table_name="ai_sync_metadata")
|
|
79
|
+
)
|
|
80
|
+
metadata_entry = result.scalar_one_or_none()
|
|
81
|
+
|
|
82
|
+
if metadata_entry:
|
|
83
|
+
metadata_entry.last_sync_time = new_sync_time
|
|
84
|
+
else:
|
|
85
|
+
# 如果不存在,则创建
|
|
86
|
+
new_metadata = SyncMetadata(table_name="ai_sync_metadata", last_sync_time=new_sync_time)
|
|
87
|
+
target_session.add(new_metadata)
|
|
88
|
+
|
|
89
|
+
# 异步提交事务
|
|
90
|
+
await target_session.commit() # TODO
|
|
91
|
+
print(f"Updated last sync time to: {new_sync_time}")
|
|
92
|
+
|
|
93
|
+
class IntellectRemoveFormatError(Exception):
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
class IntellectRemoveError(Exception):
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
class ModelNameError(Exception):
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
|
|
61
103
|
|
|
62
104
|
class IntellectType(Enum):
|
|
63
105
|
train = "train"
|
|
@@ -76,7 +118,7 @@ class AsyncIntel():
|
|
|
76
118
|
assert database_url
|
|
77
119
|
assert 'aio' in database_url
|
|
78
120
|
except AssertionError as e:
|
|
79
|
-
|
|
121
|
+
super_log(database_url,'database_url',logger=self.logger.warning)
|
|
80
122
|
raise IntellectRemoveFormatError(f"异步服务url必须提供, 且必须是aiomysql配置") from e
|
|
81
123
|
|
|
82
124
|
self.engine = create_async_engine(database_url, echo=False,
|
|
@@ -86,81 +128,170 @@ class AsyncIntel():
|
|
|
86
128
|
pool_pre_ping=True, # 使用前检查连接活性
|
|
87
129
|
pool_timeout=30 # 等待连接池中连接的最长时间(秒)
|
|
88
130
|
)
|
|
89
|
-
|
|
90
131
|
|
|
91
|
-
if
|
|
132
|
+
if "gemini" in model_name:
|
|
92
133
|
self.llm = BianXieAdapter(model_name = model_name)
|
|
93
|
-
elif
|
|
134
|
+
elif "doubao" in model_name:
|
|
94
135
|
self.llm = ArkAdapter(model_name = model_name)
|
|
95
136
|
else:
|
|
96
|
-
|
|
97
|
-
|
|
137
|
+
raise ModelNameError("AsyncIntel init get error model_name from zxf")
|
|
138
|
+
|
|
139
|
+
self.eval_df = pd.DataFrame({"name":[],'status':[],"score":[],"total":[],"bad_case":[]})
|
|
98
140
|
|
|
99
|
-
async def
|
|
141
|
+
async def create_main_database(self):
|
|
142
|
+
tables_to_create_names = ["ai_prompts","ai_usecase"]
|
|
100
143
|
async with self.engine.begin() as conn:
|
|
144
|
+
# 从 metadata 中获取对应的 Table 对象
|
|
145
|
+
specific_database_objects = []
|
|
146
|
+
for table_name in tables_to_create_names:
|
|
147
|
+
if table_name in PromptBase.metadata.tables:
|
|
148
|
+
specific_database_objects.append(PromptBase.metadata.tables[table_name])
|
|
149
|
+
else:
|
|
150
|
+
print(f"Warning: Table '{table_name}' not found in metadata.")
|
|
151
|
+
|
|
152
|
+
if specific_database_objects:
|
|
153
|
+
await conn.run_sync(PromptBase.metadata.create_all, tables=specific_database_objects)
|
|
154
|
+
else:
|
|
155
|
+
print("No specific tables to create.")
|
|
156
|
+
|
|
157
|
+
async def create_database(self,engine):
|
|
158
|
+
async with engine.begin() as conn:
|
|
101
159
|
await conn.run_sync(PromptBase.metadata.create_all)
|
|
102
|
-
|
|
103
|
-
async def
|
|
160
|
+
|
|
161
|
+
async def get_prompt(self,prompt_id,version,session):
|
|
104
162
|
"""
|
|
105
163
|
获取指定 prompt_id 的最新版本数据,通过创建时间判断。
|
|
106
164
|
"""
|
|
107
|
-
|
|
108
|
-
Prompt.
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
165
|
+
if version:
|
|
166
|
+
stmt_ = select(Prompt).filter(
|
|
167
|
+
Prompt.prompt_id == prompt_id,
|
|
168
|
+
Prompt.version == version
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
stmt_ = select(Prompt).filter(
|
|
172
|
+
Prompt.prompt_id == prompt_id,
|
|
173
|
+
)
|
|
174
|
+
stmt = stmt_.order_by(
|
|
175
|
+
desc(Prompt.timestamp), # 使用 sqlalchemy.desc() 来指定降序
|
|
176
|
+
desc(Prompt.version) # 使用 sqlalchemy.desc() 来指定降序
|
|
177
|
+
)
|
|
178
|
+
|
|
114
179
|
result = await session.execute(stmt)
|
|
115
|
-
# 3. 从 Result 对象中获取第一个模型实例
|
|
116
|
-
# .scalars() 用于从结果行中获取第一个列的值(这里是Prompt对象本身)
|
|
117
|
-
# .first() 获取第一个结果
|
|
118
180
|
result = result.scalars().first()
|
|
119
181
|
|
|
120
182
|
return result
|
|
121
183
|
|
|
122
|
-
async def
|
|
123
|
-
|
|
124
|
-
|
|
184
|
+
async def sync_production_database(self,database_url:str):
|
|
185
|
+
target_engine = create_async_engine(database_url, echo=False)
|
|
186
|
+
await self.create_database(target_engine)
|
|
187
|
+
async with create_async_session(self.engine) as source_session:
|
|
188
|
+
async with create_async_session(target_engine) as target_session:
|
|
189
|
+
|
|
190
|
+
last_sync_time = await get_last_sync_time(target_session)
|
|
191
|
+
print(f"Starting sync for sync_metadata from: {last_sync_time}")
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
processed_count = 0
|
|
195
|
+
#2 next_sync_watermark = last_sync_time
|
|
196
|
+
current_batch_max_updated_at = last_sync_time
|
|
197
|
+
|
|
198
|
+
while True:
|
|
199
|
+
source_results = await source_session.execute(
|
|
200
|
+
select(Prompt)
|
|
201
|
+
.filter(Prompt.timestamp > last_sync_time)
|
|
202
|
+
.order_by(Prompt.timestamp.asc(), Prompt.id.asc())
|
|
203
|
+
.limit(BATCH_SIZE)
|
|
204
|
+
)
|
|
205
|
+
records_to_sync = source_results.scalars().all()
|
|
206
|
+
if not records_to_sync:
|
|
207
|
+
print("没有更多记录了")
|
|
208
|
+
break # 没有更多记录了
|
|
209
|
+
|
|
210
|
+
#2 max_timestamp_in_batch = datetime(1970, 1, 1) # 初始化为最早时间
|
|
211
|
+
|
|
212
|
+
# 准备要插入或更新到目标数据库的数据
|
|
213
|
+
for record in records_to_sync:
|
|
214
|
+
# 查找目标数据库中是否存在该ID的记录
|
|
215
|
+
# 这里的 `User` 模型会对应到 target_db.users
|
|
216
|
+
target_prompt_result = await target_session.execute(
|
|
217
|
+
select(Prompt).filter_by(id=record.id) # 假设 prompt_id 是唯一标识符
|
|
218
|
+
)
|
|
219
|
+
target_prompt = target_prompt_result.scalar_one_or_none()
|
|
220
|
+
|
|
221
|
+
if target_prompt:
|
|
222
|
+
# 如果存在,则更新
|
|
223
|
+
target_prompt.prompt_id = record.prompt_id
|
|
224
|
+
target_prompt.version = record.version
|
|
225
|
+
target_prompt.timestamp = record.timestamp
|
|
226
|
+
target_prompt.prompt = record.prompt
|
|
227
|
+
target_prompt.use_case = record.use_case
|
|
228
|
+
target_prompt.action_type = record.action_type
|
|
229
|
+
target_prompt.demand = record.demand
|
|
230
|
+
target_prompt.score = record.score
|
|
231
|
+
target_prompt.is_deleted = record.is_deleted
|
|
232
|
+
else:
|
|
233
|
+
# 如果不存在,则添加新记录
|
|
234
|
+
# 注意:这里需要创建一个新的User实例,而不是直接添加源数据库的record对象
|
|
235
|
+
new_prompt = Prompt(
|
|
236
|
+
prompt_id=record.prompt_id,
|
|
237
|
+
version=record.version,
|
|
238
|
+
timestamp=record.timestamp,
|
|
239
|
+
prompt = record.prompt,
|
|
240
|
+
use_case = record.use_case,
|
|
241
|
+
action_type = record.action_type,
|
|
242
|
+
demand = record.demand,
|
|
243
|
+
score = record.score,
|
|
244
|
+
is_deleted = record.is_deleted
|
|
245
|
+
)
|
|
246
|
+
target_session.add(new_prompt)
|
|
247
|
+
|
|
248
|
+
# 记录当前批次最大的 updated_at
|
|
249
|
+
#2
|
|
250
|
+
# if record.timestamp > max_timestamp_in_batch:
|
|
251
|
+
# max_timestamp_in_batch = record.timestamp
|
|
252
|
+
if record.timestamp > current_batch_max_updated_at:
|
|
253
|
+
current_batch_max_updated_at = record.timestamp
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
await target_session.commit()
|
|
257
|
+
processed_count += len(records_to_sync)
|
|
258
|
+
print(f"Processed {len(records_to_sync)} records. Total processed: {processed_count}")
|
|
259
|
+
|
|
260
|
+
#2 next_sync_watermark = max_timestamp_in_batch + timedelta(microseconds=1)
|
|
261
|
+
last_sync_time = current_batch_max_updated_at + timedelta(microseconds=1)
|
|
125
262
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
table_name (str): 存储提示词数据的数据库表名。
|
|
130
|
-
db_manager (DBManager): 数据库管理器的实例,用于执行查询。
|
|
263
|
+
|
|
264
|
+
if len(records_to_sync) < BATCH_SIZE: # 如果查询到的记录数小于批次大小,说明已经处理完所有符合条件的记录
|
|
265
|
+
break
|
|
131
266
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
Prompt.prompt_id == target_prompt_id,
|
|
138
|
-
Prompt.version == target_version
|
|
139
|
-
)
|
|
140
|
-
result = await session.execute(stmt)
|
|
267
|
+
if processed_count > 0:
|
|
268
|
+
# 最终更新last_sync_time到数据库,确保记录的是所有已处理记录中最新的一个
|
|
269
|
+
await update_last_sync_time(target_session, current_batch_max_updated_at + timedelta(microseconds=1))
|
|
270
|
+
|
|
271
|
+
#2 await update_last_sync_time(target_session, next_sync_watermark)
|
|
141
272
|
|
|
142
|
-
|
|
273
|
+
await target_session.commit() # 确保最终的 metadata 更新也被提交
|
|
274
|
+
else:
|
|
275
|
+
print("No new records to sync.")
|
|
143
276
|
|
|
144
|
-
return specific_prompt
|
|
145
277
|
|
|
146
|
-
async def
|
|
278
|
+
async def get_prompt_safe(self,
|
|
147
279
|
prompt_id: str,
|
|
148
280
|
version = None,
|
|
149
281
|
session = None) -> Prompt:
|
|
150
282
|
"""
|
|
151
283
|
从sql获取提示词
|
|
152
284
|
"""
|
|
153
|
-
|
|
154
|
-
if
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
285
|
+
prompt_obj = await self.get_prompt(prompt_id=prompt_id,version=version,session=session)
|
|
286
|
+
if prompt_obj:
|
|
287
|
+
return prompt_obj
|
|
288
|
+
|
|
289
|
+
print("warnning 未找到制定版本, 默认使用最新版本")
|
|
290
|
+
prompt_obj = await self.get_prompt(prompt_id=prompt_id,version=None,session=session)
|
|
291
|
+
return prompt_obj
|
|
292
|
+
|
|
161
293
|
|
|
162
|
-
|
|
163
|
-
async def save_prompt_increment_version(self,
|
|
294
|
+
async def save_prompt(self,
|
|
164
295
|
prompt_id: str,
|
|
165
296
|
new_prompt: str,
|
|
166
297
|
use_case:str = "",
|
|
@@ -173,7 +304,7 @@ class AsyncIntel():
|
|
|
173
304
|
input_data 指的是输入用例, 可以为空
|
|
174
305
|
"""
|
|
175
306
|
# 查看是否已经存在
|
|
176
|
-
prompts_obj = await self.
|
|
307
|
+
prompts_obj = await self.get_prompt_safe(prompt_id=prompt_id,session=session)
|
|
177
308
|
|
|
178
309
|
if prompts_obj:
|
|
179
310
|
# 如果存在版本加1
|
|
@@ -200,105 +331,55 @@ class AsyncIntel():
|
|
|
200
331
|
session.add(prompt1)
|
|
201
332
|
await session.commit() # 提交事务,将数据写入数据库
|
|
202
333
|
|
|
203
|
-
async def
|
|
334
|
+
async def get_use_case(self,
|
|
335
|
+
target_prompt_id: str,
|
|
336
|
+
session = None
|
|
337
|
+
):
|
|
338
|
+
"""
|
|
339
|
+
从sql保存提示词
|
|
340
|
+
"""
|
|
341
|
+
stmt = select(UseCase).filter(UseCase.is_deleted == 0,
|
|
342
|
+
UseCase.prompt_id == target_prompt_id)
|
|
343
|
+
|
|
344
|
+
result = await session.execute(stmt)
|
|
345
|
+
# use_case = result.scalars().one_or_none()
|
|
346
|
+
use_case = result.scalars().all()
|
|
347
|
+
return use_case
|
|
348
|
+
|
|
349
|
+
async def save_use_case(self,
|
|
204
350
|
prompt_id: str,
|
|
205
351
|
use_case:str = "",
|
|
352
|
+
timestamp = "",
|
|
206
353
|
output = "",
|
|
207
354
|
solution: str = "",
|
|
355
|
+
faired_time = 0,
|
|
208
356
|
session = None
|
|
209
357
|
):
|
|
358
|
+
|
|
210
359
|
"""
|
|
211
360
|
从sql保存提示词
|
|
212
361
|
"""
|
|
213
|
-
|
|
362
|
+
#TODO 存之前保证数据库中相同的prompt_id中没有重复的use_case
|
|
363
|
+
use_cases = await self.get_use_case(target_prompt_id = prompt_id,
|
|
364
|
+
session = session)
|
|
365
|
+
for use_case_old in use_cases:
|
|
366
|
+
if use_case == use_case_old.use_case:
|
|
367
|
+
print("用例已经存在")
|
|
368
|
+
return
|
|
369
|
+
|
|
370
|
+
use_case = UseCase(prompt_id=prompt_id,
|
|
214
371
|
use_case = use_case,
|
|
372
|
+
timestamp = timestamp,
|
|
215
373
|
output = output,
|
|
216
374
|
solution = solution,
|
|
375
|
+
faired_time = faired_time,
|
|
217
376
|
)
|
|
218
377
|
|
|
219
378
|
session.add(use_case)
|
|
220
379
|
await session.commit() # 提交事务,将数据写入数据库
|
|
221
380
|
|
|
222
|
-
async def summary_to_sql(
|
|
223
|
-
self,
|
|
224
|
-
prompt_id:str,
|
|
225
|
-
version = None,
|
|
226
|
-
prompt = "",
|
|
227
|
-
session = None
|
|
228
|
-
):
|
|
229
|
-
"""
|
|
230
|
-
让大模型微调已经存在的 system_prompt
|
|
231
|
-
"""
|
|
232
|
-
system_prompt_created_prompt = """
|
|
233
|
-
很棒, 我们已经达成了某种默契, 我们之间合作无间, 但是, 可悲的是, 当我关闭这个窗口的时候, 你就会忘记我们之间经历的种种磨合, 这是可惜且心痛的, 所以你能否将目前这一套处理流程结晶成一个优质的prompt 这样, 我们下一次只要将prompt输入, 你就能想起我们今天的磨合过程,
|
|
234
|
-
对了,我提示一点, 这个prompt的主角是你, 也就是说, 你在和未来的你对话, 你要教会未来的你今天这件事, 是否让我看懂到时其次
|
|
235
|
-
|
|
236
|
-
只要输出提示词内容即可, 不需要任何的说明和解释
|
|
237
|
-
"""
|
|
238
|
-
system_result = await self.llm.aproduct(prompt + system_prompt_created_prompt)
|
|
239
|
-
|
|
240
|
-
s_prompt = extract_(system_result,pattern_key=r"prompt")
|
|
241
|
-
chat_history = s_prompt or system_result
|
|
242
|
-
await self.save_prompt_increment_version(prompt_id,
|
|
243
|
-
new_prompt = chat_history,
|
|
244
|
-
use_case = " summary ",
|
|
245
|
-
score = 60,
|
|
246
|
-
session = session)
|
|
247
|
-
|
|
248
|
-
async def prompt_finetune_to_sql(
|
|
249
|
-
self,
|
|
250
|
-
prompt_id:str,
|
|
251
|
-
version = None,
|
|
252
|
-
demand: str = "",
|
|
253
|
-
session = None,
|
|
254
|
-
):
|
|
255
|
-
"""
|
|
256
|
-
让大模型微调已经存在的 system_prompt
|
|
257
|
-
"""
|
|
258
|
-
change_by_opinion_prompt = """
|
|
259
|
-
你是一个资深AI提示词工程师,具备卓越的Prompt设计与优化能力。
|
|
260
|
-
我将为你提供一段现有System Prompt。你的核心任务是基于这段Prompt进行修改,以实现我提出的特定目标和功能需求。
|
|
261
|
-
请你绝对严格地遵循以下原则:
|
|
262
|
-
极端最小化修改原则(核心):
|
|
263
|
-
在满足所有功能需求的前提下,只进行我明确要求的修改。
|
|
264
|
-
即使你认为有更“优化”、“清晰”或“简洁”的表达方式,只要我没有明确要求,也绝不允许进行任何未经指令的修改。
|
|
265
|
-
目的就是尽可能地保留原有Prompt的字符和结构不变,除非我的功能要求必须改变。
|
|
266
|
-
例如,如果我只要求你修改一个词,你就不应该修改整句话的结构。
|
|
267
|
-
严格遵循我的指令:
|
|
268
|
-
你必须精确地执行我提出的所有具体任务和要求。
|
|
269
|
-
绝不允许自行添加任何超出指令范围的说明、角色扮演、约束条件或任何非我指令要求的内容。
|
|
270
|
-
保持原有Prompt的风格和语调:
|
|
271
|
-
尽可能地与现有Prompt的语言风格、正式程度和语调保持一致。
|
|
272
|
-
不要改变不相关的句子或其表达方式。
|
|
273
|
-
只提供修改后的Prompt:
|
|
274
|
-
直接输出修改后的完整System Prompt文本。
|
|
275
|
-
不要包含任何解释、说明或额外对话。
|
|
276
|
-
在你开始之前,请务必确认你已理解并能绝对严格地遵守这些原则。任何未经明确指令的改动都将视为未能完成任务。
|
|
277
|
-
|
|
278
|
-
现有System Prompt:
|
|
279
|
-
{old_system_prompt}
|
|
280
|
-
|
|
281
|
-
功能需求:
|
|
282
|
-
{opinion}
|
|
283
|
-
"""
|
|
284
|
-
|
|
285
|
-
prompt_ = await self.get_prompts_from_sql(prompt_id = prompt_id,version = version,
|
|
286
|
-
session=session)
|
|
287
|
-
if demand:
|
|
288
|
-
new_prompt = await self.llm.aproduct(
|
|
289
|
-
change_by_opinion_prompt.format(old_system_prompt=prompt_.prompt, opinion=demand)
|
|
290
|
-
)
|
|
291
|
-
else:
|
|
292
|
-
new_prompt = prompt_
|
|
293
|
-
await self.save_prompt_increment_version(prompt_id = prompt_id,
|
|
294
|
-
new_prompt = new_prompt,
|
|
295
|
-
use_case = " finetune ",
|
|
296
|
-
score = 60,
|
|
297
|
-
session = session)
|
|
298
|
-
|
|
299
|
-
|
|
300
381
|
async def push_action_order(self,demand : str,prompt_id: str,
|
|
301
|
-
action_type = 'train')
|
|
382
|
+
action_type = 'train'):# init
|
|
302
383
|
|
|
303
384
|
"""
|
|
304
385
|
从sql保存提示词
|
|
@@ -309,9 +390,9 @@ class AsyncIntel():
|
|
|
309
390
|
# 查看是否已经存在
|
|
310
391
|
async with create_async_session(self.engine) as session:
|
|
311
392
|
|
|
312
|
-
latest_prompt = await self.
|
|
313
|
-
|
|
314
|
-
|
|
393
|
+
latest_prompt = await self.get_prompt_safe(prompt_id=prompt_id,session=session)
|
|
394
|
+
if latest_prompt:
|
|
395
|
+
await self.save_prompt(prompt_id=latest_prompt.prompt_id,
|
|
315
396
|
new_prompt = latest_prompt.prompt,
|
|
316
397
|
use_case = latest_prompt.use_case,
|
|
317
398
|
action_type=action_type,
|
|
@@ -319,17 +400,26 @@ class AsyncIntel():
|
|
|
319
400
|
score=latest_prompt.score,
|
|
320
401
|
session=session
|
|
321
402
|
)
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
403
|
+
return "success"
|
|
404
|
+
else:
|
|
405
|
+
await self.save_prompt(prompt_id=prompt_id,
|
|
406
|
+
new_prompt = demand,
|
|
407
|
+
use_case = "",
|
|
408
|
+
action_type="inference",
|
|
409
|
+
demand=demand,
|
|
410
|
+
score=60,
|
|
411
|
+
session=session
|
|
412
|
+
)
|
|
413
|
+
return "init"
|
|
325
414
|
|
|
326
415
|
|
|
327
|
-
async def
|
|
416
|
+
async def intellect(self,
|
|
328
417
|
input_data: dict | str,
|
|
329
418
|
output_format: str,
|
|
330
419
|
prompt_id: str,
|
|
331
420
|
version: str = None,
|
|
332
421
|
inference_save_case = True,
|
|
422
|
+
change_case = False,
|
|
333
423
|
):
|
|
334
424
|
if isinstance(input_data,dict):
|
|
335
425
|
input_ = json.dumps(input_data,ensure_ascii=False)
|
|
@@ -338,29 +428,22 @@ class AsyncIntel():
|
|
|
338
428
|
|
|
339
429
|
# 查数据库, 获取最新提示词对象
|
|
340
430
|
async with create_async_session(self.engine) as session:
|
|
341
|
-
result_obj = await self.
|
|
342
|
-
|
|
343
|
-
|
|
431
|
+
result_obj = await self.get_prompt_safe(prompt_id=prompt_id,session=session)
|
|
344
432
|
if result_obj is None:
|
|
345
|
-
|
|
346
|
-
prompt_id = prompt_id,
|
|
347
|
-
new_prompt = "做一些处理",
|
|
348
|
-
use_case = input_,
|
|
349
|
-
score = 60,
|
|
350
|
-
session = session
|
|
351
|
-
)
|
|
352
|
-
ai_result = "初始化完成"
|
|
353
|
-
return ai_result
|
|
433
|
+
raise IntellectRemoveError("不存在的prompt_id")
|
|
354
434
|
|
|
355
435
|
prompt = result_obj.prompt
|
|
356
436
|
if result_obj.action_type == "inference":
|
|
357
437
|
# 直接推理即可
|
|
358
|
-
ai_result = await self.llm.aproduct(prompt + output_format + "\
|
|
438
|
+
ai_result = await self.llm.aproduct(prompt + output_format + "\nuser:" + input_)
|
|
359
439
|
if inference_save_case:
|
|
360
|
-
|
|
440
|
+
# 设计一个机制, 传输说获取300数据, 那么数据就一直流转获取, 知道300截止
|
|
441
|
+
await self.save_use_case(prompt_id,
|
|
361
442
|
use_case = input_,
|
|
443
|
+
timestamp = datetime.now(),
|
|
362
444
|
output = ai_result,
|
|
363
|
-
solution =
|
|
445
|
+
solution = output_format,
|
|
446
|
+
faired_time = 0,
|
|
364
447
|
session = session,
|
|
365
448
|
)
|
|
366
449
|
|
|
@@ -369,9 +452,6 @@ class AsyncIntel():
|
|
|
369
452
|
# 则训练推广
|
|
370
453
|
|
|
371
454
|
# 新版本 默人修改会 inference 状态
|
|
372
|
-
chat_history = prompt
|
|
373
|
-
before_input = result_obj.use_case
|
|
374
|
-
demand = result_obj.demand
|
|
375
455
|
|
|
376
456
|
|
|
377
457
|
# assert demand
|
|
@@ -386,195 +466,170 @@ class AsyncIntel():
|
|
|
386
466
|
|
|
387
467
|
# ai_result = await self.llm.aproduct(input_prompt)
|
|
388
468
|
# chat_history = input_prompt + "\nassistant:\n" + ai_result # 用聊天记录作为完整提示词
|
|
389
|
-
# await self.
|
|
469
|
+
# await self.save_prompt(prompt_id, chat_history,
|
|
390
470
|
# use_case = input_,
|
|
391
471
|
# score = 60,
|
|
392
472
|
# session = session)
|
|
393
473
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
474
|
+
|
|
475
|
+
# version 2
|
|
476
|
+
|
|
477
|
+
# if input_ == before_input:
|
|
478
|
+
# new_prompt = prompt + "\nuser:" + demand
|
|
479
|
+
# else:
|
|
480
|
+
# new_prompt = prompt + "\nuser:" + input_
|
|
481
|
+
|
|
482
|
+
# ai_result = await self.llm.aproduct(new_prompt + output_format)
|
|
483
|
+
|
|
484
|
+
# save_new_prompt = new_prompt + "\nassistant:\n" + ai_result
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
# await self.save_prompt(
|
|
488
|
+
# prompt_id,
|
|
489
|
+
# new_prompt=save_new_prompt,
|
|
490
|
+
# use_case = input_,
|
|
491
|
+
# action_type = "inference",
|
|
492
|
+
# score = 60,
|
|
493
|
+
# session = session)
|
|
494
|
+
chat_history = prompt
|
|
495
|
+
before_input = result_obj.use_case
|
|
496
|
+
demand = result_obj.demand
|
|
497
|
+
input_data = input_
|
|
498
|
+
if before_input == "" or change_case is True:
|
|
499
|
+
result_obj.use_case = input_
|
|
500
|
+
await session.commit()
|
|
501
|
+
# 查询上一条, 将before_input 更新位input_
|
|
502
|
+
prompt += input_
|
|
503
|
+
|
|
504
|
+
# 使用更新后的数据进行后续步骤
|
|
505
|
+
new_prompt = prompt + "\nuser:" + demand
|
|
398
506
|
|
|
399
507
|
ai_result = await self.llm.aproduct(new_prompt + output_format)
|
|
400
508
|
|
|
401
509
|
save_new_prompt = new_prompt + "\nassistant:\n" + ai_result
|
|
402
510
|
|
|
403
511
|
|
|
404
|
-
await self.
|
|
512
|
+
await self.save_prompt(
|
|
405
513
|
prompt_id,
|
|
406
514
|
new_prompt=save_new_prompt,
|
|
407
515
|
use_case = input_,
|
|
516
|
+
action_type = "inference",
|
|
408
517
|
score = 60,
|
|
409
518
|
session = session)
|
|
410
519
|
|
|
411
520
|
elif result_obj.action_type == "summary":
|
|
521
|
+
system_prompt_summary = """
|
|
522
|
+
很棒, 我们已经达成了某种默契, 我们之间合作无间, 但是, 可悲的是, 当我关闭这个窗口的时候, 你就会忘记我们之间经历的种种磨合, 这是可惜且心痛的, 所以你能否将目前这一套处理流程结晶成一个优质的prompt 这样, 我们下一次只要将prompt输入, 你就能想起我们今天的磨合过程,
|
|
523
|
+
对了,我提示一点, 这个prompt的主角是你, 也就是说, 你在和未来的你对话, 你要教会未来的你今天这件事, 是否让我看懂到时其次
|
|
412
524
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
525
|
+
只要输出提示词内容即可, 不需要任何的说明和解释
|
|
526
|
+
"""
|
|
527
|
+
|
|
528
|
+
latest_prompt = await self.get_prompt_safe(prompt_id=prompt_id,session=session)
|
|
529
|
+
|
|
530
|
+
system_result = await self.llm.aproduct(prompt + system_prompt_summary)
|
|
531
|
+
s_prompt = extract_(system_result,pattern_key=r"prompt")
|
|
532
|
+
new_prompt = s_prompt or system_result
|
|
533
|
+
await self.save_prompt(
|
|
534
|
+
prompt_id,
|
|
535
|
+
new_prompt = new_prompt,
|
|
536
|
+
use_case = latest_prompt.use_case,
|
|
537
|
+
score = 65,
|
|
538
|
+
action_type = "inference",
|
|
539
|
+
session = session
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
ai_result = await self.llm.aproduct(prompt + output_format + "\nuser:" + input_)
|
|
418
543
|
|
|
419
544
|
elif result_obj.action_type == "finetune":
|
|
420
545
|
demand = result_obj.demand
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
raise
|
|
441
|
-
|
|
442
|
-
return ai_result
|
|
443
|
-
|
|
444
|
-
async def intellect_stream_remove(self,
|
|
445
|
-
input_data: dict | str,
|
|
446
|
-
output_format: str,
|
|
447
|
-
prompt_id: str,
|
|
448
|
-
version: str = None,
|
|
449
|
-
inference_save_case = True,
|
|
450
|
-
push_patch = False,
|
|
451
|
-
):
|
|
452
|
-
if isinstance(input_data,dict):
|
|
453
|
-
input_ = json.dumps(input_data,ensure_ascii=False)
|
|
454
|
-
elif isinstance(input_data,str):
|
|
455
|
-
input_ = input_data
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
# 查数据库, 获取最新提示词对象
|
|
459
|
-
with create_session(self.engine) as session:
|
|
460
|
-
result_obj = await self.get_prompts_from_sql(prompt_id=prompt_id,session=session)
|
|
461
|
-
|
|
462
|
-
'''
|
|
463
|
-
if result_obj is None:
|
|
464
|
-
await self.save_prompt_increment_version(
|
|
465
|
-
prompt_id = prompt_id,
|
|
466
|
-
new_prompt = "做一些处理",
|
|
467
|
-
use_case = input_,
|
|
468
|
-
session = session
|
|
469
|
-
)
|
|
470
|
-
ai_result = await self.intellect_stream_remove(input_data = input_data,
|
|
471
|
-
output_format = output_format,
|
|
472
|
-
prompt_id = prompt_id,
|
|
473
|
-
version = version,
|
|
474
|
-
inference_save_case = inference_save_case
|
|
475
|
-
)
|
|
476
|
-
return ai_result'''
|
|
546
|
+
change_by_opinion_prompt = """
|
|
547
|
+
你是一个资深AI提示词工程师,具备卓越的Prompt设计与优化能力。
|
|
548
|
+
我将为你提供一段现有System Prompt。你的核心任务是基于这段Prompt进行修改,以实现我提出的特定目标和功能需求。
|
|
549
|
+
请你绝对严格地遵循以下原则:
|
|
550
|
+
极端最小化修改原则(核心):
|
|
551
|
+
在满足所有功能需求的前提下,只进行我明确要求的修改。
|
|
552
|
+
即使你认为有更“优化”、“清晰”或“简洁”的表达方式,只要我没有明确要求,也绝不允许进行任何未经指令的修改。
|
|
553
|
+
目的就是尽可能地保留原有Prompt的字符和结构不变,除非我的功能要求必须改变。
|
|
554
|
+
例如,如果我只要求你修改一个词,你就不应该修改整句话的结构。
|
|
555
|
+
严格遵循我的指令:
|
|
556
|
+
你必须精确地执行我提出的所有具体任务和要求。
|
|
557
|
+
绝不允许自行添加任何超出指令范围的说明、角色扮演、约束条件或任何非我指令要求的内容。
|
|
558
|
+
保持原有Prompt的风格和语调:
|
|
559
|
+
尽可能地与现有Prompt的语言风格、正式程度和语调保持一致。
|
|
560
|
+
不要改变不相关的句子或其表达方式。
|
|
561
|
+
只提供修改后的Prompt:
|
|
562
|
+
直接输出修改后的完整System Prompt文本。
|
|
563
|
+
不要包含任何解释、说明或额外对话。
|
|
564
|
+
在你开始之前,请务必确认你已理解并能绝对严格地遵守这些原则。任何未经明确指令的改动都将视为未能完成任务。
|
|
477
565
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
# 直接推理即可
|
|
481
|
-
|
|
482
|
-
ai_generate_result = self.llm.aproduct_stream(prompt + output_format + "\n-----input----\n" + input_)
|
|
483
|
-
ai_result = ""
|
|
484
|
-
async for word in ai_generate_result:
|
|
485
|
-
ai_result += word
|
|
486
|
-
yield word
|
|
487
|
-
if inference_save_case:
|
|
488
|
-
await self.save_use_case_by_sql(prompt_id,
|
|
489
|
-
use_case = input_,
|
|
490
|
-
output = ai_result,
|
|
491
|
-
solution = "备注/理想回复",
|
|
492
|
-
session = session,
|
|
493
|
-
)
|
|
494
|
-
|
|
495
|
-
elif result_obj.action_type == "train":
|
|
496
|
-
assert result_obj.demand # 如果type = train 且 demand 是空 则报错
|
|
497
|
-
# 则训练推广
|
|
566
|
+
现有System Prompt:
|
|
567
|
+
{old_system_prompt}
|
|
498
568
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
demand = result_obj.demand
|
|
503
|
-
|
|
569
|
+
功能需求:
|
|
570
|
+
{opinion}
|
|
571
|
+
"""
|
|
504
572
|
|
|
573
|
+
latest_prompt = await self.get_prompt_safe(prompt_id=prompt_id,session=session)
|
|
574
|
+
prompt_ = await self.get_prompt_safe(prompt_id = prompt_id,version = version,
|
|
575
|
+
session=session)
|
|
505
576
|
assert demand
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
577
|
+
|
|
578
|
+
if demand:
|
|
579
|
+
new_prompt = await self.llm.aproduct(
|
|
580
|
+
change_by_opinion_prompt.format(old_system_prompt=prompt_.prompt, opinion=demand)
|
|
581
|
+
)
|
|
511
582
|
else:
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
chat_history = input_prompt + "\nassistant:\n" + ai_result # 用聊天记录作为完整提示词
|
|
522
|
-
await self.save_prompt_increment_version(prompt_id, chat_history,
|
|
523
|
-
use_case = input_,
|
|
524
|
-
score = 60,
|
|
525
|
-
session = session)
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
elif result_obj.action_type == "summary":
|
|
529
|
-
|
|
530
|
-
await self.summary_to_sql(prompt_id = prompt_id,
|
|
531
|
-
prompt = prompt,
|
|
532
|
-
session = session
|
|
533
|
-
)
|
|
534
|
-
input_prompt = prompt + output_format + "\n-----input----\n" + input_
|
|
535
|
-
ai_generate_result = self.llm.aproduct_stream(input_prompt)
|
|
536
|
-
ai_result = ""
|
|
537
|
-
async for word in ai_generate_result:
|
|
538
|
-
ai_result += word
|
|
539
|
-
yield word
|
|
583
|
+
new_prompt = prompt_
|
|
584
|
+
await self.save_prompt(
|
|
585
|
+
prompt_id,
|
|
586
|
+
new_prompt = new_prompt,
|
|
587
|
+
use_case = latest_prompt.use_case,
|
|
588
|
+
score = 70,
|
|
589
|
+
action_type = "inference",
|
|
590
|
+
session = session
|
|
591
|
+
)
|
|
540
592
|
|
|
541
|
-
|
|
542
|
-
demand = result_obj.demand
|
|
543
|
-
|
|
544
|
-
assert demand
|
|
545
|
-
await self.prompt_finetune_to_sql(prompt_id = prompt_id,
|
|
546
|
-
demand = demand,
|
|
547
|
-
session = session
|
|
548
|
-
)
|
|
549
|
-
input_prompt = prompt + output_format + "\n-----input----\n" + input_
|
|
550
|
-
ai_generate_result = self.llm.aproduct_stream(input_prompt)
|
|
551
|
-
ai_result = ""
|
|
552
|
-
async for word in ai_generate_result:
|
|
553
|
-
ai_result += word
|
|
554
|
-
yield word
|
|
593
|
+
ai_result = await self.llm.aproduct(prompt + output_format + "\nuser:" + input_)
|
|
555
594
|
|
|
556
595
|
elif result_obj.action_type == "patch":
|
|
557
|
-
|
|
558
596
|
demand = result_obj.demand
|
|
559
597
|
assert demand
|
|
560
|
-
|
|
598
|
+
latest_prompt = await self.get_prompt_safe(prompt_id=prompt_id,session=session)
|
|
599
|
+
|
|
561
600
|
chat_history = prompt + demand
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
601
|
+
await self.save_prompt(prompt_id,
|
|
602
|
+
chat_history,
|
|
603
|
+
use_case = latest_prompt.use_case,
|
|
604
|
+
score = 70,
|
|
605
|
+
action_type = "inference",
|
|
606
|
+
session = session)
|
|
607
|
+
|
|
608
|
+
ai_result = await self.llm.aproduct(chat_history + output_format + "\nuser:" + input_)
|
|
609
|
+
|
|
610
|
+
elif result_obj.action_type.startswith("to:"):
|
|
611
|
+
target_version = result_obj.action_type.split(":")[-1]
|
|
612
|
+
latest_prompt = await self.get_prompt_safe(prompt_id=prompt_id,session=session)
|
|
613
|
+
prompt_obj = await self.get_prompt_safe(prompt_id=prompt_id,
|
|
614
|
+
version=target_version,
|
|
615
|
+
session=session)
|
|
616
|
+
|
|
617
|
+
await self.save_prompt(prompt_id,
|
|
618
|
+
prompt_obj.prompt,
|
|
619
|
+
use_case = latest_prompt.use_case,
|
|
620
|
+
score = prompt_obj.score,
|
|
621
|
+
action_type = "inference",
|
|
622
|
+
session = session)
|
|
623
|
+
ai_result = await self.llm.aproduct(prompt_obj.prompt + output_format + "\nuser:" + input_)
|
|
624
|
+
|
|
572
625
|
else:
|
|
573
626
|
raise
|
|
574
627
|
|
|
575
|
-
|
|
628
|
+
return ai_result
|
|
629
|
+
|
|
630
|
+
async def intellect_format(self,
|
|
576
631
|
input_data: dict | str,
|
|
577
|
-
OutputFormat: object,
|
|
632
|
+
OutputFormat: object | None,
|
|
578
633
|
prompt_id: str,
|
|
579
634
|
ExtraFormats: list[object] = [],
|
|
580
635
|
version: str = None,
|
|
@@ -588,9 +643,11 @@ class AsyncIntel():
|
|
|
588
643
|
"```json([\s\S]*?)```"
|
|
589
644
|
使用以下方式验证
|
|
590
645
|
"""
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
646
|
+
if OutputFormat:
|
|
647
|
+
output_format = base_format_prompt + "\n".join([inspect.getsource(outputformat) for outputformat in ExtraFormats]) + inspect.getsource(OutputFormat)
|
|
648
|
+
else:
|
|
649
|
+
output_format = ""
|
|
650
|
+
ai_result = await self.intellect(
|
|
594
651
|
input_data=input_data,
|
|
595
652
|
output_format=output_format,
|
|
596
653
|
prompt_id=prompt_id,
|
|
@@ -599,26 +656,32 @@ class AsyncIntel():
|
|
|
599
656
|
)
|
|
600
657
|
|
|
601
658
|
try:
|
|
602
|
-
|
|
603
659
|
json_str = extract_(ai_result,r'json')
|
|
604
|
-
# json_str = fix_broken_json_string(json_str)
|
|
605
660
|
ai_result = json.loads(json_str)
|
|
606
|
-
OutputFormat
|
|
661
|
+
if OutputFormat:
|
|
662
|
+
OutputFormat(**ai_result)
|
|
607
663
|
|
|
608
664
|
except JSONDecodeError as e:
|
|
609
|
-
slog(ai_result,logger=self.logger.error)
|
|
610
665
|
try:
|
|
611
666
|
self.logger.error(f"尝试补救")
|
|
612
667
|
json_str = fix_broken_json_string(json_str)
|
|
613
668
|
ai_result = json.loads(json_str)
|
|
614
|
-
OutputFormat
|
|
669
|
+
if OutputFormat:
|
|
670
|
+
OutputFormat(**ai_result)
|
|
615
671
|
|
|
616
672
|
except JSONDecodeError as e:
|
|
617
|
-
raise IntellectRemoveFormatError(f"prompt_id: {prompt_id}
|
|
673
|
+
raise IntellectRemoveFormatError(f"prompt_id: {prompt_id} 生成的内容为无法被Json解析 {e}") from e
|
|
674
|
+
|
|
675
|
+
except ValidationError as e:
|
|
676
|
+
err_info = e.errors()[0]
|
|
677
|
+
raise IntellectRemoveFormatError(f"{err_info["type"]}: 属性:{err_info['loc']}, 发生了如下错误: {err_info['msg']}, 格式校验失败, 当前输入为: {err_info['input']} 请检查") from e
|
|
678
|
+
|
|
679
|
+
except Exception as e:
|
|
680
|
+
raise Exception(f"Error {prompt_id} : {e}") from e
|
|
618
681
|
|
|
619
682
|
return ai_result
|
|
620
683
|
|
|
621
|
-
async def
|
|
684
|
+
async def intellect_formats(self,
|
|
622
685
|
input_datas: list[dict | str],
|
|
623
686
|
OutputFormat: object,
|
|
624
687
|
prompt_id: str,
|
|
@@ -628,14 +691,16 @@ class AsyncIntel():
|
|
|
628
691
|
):
|
|
629
692
|
|
|
630
693
|
async with create_async_session(self.engine) as session:
|
|
631
|
-
prompt_result = await self.
|
|
694
|
+
prompt_result = await self.get_prompt_safe(prompt_id=prompt_id,
|
|
632
695
|
session=session)
|
|
696
|
+
if prompt_result is None:
|
|
697
|
+
raise IntellectRemoveError("不存在的prompt_id")
|
|
633
698
|
if prompt_result.action_type != "inference":
|
|
634
699
|
input_datas = input_datas[:1]
|
|
635
700
|
tasks = []
|
|
636
701
|
for input_data in input_datas:
|
|
637
702
|
tasks.append(
|
|
638
|
-
self.
|
|
703
|
+
self.intellect_format(
|
|
639
704
|
input_data = input_data,
|
|
640
705
|
prompt_id = prompt_id,
|
|
641
706
|
OutputFormat = OutputFormat,
|
|
@@ -646,9 +711,8 @@ class AsyncIntel():
|
|
|
646
711
|
)
|
|
647
712
|
results = await asyncio.gather(*tasks, return_exceptions=False)
|
|
648
713
|
return results
|
|
649
|
-
|
|
650
714
|
|
|
651
|
-
def
|
|
715
|
+
def intellect_warp(self,prompt_id: str):
|
|
652
716
|
def outer_packing(func):
|
|
653
717
|
@functools.wraps(func)
|
|
654
718
|
async def wrapper(*args, **kwargs):
|
|
@@ -663,7 +727,7 @@ class AsyncIntel():
|
|
|
663
727
|
elif isinstance(input_data,str):
|
|
664
728
|
input_ = output_ = input_data
|
|
665
729
|
|
|
666
|
-
output_ = await self.
|
|
730
|
+
output_ = await self.intellect_format(
|
|
667
731
|
input_data = input_data,
|
|
668
732
|
prompt_id = prompt_id,
|
|
669
733
|
OutputFormat = OutputFormat,
|
|
@@ -675,3 +739,182 @@ class AsyncIntel():
|
|
|
675
739
|
return result
|
|
676
740
|
return wrapper
|
|
677
741
|
return outer_packing
|
|
742
|
+
|
|
743
|
+
async def intellect_format_eval(self,
|
|
744
|
+
OutputFormat: object,
|
|
745
|
+
prompt_id: str,
|
|
746
|
+
ExtraFormats: list[object] = [],
|
|
747
|
+
version: str = None,
|
|
748
|
+
MIN_SUCCESS_RATE = 80.0,
|
|
749
|
+
ConTent_Function = None,
|
|
750
|
+
):
|
|
751
|
+
"""
|
|
752
|
+
ConTent_Function:
|
|
753
|
+
# TODO 人类评价 eval
|
|
754
|
+
# TODO llm 评价 eval
|
|
755
|
+
"""
|
|
756
|
+
async with create_async_session(self.engine) as session:
|
|
757
|
+
use_cases = await self.get_use_case(target_prompt_id=prompt_id,session=session)
|
|
758
|
+
prompt_result = await self.get_prompt_safe(prompt_id=prompt_id,
|
|
759
|
+
session=session)
|
|
760
|
+
if prompt_result is None:
|
|
761
|
+
raise IntellectRemoveError("不存在的prompt_id")
|
|
762
|
+
if prompt_result.action_type != "inference":
|
|
763
|
+
raise IntellectRemoveError("请在inference模式下使用次类")
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
total_assertions = len(use_cases)
|
|
767
|
+
result_cases = []
|
|
768
|
+
|
|
769
|
+
async def evals_func(use_case,prompt_id,OutputFormat,ExtraFormats,version):
|
|
770
|
+
try:
|
|
771
|
+
# 这里将参数传入
|
|
772
|
+
ai_result = await self.intellect_format(
|
|
773
|
+
input_data = use_case.use_case,
|
|
774
|
+
prompt_id = prompt_id,
|
|
775
|
+
OutputFormat = OutputFormat,
|
|
776
|
+
ExtraFormats = ExtraFormats,
|
|
777
|
+
version = version,
|
|
778
|
+
inference_save_case = False,
|
|
779
|
+
)
|
|
780
|
+
if ConTent_Function:
|
|
781
|
+
ConTent_Function()
|
|
782
|
+
result_cases.append({"type":"Successful","case":use_case.use_case,"reply":f"pass"})
|
|
783
|
+
use_case.output = ai_result
|
|
784
|
+
except IntellectRemoveFormatError as e:
|
|
785
|
+
result_cases.append({"type":"FAILED","case":use_case.use_case,"reply":f"{e}"})
|
|
786
|
+
use_case.output = f"{"FAILED"}-{e}"
|
|
787
|
+
except Exception as e: # 捕获其他可能的错误
|
|
788
|
+
result_cases.append({"type":"FAILED","case":use_case.use_case,"reply":f"Exp {e}"})
|
|
789
|
+
use_case.output = f"{"FAILED"}-{e}"
|
|
790
|
+
await session.commit()
|
|
791
|
+
|
|
792
|
+
tasks = []
|
|
793
|
+
for use_case in use_cases:
|
|
794
|
+
tasks.append(
|
|
795
|
+
evals_func(
|
|
796
|
+
use_case = use_case,
|
|
797
|
+
prompt_id = prompt_id,
|
|
798
|
+
OutputFormat = OutputFormat,
|
|
799
|
+
ExtraFormats = ExtraFormats,
|
|
800
|
+
version = version
|
|
801
|
+
)
|
|
802
|
+
)
|
|
803
|
+
await tqdm.gather(*tasks,total=len(tasks))
|
|
804
|
+
# await asyncio.gather(*tasks, return_exceptions=False)
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
successful_assertions = 0
|
|
808
|
+
bad_case = []
|
|
809
|
+
for i in result_cases:
|
|
810
|
+
if i['type'] == "Successful":
|
|
811
|
+
successful_assertions += 1
|
|
812
|
+
else:
|
|
813
|
+
bad_case.append(i)
|
|
814
|
+
|
|
815
|
+
success_rate = (successful_assertions / total_assertions) * 100
|
|
816
|
+
|
|
817
|
+
if success_rate >= MIN_SUCCESS_RATE:
|
|
818
|
+
return "通过", success_rate, str(total_assertions), json.dumps(bad_case,ensure_ascii=False),
|
|
819
|
+
else:
|
|
820
|
+
return "未通过",success_rate, str(total_assertions), json.dumps(bad_case,ensure_ascii=False),
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
def draw_data(self):
|
|
825
|
+
df = self.eval_df
|
|
826
|
+
# --- 可视化部分 ---
|
|
827
|
+
fig = go.Figure()
|
|
828
|
+
|
|
829
|
+
# 为每个条形图动态设置颜色
|
|
830
|
+
colors = []
|
|
831
|
+
for status_val in df['status']:
|
|
832
|
+
if status_val == '通过':
|
|
833
|
+
colors.append('mediumseagreen') # 通过为绿色
|
|
834
|
+
else: # 假设其他所有状态都视为“未通过”
|
|
835
|
+
colors.append('lightcoral') # 未通过为红色
|
|
836
|
+
|
|
837
|
+
fig.add_trace(go.Bar(
|
|
838
|
+
y=df['name'], # Y轴显示项目名称
|
|
839
|
+
x=df['score'], # X轴显示通过百分比 (score列现在代表通过百分比)
|
|
840
|
+
orientation='h', # 设置为横向
|
|
841
|
+
name='通过率', # 这个 name 可能会在图例中显示
|
|
842
|
+
marker_color=colors, # !!! 这里根据 status 动态设置颜色 !!!
|
|
843
|
+
text=df['score'].apply(lambda x: f'{x:.2f}%'), # 在条形图上显示百分比文本
|
|
844
|
+
textposition='inside',
|
|
845
|
+
insidetextanchor='middle',
|
|
846
|
+
hovertemplate="<b>prompt:</b> %{y}<br><b>状态:</b> " + df['status'] + "<br><b>总量:</b> "+ df['total'] + "<br><b>通过百分比:</b> %{x:.2f}%<extra></extra>"
|
|
847
|
+
))
|
|
848
|
+
|
|
849
|
+
# 添加一个辅助的条形图作为背景,表示总的100%
|
|
850
|
+
fig.add_trace(go.Bar(
|
|
851
|
+
y=df['name'],
|
|
852
|
+
x=[100] * len(df), # 所有项目都填充到100%
|
|
853
|
+
orientation='h',
|
|
854
|
+
name='总计',
|
|
855
|
+
marker_color='lightgray', # 背景用灰色
|
|
856
|
+
hoverinfo='none', # 不显示hover信息
|
|
857
|
+
opacity=0.5, # 设置透明度
|
|
858
|
+
showlegend=False # 不显示图例
|
|
859
|
+
))
|
|
860
|
+
|
|
861
|
+
fig.update_layout(
|
|
862
|
+
title='各项目/批次通过百分比及状态',
|
|
863
|
+
xaxis=dict(
|
|
864
|
+
title='通过百分比 (%)',
|
|
865
|
+
range=[0, 100], # X轴范围0-100
|
|
866
|
+
tickvals=[0, 25, 50, 75, 100],
|
|
867
|
+
showgrid=True,
|
|
868
|
+
gridcolor='lightgray'
|
|
869
|
+
),
|
|
870
|
+
yaxis=dict(
|
|
871
|
+
title='项目/批次',
|
|
872
|
+
autorange="reversed"
|
|
873
|
+
),
|
|
874
|
+
barmode='overlay', # 仍使用 overlay 模式,因为背景条是独立的
|
|
875
|
+
hovermode="y unified",
|
|
876
|
+
margin=dict(l=100, r=20, t=60, b=50),
|
|
877
|
+
height=400 + len(df) * 30
|
|
878
|
+
)
|
|
879
|
+
error_message =str(df['bad_case'].to_dict())
|
|
880
|
+
fig.add_annotation(
|
|
881
|
+
text=f"<b>bad_case:</b> {error_message}", # 要显示的文本
|
|
882
|
+
xref="paper", yref="paper", # 使用“paper”坐标系,表示相对于图表区域
|
|
883
|
+
x=0.01, y=-0.15, # x=0.01 靠近左侧,y=-0.15 在图表底部下方 (您可以调整这些值)
|
|
884
|
+
showarrow=False, # 不显示箭头
|
|
885
|
+
align="left",
|
|
886
|
+
font=dict(
|
|
887
|
+
family="Arial, sans-serif",
|
|
888
|
+
size=12,
|
|
889
|
+
color="red" # 错误信息通常用红色
|
|
890
|
+
),
|
|
891
|
+
bgcolor="white", # 背景颜色
|
|
892
|
+
bordercolor="red", # 边框颜色
|
|
893
|
+
borderwidth=1,
|
|
894
|
+
borderpad=4,
|
|
895
|
+
xanchor='left', # 文本框左对齐到x坐标
|
|
896
|
+
yanchor='top' # 文本框顶部对齐到y坐标
|
|
897
|
+
)
|
|
898
|
+
# 可能还需要调整底部的边距以容纳错误信息
|
|
899
|
+
fig.update_layout(
|
|
900
|
+
margin=dict(l=100, r=20, t=60, b=100), # 增加底部边距
|
|
901
|
+
height=400 + len(df) * 30 + 50 # 增加图表高度以适应文本框
|
|
902
|
+
)
|
|
903
|
+
|
|
904
|
+
fig.show()
|
|
905
|
+
pass
|
|
906
|
+
|
|
907
|
+
async def _evals(self,prompt_id, OutputFormat, ExtraFormats_list = [],**kwargs):
|
|
908
|
+
|
|
909
|
+
status,score, total, bad_case = await self.intellect_format_eval(
|
|
910
|
+
prompt_id=prompt_id,
|
|
911
|
+
OutputFormat = OutputFormat,
|
|
912
|
+
ExtraFormats = ExtraFormats_list,
|
|
913
|
+
version = None,
|
|
914
|
+
**kwargs
|
|
915
|
+
)
|
|
916
|
+
self.df.loc[len(self.df)] = {"name":prompt_id,
|
|
917
|
+
'status':status,"score":score,
|
|
918
|
+
"total":total,"bad_case":bad_case}
|
|
919
|
+
|
|
920
|
+
# 整体测试d, 测试未通过d, 大模型调整再测试, 依旧不通过, 大模型裂变, 仍不通过, 互换人力
|