codegnipy 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegnipy/__init__.py +190 -0
- codegnipy/cli.py +153 -0
- codegnipy/decorator.py +151 -0
- codegnipy/determinism.py +631 -0
- codegnipy/memory.py +276 -0
- codegnipy/providers.py +1160 -0
- codegnipy/reflection.py +244 -0
- codegnipy/runtime.py +197 -0
- codegnipy/scheduler.py +498 -0
- codegnipy/streaming.py +387 -0
- codegnipy/tools.py +481 -0
- codegnipy/transformer.py +155 -0
- codegnipy/validation.py +961 -0
- codegnipy-0.0.1.dist-info/METADATA +417 -0
- codegnipy-0.0.1.dist-info/RECORD +19 -0
- codegnipy-0.0.1.dist-info/WHEEL +5 -0
- codegnipy-0.0.1.dist-info/entry_points.txt +2 -0
- codegnipy-0.0.1.dist-info/licenses/LICENSE +21 -0
- codegnipy-0.0.1.dist-info/top_level.txt +1 -0
codegnipy/determinism.py
ADDED
|
@@ -0,0 +1,631 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Codegnipy 确定性保证模块
|
|
3
|
+
|
|
4
|
+
提供类型约束、模拟执行和幻觉检测功能,确保 LLM 输出的可靠性。
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
import json
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import (
|
|
13
|
+
Any, List, Dict, Type, TypeVar, Optional, TYPE_CHECKING
|
|
14
|
+
)
|
|
15
|
+
from pydantic import BaseModel, ValidationError as PydanticValidationError
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from .runtime import CognitiveContext
|
|
19
|
+
from .validation import BaseValidator
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
T = TypeVar('T')
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ValidationStatus(Enum):
|
|
26
|
+
"""验证状态"""
|
|
27
|
+
VALID = "valid"
|
|
28
|
+
INVALID = "invalid"
|
|
29
|
+
UNCERTAIN = "uncertain"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ValidationResult:
|
|
34
|
+
"""验证结果"""
|
|
35
|
+
status: ValidationStatus
|
|
36
|
+
value: Any
|
|
37
|
+
errors: List[str] = field(default_factory=list)
|
|
38
|
+
warnings: List[str] = field(default_factory=list)
|
|
39
|
+
confidence: float = 1.0
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class TypeConstraint(ABC):
|
|
43
|
+
"""类型约束抽象基类"""
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def validate(self, value: Any) -> ValidationResult:
|
|
47
|
+
"""验证值是否符合约束"""
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def to_prompt(self) -> str:
|
|
52
|
+
"""生成用于 LLM 提示的约束描述"""
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class PrimitiveConstraint(TypeConstraint):
|
|
57
|
+
"""基础类型约束"""
|
|
58
|
+
|
|
59
|
+
TYPE_MAP = {
|
|
60
|
+
str: "字符串",
|
|
61
|
+
int: "整数",
|
|
62
|
+
float: "浮点数",
|
|
63
|
+
bool: "布尔值",
|
|
64
|
+
list: "列表",
|
|
65
|
+
dict: "字典"
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
def __init__(self, expected_type: Type, min_value=None, max_value=None,
|
|
69
|
+
min_length=None, max_length=None, pattern=None):
|
|
70
|
+
self.expected_type = expected_type
|
|
71
|
+
self.min_value = min_value
|
|
72
|
+
self.max_value = max_value
|
|
73
|
+
self.min_length = min_length
|
|
74
|
+
self.max_length = max_length
|
|
75
|
+
self.pattern = pattern
|
|
76
|
+
|
|
77
|
+
def validate(self, value: Any) -> ValidationResult:
|
|
78
|
+
errors: List[str] = []
|
|
79
|
+
warnings: List[str] = []
|
|
80
|
+
|
|
81
|
+
# 类型检查
|
|
82
|
+
if not isinstance(value, self.expected_type):
|
|
83
|
+
# 尝试类型转换
|
|
84
|
+
try:
|
|
85
|
+
if self.expected_type is bool:
|
|
86
|
+
if isinstance(value, str):
|
|
87
|
+
if value.lower() in ('true', 'yes', '1'):
|
|
88
|
+
value = True
|
|
89
|
+
elif value.lower() in ('false', 'no', '0'):
|
|
90
|
+
value = False
|
|
91
|
+
else:
|
|
92
|
+
raise ValueError()
|
|
93
|
+
else:
|
|
94
|
+
value = bool(value)
|
|
95
|
+
elif self.expected_type in (int, float):
|
|
96
|
+
value = self.expected_type(value)
|
|
97
|
+
else:
|
|
98
|
+
errors.append(f"类型错误: 期望 {self.TYPE_MAP.get(self.expected_type, self.expected_type.__name__)}, 实际 {type(value).__name__}")
|
|
99
|
+
except (ValueError, TypeError):
|
|
100
|
+
errors.append(f"类型错误: 期望 {self.TYPE_MAP.get(self.expected_type, self.expected_type.__name__)}, 实际 {type(value).__name__}")
|
|
101
|
+
|
|
102
|
+
# 数值范围检查
|
|
103
|
+
if self.expected_type in (int, float) and isinstance(value, (int, float)):
|
|
104
|
+
if self.min_value is not None and value < self.min_value:
|
|
105
|
+
errors.append(f"值 {value} 小于最小值 {self.min_value}")
|
|
106
|
+
if self.max_value is not None and value > self.max_value:
|
|
107
|
+
errors.append(f"值 {value} 大于最大值 {self.max_value}")
|
|
108
|
+
|
|
109
|
+
# 长度检查
|
|
110
|
+
if hasattr(value, '__len__'):
|
|
111
|
+
length = len(value)
|
|
112
|
+
if self.min_length is not None and length < self.min_length:
|
|
113
|
+
errors.append(f"长度 {length} 小于最小长度 {self.min_length}")
|
|
114
|
+
if self.max_length is not None and length > self.max_length:
|
|
115
|
+
errors.append(f"长度 {length} 大于最大长度 {self.max_length}")
|
|
116
|
+
|
|
117
|
+
# 正则模式检查
|
|
118
|
+
if self.pattern and isinstance(value, str):
|
|
119
|
+
if not re.match(self.pattern, value):
|
|
120
|
+
errors.append(f"字符串不匹配模式: {self.pattern}")
|
|
121
|
+
|
|
122
|
+
status = ValidationStatus.VALID if not errors else ValidationStatus.INVALID
|
|
123
|
+
return ValidationResult(status=status, value=value, errors=errors, warnings=warnings)
|
|
124
|
+
|
|
125
|
+
def to_prompt(self) -> str:
|
|
126
|
+
desc = f"类型: {self.TYPE_MAP.get(self.expected_type, self.expected_type.__name__)}"
|
|
127
|
+
|
|
128
|
+
if self.min_value is not None or self.max_value is not None:
|
|
129
|
+
range_desc = []
|
|
130
|
+
if self.min_value is not None:
|
|
131
|
+
range_desc.append(f"最小值 {self.min_value}")
|
|
132
|
+
if self.max_value is not None:
|
|
133
|
+
range_desc.append(f"最大值 {self.max_value}")
|
|
134
|
+
desc += f", {' '.join(range_desc)}"
|
|
135
|
+
|
|
136
|
+
if self.min_length is not None or self.max_length is not None:
|
|
137
|
+
len_desc = []
|
|
138
|
+
if self.min_length is not None:
|
|
139
|
+
len_desc.append(f"最小长度 {self.min_length}")
|
|
140
|
+
if self.max_length is not None:
|
|
141
|
+
len_desc.append(f"最大长度 {self.max_length}")
|
|
142
|
+
desc += f", {' '.join(len_desc)}"
|
|
143
|
+
|
|
144
|
+
if self.pattern:
|
|
145
|
+
desc += f", 必须匹配模式: {self.pattern}"
|
|
146
|
+
|
|
147
|
+
return desc
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class EnumConstraint(TypeConstraint):
|
|
151
|
+
"""枚举约束"""
|
|
152
|
+
|
|
153
|
+
def __init__(self, allowed_values: List[Any], case_sensitive: bool = True):
|
|
154
|
+
self.allowed_values = allowed_values
|
|
155
|
+
self.case_sensitive = case_sensitive
|
|
156
|
+
|
|
157
|
+
def validate(self, value: Any) -> ValidationResult:
|
|
158
|
+
errors = []
|
|
159
|
+
|
|
160
|
+
check_value = value if self.case_sensitive else str(value).lower()
|
|
161
|
+
check_allowed = self.allowed_values if self.case_sensitive else [str(v).lower() for v in self.allowed_values]
|
|
162
|
+
|
|
163
|
+
if check_value not in check_allowed:
|
|
164
|
+
errors.append(f"值 '{value}' 不在允许的值中: {self.allowed_values}")
|
|
165
|
+
|
|
166
|
+
# 返回原始值(匹配正确的大小写)
|
|
167
|
+
if not self.case_sensitive and isinstance(value, str):
|
|
168
|
+
for av in self.allowed_values:
|
|
169
|
+
if str(av).lower() == value.lower():
|
|
170
|
+
value = av
|
|
171
|
+
break
|
|
172
|
+
|
|
173
|
+
status = ValidationStatus.VALID if not errors else ValidationStatus.INVALID
|
|
174
|
+
return ValidationResult(status=status, value=value, errors=errors)
|
|
175
|
+
|
|
176
|
+
def to_prompt(self) -> str:
|
|
177
|
+
return f"必须是以下值之一: {', '.join(str(v) for v in self.allowed_values)}"
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class SchemaConstraint(TypeConstraint):
|
|
181
|
+
"""Schema 约束 (使用 Pydantic)"""
|
|
182
|
+
|
|
183
|
+
def __init__(self, model_class: Type[BaseModel]):
|
|
184
|
+
self.model_class = model_class
|
|
185
|
+
|
|
186
|
+
def validate(self, value: Any) -> ValidationResult:
|
|
187
|
+
errors = []
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
if isinstance(value, str):
|
|
191
|
+
# 尝试解析 JSON
|
|
192
|
+
try:
|
|
193
|
+
value = json.loads(value)
|
|
194
|
+
except json.JSONDecodeError:
|
|
195
|
+
errors.append("无法解析 JSON 字符串")
|
|
196
|
+
return ValidationResult(
|
|
197
|
+
status=ValidationStatus.INVALID,
|
|
198
|
+
value=value,
|
|
199
|
+
errors=errors
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# 使用 Pydantic 验证
|
|
203
|
+
validated = self.model_class.model_validate(value)
|
|
204
|
+
return ValidationResult(
|
|
205
|
+
status=ValidationStatus.VALID,
|
|
206
|
+
value=validated.model_dump()
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
except PydanticValidationError as e:
|
|
210
|
+
for error in e.errors():
|
|
211
|
+
errors.append(f"{'.'.join(str(loc) for loc in error['loc'])}: {error['msg']}")
|
|
212
|
+
|
|
213
|
+
return ValidationResult(
|
|
214
|
+
status=ValidationStatus.INVALID,
|
|
215
|
+
value=value,
|
|
216
|
+
errors=errors
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def to_prompt(self) -> str:
|
|
220
|
+
schema = self.model_class.model_json_schema()
|
|
221
|
+
return f"必须符合以下 JSON Schema:\n{json.dumps(schema, ensure_ascii=False, indent=2)}"
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class ListConstraint(TypeConstraint):
|
|
225
|
+
"""列表约束"""
|
|
226
|
+
|
|
227
|
+
def __init__(self, item_constraint: Optional[TypeConstraint] = None, min_length=None, max_length=None):
|
|
228
|
+
self.item_constraint = item_constraint
|
|
229
|
+
self.min_length = min_length
|
|
230
|
+
self.max_length = max_length
|
|
231
|
+
|
|
232
|
+
def validate(self, value: Any) -> ValidationResult:
|
|
233
|
+
errors: List[str] = []
|
|
234
|
+
warnings: List[str] = []
|
|
235
|
+
|
|
236
|
+
# 解析 JSON 字符串
|
|
237
|
+
if isinstance(value, str):
|
|
238
|
+
try:
|
|
239
|
+
value = json.loads(value)
|
|
240
|
+
except json.JSONDecodeError:
|
|
241
|
+
errors.append("无法解析 JSON 字符串")
|
|
242
|
+
return ValidationResult(status=ValidationStatus.INVALID, value=value, errors=errors)
|
|
243
|
+
|
|
244
|
+
if not isinstance(value, list):
|
|
245
|
+
errors.append(f"类型错误: 期望列表, 实际 {type(value).__name__}")
|
|
246
|
+
return ValidationResult(status=ValidationStatus.INVALID, value=value, errors=errors)
|
|
247
|
+
|
|
248
|
+
# 长度检查
|
|
249
|
+
if self.min_length is not None and len(value) < self.min_length:
|
|
250
|
+
errors.append(f"列表长度 {len(value)} 小于最小长度 {self.min_length}")
|
|
251
|
+
if self.max_length is not None and len(value) > self.max_length:
|
|
252
|
+
errors.append(f"列表长度 {len(value)} 大于最大长度 {self.max_length}")
|
|
253
|
+
|
|
254
|
+
# 元素验证
|
|
255
|
+
if self.item_constraint:
|
|
256
|
+
validated_items = []
|
|
257
|
+
for i, item in enumerate(value):
|
|
258
|
+
result = self.item_constraint.validate(item)
|
|
259
|
+
if result.status == ValidationStatus.INVALID:
|
|
260
|
+
errors.append(f"索引 {i}: {'; '.join(result.errors)}")
|
|
261
|
+
else:
|
|
262
|
+
validated_items.append(result.value)
|
|
263
|
+
value = validated_items
|
|
264
|
+
|
|
265
|
+
status = ValidationStatus.VALID if not errors else ValidationStatus.INVALID
|
|
266
|
+
return ValidationResult(status=status, value=value, errors=errors, warnings=warnings)
|
|
267
|
+
|
|
268
|
+
def to_prompt(self) -> str:
|
|
269
|
+
desc = "类型: 列表"
|
|
270
|
+
if self.min_length is not None:
|
|
271
|
+
desc += f", 最小长度 {self.min_length}"
|
|
272
|
+
if self.max_length is not None:
|
|
273
|
+
desc += f", 最大长度 {self.max_length}"
|
|
274
|
+
if self.item_constraint:
|
|
275
|
+
desc += f"\n元素约束: {self.item_constraint.to_prompt()}"
|
|
276
|
+
return desc
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# ============ 模拟执行模式 ============
|
|
280
|
+
|
|
281
|
+
class SimulationMode(Enum):
|
|
282
|
+
"""模拟模式"""
|
|
283
|
+
OFF = "off" # 不模拟,真实调用
|
|
284
|
+
MOCK = "mock" # 使用模拟响应
|
|
285
|
+
RECORD = "record" # 记录真实响应
|
|
286
|
+
REPLAY = "replay" # 回放记录的响应
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
@dataclass
|
|
290
|
+
class MockResponse:
|
|
291
|
+
"""模拟响应"""
|
|
292
|
+
prompt: str
|
|
293
|
+
response: str
|
|
294
|
+
metadata: Optional[Dict[str, Any]] = field(default_factory=dict)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
class Simulator:
|
|
298
|
+
"""
|
|
299
|
+
模拟执行器
|
|
300
|
+
|
|
301
|
+
用于测试和开发,无需真实调用 LLM。
|
|
302
|
+
"""
|
|
303
|
+
|
|
304
|
+
def __init__(self, mode: SimulationMode = SimulationMode.MOCK):
|
|
305
|
+
self.mode = mode
|
|
306
|
+
self._recordings: List[MockResponse] = []
|
|
307
|
+
self._mock_responses: Dict[str, str] = {}
|
|
308
|
+
self._default_response = "This is a mock response."
|
|
309
|
+
|
|
310
|
+
def set_mock_response(self, prompt_pattern: str, response: str):
|
|
311
|
+
"""设置模拟响应"""
|
|
312
|
+
self._mock_responses[prompt_pattern] = response
|
|
313
|
+
|
|
314
|
+
def set_default_response(self, response: str):
|
|
315
|
+
"""设置默认响应"""
|
|
316
|
+
self._default_response = response
|
|
317
|
+
|
|
318
|
+
def get_response(self, prompt: str) -> str:
|
|
319
|
+
"""获取响应(根据模式)"""
|
|
320
|
+
if self.mode == SimulationMode.OFF:
|
|
321
|
+
raise RuntimeError("模拟器处于关闭状态,不应调用此方法")
|
|
322
|
+
|
|
323
|
+
if self.mode == SimulationMode.MOCK:
|
|
324
|
+
return self._get_mock_response(prompt)
|
|
325
|
+
|
|
326
|
+
if self.mode == SimulationMode.REPLAY:
|
|
327
|
+
return self._get_replay_response(prompt)
|
|
328
|
+
|
|
329
|
+
raise RuntimeError(f"未知模式: {self.mode}")
|
|
330
|
+
|
|
331
|
+
def _get_mock_response(self, prompt: str) -> str:
|
|
332
|
+
"""获取模拟响应"""
|
|
333
|
+
# 查找匹配的模式
|
|
334
|
+
for pattern, response in self._mock_responses.items():
|
|
335
|
+
if re.search(pattern, prompt, re.IGNORECASE):
|
|
336
|
+
return response
|
|
337
|
+
|
|
338
|
+
# 返回默认响应
|
|
339
|
+
return self._default_response
|
|
340
|
+
|
|
341
|
+
def _get_replay_response(self, prompt: str) -> str:
|
|
342
|
+
"""获取回放响应"""
|
|
343
|
+
for recording in self._recordings:
|
|
344
|
+
if recording.prompt == prompt:
|
|
345
|
+
return recording.response
|
|
346
|
+
|
|
347
|
+
raise ValueError(f"未找到匹配的回放响应: {prompt[:50]}...")
|
|
348
|
+
|
|
349
|
+
def record(self, prompt: str, response: str, metadata: Optional[Dict[str, Any]] = None):
|
|
350
|
+
"""记录响应"""
|
|
351
|
+
self._recordings.append(MockResponse(
|
|
352
|
+
prompt=prompt,
|
|
353
|
+
response=response,
|
|
354
|
+
metadata=metadata or {}
|
|
355
|
+
))
|
|
356
|
+
|
|
357
|
+
def load_recordings(self, filepath: str):
|
|
358
|
+
"""从文件加载记录"""
|
|
359
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
360
|
+
data = json.load(f)
|
|
361
|
+
self._recordings = [MockResponse(**r) for r in data]
|
|
362
|
+
|
|
363
|
+
def save_recordings(self, filepath: str):
|
|
364
|
+
"""保存记录到文件"""
|
|
365
|
+
data = [
|
|
366
|
+
{"prompt": r.prompt, "response": r.response, "metadata": r.metadata}
|
|
367
|
+
for r in self._recordings
|
|
368
|
+
]
|
|
369
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
370
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
371
|
+
|
|
372
|
+
def clear_recordings(self):
|
|
373
|
+
"""清空记录"""
|
|
374
|
+
self._recordings.clear()
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
# ============ 幻觉检测 ============
|
|
378
|
+
|
|
379
|
+
@dataclass
|
|
380
|
+
class HallucinationCheck:
|
|
381
|
+
"""幻觉检查结果"""
|
|
382
|
+
is_hallucination: bool
|
|
383
|
+
confidence: float
|
|
384
|
+
reasons: List[str] = field(default_factory=list)
|
|
385
|
+
suggestions: List[str] = field(default_factory=list)
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
class HallucinationDetector:
|
|
389
|
+
"""
|
|
390
|
+
幻觉检测器
|
|
391
|
+
|
|
392
|
+
检测 LLM 输出中可能存在的幻觉内容。
|
|
393
|
+
支持外部验证器集成以增强检测准确率。
|
|
394
|
+
"""
|
|
395
|
+
|
|
396
|
+
def __init__(self, external_validator: Optional["BaseValidator"] = None):
|
|
397
|
+
"""
|
|
398
|
+
初始化幻觉检测器
|
|
399
|
+
|
|
400
|
+
参数:
|
|
401
|
+
external_validator: 外部验证器实例(可选)
|
|
402
|
+
"""
|
|
403
|
+
self._patterns = [
|
|
404
|
+
# 常见幻觉模式
|
|
405
|
+
(r'\b\d{4}年\d{1,2}月\d{1,2}日\b', "可能是虚构的日期"),
|
|
406
|
+
(r'\bhttps?://[^\s]+\b', "可能是虚构的 URL"),
|
|
407
|
+
(r'\b[\w\.-]+@[\w\.-]+\.\w+\b', "可能是虚构的邮箱"),
|
|
408
|
+
(r'研究表明|研究显示|据统计', "未引用具体来源的声明"),
|
|
409
|
+
(r'众所周知|显然|毫无疑问', "可能缺乏证据支持的断言"),
|
|
410
|
+
]
|
|
411
|
+
self._external_validator = external_validator
|
|
412
|
+
|
|
413
|
+
def check(self, response: str, context: Optional[Dict[str, Any]] = None,
|
|
414
|
+
use_external: bool = False) -> HallucinationCheck:
|
|
415
|
+
"""
|
|
416
|
+
检查响应中的幻觉
|
|
417
|
+
|
|
418
|
+
参数:
|
|
419
|
+
response: LLM 响应文本
|
|
420
|
+
context: 可选的上下文信息
|
|
421
|
+
use_external: 是否使用外部验证器
|
|
422
|
+
返回:
|
|
423
|
+
HallucinationCheck 对象
|
|
424
|
+
"""
|
|
425
|
+
reasons: List[str] = []
|
|
426
|
+
suggestions: List[str] = []
|
|
427
|
+
hallucination_score = 0.0
|
|
428
|
+
|
|
429
|
+
# 模式检查
|
|
430
|
+
for pattern, description in self._patterns:
|
|
431
|
+
matches = re.findall(pattern, response)
|
|
432
|
+
if matches:
|
|
433
|
+
reasons.append(f"{description}: 发现 {len(matches)} 处")
|
|
434
|
+
hallucination_score += 0.2 * len(matches)
|
|
435
|
+
|
|
436
|
+
# 数值一致性检查
|
|
437
|
+
numbers = re.findall(r'\b\d+(?:\.\d+)?\b', response)
|
|
438
|
+
if len(numbers) > 5:
|
|
439
|
+
reasons.append("包含大量数字,请验证准确性")
|
|
440
|
+
hallucination_score += 0.1
|
|
441
|
+
|
|
442
|
+
# 引用检查
|
|
443
|
+
if '引用' in response or '参考' in response:
|
|
444
|
+
if not re.search(r'\[\d+\]|\(\d{4}\)|"([^"]+)"', response):
|
|
445
|
+
reasons.append("提到引用但未提供具体引用格式")
|
|
446
|
+
hallucination_score += 0.15
|
|
447
|
+
|
|
448
|
+
# 外部验证(可选)
|
|
449
|
+
external_result = None
|
|
450
|
+
if use_external and self._external_validator and self._external_validator.is_available():
|
|
451
|
+
try:
|
|
452
|
+
from .validation import ExternalValidationStatus
|
|
453
|
+
external_result = self._external_validator.validate(response)
|
|
454
|
+
|
|
455
|
+
if external_result.status == ExternalValidationStatus.REFUTED:
|
|
456
|
+
hallucination_score += 0.3
|
|
457
|
+
reasons.append(f"外部验证器反驳: {external_result.summary}")
|
|
458
|
+
elif external_result.status == ExternalValidationStatus.VERIFIED:
|
|
459
|
+
hallucination_score = max(0, hallucination_score - 0.2)
|
|
460
|
+
suggestions.append(f"外部验证器确认: {external_result.summary}")
|
|
461
|
+
except Exception as e:
|
|
462
|
+
reasons.append(f"外部验证失败: {str(e)}")
|
|
463
|
+
|
|
464
|
+
# 计算置信度
|
|
465
|
+
confidence = min(hallucination_score, 1.0)
|
|
466
|
+
is_hallucination = confidence > 0.3
|
|
467
|
+
|
|
468
|
+
if is_hallucination:
|
|
469
|
+
suggestions.append("建议验证响应中的具体细节")
|
|
470
|
+
suggestions.append("考虑使用反思循环进行二次确认")
|
|
471
|
+
|
|
472
|
+
return HallucinationCheck(
|
|
473
|
+
is_hallucination=is_hallucination,
|
|
474
|
+
confidence=confidence,
|
|
475
|
+
reasons=reasons,
|
|
476
|
+
suggestions=suggestions
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
async def check_async(self, response: str, context: Optional[Dict[str, Any]] = None,
|
|
480
|
+
use_external: bool = True) -> HallucinationCheck:
|
|
481
|
+
"""
|
|
482
|
+
异步检查响应中的幻觉(支持异步外部验证)
|
|
483
|
+
|
|
484
|
+
参数:
|
|
485
|
+
response: LLM 响应文本
|
|
486
|
+
context: 可选的上下文信息
|
|
487
|
+
use_external: 是否使用外部验证器
|
|
488
|
+
返回:
|
|
489
|
+
HallucinationCheck 对象
|
|
490
|
+
"""
|
|
491
|
+
reasons: List[str] = []
|
|
492
|
+
suggestions: List[str] = []
|
|
493
|
+
hallucination_score = 0.0
|
|
494
|
+
|
|
495
|
+
# 模式检查
|
|
496
|
+
for pattern, description in self._patterns:
|
|
497
|
+
matches = re.findall(pattern, response)
|
|
498
|
+
if matches:
|
|
499
|
+
reasons.append(f"{description}: 发现 {len(matches)} 处")
|
|
500
|
+
hallucination_score += 0.2 * len(matches)
|
|
501
|
+
|
|
502
|
+
# 数值一致性检查
|
|
503
|
+
numbers = re.findall(r'\b\d+(?:\.\d+)?\b', response)
|
|
504
|
+
if len(numbers) > 5:
|
|
505
|
+
reasons.append("包含大量数字,请验证准确性")
|
|
506
|
+
hallucination_score += 0.1
|
|
507
|
+
|
|
508
|
+
# 引用检查
|
|
509
|
+
if '引用' in response or '参考' in response:
|
|
510
|
+
if not re.search(r'\[\d+\]|\(\d{4}\)|"([^"]+)"', response):
|
|
511
|
+
reasons.append("提到引用但未提供具体引用格式")
|
|
512
|
+
hallucination_score += 0.15
|
|
513
|
+
|
|
514
|
+
# 异步外部验证
|
|
515
|
+
if use_external and self._external_validator and self._external_validator.is_available():
|
|
516
|
+
try:
|
|
517
|
+
from .validation import ExternalValidationStatus
|
|
518
|
+
external_result = await self._external_validator.validate_async(response)
|
|
519
|
+
|
|
520
|
+
if external_result.status == ExternalValidationStatus.REFUTED:
|
|
521
|
+
hallucination_score += 0.3
|
|
522
|
+
reasons.append(f"外部验证器反驳: {external_result.summary}")
|
|
523
|
+
elif external_result.status == ExternalValidationStatus.VERIFIED:
|
|
524
|
+
hallucination_score = max(0, hallucination_score - 0.2)
|
|
525
|
+
suggestions.append(f"外部验证器确认: {external_result.summary}")
|
|
526
|
+
except Exception as e:
|
|
527
|
+
reasons.append(f"外部验证失败: {str(e)}")
|
|
528
|
+
|
|
529
|
+
# 计算置信度
|
|
530
|
+
confidence = min(hallucination_score, 1.0)
|
|
531
|
+
is_hallucination = confidence > 0.3
|
|
532
|
+
|
|
533
|
+
if is_hallucination:
|
|
534
|
+
suggestions.append("建议验证响应中的具体细节")
|
|
535
|
+
suggestions.append("考虑使用反思循环进行二次确认")
|
|
536
|
+
|
|
537
|
+
return HallucinationCheck(
|
|
538
|
+
is_hallucination=is_hallucination,
|
|
539
|
+
confidence=confidence,
|
|
540
|
+
reasons=reasons,
|
|
541
|
+
suggestions=suggestions
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
def add_pattern(self, pattern: str, description: str):
|
|
545
|
+
"""添加自定义幻觉检测模式"""
|
|
546
|
+
self._patterns.append((pattern, description))
|
|
547
|
+
|
|
548
|
+
def set_external_validator(self, validator: "BaseValidator") -> None:
|
|
549
|
+
"""
|
|
550
|
+
设置外部验证器
|
|
551
|
+
|
|
552
|
+
参数:
|
|
553
|
+
validator: 外部验证器实例
|
|
554
|
+
"""
|
|
555
|
+
self._external_validator = validator
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
# ============ 确定性认知调用 ============
|
|
559
|
+
|
|
560
|
+
def deterministic_call(
|
|
561
|
+
|
|
562
|
+
prompt: str,
|
|
563
|
+
|
|
564
|
+
constraint: TypeConstraint,
|
|
565
|
+
|
|
566
|
+
context: Optional["CognitiveContext"] = None,
|
|
567
|
+
|
|
568
|
+
*,
|
|
569
|
+
|
|
570
|
+
max_attempts: int = 3,
|
|
571
|
+
|
|
572
|
+
use_reflection: bool = False,
|
|
573
|
+
|
|
574
|
+
simulator: Optional[Simulator] = None
|
|
575
|
+
|
|
576
|
+
) -> ValidationResult:
|
|
577
|
+
|
|
578
|
+
"""
|
|
579
|
+
|
|
580
|
+
带类型约束的确定性认知调用
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
参数:
|
|
585
|
+
|
|
586
|
+
prompt: 提示文本
|
|
587
|
+
|
|
588
|
+
constraint: 类型约束
|
|
589
|
+
|
|
590
|
+
context: 认知上下文
|
|
591
|
+
|
|
592
|
+
max_attempts: 最大尝试次数
|
|
593
|
+
|
|
594
|
+
use_reflection: 是否使用反思
|
|
595
|
+
|
|
596
|
+
simulator: 模拟器(用于测试)
|
|
597
|
+
|
|
598
|
+
返回:
|
|
599
|
+
|
|
600
|
+
ValidationResult 对象
|
|
601
|
+
|
|
602
|
+
"""
|
|
603
|
+
from .runtime import cognitive_call
|
|
604
|
+
from .reflection import with_reflection
|
|
605
|
+
|
|
606
|
+
# 构建带约束的提示
|
|
607
|
+
constrained_prompt = f"{prompt}\n\n约束: {constraint.to_prompt()}\n\n请严格按照约束要求回答。"
|
|
608
|
+
|
|
609
|
+
for attempt in range(max_attempts):
|
|
610
|
+
# 获取响应
|
|
611
|
+
if simulator and simulator.mode != SimulationMode.OFF:
|
|
612
|
+
response = simulator.get_response(prompt)
|
|
613
|
+
else:
|
|
614
|
+
if use_reflection:
|
|
615
|
+
result = with_reflection(constrained_prompt, context)
|
|
616
|
+
response = result.corrected_response or result.original_response
|
|
617
|
+
else:
|
|
618
|
+
response = cognitive_call(constrained_prompt, context)
|
|
619
|
+
|
|
620
|
+
# 验证响应
|
|
621
|
+
validation = constraint.validate(response)
|
|
622
|
+
|
|
623
|
+
if validation.status == ValidationStatus.VALID:
|
|
624
|
+
return validation
|
|
625
|
+
|
|
626
|
+
# 如果验证失败,添加反馈并重试
|
|
627
|
+
if attempt < max_attempts - 1:
|
|
628
|
+
error_feedback = "; ".join(validation.errors)
|
|
629
|
+
constrained_prompt = f"{prompt}\n\n约束: {constraint.to_prompt()}\n\n上次的回答不符合要求,错误: {error_feedback}\n\n请修正后重新回答。"
|
|
630
|
+
|
|
631
|
+
return validation
|