codegnipy 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,631 @@
1
+ """
2
+ Codegnipy 确定性保证模块
3
+
4
+ 提供类型约束、模拟执行和幻觉检测功能,确保 LLM 输出的可靠性。
5
+ """
6
+
7
+ import re
8
+ import json
9
+ from abc import ABC, abstractmethod
10
+ from dataclasses import dataclass, field
11
+ from enum import Enum
12
+ from typing import (
13
+ Any, List, Dict, Type, TypeVar, Optional, TYPE_CHECKING
14
+ )
15
+ from pydantic import BaseModel, ValidationError as PydanticValidationError
16
+
17
+ if TYPE_CHECKING:
18
+ from .runtime import CognitiveContext
19
+ from .validation import BaseValidator
20
+
21
+
22
+ T = TypeVar('T')
23
+
24
+
25
+ class ValidationStatus(Enum):
26
+ """验证状态"""
27
+ VALID = "valid"
28
+ INVALID = "invalid"
29
+ UNCERTAIN = "uncertain"
30
+
31
+
32
+ @dataclass
33
+ class ValidationResult:
34
+ """验证结果"""
35
+ status: ValidationStatus
36
+ value: Any
37
+ errors: List[str] = field(default_factory=list)
38
+ warnings: List[str] = field(default_factory=list)
39
+ confidence: float = 1.0
40
+
41
+
42
+ class TypeConstraint(ABC):
43
+ """类型约束抽象基类"""
44
+
45
+ @abstractmethod
46
+ def validate(self, value: Any) -> ValidationResult:
47
+ """验证值是否符合约束"""
48
+ pass
49
+
50
+ @abstractmethod
51
+ def to_prompt(self) -> str:
52
+ """生成用于 LLM 提示的约束描述"""
53
+ pass
54
+
55
+
56
+ class PrimitiveConstraint(TypeConstraint):
57
+ """基础类型约束"""
58
+
59
+ TYPE_MAP = {
60
+ str: "字符串",
61
+ int: "整数",
62
+ float: "浮点数",
63
+ bool: "布尔值",
64
+ list: "列表",
65
+ dict: "字典"
66
+ }
67
+
68
+ def __init__(self, expected_type: Type, min_value=None, max_value=None,
69
+ min_length=None, max_length=None, pattern=None):
70
+ self.expected_type = expected_type
71
+ self.min_value = min_value
72
+ self.max_value = max_value
73
+ self.min_length = min_length
74
+ self.max_length = max_length
75
+ self.pattern = pattern
76
+
77
+ def validate(self, value: Any) -> ValidationResult:
78
+ errors: List[str] = []
79
+ warnings: List[str] = []
80
+
81
+ # 类型检查
82
+ if not isinstance(value, self.expected_type):
83
+ # 尝试类型转换
84
+ try:
85
+ if self.expected_type is bool:
86
+ if isinstance(value, str):
87
+ if value.lower() in ('true', 'yes', '1'):
88
+ value = True
89
+ elif value.lower() in ('false', 'no', '0'):
90
+ value = False
91
+ else:
92
+ raise ValueError()
93
+ else:
94
+ value = bool(value)
95
+ elif self.expected_type in (int, float):
96
+ value = self.expected_type(value)
97
+ else:
98
+ errors.append(f"类型错误: 期望 {self.TYPE_MAP.get(self.expected_type, self.expected_type.__name__)}, 实际 {type(value).__name__}")
99
+ except (ValueError, TypeError):
100
+ errors.append(f"类型错误: 期望 {self.TYPE_MAP.get(self.expected_type, self.expected_type.__name__)}, 实际 {type(value).__name__}")
101
+
102
+ # 数值范围检查
103
+ if self.expected_type in (int, float) and isinstance(value, (int, float)):
104
+ if self.min_value is not None and value < self.min_value:
105
+ errors.append(f"值 {value} 小于最小值 {self.min_value}")
106
+ if self.max_value is not None and value > self.max_value:
107
+ errors.append(f"值 {value} 大于最大值 {self.max_value}")
108
+
109
+ # 长度检查
110
+ if hasattr(value, '__len__'):
111
+ length = len(value)
112
+ if self.min_length is not None and length < self.min_length:
113
+ errors.append(f"长度 {length} 小于最小长度 {self.min_length}")
114
+ if self.max_length is not None and length > self.max_length:
115
+ errors.append(f"长度 {length} 大于最大长度 {self.max_length}")
116
+
117
+ # 正则模式检查
118
+ if self.pattern and isinstance(value, str):
119
+ if not re.match(self.pattern, value):
120
+ errors.append(f"字符串不匹配模式: {self.pattern}")
121
+
122
+ status = ValidationStatus.VALID if not errors else ValidationStatus.INVALID
123
+ return ValidationResult(status=status, value=value, errors=errors, warnings=warnings)
124
+
125
+ def to_prompt(self) -> str:
126
+ desc = f"类型: {self.TYPE_MAP.get(self.expected_type, self.expected_type.__name__)}"
127
+
128
+ if self.min_value is not None or self.max_value is not None:
129
+ range_desc = []
130
+ if self.min_value is not None:
131
+ range_desc.append(f"最小值 {self.min_value}")
132
+ if self.max_value is not None:
133
+ range_desc.append(f"最大值 {self.max_value}")
134
+ desc += f", {' '.join(range_desc)}"
135
+
136
+ if self.min_length is not None or self.max_length is not None:
137
+ len_desc = []
138
+ if self.min_length is not None:
139
+ len_desc.append(f"最小长度 {self.min_length}")
140
+ if self.max_length is not None:
141
+ len_desc.append(f"最大长度 {self.max_length}")
142
+ desc += f", {' '.join(len_desc)}"
143
+
144
+ if self.pattern:
145
+ desc += f", 必须匹配模式: {self.pattern}"
146
+
147
+ return desc
148
+
149
+
150
+ class EnumConstraint(TypeConstraint):
151
+ """枚举约束"""
152
+
153
+ def __init__(self, allowed_values: List[Any], case_sensitive: bool = True):
154
+ self.allowed_values = allowed_values
155
+ self.case_sensitive = case_sensitive
156
+
157
+ def validate(self, value: Any) -> ValidationResult:
158
+ errors = []
159
+
160
+ check_value = value if self.case_sensitive else str(value).lower()
161
+ check_allowed = self.allowed_values if self.case_sensitive else [str(v).lower() for v in self.allowed_values]
162
+
163
+ if check_value not in check_allowed:
164
+ errors.append(f"值 '{value}' 不在允许的值中: {self.allowed_values}")
165
+
166
+ # 返回原始值(匹配正确的大小写)
167
+ if not self.case_sensitive and isinstance(value, str):
168
+ for av in self.allowed_values:
169
+ if str(av).lower() == value.lower():
170
+ value = av
171
+ break
172
+
173
+ status = ValidationStatus.VALID if not errors else ValidationStatus.INVALID
174
+ return ValidationResult(status=status, value=value, errors=errors)
175
+
176
+ def to_prompt(self) -> str:
177
+ return f"必须是以下值之一: {', '.join(str(v) for v in self.allowed_values)}"
178
+
179
+
180
+ class SchemaConstraint(TypeConstraint):
181
+ """Schema 约束 (使用 Pydantic)"""
182
+
183
+ def __init__(self, model_class: Type[BaseModel]):
184
+ self.model_class = model_class
185
+
186
+ def validate(self, value: Any) -> ValidationResult:
187
+ errors = []
188
+
189
+ try:
190
+ if isinstance(value, str):
191
+ # 尝试解析 JSON
192
+ try:
193
+ value = json.loads(value)
194
+ except json.JSONDecodeError:
195
+ errors.append("无法解析 JSON 字符串")
196
+ return ValidationResult(
197
+ status=ValidationStatus.INVALID,
198
+ value=value,
199
+ errors=errors
200
+ )
201
+
202
+ # 使用 Pydantic 验证
203
+ validated = self.model_class.model_validate(value)
204
+ return ValidationResult(
205
+ status=ValidationStatus.VALID,
206
+ value=validated.model_dump()
207
+ )
208
+
209
+ except PydanticValidationError as e:
210
+ for error in e.errors():
211
+ errors.append(f"{'.'.join(str(loc) for loc in error['loc'])}: {error['msg']}")
212
+
213
+ return ValidationResult(
214
+ status=ValidationStatus.INVALID,
215
+ value=value,
216
+ errors=errors
217
+ )
218
+
219
+ def to_prompt(self) -> str:
220
+ schema = self.model_class.model_json_schema()
221
+ return f"必须符合以下 JSON Schema:\n{json.dumps(schema, ensure_ascii=False, indent=2)}"
222
+
223
+
224
+ class ListConstraint(TypeConstraint):
225
+ """列表约束"""
226
+
227
+ def __init__(self, item_constraint: Optional[TypeConstraint] = None, min_length=None, max_length=None):
228
+ self.item_constraint = item_constraint
229
+ self.min_length = min_length
230
+ self.max_length = max_length
231
+
232
+ def validate(self, value: Any) -> ValidationResult:
233
+ errors: List[str] = []
234
+ warnings: List[str] = []
235
+
236
+ # 解析 JSON 字符串
237
+ if isinstance(value, str):
238
+ try:
239
+ value = json.loads(value)
240
+ except json.JSONDecodeError:
241
+ errors.append("无法解析 JSON 字符串")
242
+ return ValidationResult(status=ValidationStatus.INVALID, value=value, errors=errors)
243
+
244
+ if not isinstance(value, list):
245
+ errors.append(f"类型错误: 期望列表, 实际 {type(value).__name__}")
246
+ return ValidationResult(status=ValidationStatus.INVALID, value=value, errors=errors)
247
+
248
+ # 长度检查
249
+ if self.min_length is not None and len(value) < self.min_length:
250
+ errors.append(f"列表长度 {len(value)} 小于最小长度 {self.min_length}")
251
+ if self.max_length is not None and len(value) > self.max_length:
252
+ errors.append(f"列表长度 {len(value)} 大于最大长度 {self.max_length}")
253
+
254
+ # 元素验证
255
+ if self.item_constraint:
256
+ validated_items = []
257
+ for i, item in enumerate(value):
258
+ result = self.item_constraint.validate(item)
259
+ if result.status == ValidationStatus.INVALID:
260
+ errors.append(f"索引 {i}: {'; '.join(result.errors)}")
261
+ else:
262
+ validated_items.append(result.value)
263
+ value = validated_items
264
+
265
+ status = ValidationStatus.VALID if not errors else ValidationStatus.INVALID
266
+ return ValidationResult(status=status, value=value, errors=errors, warnings=warnings)
267
+
268
+ def to_prompt(self) -> str:
269
+ desc = "类型: 列表"
270
+ if self.min_length is not None:
271
+ desc += f", 最小长度 {self.min_length}"
272
+ if self.max_length is not None:
273
+ desc += f", 最大长度 {self.max_length}"
274
+ if self.item_constraint:
275
+ desc += f"\n元素约束: {self.item_constraint.to_prompt()}"
276
+ return desc
277
+
278
+
279
+ # ============ 模拟执行模式 ============
280
+
281
+ class SimulationMode(Enum):
282
+ """模拟模式"""
283
+ OFF = "off" # 不模拟,真实调用
284
+ MOCK = "mock" # 使用模拟响应
285
+ RECORD = "record" # 记录真实响应
286
+ REPLAY = "replay" # 回放记录的响应
287
+
288
+
289
+ @dataclass
290
+ class MockResponse:
291
+ """模拟响应"""
292
+ prompt: str
293
+ response: str
294
+ metadata: Optional[Dict[str, Any]] = field(default_factory=dict)
295
+
296
+
297
+ class Simulator:
298
+ """
299
+ 模拟执行器
300
+
301
+ 用于测试和开发,无需真实调用 LLM。
302
+ """
303
+
304
+ def __init__(self, mode: SimulationMode = SimulationMode.MOCK):
305
+ self.mode = mode
306
+ self._recordings: List[MockResponse] = []
307
+ self._mock_responses: Dict[str, str] = {}
308
+ self._default_response = "This is a mock response."
309
+
310
+ def set_mock_response(self, prompt_pattern: str, response: str):
311
+ """设置模拟响应"""
312
+ self._mock_responses[prompt_pattern] = response
313
+
314
+ def set_default_response(self, response: str):
315
+ """设置默认响应"""
316
+ self._default_response = response
317
+
318
+ def get_response(self, prompt: str) -> str:
319
+ """获取响应(根据模式)"""
320
+ if self.mode == SimulationMode.OFF:
321
+ raise RuntimeError("模拟器处于关闭状态,不应调用此方法")
322
+
323
+ if self.mode == SimulationMode.MOCK:
324
+ return self._get_mock_response(prompt)
325
+
326
+ if self.mode == SimulationMode.REPLAY:
327
+ return self._get_replay_response(prompt)
328
+
329
+ raise RuntimeError(f"未知模式: {self.mode}")
330
+
331
+ def _get_mock_response(self, prompt: str) -> str:
332
+ """获取模拟响应"""
333
+ # 查找匹配的模式
334
+ for pattern, response in self._mock_responses.items():
335
+ if re.search(pattern, prompt, re.IGNORECASE):
336
+ return response
337
+
338
+ # 返回默认响应
339
+ return self._default_response
340
+
341
+ def _get_replay_response(self, prompt: str) -> str:
342
+ """获取回放响应"""
343
+ for recording in self._recordings:
344
+ if recording.prompt == prompt:
345
+ return recording.response
346
+
347
+ raise ValueError(f"未找到匹配的回放响应: {prompt[:50]}...")
348
+
349
+ def record(self, prompt: str, response: str, metadata: Optional[Dict[str, Any]] = None):
350
+ """记录响应"""
351
+ self._recordings.append(MockResponse(
352
+ prompt=prompt,
353
+ response=response,
354
+ metadata=metadata or {}
355
+ ))
356
+
357
+ def load_recordings(self, filepath: str):
358
+ """从文件加载记录"""
359
+ with open(filepath, 'r', encoding='utf-8') as f:
360
+ data = json.load(f)
361
+ self._recordings = [MockResponse(**r) for r in data]
362
+
363
+ def save_recordings(self, filepath: str):
364
+ """保存记录到文件"""
365
+ data = [
366
+ {"prompt": r.prompt, "response": r.response, "metadata": r.metadata}
367
+ for r in self._recordings
368
+ ]
369
+ with open(filepath, 'w', encoding='utf-8') as f:
370
+ json.dump(data, f, ensure_ascii=False, indent=2)
371
+
372
+ def clear_recordings(self):
373
+ """清空记录"""
374
+ self._recordings.clear()
375
+
376
+
377
+ # ============ 幻觉检测 ============
378
+
379
+ @dataclass
380
+ class HallucinationCheck:
381
+ """幻觉检查结果"""
382
+ is_hallucination: bool
383
+ confidence: float
384
+ reasons: List[str] = field(default_factory=list)
385
+ suggestions: List[str] = field(default_factory=list)
386
+
387
+
388
+ class HallucinationDetector:
389
+ """
390
+ 幻觉检测器
391
+
392
+ 检测 LLM 输出中可能存在的幻觉内容。
393
+ 支持外部验证器集成以增强检测准确率。
394
+ """
395
+
396
+ def __init__(self, external_validator: Optional["BaseValidator"] = None):
397
+ """
398
+ 初始化幻觉检测器
399
+
400
+ 参数:
401
+ external_validator: 外部验证器实例(可选)
402
+ """
403
+ self._patterns = [
404
+ # 常见幻觉模式
405
+ (r'\b\d{4}年\d{1,2}月\d{1,2}日\b', "可能是虚构的日期"),
406
+ (r'\bhttps?://[^\s]+\b', "可能是虚构的 URL"),
407
+ (r'\b[\w\.-]+@[\w\.-]+\.\w+\b', "可能是虚构的邮箱"),
408
+ (r'研究表明|研究显示|据统计', "未引用具体来源的声明"),
409
+ (r'众所周知|显然|毫无疑问', "可能缺乏证据支持的断言"),
410
+ ]
411
+ self._external_validator = external_validator
412
+
413
+ def check(self, response: str, context: Optional[Dict[str, Any]] = None,
414
+ use_external: bool = False) -> HallucinationCheck:
415
+ """
416
+ 检查响应中的幻觉
417
+
418
+ 参数:
419
+ response: LLM 响应文本
420
+ context: 可选的上下文信息
421
+ use_external: 是否使用外部验证器
422
+ 返回:
423
+ HallucinationCheck 对象
424
+ """
425
+ reasons: List[str] = []
426
+ suggestions: List[str] = []
427
+ hallucination_score = 0.0
428
+
429
+ # 模式检查
430
+ for pattern, description in self._patterns:
431
+ matches = re.findall(pattern, response)
432
+ if matches:
433
+ reasons.append(f"{description}: 发现 {len(matches)} 处")
434
+ hallucination_score += 0.2 * len(matches)
435
+
436
+ # 数值一致性检查
437
+ numbers = re.findall(r'\b\d+(?:\.\d+)?\b', response)
438
+ if len(numbers) > 5:
439
+ reasons.append("包含大量数字,请验证准确性")
440
+ hallucination_score += 0.1
441
+
442
+ # 引用检查
443
+ if '引用' in response or '参考' in response:
444
+ if not re.search(r'\[\d+\]|\(\d{4}\)|"([^"]+)"', response):
445
+ reasons.append("提到引用但未提供具体引用格式")
446
+ hallucination_score += 0.15
447
+
448
+ # 外部验证(可选)
449
+ external_result = None
450
+ if use_external and self._external_validator and self._external_validator.is_available():
451
+ try:
452
+ from .validation import ExternalValidationStatus
453
+ external_result = self._external_validator.validate(response)
454
+
455
+ if external_result.status == ExternalValidationStatus.REFUTED:
456
+ hallucination_score += 0.3
457
+ reasons.append(f"外部验证器反驳: {external_result.summary}")
458
+ elif external_result.status == ExternalValidationStatus.VERIFIED:
459
+ hallucination_score = max(0, hallucination_score - 0.2)
460
+ suggestions.append(f"外部验证器确认: {external_result.summary}")
461
+ except Exception as e:
462
+ reasons.append(f"外部验证失败: {str(e)}")
463
+
464
+ # 计算置信度
465
+ confidence = min(hallucination_score, 1.0)
466
+ is_hallucination = confidence > 0.3
467
+
468
+ if is_hallucination:
469
+ suggestions.append("建议验证响应中的具体细节")
470
+ suggestions.append("考虑使用反思循环进行二次确认")
471
+
472
+ return HallucinationCheck(
473
+ is_hallucination=is_hallucination,
474
+ confidence=confidence,
475
+ reasons=reasons,
476
+ suggestions=suggestions
477
+ )
478
+
479
+ async def check_async(self, response: str, context: Optional[Dict[str, Any]] = None,
480
+ use_external: bool = True) -> HallucinationCheck:
481
+ """
482
+ 异步检查响应中的幻觉(支持异步外部验证)
483
+
484
+ 参数:
485
+ response: LLM 响应文本
486
+ context: 可选的上下文信息
487
+ use_external: 是否使用外部验证器
488
+ 返回:
489
+ HallucinationCheck 对象
490
+ """
491
+ reasons: List[str] = []
492
+ suggestions: List[str] = []
493
+ hallucination_score = 0.0
494
+
495
+ # 模式检查
496
+ for pattern, description in self._patterns:
497
+ matches = re.findall(pattern, response)
498
+ if matches:
499
+ reasons.append(f"{description}: 发现 {len(matches)} 处")
500
+ hallucination_score += 0.2 * len(matches)
501
+
502
+ # 数值一致性检查
503
+ numbers = re.findall(r'\b\d+(?:\.\d+)?\b', response)
504
+ if len(numbers) > 5:
505
+ reasons.append("包含大量数字,请验证准确性")
506
+ hallucination_score += 0.1
507
+
508
+ # 引用检查
509
+ if '引用' in response or '参考' in response:
510
+ if not re.search(r'\[\d+\]|\(\d{4}\)|"([^"]+)"', response):
511
+ reasons.append("提到引用但未提供具体引用格式")
512
+ hallucination_score += 0.15
513
+
514
+ # 异步外部验证
515
+ if use_external and self._external_validator and self._external_validator.is_available():
516
+ try:
517
+ from .validation import ExternalValidationStatus
518
+ external_result = await self._external_validator.validate_async(response)
519
+
520
+ if external_result.status == ExternalValidationStatus.REFUTED:
521
+ hallucination_score += 0.3
522
+ reasons.append(f"外部验证器反驳: {external_result.summary}")
523
+ elif external_result.status == ExternalValidationStatus.VERIFIED:
524
+ hallucination_score = max(0, hallucination_score - 0.2)
525
+ suggestions.append(f"外部验证器确认: {external_result.summary}")
526
+ except Exception as e:
527
+ reasons.append(f"外部验证失败: {str(e)}")
528
+
529
+ # 计算置信度
530
+ confidence = min(hallucination_score, 1.0)
531
+ is_hallucination = confidence > 0.3
532
+
533
+ if is_hallucination:
534
+ suggestions.append("建议验证响应中的具体细节")
535
+ suggestions.append("考虑使用反思循环进行二次确认")
536
+
537
+ return HallucinationCheck(
538
+ is_hallucination=is_hallucination,
539
+ confidence=confidence,
540
+ reasons=reasons,
541
+ suggestions=suggestions
542
+ )
543
+
544
+ def add_pattern(self, pattern: str, description: str):
545
+ """添加自定义幻觉检测模式"""
546
+ self._patterns.append((pattern, description))
547
+
548
+ def set_external_validator(self, validator: "BaseValidator") -> None:
549
+ """
550
+ 设置外部验证器
551
+
552
+ 参数:
553
+ validator: 外部验证器实例
554
+ """
555
+ self._external_validator = validator
556
+
557
+
558
+ # ============ 确定性认知调用 ============
559
+
560
+ def deterministic_call(
561
+
562
+ prompt: str,
563
+
564
+ constraint: TypeConstraint,
565
+
566
+ context: Optional["CognitiveContext"] = None,
567
+
568
+ *,
569
+
570
+ max_attempts: int = 3,
571
+
572
+ use_reflection: bool = False,
573
+
574
+ simulator: Optional[Simulator] = None
575
+
576
+ ) -> ValidationResult:
577
+
578
+ """
579
+
580
+ 带类型约束的确定性认知调用
581
+
582
+
583
+
584
+ 参数:
585
+
586
+ prompt: 提示文本
587
+
588
+ constraint: 类型约束
589
+
590
+ context: 认知上下文
591
+
592
+ max_attempts: 最大尝试次数
593
+
594
+ use_reflection: 是否使用反思
595
+
596
+ simulator: 模拟器(用于测试)
597
+
598
+ 返回:
599
+
600
+ ValidationResult 对象
601
+
602
+ """
603
+ from .runtime import cognitive_call
604
+ from .reflection import with_reflection
605
+
606
+ # 构建带约束的提示
607
+ constrained_prompt = f"{prompt}\n\n约束: {constraint.to_prompt()}\n\n请严格按照约束要求回答。"
608
+
609
+ for attempt in range(max_attempts):
610
+ # 获取响应
611
+ if simulator and simulator.mode != SimulationMode.OFF:
612
+ response = simulator.get_response(prompt)
613
+ else:
614
+ if use_reflection:
615
+ result = with_reflection(constrained_prompt, context)
616
+ response = result.corrected_response or result.original_response
617
+ else:
618
+ response = cognitive_call(constrained_prompt, context)
619
+
620
+ # 验证响应
621
+ validation = constraint.validate(response)
622
+
623
+ if validation.status == ValidationStatus.VALID:
624
+ return validation
625
+
626
+ # 如果验证失败,添加反馈并重试
627
+ if attempt < max_attempts - 1:
628
+ error_feedback = "; ".join(validation.errors)
629
+ constrained_prompt = f"{prompt}\n\n约束: {constraint.to_prompt()}\n\n上次的回答不符合要求,错误: {error_feedback}\n\n请修正后重新回答。"
630
+
631
+ return validation