open-reflection-protocol 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- open_reflection_protocol-0.3.0.dist-info/METADATA +262 -0
- open_reflection_protocol-0.3.0.dist-info/RECORD +29 -0
- open_reflection_protocol-0.3.0.dist-info/WHEEL +4 -0
- open_reflection_protocol-0.3.0.dist-info/entry_points.txt +2 -0
- orp/__init__.py +66 -0
- orp/adapters/__init__.py +6 -0
- orp/adapters/generic_json.py +24 -0
- orp/adapters/langgraph.py +24 -0
- orp/adapters/openai_agents.py +27 -0
- orp/adapters/otel.py +52 -0
- orp/capture.py +162 -0
- orp/cli.py +366 -0
- orp/compiler.py +124 -0
- orp/conflicts.py +62 -0
- orp/delivery.py +110 -0
- orp/effects.py +112 -0
- orp/evidence.py +92 -0
- orp/examples/failing_coding_agent.py +38 -0
- orp/experience.py +114 -0
- orp/export.py +60 -0
- orp/lessons.py +95 -0
- orp/mcp_server.py +171 -0
- orp/reflect.py +97 -0
- orp/replay.py +108 -0
- orp/rollback.py +82 -0
- orp/schema.py +303 -0
- orp/storage.py +459 -0
- orp/training.py +94 -0
- orp/viewer.py +104 -0
orp/effects.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Effect Evaluator — 分级效果评估
|
|
2
|
+
|
|
3
|
+
评估方法分级:
|
|
4
|
+
1. descriptive: 仅记录检索/应用/结果,不声称因果
|
|
5
|
+
2. matched_baseline: 匹配相似任务/Agent/模型版本的基线
|
|
6
|
+
3. randomized: A/B 实验
|
|
7
|
+
4. causal_model: 贝叶斯分层等因果方法
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Any, Optional
|
|
11
|
+
|
|
12
|
+
from orp.schema import (
|
|
13
|
+
Lesson, LessonEvaluation, EvaluationMethod, LessonStatus,
|
|
14
|
+
)
|
|
15
|
+
from orp.storage import ORPStorage
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class EffectEvaluator:
|
|
19
|
+
"""Lesson 效果评估"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, storage: Optional[ORPStorage] = None):
|
|
22
|
+
self._storage = storage or ORPStorage()
|
|
23
|
+
|
|
24
|
+
def describe(self, lesson: Lesson) -> LessonEvaluation:
|
|
25
|
+
"""Descriptive 评估 — 仅记录统计"""
|
|
26
|
+
m = lesson.metrics
|
|
27
|
+
return LessonEvaluation(
|
|
28
|
+
lesson_id=lesson.lesson_id,
|
|
29
|
+
method=EvaluationMethod.DESCRIPTIVE,
|
|
30
|
+
population={"agent_version": lesson.scope.get("agent_versions", [])},
|
|
31
|
+
results={
|
|
32
|
+
"with_lesson": {
|
|
33
|
+
"tasks": m.get("retrieved", 0),
|
|
34
|
+
"successes": m.get("successful_after_apply", 0),
|
|
35
|
+
},
|
|
36
|
+
"baseline": {"tasks": 0, "successes": 0},
|
|
37
|
+
"estimated_effect": None,
|
|
38
|
+
"uncertainty_interval": None,
|
|
39
|
+
},
|
|
40
|
+
decision="keep_active",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def evaluate_matched_baseline(self, lesson: Lesson,
|
|
44
|
+
baseline_success_rate: float = 0.5,
|
|
45
|
+
baseline_tasks: int = 1) -> LessonEvaluation:
|
|
46
|
+
"""Matched Baseline 评估 — 与基线比较"""
|
|
47
|
+
m = lesson.metrics
|
|
48
|
+
applied = m.get("applied", 0)
|
|
49
|
+
successes = m.get("successful_after_apply", 0)
|
|
50
|
+
|
|
51
|
+
with_rate = successes / applied if applied > 0 else 0
|
|
52
|
+
raw_effect = with_rate - baseline_success_rate
|
|
53
|
+
|
|
54
|
+
return LessonEvaluation(
|
|
55
|
+
lesson_id=lesson.lesson_id,
|
|
56
|
+
method=EvaluationMethod.MATCHED_BASELINE,
|
|
57
|
+
population={
|
|
58
|
+
"agent_version": lesson.scope.get("agent_versions", []),
|
|
59
|
+
"baseline_source": "matched_tasks",
|
|
60
|
+
},
|
|
61
|
+
results={
|
|
62
|
+
"with_lesson": {
|
|
63
|
+
"tasks": applied,
|
|
64
|
+
"successes": successes,
|
|
65
|
+
"success_rate": round(with_rate, 3),
|
|
66
|
+
},
|
|
67
|
+
"baseline": {
|
|
68
|
+
"tasks": baseline_tasks,
|
|
69
|
+
"successes": int(baseline_tasks * baseline_success_rate),
|
|
70
|
+
"success_rate": round(baseline_success_rate, 3),
|
|
71
|
+
},
|
|
72
|
+
"estimated_effect": round(raw_effect, 3) if applied > 0 else None,
|
|
73
|
+
"uncertainty_interval": None,
|
|
74
|
+
},
|
|
75
|
+
decision=self._decide(raw_effect, applied),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def _decide(self, effect: float, sample: int) -> str:
|
|
79
|
+
"""基于效果和样本量生成处置建议"""
|
|
80
|
+
if sample < 3:
|
|
81
|
+
return "keep_active" # 样本太少,不做决定
|
|
82
|
+
if effect > 0.1:
|
|
83
|
+
return "keep_active"
|
|
84
|
+
if effect < -0.1:
|
|
85
|
+
return "review"
|
|
86
|
+
if effect < -0.3:
|
|
87
|
+
return "deprecate"
|
|
88
|
+
return "keep_active"
|
|
89
|
+
|
|
90
|
+
def auto_evaluate_all(self) -> list[LessonEvaluation]:
|
|
91
|
+
"""对所有 active Lesson 运行 matched_baseline 评估"""
|
|
92
|
+
evaluations = []
|
|
93
|
+
for lesson in self._storage.list_lessons(status=LessonStatus.ACTIVE):
|
|
94
|
+
evals_list = self._storage.conn.execute(
|
|
95
|
+
"SELECT * FROM lesson_evals WHERE lesson_id = ? ORDER BY created_at DESC LIMIT 5",
|
|
96
|
+
(lesson.lesson_id,)
|
|
97
|
+
).fetchall()
|
|
98
|
+
baseline_rate = 0.5
|
|
99
|
+
if evals_list:
|
|
100
|
+
# 尝试从上次评估获取基线
|
|
101
|
+
for r in evals_list:
|
|
102
|
+
import json
|
|
103
|
+
results = json.loads(r["results_json"]) if r["results_json"] else {}
|
|
104
|
+
baseline = results.get("baseline", {})
|
|
105
|
+
if baseline.get("success_rate"):
|
|
106
|
+
baseline_rate = baseline["success_rate"]
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
evaluation = self.evaluate_matched_baseline(lesson, baseline_rate)
|
|
110
|
+
self._storage.save_lesson_evaluation(evaluation)
|
|
111
|
+
evaluations.append(evaluation)
|
|
112
|
+
return evaluations
|
orp/evidence.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""证据管理 — 哈希、引用、可信等级、脱敏"""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
|
|
9
|
+
from orp.schema import TrustLevel, EvidenceRef
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
DEFAULT_REDACTION_PATTERNS: list[tuple[str, str]] = [
|
|
13
|
+
(r'(api[_-]?key|apikey|token|secret|password|passwd|credential)\s*[:=]\s*["\']?[^"\'&\s]+', r'***REDACTED***'),
|
|
14
|
+
(r'(?:(?:sk|pk|ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9]{10,})', '***REDACTED***'),
|
|
15
|
+
(r'(?:(?:AKIA|ASIA)[0-9A-Z]{16})', '***REDACTED***'),
|
|
16
|
+
(r'(?:\d{4}[-]?\d{4}[-]?\d{4}[-]?\d{4})', '***REDACTED***'),
|
|
17
|
+
(r'(?:eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,})', '***JWT-REDACTED***'),
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def compute_hash(content: str) -> str:
|
|
22
|
+
return f"sha256:{hashlib.sha256(content.encode()).hexdigest()}"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def compute_file_hash(path: str) -> Optional[str]:
|
|
26
|
+
try:
|
|
27
|
+
content = Path(path).read_text()
|
|
28
|
+
return compute_hash(content)
|
|
29
|
+
except (FileNotFoundError, IOError):
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def redact_text(text: str, patterns: Optional[list[tuple[str, str]]] = None) -> str:
|
|
34
|
+
"""对文本应用脱敏模式"""
|
|
35
|
+
if patterns is None:
|
|
36
|
+
patterns = DEFAULT_REDACTION_PATTERNS
|
|
37
|
+
result = text
|
|
38
|
+
for pattern, replacement in patterns:
|
|
39
|
+
result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)
|
|
40
|
+
return result
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def redact_sensitive_fields(data: dict[str, Any], depth: int = 0) -> dict[str, Any]:
|
|
44
|
+
"""递归脱敏字典中的敏感字段"""
|
|
45
|
+
SENSITIVE_KEYS = {"api_key", "apiKey", "apikey", "token", "secret", "password", "credential", "private_key"}
|
|
46
|
+
if depth > 10:
|
|
47
|
+
return data
|
|
48
|
+
result = {}
|
|
49
|
+
for k, v in data.items():
|
|
50
|
+
if any(sk in k.lower() for sk in SENSITIVE_KEYS):
|
|
51
|
+
result[k] = "***REDACTED***"
|
|
52
|
+
elif isinstance(v, dict):
|
|
53
|
+
result[k] = redact_sensitive_fields(v, depth + 1)
|
|
54
|
+
elif isinstance(v, str):
|
|
55
|
+
result[k] = redact_text(v)
|
|
56
|
+
else:
|
|
57
|
+
result[k] = v
|
|
58
|
+
return result
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def make_evidence_ref(uri: str, content: Optional[str] = None) -> EvidenceRef:
|
|
62
|
+
"""创建证据引用,可选带内容哈希"""
|
|
63
|
+
ref = EvidenceRef(
|
|
64
|
+
evidence_id=f"ref_{hashlib.md5(uri.encode()).hexdigest()[:12]}",
|
|
65
|
+
kind="tool_output",
|
|
66
|
+
uri=uri,
|
|
67
|
+
)
|
|
68
|
+
if content:
|
|
69
|
+
ref.digest = compute_hash(content)
|
|
70
|
+
return ref
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def infer_trust_level(evidence_refs: list[str], source: str) -> TrustLevel:
|
|
74
|
+
"""根据证据引用和来源推断可信等级"""
|
|
75
|
+
if not evidence_refs:
|
|
76
|
+
return TrustLevel.ASSERTED
|
|
77
|
+
if source == "human":
|
|
78
|
+
return TrustLevel.HUMAN_CONFIRMED
|
|
79
|
+
if source == "tool" or source == "system":
|
|
80
|
+
return TrustLevel.OBSERVED
|
|
81
|
+
return TrustLevel.ASSERTED
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def canonicalize_evidence_id(raw: str) -> str:
|
|
85
|
+
"""规范化证据 ID 格式"""
|
|
86
|
+
raw = raw.strip()
|
|
87
|
+
if raw.startswith("artifact:") or raw.startswith("file:") or raw.startswith("eval:"):
|
|
88
|
+
return raw
|
|
89
|
+
if raw.startswith("sha256:") or raw.startswith("ref_"):
|
|
90
|
+
return raw
|
|
91
|
+
# default to artifact prefix
|
|
92
|
+
return f"artifact:{raw}"
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Example failing coding agent — simulates a buggy agent for ORP testing"""
|
|
3
|
+
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def fix_authentication():
|
|
8
|
+
"""Simulates fixing an auth bug — but misses the anonymous user path"""
|
|
9
|
+
# This "fix" only covers logged-in users
|
|
10
|
+
print("Fixing authentication controller...")
|
|
11
|
+
print("Added null check for authenticated user")
|
|
12
|
+
print("Running tests...")
|
|
13
|
+
print("pytest: 34 passed, 1 failed")
|
|
14
|
+
print("FAILED tests/test_anonymous_access.py::test_anonymous_user_get_name")
|
|
15
|
+
sys.exit(1)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def fix_with_tests_first():
|
|
19
|
+
"""Alternative strategy: write tests first, then fix"""
|
|
20
|
+
print("Step 1: Writing anonymous user regression test...")
|
|
21
|
+
print("tests/test_anonymous_access.py written")
|
|
22
|
+
print("Step 2: Running test to confirm failure...")
|
|
23
|
+
print("FAILED as expected — test reproduces the bug")
|
|
24
|
+
print("Step 3: Fixing the implementation...")
|
|
25
|
+
print("Added null check for anonymous user")
|
|
26
|
+
print("Step 4: Running all tests...")
|
|
27
|
+
print("pytest: 35 passed, 0 failed")
|
|
28
|
+
print("All tests pass!")
|
|
29
|
+
sys.exit(0)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
if __name__ == "__main__":
|
|
33
|
+
# By default, simulate the failing path
|
|
34
|
+
mode = sys.argv[1] if len(sys.argv) > 1 else "failing"
|
|
35
|
+
if mode == "failing":
|
|
36
|
+
fix_authentication()
|
|
37
|
+
else:
|
|
38
|
+
fix_with_tests_first()
|
orp/experience.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Experience Builder — 从异构 trace 构建 ExperienceRecord"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
from orp.schema import (
|
|
8
|
+
ExperienceRecord, TimelineEvent, EventKind, Outcome, ReflectionAnalysis,
|
|
9
|
+
)
|
|
10
|
+
from orp.evidence import redact_text, redact_sensitive_fields, make_evidence_ref
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ExperienceBuilder:
|
|
14
|
+
"""将异构 trace 统一为 ExperienceRecord"""
|
|
15
|
+
|
|
16
|
+
def from_trace(self, trace: dict[str, Any],
|
|
17
|
+
agent_id: str = "unknown",
|
|
18
|
+
goal: str = "") -> ExperienceRecord:
|
|
19
|
+
"""从通用 trace dict 构建 ExperienceRecord"""
|
|
20
|
+
timeline = self._extract_timeline(trace)
|
|
21
|
+
outcome = self._extract_outcome(trace)
|
|
22
|
+
reflection = self._extract_reflection(trace)
|
|
23
|
+
|
|
24
|
+
return ExperienceRecord(
|
|
25
|
+
agent={"id": agent_id, "version": trace.get("agent_version", ""),
|
|
26
|
+
"model": trace.get("model", "")},
|
|
27
|
+
task={"goal": goal or trace.get("goal", ""),
|
|
28
|
+
"domain": trace.get("domain", ""),
|
|
29
|
+
"input_ref": trace.get("input_ref", "")},
|
|
30
|
+
trace_ref=trace.get("trace_id") or trace.get("trace_ref"),
|
|
31
|
+
timeline=timeline,
|
|
32
|
+
outcome=outcome,
|
|
33
|
+
reflection=reflection,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def from_events(self, events: list[TimelineEvent],
|
|
37
|
+
goal: str = "",
|
|
38
|
+
agent_id: str = "unknown") -> ExperienceRecord:
|
|
39
|
+
"""直接从 TimelineEvent 列表构建"""
|
|
40
|
+
outcome = Outcome()
|
|
41
|
+
# 检查是否有 outcome event
|
|
42
|
+
for evt in events:
|
|
43
|
+
if evt.kind == EventKind.OUTCOME:
|
|
44
|
+
outcome.status = evt.content
|
|
45
|
+
break
|
|
46
|
+
return ExperienceRecord(
|
|
47
|
+
agent={"id": agent_id},
|
|
48
|
+
task={"goal": goal},
|
|
49
|
+
timeline=events,
|
|
50
|
+
outcome=outcome,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def _extract_timeline(self, trace: dict[str, Any]) -> list[TimelineEvent]:
|
|
54
|
+
"""从 trace dict 提取时间线事件"""
|
|
55
|
+
events: list[TimelineEvent] = []
|
|
56
|
+
raw_events = trace.get("events") or trace.get("spans") or trace.get("steps", [])
|
|
57
|
+
for i, raw in enumerate(raw_events):
|
|
58
|
+
events.append(TimelineEvent(
|
|
59
|
+
kind=raw.get("kind", raw.get("type", "observation")),
|
|
60
|
+
source=raw.get("source", "agent"),
|
|
61
|
+
content=raw.get("content", raw.get("message", str(raw))),
|
|
62
|
+
evidence_refs=raw.get("evidence_refs", []),
|
|
63
|
+
))
|
|
64
|
+
if not events:
|
|
65
|
+
events.append(TimelineEvent(
|
|
66
|
+
kind="observation",
|
|
67
|
+
content=f"Trace captured {len(raw_events)} unknown events"
|
|
68
|
+
))
|
|
69
|
+
return events
|
|
70
|
+
|
|
71
|
+
def _extract_outcome(self, trace: dict[str, Any]) -> Outcome:
|
|
72
|
+
raw = trace.get("outcome", {})
|
|
73
|
+
if isinstance(raw, str):
|
|
74
|
+
return Outcome(status=raw)
|
|
75
|
+
return Outcome(**raw)
|
|
76
|
+
|
|
77
|
+
def _extract_reflection(self, trace: dict[str, Any]) -> Optional[ReflectionAnalysis]:
|
|
78
|
+
raw = trace.get("reflection")
|
|
79
|
+
if not raw:
|
|
80
|
+
return None
|
|
81
|
+
if isinstance(raw, ReflectionAnalysis):
|
|
82
|
+
return raw
|
|
83
|
+
return ReflectionAnalysis(**raw)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class Redactor:
|
|
87
|
+
"""对 ExperienceRecord 应用脱敏"""
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def apply(record: ExperienceRecord) -> ExperienceRecord:
|
|
91
|
+
record.task["goal"] = redact_text(record.task.get("goal", ""))
|
|
92
|
+
for evt in record.timeline:
|
|
93
|
+
evt.content = redact_text(evt.content)
|
|
94
|
+
evt.evidence_refs = [
|
|
95
|
+
r if r.startswith(("artifact:", "eval:")) else f"ref:{hash(r)}"
|
|
96
|
+
for r in evt.evidence_refs
|
|
97
|
+
]
|
|
98
|
+
return record
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class EvidenceLinker:
|
|
102
|
+
"""链接与验证证据引用"""
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def link(record: ExperienceRecord) -> ExperienceRecord:
|
|
106
|
+
for evt in record.timeline:
|
|
107
|
+
seen = set()
|
|
108
|
+
linked = []
|
|
109
|
+
for ref in evt.evidence_refs:
|
|
110
|
+
if ref not in seen:
|
|
111
|
+
seen.add(ref)
|
|
112
|
+
linked.append(ref)
|
|
113
|
+
evt.evidence_refs = linked
|
|
114
|
+
return record
|
orp/export.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Export Engine — ORP JSON / OTLP refs / eval files"""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
from orp.schema import ExperienceRecord
|
|
8
|
+
from orp.storage import ORPStorage
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ExportEngine:
|
|
12
|
+
"""导出到多种格式"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, storage: Optional[ORPStorage] = None):
|
|
15
|
+
self._storage = storage or ORPStorage()
|
|
16
|
+
|
|
17
|
+
def to_json(self, experience_id: str) -> Optional[str]:
|
|
18
|
+
"""导出为 JSON"""
|
|
19
|
+
if experience_id == "latest":
|
|
20
|
+
exps = self._storage.list_experiences(limit=1)
|
|
21
|
+
if not exps:
|
|
22
|
+
return None
|
|
23
|
+
exp = exps[0]
|
|
24
|
+
else:
|
|
25
|
+
exp = self._storage.get_experience(experience_id)
|
|
26
|
+
if not exp:
|
|
27
|
+
return None
|
|
28
|
+
return json.dumps(exp.model_dump(), indent=2, default=str)
|
|
29
|
+
|
|
30
|
+
def to_json_file(self, experience_id: str, path: str) -> bool:
|
|
31
|
+
content = self.to_json(experience_id)
|
|
32
|
+
if not content:
|
|
33
|
+
return False
|
|
34
|
+
Path(path).write_text(content)
|
|
35
|
+
return True
|
|
36
|
+
|
|
37
|
+
def to_otlp_refs(self, experience_id: str) -> dict[str, Any]:
|
|
38
|
+
if experience_id == "latest":
|
|
39
|
+
exps = self._storage.list_experiences(limit=1)
|
|
40
|
+
if not exps:
|
|
41
|
+
return {}
|
|
42
|
+
exp = exps[0]
|
|
43
|
+
else:
|
|
44
|
+
exp = self._storage.get_experience(experience_id)
|
|
45
|
+
if not exp:
|
|
46
|
+
return {}
|
|
47
|
+
return {
|
|
48
|
+
"resource": {"orp": {"version": "0.3"}},
|
|
49
|
+
"scopeSpans": [{
|
|
50
|
+
"scope": {"name": "orp.experience"},
|
|
51
|
+
"spans": [{
|
|
52
|
+
"spanId": exp.experience_id[:16],
|
|
53
|
+
"name": exp.task.get("goal", "")[:50],
|
|
54
|
+
"attributes": {
|
|
55
|
+
"orp.experience.id": exp.experience_id,
|
|
56
|
+
"orp.schema.version": "0.3",
|
|
57
|
+
},
|
|
58
|
+
}],
|
|
59
|
+
}],
|
|
60
|
+
}
|
orp/lessons.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Lesson Store — 检索、冲突检测、过期处理"""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from orp.schema import Lesson, LessonStatus, check_lesson_conflict
|
|
7
|
+
from orp.storage import ORPStorage
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LessonStore:
|
|
11
|
+
"""Lesson 存储与检索"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, storage: Optional[ORPStorage] = None):
|
|
14
|
+
self._storage = storage or ORPStorage()
|
|
15
|
+
|
|
16
|
+
def retrieve(self, task: str, limit: int = 3,
|
|
17
|
+
status: LessonStatus = LessonStatus.ACTIVE,
|
|
18
|
+
domain: Optional[str] = None) -> list[Lesson]:
|
|
19
|
+
"""检索与当前任务相关 Lesson
|
|
20
|
+
|
|
21
|
+
排序信号: 语义相关性 → 验证等级 → 历史有效性
|
|
22
|
+
"""
|
|
23
|
+
candidates = self._storage.list_lessons(status=status, limit=50)
|
|
24
|
+
|
|
25
|
+
# 过滤过期
|
|
26
|
+
now = datetime.now(timezone.utc)
|
|
27
|
+
candidates = [l for l in candidates if not l.expires_at or l.expires_at > now]
|
|
28
|
+
|
|
29
|
+
# 按相关性评分排序
|
|
30
|
+
scored = []
|
|
31
|
+
task_lower = task.lower()
|
|
32
|
+
for lesson in candidates:
|
|
33
|
+
score = self._relevance_score(lesson, task_lower, domain)
|
|
34
|
+
if score > 0:
|
|
35
|
+
scored.append((score, lesson))
|
|
36
|
+
|
|
37
|
+
scored.sort(key=lambda x: -x[0])
|
|
38
|
+
return [lesson for _, lesson in scored[:limit]]
|
|
39
|
+
|
|
40
|
+
def _relevance_score(self, lesson: Lesson, task: str, domain: Optional[str] = None) -> float:
|
|
41
|
+
"""计算 Lesson 与任务的语义相关性分数"""
|
|
42
|
+
score = 0.0
|
|
43
|
+
# 领域匹配
|
|
44
|
+
if domain and domain in lesson.scope.get("task_domains", []):
|
|
45
|
+
score += 3.0
|
|
46
|
+
|
|
47
|
+
# 条件匹配
|
|
48
|
+
for condition in lesson.trigger.get("conditions", []):
|
|
49
|
+
if any(word in task for word in condition.lower().split()):
|
|
50
|
+
score += 2.0
|
|
51
|
+
if condition.lower() in task:
|
|
52
|
+
score += 4.0
|
|
53
|
+
|
|
54
|
+
# 验证等级加权
|
|
55
|
+
validation_level = lesson.validation.get("level", "asserted")
|
|
56
|
+
level_bonus = {
|
|
57
|
+
"asserted": 0.5, "observed": 1.0, "reproduced": 1.5,
|
|
58
|
+
"externally_verified": 2.0, "human_confirmed": 2.5,
|
|
59
|
+
"regression_guarded": 3.0,
|
|
60
|
+
}
|
|
61
|
+
score += level_bonus.get(validation_level, 0.5)
|
|
62
|
+
|
|
63
|
+
# 历史效果
|
|
64
|
+
effect = lesson.metrics.get("estimated_effect")
|
|
65
|
+
if effect is not None and effect > 0:
|
|
66
|
+
score += min(effect * 2, 2.0)
|
|
67
|
+
|
|
68
|
+
return score
|
|
69
|
+
|
|
70
|
+
def check_expired(self) -> list[Lesson]:
|
|
71
|
+
"""找出所有过期的 Lesson 并自动 deprecated"""
|
|
72
|
+
now = datetime.now(timezone.utc)
|
|
73
|
+
expired = []
|
|
74
|
+
for lesson in self._storage.list_lessons(status=LessonStatus.ACTIVE):
|
|
75
|
+
if lesson.expires_at and lesson.expires_at < now:
|
|
76
|
+
self._storage.update_lesson_status(
|
|
77
|
+
lesson.lesson_id, LessonStatus.DEPRECATED
|
|
78
|
+
)
|
|
79
|
+
lesson.status = LessonStatus.DEPRECATED
|
|
80
|
+
expired.append(lesson)
|
|
81
|
+
return expired
|
|
82
|
+
|
|
83
|
+
def validate_lesson(self, lesson_id: str) -> list[str]:
|
|
84
|
+
"""验证一条 Lesson 的完整性"""
|
|
85
|
+
lesson = self._storage.get_lesson(lesson_id)
|
|
86
|
+
if not lesson:
|
|
87
|
+
return ["Lesson not found"]
|
|
88
|
+
issues = []
|
|
89
|
+
if not lesson.recommendation:
|
|
90
|
+
issues.append("Missing recommendation")
|
|
91
|
+
if not lesson.trigger.get("conditions"):
|
|
92
|
+
issues.append("Missing trigger conditions")
|
|
93
|
+
if not lesson.scope.get("task_domains"):
|
|
94
|
+
issues.append("Missing task_domains in scope")
|
|
95
|
+
return issues
|
orp/mcp_server.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""MCP Lesson Server — 通过 MCP 协议让 Agent 查询和回报 Lesson
|
|
2
|
+
|
|
3
|
+
提供工具:
|
|
4
|
+
- orp_retrieve_lessons(task, limit, scope)
|
|
5
|
+
- orp_acknowledge_lesson(lesson_id)
|
|
6
|
+
- orp_report_outcome(lesson_id, outcome, evidence_refs)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import sys
|
|
11
|
+
from typing import Any, Optional
|
|
12
|
+
|
|
13
|
+
from orp.schema import LessonStatus, DeliveryStrategy
|
|
14
|
+
from orp.storage import ORPStorage
|
|
15
|
+
from orp.lessons import LessonStore
|
|
16
|
+
from orp.delivery import DeliveryRouter
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MCPServer:
|
|
20
|
+
"""MCP Lesson Server — 通过 stdio 或 HTTP 提供 MCP 工具"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, storage: Optional[ORPStorage] = None,
|
|
23
|
+
transport: str = "stdio"):
|
|
24
|
+
self.transport = transport
|
|
25
|
+
self._store = LessonStore(storage)
|
|
26
|
+
self._router = DeliveryRouter(storage)
|
|
27
|
+
|
|
28
|
+
def get_tool_definitions(self) -> list[dict[str, Any]]:
|
|
29
|
+
"""返回 MCP 工具定义(符合 MCP 规范)"""
|
|
30
|
+
return [
|
|
31
|
+
{
|
|
32
|
+
"name": "orp_retrieve_lessons",
|
|
33
|
+
"description": "Retrieve relevant lessons for a task. Call at the START of a task to learn from past experiences.",
|
|
34
|
+
"parameters": {
|
|
35
|
+
"type": "object",
|
|
36
|
+
"required": ["task"],
|
|
37
|
+
"properties": {
|
|
38
|
+
"task": {"type": "string", "description": "The task description to find relevant lessons for"},
|
|
39
|
+
"limit": {"type": "integer", "description": "Max lessons to return", "default": 3},
|
|
40
|
+
"domain": {"type": "string", "description": "Optional domain filter (e.g. coding, research)"},
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"name": "orp_acknowledge_lesson",
|
|
46
|
+
"description": "Confirm that a lesson has been received and understood. Call after receiving a lesson.",
|
|
47
|
+
"parameters": {
|
|
48
|
+
"type": "object",
|
|
49
|
+
"required": ["lesson_id"],
|
|
50
|
+
"properties": {
|
|
51
|
+
"lesson_id": {"type": "string", "description": "The lesson ID to acknowledge"},
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"name": "orp_report_outcome",
|
|
57
|
+
"description": "Report whether applying a lesson improved the outcome. Call at the END of a task.",
|
|
58
|
+
"parameters": {
|
|
59
|
+
"type": "object",
|
|
60
|
+
"required": ["lesson_id", "outcome"],
|
|
61
|
+
"properties": {
|
|
62
|
+
"lesson_id": {"type": "string", "description": "The lesson ID that was applied"},
|
|
63
|
+
"outcome": {"type": "string", "enum": ["success", "failed", "improved", "worse"],
|
|
64
|
+
"description": "Did the lesson help?"},
|
|
65
|
+
"evidence_refs": {
|
|
66
|
+
"type": "array", "items": {"type": "string"},
|
|
67
|
+
"description": "Optional evidence references (test results, git diff, etc.)",
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
def handle_call(self, tool_name: str, arguments: dict[str, Any]) -> dict[str, Any]:
|
|
75
|
+
"""处理 MCP 工具调用"""
|
|
76
|
+
if tool_name == "orp_retrieve_lessons":
|
|
77
|
+
task = arguments.get("task", "")
|
|
78
|
+
limit = arguments.get("limit", 3)
|
|
79
|
+
domain = arguments.get("domain")
|
|
80
|
+
lessons = self._store.retrieve(
|
|
81
|
+
task=task, limit=limit,
|
|
82
|
+
domain=domain,
|
|
83
|
+
)
|
|
84
|
+
return {
|
|
85
|
+
"lessons": [
|
|
86
|
+
{
|
|
87
|
+
"lesson_id": l.lesson_id,
|
|
88
|
+
"recommendation": l.recommendation,
|
|
89
|
+
"status": l.status.value,
|
|
90
|
+
"validation_level": l.validation.get("level", "asserted"),
|
|
91
|
+
"scope": l.scope,
|
|
92
|
+
}
|
|
93
|
+
for l in lessons
|
|
94
|
+
],
|
|
95
|
+
"count": len(lessons),
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
elif tool_name == "orp_acknowledge_lesson":
|
|
99
|
+
lesson_id = arguments.get("lesson_id", "")
|
|
100
|
+
lesson = self._store._storage.get_lesson(lesson_id)
|
|
101
|
+
if lesson:
|
|
102
|
+
lesson.metrics["acknowledged"] = lesson.metrics.get("acknowledged", 0) + 1
|
|
103
|
+
self._store._storage.save_lesson(lesson)
|
|
104
|
+
return {"status": "acknowledged", "lesson_id": lesson_id}
|
|
105
|
+
return {"status": "error", "message": "Lesson not found"}
|
|
106
|
+
|
|
107
|
+
elif tool_name == "orp_report_outcome":
|
|
108
|
+
lesson_id = arguments.get("lesson_id", "")
|
|
109
|
+
outcome = arguments.get("outcome", "")
|
|
110
|
+
evidence_refs = arguments.get("evidence_refs", [])
|
|
111
|
+
self._router.report_outcome(lesson_id, outcome, evidence_refs)
|
|
112
|
+
return {"status": "recorded", "lesson_id": lesson_id, "outcome": outcome}
|
|
113
|
+
|
|
114
|
+
return {"status": "error", "message": f"Unknown tool: {tool_name}"}
|
|
115
|
+
|
|
116
|
+
def run_stdio(self) -> None:
|
|
117
|
+
"""通过 stdio 运行 MCP Server(符合 MCP 协议)"""
|
|
118
|
+
# MCP 初始化
|
|
119
|
+
init_msg = json.dumps({
|
|
120
|
+
"jsonrpc": "2.0",
|
|
121
|
+
"method": "initialize",
|
|
122
|
+
"params": {
|
|
123
|
+
"protocolVersion": "0.1.0",
|
|
124
|
+
"capabilities": {
|
|
125
|
+
"tools": {},
|
|
126
|
+
},
|
|
127
|
+
"clientInfo": {"name": "orp-mcp-server", "version": "0.3.0"},
|
|
128
|
+
},
|
|
129
|
+
})
|
|
130
|
+
sys.stdout.write(f"Content-Length: {len(init_msg)}\r\n\r\n{init_msg}")
|
|
131
|
+
sys.stdout.flush()
|
|
132
|
+
|
|
133
|
+
# 发送工具列表
|
|
134
|
+
tool_msg = json.dumps({
|
|
135
|
+
"jsonrpc": "2.0",
|
|
136
|
+
"method": "notifications/tools/list_changed",
|
|
137
|
+
"params": {"tools": self.get_tool_definitions()},
|
|
138
|
+
})
|
|
139
|
+
sys.stdout.write(f"Content-Length: {len(tool_msg)}\r\n\r\n{tool_msg}")
|
|
140
|
+
sys.stdout.flush()
|
|
141
|
+
|
|
142
|
+
# 主循环
|
|
143
|
+
buffer = ""
|
|
144
|
+
while True:
|
|
145
|
+
try:
|
|
146
|
+
line = sys.stdin.readline()
|
|
147
|
+
if not line:
|
|
148
|
+
break
|
|
149
|
+
buffer += line
|
|
150
|
+
if "\r\n\r\n" in buffer:
|
|
151
|
+
parts = buffer.split("\r\n\r\n", 1)
|
|
152
|
+
body = parts[1]
|
|
153
|
+
buffer = ""
|
|
154
|
+
try:
|
|
155
|
+
request = json.loads(body)
|
|
156
|
+
if request.get("method") == "tools/call":
|
|
157
|
+
result = self.handle_call(
|
|
158
|
+
request["params"]["name"],
|
|
159
|
+
request["params"].get("arguments", {}),
|
|
160
|
+
)
|
|
161
|
+
response = json.dumps({
|
|
162
|
+
"jsonrpc": "2.0",
|
|
163
|
+
"id": request.get("id"),
|
|
164
|
+
"result": result,
|
|
165
|
+
})
|
|
166
|
+
sys.stdout.write(f"Content-Length: {len(response)}\r\n\r\n{response}")
|
|
167
|
+
sys.stdout.flush()
|
|
168
|
+
except json.JSONDecodeError:
|
|
169
|
+
pass
|
|
170
|
+
except (EOFError, KeyboardInterrupt):
|
|
171
|
+
break
|