open-reflection-protocol 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- open_reflection_protocol-0.3.0.dist-info/METADATA +262 -0
- open_reflection_protocol-0.3.0.dist-info/RECORD +29 -0
- open_reflection_protocol-0.3.0.dist-info/WHEEL +4 -0
- open_reflection_protocol-0.3.0.dist-info/entry_points.txt +2 -0
- orp/__init__.py +66 -0
- orp/adapters/__init__.py +6 -0
- orp/adapters/generic_json.py +24 -0
- orp/adapters/langgraph.py +24 -0
- orp/adapters/openai_agents.py +27 -0
- orp/adapters/otel.py +52 -0
- orp/capture.py +162 -0
- orp/cli.py +366 -0
- orp/compiler.py +124 -0
- orp/conflicts.py +62 -0
- orp/delivery.py +110 -0
- orp/effects.py +112 -0
- orp/evidence.py +92 -0
- orp/examples/failing_coding_agent.py +38 -0
- orp/experience.py +114 -0
- orp/export.py +60 -0
- orp/lessons.py +95 -0
- orp/mcp_server.py +171 -0
- orp/reflect.py +97 -0
- orp/replay.py +108 -0
- orp/rollback.py +82 -0
- orp/schema.py +303 -0
- orp/storage.py +459 -0
- orp/training.py +94 -0
- orp/viewer.py +104 -0
orp/cli.py
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
"""ORP CLI — 命令行界面"""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Optional
|
|
9
|
+
|
|
10
|
+
from orp.schema import (
|
|
11
|
+
ExperienceRecord, TimelineEvent, EventKind, LessonStatus,
|
|
12
|
+
CounterfactualReplay, EvalArtifact, DeliveryStrategy,
|
|
13
|
+
)
|
|
14
|
+
from orp.storage import ORPStorage
|
|
15
|
+
from orp.experience import ExperienceBuilder, Redactor, EvidenceLinker
|
|
16
|
+
from orp.capture import capture_command, capture_git_diff, capture_trace_context
|
|
17
|
+
from orp.reflect import ReflectionAnalyzer, Challenger
|
|
18
|
+
from orp.replay import CounterfactualReplayer
|
|
19
|
+
from orp.compiler import ExperienceCompiler
|
|
20
|
+
from orp.lessons import LessonStore
|
|
21
|
+
from orp.conflicts import ConflictDefender
|
|
22
|
+
from orp.delivery import DeliveryRouter
|
|
23
|
+
from orp.effects import EffectEvaluator
|
|
24
|
+
from orp.rollback import RollbackManager
|
|
25
|
+
from orp.training import TrainingPipeline
|
|
26
|
+
from orp.mcp_server import MCPServer
|
|
27
|
+
from orp.export import ExportEngine
|
|
28
|
+
from orp.viewer import HTMLReporter
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def cmd_wrap(args):
|
|
32
|
+
"""orp wrap -- python agent.py"""
|
|
33
|
+
goal = args.goal or " ".join(args.command[:3])
|
|
34
|
+
with capture_trace_context(goal) as ctx:
|
|
35
|
+
result = capture_command(args.command, timeout=args.timeout)
|
|
36
|
+
ctx.set_outcome("success" if result["success"] else "failed", result)
|
|
37
|
+
|
|
38
|
+
events = ctx.get_events()
|
|
39
|
+
events.append(TimelineEvent(
|
|
40
|
+
kind="outcome",
|
|
41
|
+
content="Exit code: " + str(result.get("exit_code", -1)),
|
|
42
|
+
source="system",
|
|
43
|
+
))
|
|
44
|
+
builder = ExperienceBuilder()
|
|
45
|
+
record = builder.from_events(events, goal=goal)
|
|
46
|
+
record.outcome.status = "success" if result.get("success", False) else "failed"
|
|
47
|
+
|
|
48
|
+
storage = ORPStorage()
|
|
49
|
+
storage.save_experience(record)
|
|
50
|
+
print("Experience recorded: " + record.experience_id)
|
|
51
|
+
print(" Outcome: " + record.outcome.status)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def cmd_inspect(args):
|
|
55
|
+
"""orp inspect [id]"""
|
|
56
|
+
storage = ORPStorage()
|
|
57
|
+
if args.id == "latest":
|
|
58
|
+
exps = storage.list_experiences(limit=1)
|
|
59
|
+
if not exps:
|
|
60
|
+
print("No experiences found")
|
|
61
|
+
return
|
|
62
|
+
exp = exps[0]
|
|
63
|
+
else:
|
|
64
|
+
exp = storage.get_experience(args.id)
|
|
65
|
+
if not exp:
|
|
66
|
+
print("Experience not found: " + args.id)
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
print()
|
|
70
|
+
print("Experience: " + exp.experience_id)
|
|
71
|
+
print(" Agent: " + exp.agent.get("id", "?"))
|
|
72
|
+
print(" Model: " + exp.agent.get("model", "?"))
|
|
73
|
+
print(" Goal: " + exp.task.get("goal", "?"))
|
|
74
|
+
print(" Outcome: " + exp.outcome.status)
|
|
75
|
+
print(" Timeline: " + str(len(exp.timeline)) + " events")
|
|
76
|
+
print()
|
|
77
|
+
print("Events:")
|
|
78
|
+
for evt in exp.timeline:
|
|
79
|
+
markers = {"observation": "\u25cf", "claim": "\u25b3", "action": "\u25b6",
|
|
80
|
+
"feedback": "\u25a0", "outcome": "\u25c6", "decision": "\u25c8"}
|
|
81
|
+
marker = markers.get(evt.kind.value, "\u25cb")
|
|
82
|
+
print(" " + marker + " [" + evt.kind.value + "] " + evt.content[:120])
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def cmd_learn(args):
|
|
86
|
+
"""orp learn [id]"""
|
|
87
|
+
storage = ORPStorage()
|
|
88
|
+
if args.id == "latest":
|
|
89
|
+
exps = storage.list_experiences(limit=1)
|
|
90
|
+
if not exps:
|
|
91
|
+
print("No experiences to learn from")
|
|
92
|
+
return
|
|
93
|
+
exp = exps[0]
|
|
94
|
+
else:
|
|
95
|
+
exp = storage.get_experience(args.id)
|
|
96
|
+
if not exp:
|
|
97
|
+
print("Experience not found: " + args.id)
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
print("Learning from: " + exp.experience_id)
|
|
101
|
+
|
|
102
|
+
analyzer = ReflectionAnalyzer()
|
|
103
|
+
reflection = analyzer.analyze(exp)
|
|
104
|
+
exp.reflection = reflection
|
|
105
|
+
print(" Diagnosis: " + (reflection.diagnosis or "none"))
|
|
106
|
+
|
|
107
|
+
challenger = Challenger()
|
|
108
|
+
challenged = challenger.challenge(exp)
|
|
109
|
+
if challenged:
|
|
110
|
+
print(" Claims challenged: " + str(len(challenged)))
|
|
111
|
+
for c in challenged:
|
|
112
|
+
print(" " + c["issue"] + ": " + c["content"][:80])
|
|
113
|
+
|
|
114
|
+
compiler = ExperienceCompiler()
|
|
115
|
+
artifacts = compiler.compile(exp)
|
|
116
|
+
|
|
117
|
+
for lesson in artifacts.get("lessons", []):
|
|
118
|
+
storage.save_lesson(lesson)
|
|
119
|
+
print(" Lesson: " + lesson.lesson_id[:12] + "... " + lesson.recommendation[:80])
|
|
120
|
+
for eval_ in artifacts.get("evals", []):
|
|
121
|
+
storage.save_eval(eval_)
|
|
122
|
+
print(" Eval: " + eval_.eval_id[:12] + "... " + eval_.command)
|
|
123
|
+
|
|
124
|
+
storage.save_experience(exp)
|
|
125
|
+
print(" Done.")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def cmd_replay(args):
|
|
129
|
+
"""orp replay <id>"""
|
|
130
|
+
storage = ORPStorage()
|
|
131
|
+
exp = storage.get_experience(args.id)
|
|
132
|
+
if not exp:
|
|
133
|
+
print("Experience not found: " + args.id)
|
|
134
|
+
return
|
|
135
|
+
replayer = CounterfactualReplayer()
|
|
136
|
+
replay = replayer.replay(
|
|
137
|
+
experience_id=exp.experience_id,
|
|
138
|
+
original="original",
|
|
139
|
+
alternative=args.strategy or "Review trace and write tests first",
|
|
140
|
+
)
|
|
141
|
+
storage.save_replay(replay)
|
|
142
|
+
print("Replay: " + replay.replay_id[:12] + "...")
|
|
143
|
+
print(" Mode: " + replay.verification_mode)
|
|
144
|
+
print(" Result: " + replay.result.get("status", "unknown"))
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def cmd_lessons(args):
|
|
148
|
+
"""orp lessons <subcommand>"""
|
|
149
|
+
storage = ORPStorage()
|
|
150
|
+
store = LessonStore(storage)
|
|
151
|
+
|
|
152
|
+
if args.subcommand == "list":
|
|
153
|
+
status = LessonStatus(args.status) if args.status else None
|
|
154
|
+
lessons = storage.list_lessons(status=status)
|
|
155
|
+
for l in lessons:
|
|
156
|
+
rid = l.lesson_id[:16] + "..."
|
|
157
|
+
print(rid + " " + l.status.value + " " + l.recommendation[:60])
|
|
158
|
+
|
|
159
|
+
elif args.subcommand == "validate":
|
|
160
|
+
issues = store.validate_lesson(args.id)
|
|
161
|
+
if not issues:
|
|
162
|
+
print("Lesson " + args.id + ": valid")
|
|
163
|
+
else:
|
|
164
|
+
print("Lesson " + args.id + ": issues")
|
|
165
|
+
for i in issues:
|
|
166
|
+
print(" - " + i)
|
|
167
|
+
|
|
168
|
+
elif args.subcommand == "conflicts":
|
|
169
|
+
defender = ConflictDefender(storage)
|
|
170
|
+
reviewed = defender.auto_review_conflicts()
|
|
171
|
+
if reviewed:
|
|
172
|
+
print("Lessons moved to under_review: " + str(reviewed))
|
|
173
|
+
else:
|
|
174
|
+
print("No conflicts found")
|
|
175
|
+
|
|
176
|
+
elif args.subcommand == "rollback":
|
|
177
|
+
manager = RollbackManager(storage)
|
|
178
|
+
rollback = manager.rollback(args.id, args.reason or "Manual rollback")
|
|
179
|
+
if rollback:
|
|
180
|
+
print("Lesson " + args.id + ": " + rollback.previous_status.value + " -> " + rollback.new_status.value)
|
|
181
|
+
else:
|
|
182
|
+
print("Lesson not found: " + args.id)
|
|
183
|
+
|
|
184
|
+
elif args.subcommand == "deliver":
|
|
185
|
+
lesson = storage.get_lesson(args.id)
|
|
186
|
+
if not lesson:
|
|
187
|
+
print("Lesson not found: " + args.id)
|
|
188
|
+
return
|
|
189
|
+
strategy = DeliveryStrategy(args.strategy) if args.strategy else DeliveryStrategy.PROMPT_CONTEXT
|
|
190
|
+
router = DeliveryRouter(storage)
|
|
191
|
+
delivery = router.deliver(lesson, "cli", strategy=strategy, context=args.context)
|
|
192
|
+
print("Delivered: " + delivery.delivery_id[:12] + "... via " + delivery.strategy.value)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def cmd_effects(args):
|
|
196
|
+
"""orp effects evaluate <id>"""
|
|
197
|
+
storage = ORPStorage()
|
|
198
|
+
evaluator = EffectEvaluator(storage)
|
|
199
|
+
|
|
200
|
+
if args.id == "all":
|
|
201
|
+
evals = evaluator.auto_evaluate_all()
|
|
202
|
+
print("Evaluated " + str(len(evals)) + " lessons")
|
|
203
|
+
for e in evals:
|
|
204
|
+
print(" " + e.lesson_id[:12] + "... -> " + e.decision)
|
|
205
|
+
else:
|
|
206
|
+
lesson = storage.get_lesson(args.id)
|
|
207
|
+
if not lesson:
|
|
208
|
+
print("Lesson not found: " + args.id)
|
|
209
|
+
return
|
|
210
|
+
evaluation = evaluator.evaluate_matched_baseline(lesson)
|
|
211
|
+
storage.save_lesson_evaluation(evaluation)
|
|
212
|
+
print("Evaluation: " + evaluation.evaluation_id[:12] + "...")
|
|
213
|
+
print(" Method: " + evaluation.method.value)
|
|
214
|
+
print(" Decision: " + evaluation.decision)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def cmd_training(args):
|
|
218
|
+
"""orp training <subcommand>"""
|
|
219
|
+
storage = ORPStorage()
|
|
220
|
+
pipeline = TrainingPipeline(storage)
|
|
221
|
+
|
|
222
|
+
if args.subcommand == "candidates":
|
|
223
|
+
candidates = storage.list_training_candidates()
|
|
224
|
+
if not candidates:
|
|
225
|
+
print("No training candidates")
|
|
226
|
+
return
|
|
227
|
+
for c in candidates:
|
|
228
|
+
print(c.candidate_id[:16] + "... " + c.format.value + " " + c.status.value)
|
|
229
|
+
|
|
230
|
+
elif args.subcommand == "export":
|
|
231
|
+
exported = pipeline.export_approved()
|
|
232
|
+
if not exported:
|
|
233
|
+
print("No approved candidates to export")
|
|
234
|
+
return
|
|
235
|
+
print("Exporting " + str(len(exported)) + " approved candidates")
|
|
236
|
+
for e in exported:
|
|
237
|
+
print(" " + e["candidate_id"][:12] + "...")
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def cmd_mcp(args):
|
|
241
|
+
"""orp mcp-server"""
|
|
242
|
+
server = MCPServer(transport=args.transport)
|
|
243
|
+
if args.transport == "stdio":
|
|
244
|
+
print("Starting ORP MCP Server (stdio)...", file=sys.stderr)
|
|
245
|
+
server.run_stdio()
|
|
246
|
+
else:
|
|
247
|
+
print("Transport " + args.transport + " not yet implemented")
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def cmd_report(args):
|
|
251
|
+
"""orp report"""
|
|
252
|
+
storage = ORPStorage()
|
|
253
|
+
reporter = HTMLReporter(storage)
|
|
254
|
+
path = reporter.write_report(args.output or "orp_report.html")
|
|
255
|
+
print("Report written to " + path)
|
|
256
|
+
if args.open:
|
|
257
|
+
import subprocess
|
|
258
|
+
try:
|
|
259
|
+
subprocess.Popen(["start", path], shell=True)
|
|
260
|
+
except Exception:
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def cmd_diff(args):
|
|
265
|
+
"""orp diff <id1> <id2>"""
|
|
266
|
+
storage = ORPStorage()
|
|
267
|
+
a = storage.get_experience(args.id1)
|
|
268
|
+
b = storage.get_experience(args.id2)
|
|
269
|
+
if not a or not b:
|
|
270
|
+
print("One or both experiences not found")
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
a_actions = len([e for e in a.timeline if e.kind == EventKind.ACTION])
|
|
274
|
+
b_actions = len([e for e in b.timeline if e.kind == EventKind.ACTION])
|
|
275
|
+
a_claims = len([e for e in a.timeline if e.kind == EventKind.CLAIM])
|
|
276
|
+
b_claims = len([e for e in b.timeline if e.kind == EventKind.CLAIM])
|
|
277
|
+
a_evidence = sum(len(e.evidence_refs) for e in a.timeline)
|
|
278
|
+
b_evidence = sum(len(e.evidence_refs) for e in b.timeline)
|
|
279
|
+
|
|
280
|
+
print("Metric Before After")
|
|
281
|
+
print("-" * 60)
|
|
282
|
+
print("Task success " + a.outcome.status.ljust(20) + b.outcome.status)
|
|
283
|
+
print("Tool calls " + str(a_actions).ljust(20) + str(b_actions))
|
|
284
|
+
print("Claims " + str(a_claims).ljust(20) + str(b_claims))
|
|
285
|
+
print("Evidence refs " + str(a_evidence).ljust(20) + str(b_evidence))
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def cmd_export(args):
|
|
289
|
+
"""orp export [id]"""
|
|
290
|
+
storage = ORPStorage()
|
|
291
|
+
engine = ExportEngine(storage)
|
|
292
|
+
content = engine.to_json(args.id)
|
|
293
|
+
if content:
|
|
294
|
+
print(content[:2000])
|
|
295
|
+
else:
|
|
296
|
+
print("Experience not found: " + args.id)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def main():
|
|
300
|
+
parser = argparse.ArgumentParser(description="ORP — Open Reflection Protocol CLI")
|
|
301
|
+
sub = parser.add_subparsers(dest="command")
|
|
302
|
+
|
|
303
|
+
p = sub.add_parser("wrap", help="Wrap an agent with ORP")
|
|
304
|
+
p.add_argument("command", nargs="+")
|
|
305
|
+
p.add_argument("--goal")
|
|
306
|
+
p.add_argument("--timeout", type=int, default=300)
|
|
307
|
+
p.set_defaults(func=cmd_wrap)
|
|
308
|
+
|
|
309
|
+
p = sub.add_parser("inspect", help="Inspect an experience")
|
|
310
|
+
p.add_argument("id", default="latest", nargs="?")
|
|
311
|
+
p.set_defaults(func=cmd_inspect)
|
|
312
|
+
|
|
313
|
+
p = sub.add_parser("learn", help="Generate lessons from an experience")
|
|
314
|
+
p.add_argument("id", default="latest", nargs="?")
|
|
315
|
+
p.set_defaults(func=cmd_learn)
|
|
316
|
+
|
|
317
|
+
p = sub.add_parser("replay", help="Counterfactual replay")
|
|
318
|
+
p.add_argument("id")
|
|
319
|
+
p.add_argument("--strategy")
|
|
320
|
+
p.set_defaults(func=cmd_replay)
|
|
321
|
+
|
|
322
|
+
p = sub.add_parser("lessons", help="Manage lessons")
|
|
323
|
+
p.add_argument("subcommand", choices=["list", "validate", "conflicts", "rollback", "deliver"])
|
|
324
|
+
p.add_argument("id", nargs="?", default="")
|
|
325
|
+
p.add_argument("--status")
|
|
326
|
+
p.add_argument("--strategy")
|
|
327
|
+
p.add_argument("--reason")
|
|
328
|
+
p.add_argument("--context")
|
|
329
|
+
p.set_defaults(func=cmd_lessons)
|
|
330
|
+
|
|
331
|
+
p = sub.add_parser("effects", help="Evaluate lesson effects")
|
|
332
|
+
p.add_argument("subcommand", choices=["evaluate"])
|
|
333
|
+
p.add_argument("id")
|
|
334
|
+
p.set_defaults(func=cmd_effects)
|
|
335
|
+
|
|
336
|
+
p = sub.add_parser("training", help="Training candidates")
|
|
337
|
+
p.add_argument("subcommand", choices=["candidates", "export"])
|
|
338
|
+
p.set_defaults(func=cmd_training)
|
|
339
|
+
|
|
340
|
+
p = sub.add_parser("mcp-server", help="Start MCP lesson server")
|
|
341
|
+
p.add_argument("--transport", default="stdio", choices=["stdio", "http"])
|
|
342
|
+
p.set_defaults(func=cmd_mcp)
|
|
343
|
+
|
|
344
|
+
p = sub.add_parser("report", help="Generate HTML report")
|
|
345
|
+
p.add_argument("--output", default="orp_report.html")
|
|
346
|
+
p.add_argument("--open", action="store_true")
|
|
347
|
+
p.set_defaults(func=cmd_report)
|
|
348
|
+
|
|
349
|
+
p = sub.add_parser("diff", help="Compare two experiences")
|
|
350
|
+
p.add_argument("id1")
|
|
351
|
+
p.add_argument("id2")
|
|
352
|
+
p.set_defaults(func=cmd_diff)
|
|
353
|
+
|
|
354
|
+
p = sub.add_parser("export", help="Export an experience")
|
|
355
|
+
p.add_argument("id", default="latest", nargs="?")
|
|
356
|
+
p.set_defaults(func=cmd_export)
|
|
357
|
+
|
|
358
|
+
args = parser.parse_args()
|
|
359
|
+
if hasattr(args, "func"):
|
|
360
|
+
args.func(args)
|
|
361
|
+
else:
|
|
362
|
+
parser.print_help()
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
if __name__ == "__main__":
|
|
366
|
+
main()
|
orp/compiler.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Experience Compiler — 将候选经验编译为可执行资产"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from orp.schema import (
|
|
6
|
+
ExperienceRecord, Lesson, EvalArtifact,
|
|
7
|
+
EventKind, LessonStatus,
|
|
8
|
+
)
|
|
9
|
+
from orp.reflect import Challenger, ReflectionAnalyzer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ExperienceCompiler:
|
|
13
|
+
"""经验编译器 — 诊断 → Lesson / Eval / Guardrail 候选"""
|
|
14
|
+
|
|
15
|
+
def compile(self, record: ExperienceRecord) -> dict[str, list[Any]]:
|
|
16
|
+
"""编译 ExperienceRecord 生成可执行资产"""
|
|
17
|
+
lessons: list[Lesson] = []
|
|
18
|
+
evals: list[EvalArtifact] = []
|
|
19
|
+
guardrails: list[dict[str, Any]] = []
|
|
20
|
+
|
|
21
|
+
# 如果任务失败,生成 Lesson 候选
|
|
22
|
+
if record.outcome.status in ("failed", "partial"):
|
|
23
|
+
lesson = self._generate_lesson(record)
|
|
24
|
+
if lesson:
|
|
25
|
+
lessons.append(lesson)
|
|
26
|
+
|
|
27
|
+
# 生成回归 Eval
|
|
28
|
+
eval_artifact = self._generate_eval(record)
|
|
29
|
+
if eval_artifact:
|
|
30
|
+
evals.append(eval_artifact)
|
|
31
|
+
|
|
32
|
+
# 检查重复动作模式
|
|
33
|
+
guardrail = self._check_repeated_patterns(record)
|
|
34
|
+
if guardrail:
|
|
35
|
+
guardrails.append(guardrail)
|
|
36
|
+
|
|
37
|
+
# 更新 record 的 artifacts 引用
|
|
38
|
+
record.artifacts = {
|
|
39
|
+
"lessons": [l.lesson_id for l in lessons],
|
|
40
|
+
"evals": [e.eval_id for e in evals],
|
|
41
|
+
"guardrails": [g.get("id", "") for g in guardrails],
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return {"lessons": lessons, "evals": evals, "guardrails": guardrails}
|
|
45
|
+
|
|
46
|
+
def _generate_lesson(self, record: ExperienceRecord) -> Optional[Lesson]:
|
|
47
|
+
"""从失败的运行生成 Lesson 候选"""
|
|
48
|
+
challenger = Challenger()
|
|
49
|
+
challenged = challenger.challenge(record)
|
|
50
|
+
|
|
51
|
+
# 从被挑战的声明中提取建议
|
|
52
|
+
recommendations = set()
|
|
53
|
+
for c in challenged:
|
|
54
|
+
content = c.get("content", "")
|
|
55
|
+
if "fix" in content.lower() or "complete" in content.lower():
|
|
56
|
+
recommendations.add("Verify fixes with before/after tests")
|
|
57
|
+
if "test" in content.lower():
|
|
58
|
+
recommendations.add("Run all tests after changes, not just affected ones")
|
|
59
|
+
|
|
60
|
+
if not recommendations:
|
|
61
|
+
if record.outcome.status == "failed":
|
|
62
|
+
recommendations.add("Review full timeline before drawing conclusions")
|
|
63
|
+
else:
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
task_goal = record.task.get("goal", "")
|
|
67
|
+
task_domain = record.task.get("domain", "coding")
|
|
68
|
+
|
|
69
|
+
return Lesson(
|
|
70
|
+
trigger={
|
|
71
|
+
"domain": task_domain,
|
|
72
|
+
"conditions": [task_goal[:200]] if task_goal else [],
|
|
73
|
+
},
|
|
74
|
+
recommendation="; ".join(sorted(recommendations)),
|
|
75
|
+
provenance={"experience_ids": [record.experience_id]},
|
|
76
|
+
scope={"task_domains": [task_domain], "frameworks": [],
|
|
77
|
+
"agent_versions": [record.agent.get("version", "")] if record.agent.get("version") else []},
|
|
78
|
+
status=LessonStatus.CANDIDATE,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def _generate_eval(self, record: ExperienceRecord) -> Optional[EvalArtifact]:
|
|
82
|
+
"""从失败的运行生成回归 Eval"""
|
|
83
|
+
error_events = [
|
|
84
|
+
e for e in record.timeline
|
|
85
|
+
if e.kind == EventKind.OBSERVATION
|
|
86
|
+
and any(w in e.content.lower() for w in ["error", "fail", "exception", "exit code", "traceback"])
|
|
87
|
+
]
|
|
88
|
+
if not error_events and record.outcome.status != "failed":
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
# 生成 pytest 测试
|
|
92
|
+
test_content = self._make_pytest_eval(record)
|
|
93
|
+
return EvalArtifact(
|
|
94
|
+
origin_experience=record.experience_id,
|
|
95
|
+
runner="pytest",
|
|
96
|
+
command="pytest -q",
|
|
97
|
+
expected={"exit_code": 0},
|
|
98
|
+
generated_by="orp-compiler",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _check_repeated_patterns(self, record):
|
|
103
|
+
"""检查重复的无效动作模式"""
|
|
104
|
+
actions = [e for e in record.timeline if e.kind == EventKind.ACTION]
|
|
105
|
+
contents = [a.content for a in actions]
|
|
106
|
+
from collections import Counter
|
|
107
|
+
duplicates = {k: v for k, v in Counter(contents).items() if v > 2}
|
|
108
|
+
if duplicates:
|
|
109
|
+
return {
|
|
110
|
+
"id": "guard_" + record.experience_id[:8],
|
|
111
|
+
"type": "repeated_action",
|
|
112
|
+
"pattern": "Repeated action " + str(max(duplicates, key=duplicates.get)) + " " + str(max(duplicates.values())) + " times",
|
|
113
|
+
"source_experience": record.experience_id,
|
|
114
|
+
}
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
def _make_pytest_eval(self, record: ExperienceRecord) -> str:
|
|
118
|
+
"""生成一个基本的 pytest 回归测试"""
|
|
119
|
+
task_goal = record.task.get("goal", "unknown")
|
|
120
|
+
return (
|
|
121
|
+
f"ORP-generated regression test\n"
|
|
122
|
+
f"Source: {record.experience_id}\n"
|
|
123
|
+
f"Goal: {task_goal}\n"
|
|
124
|
+
)
|
orp/conflicts.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Conflict Defender — Lesson 作用域与冲突检测"""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from orp.schema import Lesson, LessonStatus, check_lesson_conflict
|
|
6
|
+
from orp.storage import ORPStorage
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConflictDefender:
|
|
10
|
+
"""冲突防御 — 激活 Lesson 前执行检查"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, storage: Optional[ORPStorage] = None):
|
|
13
|
+
self._storage = storage or ORPStorage()
|
|
14
|
+
|
|
15
|
+
def check_new_lesson(self, new_lesson: Lesson) -> list[dict[str, str]]:
|
|
16
|
+
"""检查新 Lesson 与现有 active Lesson 的冲突"""
|
|
17
|
+
conflicts = []
|
|
18
|
+
active = self._storage.list_lessons(status=LessonStatus.ACTIVE)
|
|
19
|
+
for existing in active:
|
|
20
|
+
if existing.lesson_id == new_lesson.lesson_id:
|
|
21
|
+
continue
|
|
22
|
+
if check_lesson_conflict(new_lesson, existing):
|
|
23
|
+
if self._are_contradictory(new_lesson.recommendation, existing.recommendation):
|
|
24
|
+
conflicts.append({
|
|
25
|
+
"type": "contradiction",
|
|
26
|
+
"existing_id": existing.lesson_id,
|
|
27
|
+
"existing": existing.recommendation[:100],
|
|
28
|
+
"new": new_lesson.recommendation[:100],
|
|
29
|
+
})
|
|
30
|
+
return conflicts
|
|
31
|
+
|
|
32
|
+
def _are_contradictory(self, a: str, b: str) -> bool:
|
|
33
|
+
"""检查两条建议是否语义相反(简单启发式)"""
|
|
34
|
+
a_lower = a.lower()
|
|
35
|
+
b_lower = b.lower()
|
|
36
|
+
# 检查是否有反义词对
|
|
37
|
+
opposites = [
|
|
38
|
+
("always", "never"),
|
|
39
|
+
("must", "must not"),
|
|
40
|
+
("do", "don't"),
|
|
41
|
+
("before", "after"),
|
|
42
|
+
("first", "last"),
|
|
43
|
+
]
|
|
44
|
+
for a_word, b_word in opposites:
|
|
45
|
+
has_a = a_word in a_lower
|
|
46
|
+
has_b = b_word in b_lower
|
|
47
|
+
if has_a and has_b:
|
|
48
|
+
return True
|
|
49
|
+
return False
|
|
50
|
+
|
|
51
|
+
def auto_review_conflicts(self) -> list[str]:
|
|
52
|
+
"""自动将所有冲突的 Lesson 标记为 under_review"""
|
|
53
|
+
reviewed: list[str] = []
|
|
54
|
+
active = self._storage.list_lessons(status=LessonStatus.ACTIVE)
|
|
55
|
+
for a in active:
|
|
56
|
+
for b in active:
|
|
57
|
+
if a.lesson_id >= b.lesson_id:
|
|
58
|
+
continue
|
|
59
|
+
if check_lesson_conflict(a, b) and self._are_contradictory(a.recommendation, b.recommendation):
|
|
60
|
+
self._storage.update_lesson_status(b.lesson_id, LessonStatus.UNDER_REVIEW)
|
|
61
|
+
reviewed.append(b.lesson_id)
|
|
62
|
+
return reviewed
|
orp/delivery.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Delivery Router — 将 Lesson 交付给 Agent"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from orp.schema import (
|
|
6
|
+
Lesson, LessonDelivery, DeliveryStrategy,
|
|
7
|
+
)
|
|
8
|
+
from orp.storage import ORPStorage
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DeliveryRouter:
|
|
12
|
+
"""Lesson 交付路由 — 支持多种交付策略"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, storage: Optional[ORPStorage] = None):
|
|
15
|
+
self._storage = storage or ORPStorage()
|
|
16
|
+
|
|
17
|
+
def deliver(self, lesson: Lesson, experience_id: str,
|
|
18
|
+
strategy: DeliveryStrategy = DeliveryStrategy.MCP_TOOL,
|
|
19
|
+
context: Optional[str] = None) -> LessonDelivery:
|
|
20
|
+
"""交付 Lesson 并记录"""
|
|
21
|
+
delivery = LessonDelivery(
|
|
22
|
+
lesson_id=lesson.lesson_id,
|
|
23
|
+
experience_id=experience_id,
|
|
24
|
+
strategy=strategy,
|
|
25
|
+
delivery_context=context,
|
|
26
|
+
)
|
|
27
|
+
# 如果策略需要写入文件,同步执行
|
|
28
|
+
if strategy == DeliveryStrategy.POLICY_FILE:
|
|
29
|
+
self._write_policy_file(lesson)
|
|
30
|
+
elif strategy == DeliveryStrategy.PROMPT_CONTEXT:
|
|
31
|
+
delivery.acknowledged = True # 假设注入成功
|
|
32
|
+
|
|
33
|
+
self._storage.save_delivery(delivery)
|
|
34
|
+
|
|
35
|
+
# 更新 Lesson 指标
|
|
36
|
+
lesson.metrics["delivered"] = lesson.metrics.get("delivered", 0) + 1
|
|
37
|
+
self._storage.save_lesson(lesson)
|
|
38
|
+
|
|
39
|
+
return delivery
|
|
40
|
+
|
|
41
|
+
def acknowledge(self, lesson_id: str, delivery_id: str) -> None:
|
|
42
|
+
"""记录 Agent 已确认接收到 Lesson"""
|
|
43
|
+
# In a real implementation, this would update the delivery record
|
|
44
|
+
# For now, we update metrics on the lesson
|
|
45
|
+
lesson = self._storage.get_lesson(lesson_id)
|
|
46
|
+
if lesson:
|
|
47
|
+
lesson.metrics["acknowledged"] = lesson.metrics.get("acknowledged", 0) + 1
|
|
48
|
+
self._storage.save_lesson(lesson)
|
|
49
|
+
|
|
50
|
+
def report_outcome(self, lesson_id: str, outcome: str,
|
|
51
|
+
evidence_refs: Optional[list[str]] = None) -> None:
|
|
52
|
+
"""记录 Lesson 应用后的实际结果"""
|
|
53
|
+
lesson = self._storage.get_lesson(lesson_id)
|
|
54
|
+
if not lesson:
|
|
55
|
+
return
|
|
56
|
+
lesson.metrics["applied"] = lesson.metrics.get("applied", 0) + 1
|
|
57
|
+
if outcome in ("success", "improved", "passed"):
|
|
58
|
+
lesson.metrics["successful_after_apply"] = lesson.metrics.get("successful_after_apply", 0) + 1
|
|
59
|
+
self._storage.save_lesson(lesson)
|
|
60
|
+
|
|
61
|
+
def _write_policy_file(self, lesson: Lesson) -> None:
|
|
62
|
+
"""将 Lesson 写入 AGENTS.md 等策略文件"""
|
|
63
|
+
import os
|
|
64
|
+
try:
|
|
65
|
+
agents_path = os.path.join(os.getcwd(), "AGENTS.md")
|
|
66
|
+
comment = f"\n<!-- ORP Lesson: {lesson.lesson_id} -->\n- {lesson.recommendation}\n<!-- END ORP Lesson -->\n"
|
|
67
|
+
if os.path.exists(agents_path):
|
|
68
|
+
with open(agents_path, "a") as f:
|
|
69
|
+
f.write(comment)
|
|
70
|
+
except (IOError, PermissionError):
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
def get_mcp_tools(self) -> list[dict[str, Any]]:
|
|
74
|
+
"""返回 MCP Server 的工具定义"""
|
|
75
|
+
return [
|
|
76
|
+
{
|
|
77
|
+
"name": "orp_retrieve_lessons",
|
|
78
|
+
"description": "Retrieve relevant lessons for a task",
|
|
79
|
+
"parameters": {
|
|
80
|
+
"type": "object",
|
|
81
|
+
"properties": {
|
|
82
|
+
"task": {"type": "string", "description": "Task description"},
|
|
83
|
+
"limit": {"type": "integer", "default": 3},
|
|
84
|
+
"domain": {"type": "string", "optional": True},
|
|
85
|
+
},
|
|
86
|
+
},
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"name": "orp_acknowledge_lesson",
|
|
90
|
+
"description": "Acknowledge a delivered lesson",
|
|
91
|
+
"parameters": {
|
|
92
|
+
"type": "object",
|
|
93
|
+
"properties": {
|
|
94
|
+
"lesson_id": {"type": "string"},
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"name": "orp_report_outcome",
|
|
100
|
+
"description": "Report the outcome of applying a lesson",
|
|
101
|
+
"parameters": {
|
|
102
|
+
"type": "object",
|
|
103
|
+
"properties": {
|
|
104
|
+
"lesson_id": {"type": "string"},
|
|
105
|
+
"outcome": {"type": "string", "enum": ["success", "failed", "improved", "worse"]},
|
|
106
|
+
"evidence_refs": {"type": "array", "items": {"type": "string"}},
|
|
107
|
+
},
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
]
|