@memtensor/memos-local-openclaw-plugin 1.0.9-beta.2 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/skill/ALGORITHMS.md +141 -0
- package/src/skill/CHANGELOG-DESIGN.md +24 -0
- package/src/skill/DESIGN.md +72 -0
- package/src/skill/experience-extractor.ts +191 -0
- package/src/skill/feedback-signals.ts +181 -0
- package/src/viewer/html-v2.ts +1631 -0
package/package.json
CHANGED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# Skill 反馈驱动重构 — 算法原理
|
|
2
|
+
|
|
3
|
+
## 1. 纯语义反馈识别
|
|
4
|
+
|
|
5
|
+
### 模块
|
|
6
|
+
`feedback-signals.ts` → `FeedbackSignalExtractor`
|
|
7
|
+
|
|
8
|
+
### 输入
|
|
9
|
+
多轮对话 chunks(user + assistant + tool),含 turnId
|
|
10
|
+
|
|
11
|
+
### 输出
|
|
12
|
+
`FeedbackSignal[]`,每条包含 type、source、evidence、turnId、confidence
|
|
13
|
+
|
|
14
|
+
### 信号类型
|
|
15
|
+
| Type | 含义 | 示例 |
|
|
16
|
+
|------|------|------|
|
|
17
|
+
| reject | 用户否定当前方案 | "这不是我要的效果" |
|
|
18
|
+
| correction | 用户修正 agent 输出 | "不是这个文件,应该是 config.yaml" |
|
|
19
|
+
| constraint | 用户追加约束 | "而且不能用 root 用户" |
|
|
20
|
+
| success | 用户确认成功 | "可以了,这次对了" |
|
|
21
|
+
| confusion | 用户表达困惑 | "为什么会这样?" |
|
|
22
|
+
| preference | 用户表达偏好 | "我更喜欢用 pnpm" |
|
|
23
|
+
|
|
24
|
+
### 隐式反馈识别
|
|
25
|
+
不依赖关键词匹配。通过 LLM 理解上下文语义:
|
|
26
|
+
- 用户重述目标 → 可能是隐式 reject
|
|
27
|
+
- 用户频繁追问细节 → 可能是 confusion
|
|
28
|
+
- 用户切换话题但不明确否定 → 可能是 preference
|
|
29
|
+
|
|
30
|
+
## 2. TaskExperience 提取
|
|
31
|
+
|
|
32
|
+
### 模块
|
|
33
|
+
`experience-extractor.ts` → `ExperienceExtractor`
|
|
34
|
+
|
|
35
|
+
### 三级策略
|
|
36
|
+
| Mode | 调用次数 | 输出 |
|
|
37
|
+
|------|----------|------|
|
|
38
|
+
| minimal | 0 次额外 LLM | 不生成经验 |
|
|
39
|
+
| balanced | 1 次 LLM | TaskExperience(含内联 feedbackSignals) |
|
|
40
|
+
| full | 2 次 LLM | 独立 feedbackSignals + TaskExperience |
|
|
41
|
+
|
|
42
|
+
### 输出 schema
|
|
43
|
+
```json
|
|
44
|
+
{
|
|
45
|
+
"goal": "用户要做什么",
|
|
46
|
+
"successCriteria": ["标准1", "标准2"],
|
|
47
|
+
"failedAttempts": [{
|
|
48
|
+
"attemptLabel": "短标签",
|
|
49
|
+
"approachSummary": "尝试了什么",
|
|
50
|
+
"failureReason": "为什么失败",
|
|
51
|
+
"replacedBy": "被什么替代"
|
|
52
|
+
}],
|
|
53
|
+
"workingApproach": "最终成功的方案",
|
|
54
|
+
"transferableHeuristics": [{
|
|
55
|
+
"kind": "avoid|prefer|verify|clarify",
|
|
56
|
+
"trigger": "触发场景",
|
|
57
|
+
"rule": "应该/不应该做什么",
|
|
58
|
+
"why": "原因",
|
|
59
|
+
"evidence": "证据"
|
|
60
|
+
}]
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### 缓存策略
|
|
65
|
+
- 结果缓存在 `task_experience_cache` 表
|
|
66
|
+
- 同一 task 不重复提取
|
|
67
|
+
- worker 重试安全
|
|
68
|
+
|
|
69
|
+
## 3. Heuristic 生命周期
|
|
70
|
+
|
|
71
|
+
### 状态机
|
|
72
|
+
```
|
|
73
|
+
[新提取] → candidate → active → refined → stale → [归档]
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 晋升条件 (candidate → active)
|
|
77
|
+
- `supportTaskCount >= 2`
|
|
78
|
+
- `evidenceCount >= 3`
|
|
79
|
+
- 被 skill 升级流程采纳
|
|
80
|
+
|
|
81
|
+
### 降级条件 (active → stale)
|
|
82
|
+
- 连续多次召回后被判定为 misleading
|
|
83
|
+
- 被更新版 heuristic 替代
|
|
84
|
+
- 依赖的工具/环境过时
|
|
85
|
+
|
|
86
|
+
### 去重/合并算法
|
|
87
|
+
对每条新 candidate:
|
|
88
|
+
1. 计算 embedding
|
|
89
|
+
2. 在已有 heuristic 库中做余弦相似度搜索
|
|
90
|
+
3. 相似度 >= 0.85 → 更新已有 heuristic(增加 evidence 和 support count)
|
|
91
|
+
4. 0.60 <= 相似度 < 0.85 → 追加证据到已有 heuristic
|
|
92
|
+
5. 相似度 < 0.60 → 新建 candidate
|
|
93
|
+
|
|
94
|
+
## 4. Heuristic 召回打分
|
|
95
|
+
|
|
96
|
+
### 三因子公式
|
|
97
|
+
```
|
|
98
|
+
score = 0.45 × topic_similarity + 0.35 × trigger_signal_similarity + 0.20 × quality_weight
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### topic_similarity
|
|
102
|
+
query embedding 与 heuristic embedding 的余弦相似度
|
|
103
|
+
|
|
104
|
+
### trigger_signal_similarity
|
|
105
|
+
当前对话的 LiveInteractionSignals 与 heuristic trigger 的匹配度。
|
|
106
|
+
LiveInteractionSignals 包括:
|
|
107
|
+
- userDissatisfaction
|
|
108
|
+
- constraintRestatement
|
|
109
|
+
- repeatedFailure
|
|
110
|
+
- acceptanceUnclear
|
|
111
|
+
- sameErrorRepeated
|
|
112
|
+
|
|
113
|
+
### quality_weight 归一化
|
|
114
|
+
```
|
|
115
|
+
base = 0.5
|
|
116
|
+
+ 0.2 if status in (active, refined)
|
|
117
|
+
+ 0.15 if supportTaskCount >= 3
|
|
118
|
+
+ 0.10 if supportTaskCount >= 5
|
|
119
|
+
+ 0.05 if lastValidatedAt within 7 days
|
|
120
|
+
= min(1.0, total)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## 5. 统一 LLM 过滤
|
|
124
|
+
|
|
125
|
+
memory 和 heuristic 候选合并为统一候选池:
|
|
126
|
+
- 每条带 `candidateType: "memory" | "heuristic"`
|
|
127
|
+
- 告诉 LLM 两类职责不同:memory = 事实证据,heuristic = 行为约束
|
|
128
|
+
- 限制总输出:最多 3 memory + 2 heuristic,或总量 5
|
|
129
|
+
|
|
130
|
+
### Prompt 注入顺序
|
|
131
|
+
1. Heuristic(行为约束 / 避坑提醒)
|
|
132
|
+
2. Skill summary(方法入口)
|
|
133
|
+
3. Memory evidence(事实证据)
|
|
134
|
+
|
|
135
|
+
## 6. Context Budget
|
|
136
|
+
|
|
137
|
+
| 类型 | 上限 | 格式 |
|
|
138
|
+
|------|------|------|
|
|
139
|
+
| heuristic | 2-3 条 | trigger + rule + why(1-2 句) |
|
|
140
|
+
| skill | 1-2 条 | 名称 + 适用场景 + 调用提示 |
|
|
141
|
+
| memory | 2-3 条 | 相关 evidence span |
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Skill 反馈驱动重构 — 设计变更记录
|
|
2
|
+
|
|
3
|
+
## 2026-04-13: 初始实现
|
|
4
|
+
|
|
5
|
+
### 变更内容
|
|
6
|
+
- 新增 types: FeedbackSignal, FailedAttempt, TaskExperience, SkillHeuristic, HeuristicRecallEvent
|
|
7
|
+
- 新增 SQLite 表: task_feedback_signals, task_failed_attempts, task_experience_cache, skill_heuristics, skill_heuristic_embeddings, heuristic_recall_events
|
|
8
|
+
- 新增模块: feedback-signals.ts, experience-extractor.ts
|
|
9
|
+
- 修改 evaluator.ts: 支持 TaskExperience 输入
|
|
10
|
+
- 修改 generator.ts: 双产物输出 (SKILL.md + heuristics)
|
|
11
|
+
- 修改 upgrader.ts: 携带完整原始证据升级
|
|
12
|
+
- 修改 evolver.ts: 整合 experience → eval → generate/upgrade → heuristic 管线
|
|
13
|
+
- 修改 recall/engine.ts: 双通道召回 (memory + heuristic) + 三因子打分
|
|
14
|
+
- 修改 index.ts: heuristic 召回 + prompt 注入
|
|
15
|
+
- 新增 viewer/html-v2.ts: 新版 viewer 前端
|
|
16
|
+
- 修改 viewer/server.ts: heuristic API 路由 + viewer v2 切换
|
|
17
|
+
- 修改 sharing/types.ts: SkillBundle 增加 heuristics 字段
|
|
18
|
+
|
|
19
|
+
### 设计决策
|
|
20
|
+
1. **balanced 模式为默认**: 一次 LLM 调用同时产出 feedbackSignals + failedAttempts + heuristics,平衡成本和质量
|
|
21
|
+
2. **heuristic 独立存储**: 不混入 chunks 表,有独立 embedding 和生命周期
|
|
22
|
+
3. **统一 LLM 过滤**: memory 和 heuristic 合并过滤,避免两次 LLM 调用
|
|
23
|
+
4. **注入顺序 heuristic-first**: 让 agent 先看到避坑规则,再看方法和证据
|
|
24
|
+
5. **向后兼容**: 所有新功能默认启用但可关闭,老数据不强制回填
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# Skill 反馈驱动重构 — 设计说明
|
|
2
|
+
|
|
3
|
+
## 方案背景
|
|
4
|
+
|
|
5
|
+
将 skill 系统从"只总结成功步骤"升级为一套同时沉淀三类资产的系统:
|
|
6
|
+
- **Verified Skill**: 最终验证可复用的操作路径 (SKILL.md)
|
|
7
|
+
- **Failure Heuristics**: 从失败和用户纠偏中提炼出的避坑规则 (SkillHeuristic)
|
|
8
|
+
- **Feedback Signals**: 用户否定、修正、确认成功等高价值监督信号 (FeedbackSignal)
|
|
9
|
+
|
|
10
|
+
## 三层资产模型
|
|
11
|
+
|
|
12
|
+
### Skill (技能)
|
|
13
|
+
- 存储:`skills` 表 + 文件系统 (`skills-store/<name>/SKILL.md`)
|
|
14
|
+
- 职责:记录"怎么做" — 可复用的操作步骤、配置、命令
|
|
15
|
+
- 触发:通过 description embedding 在 recall 时被检索
|
|
16
|
+
- 生命周期:`draft → active → archived`
|
|
17
|
+
|
|
18
|
+
### SkillHeuristic (经验规则)
|
|
19
|
+
- 存储:`skill_heuristics` 表 + `skill_heuristic_embeddings`
|
|
20
|
+
- 职责:记录"别怎么做/什么时候要警惕" — 避坑、验证、偏好规则
|
|
21
|
+
- 触发:通过 embedding 在 recall 时被主动注入 prompt
|
|
22
|
+
- 生命周期:`candidate → active → refined → stale`
|
|
23
|
+
- 与 Skill 关系:多对多(通过 `relatedSkillIds`),可跨 skill 或无关联
|
|
24
|
+
|
|
25
|
+
### FeedbackSignal (反馈信号)
|
|
26
|
+
- 存储:`task_feedback_signals` 表
|
|
27
|
+
- 职责:原始监督信号,是 heuristic 的证据来源
|
|
28
|
+
- 来源:用户否定、执行错误、助手自检
|
|
29
|
+
- 生命周期:与任务绑定,不独立演化
|
|
30
|
+
|
|
31
|
+
## 数据流
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
CaptureMessages → Chunks & Tasks
|
|
35
|
+
↓
|
|
36
|
+
FeedbackSignalExtractor (feedback-signals.ts)
|
|
37
|
+
↓
|
|
38
|
+
ExperienceExtractor (experience-extractor.ts)
|
|
39
|
+
↓
|
|
40
|
+
TaskExperience
|
|
41
|
+
↙ ↘
|
|
42
|
+
SkillEvaluator HeuristicWriter (evolver.ts)
|
|
43
|
+
↓ ↓
|
|
44
|
+
SkillGenerator/ skill_heuristics 表
|
|
45
|
+
SkillUpgrader ↓
|
|
46
|
+
↓ RecallEngine.searchHeuristics()
|
|
47
|
+
SKILL.md ↓
|
|
48
|
+
Prompt 注入 (heuristic → skill → memory)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## 与现有系统兼容
|
|
52
|
+
|
|
53
|
+
- 所有新功能通过 `SkillEvolutionConfig` 的新字段控制开关
|
|
54
|
+
- 老数据不回填:`read-time compatible, write-time upgraded`
|
|
55
|
+
- 新表缺失时不影响旧系统启动
|
|
56
|
+
- heuristic 不混入 chunks 表,独立存储和检索
|
|
57
|
+
|
|
58
|
+
## 配置
|
|
59
|
+
|
|
60
|
+
新增配置项(均挂在 `skillEvolution` 下):
|
|
61
|
+
- `experienceExtractionEnabled`: 是否启用经验提炼 (默认 true)
|
|
62
|
+
- `heuristicEnabled`: 是否启用 heuristic 生成 (默认 true)
|
|
63
|
+
- `heuristicRecallEnabled`: 是否启用 heuristic 召回 (默认 true)
|
|
64
|
+
- `experienceMode`: `"minimal" | "balanced" | "full"` (默认 balanced)
|
|
65
|
+
- `heuristicRecallTopK`: 最多召回几条 heuristic (默认 3)
|
|
66
|
+
- `heuristicScoringWeights`: 三因子打分权重
|
|
67
|
+
|
|
68
|
+
## 关键参考
|
|
69
|
+
|
|
70
|
+
- Reflexion (Shinn et al., 2023): LLM 自我反思和经验回放
|
|
71
|
+
- Voyager (Wang et al., 2023): 技能库自动扩展
|
|
72
|
+
- 本项目文档: `skill-feedback-redesign.md`
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import { v4 as uuid } from "uuid";
|
|
2
|
+
import type { SqliteStore } from "../storage/sqlite";
|
|
3
|
+
import type { Task, Chunk, TaskExperience, FeedbackSignal, FailedAttempt, TransferableHeuristic, ExperienceMode, PluginContext } from "../types";
|
|
4
|
+
import { DEFAULTS } from "../types";
|
|
5
|
+
import { FeedbackSignalExtractor } from "./feedback-signals";
|
|
6
|
+
import { buildSkillConfigChain, callLLMWithFallback } from "../shared/llm-call";
|
|
7
|
+
|
|
8
|
+
const EXPERIENCE_PROMPT = `You are a task experience extraction expert. Analyze the completed task record and extract structured experience data.
|
|
9
|
+
|
|
10
|
+
Focus on:
|
|
11
|
+
1. What was the goal and how was success defined?
|
|
12
|
+
2. What approaches were tried and failed? WHY did they fail?
|
|
13
|
+
3. What was the final working approach?
|
|
14
|
+
4. What user feedback drove direction changes?
|
|
15
|
+
5. What transferable lessons/heuristics can be extracted for future similar tasks?
|
|
16
|
+
|
|
17
|
+
For transferable heuristics, each should be:
|
|
18
|
+
- "avoid": Something to NOT do (backed by real failure evidence)
|
|
19
|
+
- "prefer": A better approach discovered through trial-and-error
|
|
20
|
+
- "verify": A check that should be performed to prevent known failure modes
|
|
21
|
+
- "clarify": A question that should be asked early to avoid going down wrong paths
|
|
22
|
+
|
|
23
|
+
Task title: {TITLE}
|
|
24
|
+
Task summary:
|
|
25
|
+
{SUMMARY}
|
|
26
|
+
|
|
27
|
+
Conversation highlights:
|
|
28
|
+
{CONVERSATION}
|
|
29
|
+
|
|
30
|
+
{FEEDBACK_SECTION}
|
|
31
|
+
|
|
32
|
+
Reply with JSON only, strictly following this schema:
|
|
33
|
+
{
|
|
34
|
+
"goal": "What the user wanted to achieve",
|
|
35
|
+
"successCriteria": ["criterion1", "criterion2"],
|
|
36
|
+
"failedAttempts": [
|
|
37
|
+
{
|
|
38
|
+
"attemptLabel": "short label",
|
|
39
|
+
"approachSummary": "what was tried",
|
|
40
|
+
"failureReason": "why it failed",
|
|
41
|
+
"replacedBy": "what replaced it or null"
|
|
42
|
+
}
|
|
43
|
+
],
|
|
44
|
+
"workingApproach": "The final approach that worked",
|
|
45
|
+
"transferableHeuristics": [
|
|
46
|
+
{
|
|
47
|
+
"kind": "avoid|prefer|verify|clarify",
|
|
48
|
+
"trigger": "when/what situation triggers this heuristic",
|
|
49
|
+
"rule": "what to do or not do",
|
|
50
|
+
"why": "reason based on real evidence",
|
|
51
|
+
"evidence": "brief quote from conversation"
|
|
52
|
+
}
|
|
53
|
+
]
|
|
54
|
+
}`;
|
|
55
|
+
|
|
56
|
+
export class ExperienceExtractor {
|
|
57
|
+
private feedbackExtractor: FeedbackSignalExtractor;
|
|
58
|
+
|
|
59
|
+
constructor(
|
|
60
|
+
private store: SqliteStore,
|
|
61
|
+
private ctx: PluginContext,
|
|
62
|
+
) {
|
|
63
|
+
this.feedbackExtractor = new FeedbackSignalExtractor(ctx);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
async extract(task: Task, chunks: Chunk[]): Promise<TaskExperience | null> {
|
|
67
|
+
const mode = this.ctx.config.skillEvolution?.experienceMode ?? DEFAULTS.experienceMode;
|
|
68
|
+
if (mode === "minimal") return null;
|
|
69
|
+
|
|
70
|
+
const cached = this.store.getCachedTaskExperience(task.id);
|
|
71
|
+
if (cached) {
|
|
72
|
+
this.ctx.log.debug(`ExperienceExtractor: using cached experience for task ${task.id}`);
|
|
73
|
+
return cached;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const chain = buildSkillConfigChain(this.ctx);
|
|
77
|
+
if (chain.length === 0) {
|
|
78
|
+
this.ctx.log.warn("ExperienceExtractor: no LLM config, skipping");
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
let feedbackSignals: FeedbackSignal[] = [];
|
|
83
|
+
if (mode === "full") {
|
|
84
|
+
feedbackSignals = await this.feedbackExtractor.extract(task.id, chunks);
|
|
85
|
+
for (const signal of feedbackSignals) {
|
|
86
|
+
this.store.insertFeedbackSignal(signal);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const conversationText = this.buildConversation(chunks);
|
|
91
|
+
const feedbackSection = feedbackSignals.length > 0
|
|
92
|
+
? `\nPre-extracted feedback signals:\n${JSON.stringify(feedbackSignals.map(s => ({ type: s.type, source: s.source, evidence: s.evidence, confidence: s.confidence })), null, 2)}`
|
|
93
|
+
: "";
|
|
94
|
+
|
|
95
|
+
const prompt = EXPERIENCE_PROMPT
|
|
96
|
+
.replace("{TITLE}", task.title)
|
|
97
|
+
.replace("{SUMMARY}", task.summary.slice(0, 4000))
|
|
98
|
+
.replace("{CONVERSATION}", conversationText.slice(0, 12000))
|
|
99
|
+
.replace("{FEEDBACK_SECTION}", feedbackSection);
|
|
100
|
+
|
|
101
|
+
try {
|
|
102
|
+
const raw = await callLLMWithFallback(chain, prompt, this.ctx.log, "ExperienceExtractor", {
|
|
103
|
+
maxTokens: 3000, temperature: 0.1, timeoutMs: 90_000, openclawAPI: this.ctx.openclawAPI,
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
const experience = this.parseExperience(raw, task.id, feedbackSignals);
|
|
107
|
+
if (!experience) return null;
|
|
108
|
+
|
|
109
|
+
if (mode === "balanced" && experience.feedbackSignals.length > 0) {
|
|
110
|
+
for (const signal of experience.feedbackSignals) {
|
|
111
|
+
this.store.insertFeedbackSignal(signal);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
for (const attempt of experience.failedAttempts) {
|
|
116
|
+
this.store.insertFailedAttempt(attempt);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
this.store.cacheTaskExperience(task.id, experience);
|
|
120
|
+
this.ctx.log.info(`ExperienceExtractor: extracted experience for task "${task.title}" — ${experience.failedAttempts.length} failed attempts, ${experience.transferableHeuristics.length} heuristics`);
|
|
121
|
+
return experience;
|
|
122
|
+
} catch (err) {
|
|
123
|
+
this.ctx.log.warn(`ExperienceExtractor failed: ${err}`);
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
private buildConversation(chunks: Chunk[]): string {
|
|
129
|
+
return chunks
|
|
130
|
+
.filter(c => c.role !== "system")
|
|
131
|
+
.map(c => {
|
|
132
|
+
const role = c.role === "user" ? "User" : c.role === "assistant" ? "Assistant" : c.role === "tool" ? "Tool" : c.role;
|
|
133
|
+
const content = c.role === "tool" ? c.content.slice(0, 800) : c.content;
|
|
134
|
+
return `[${role}]: ${content}`;
|
|
135
|
+
})
|
|
136
|
+
.join("\n\n");
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
private parseExperience(raw: string, taskId: string, preSignals: FeedbackSignal[]): TaskExperience | null {
|
|
140
|
+
const match = raw.match(/\{[\s\S]*\}/);
|
|
141
|
+
if (!match) return null;
|
|
142
|
+
try {
|
|
143
|
+
const obj = JSON.parse(match[0]);
|
|
144
|
+
const now = Date.now();
|
|
145
|
+
|
|
146
|
+
const failedAttempts: FailedAttempt[] = (obj.failedAttempts || []).map((a: any) => ({
|
|
147
|
+
id: uuid(),
|
|
148
|
+
taskId,
|
|
149
|
+
attemptLabel: a.attemptLabel || "unknown",
|
|
150
|
+
approachSummary: a.approachSummary || "",
|
|
151
|
+
failureReason: a.failureReason || "",
|
|
152
|
+
failureSignalIds: [],
|
|
153
|
+
replacedBy: a.replacedBy || null,
|
|
154
|
+
createdAt: now,
|
|
155
|
+
}));
|
|
156
|
+
|
|
157
|
+
const inlineSignals: FeedbackSignal[] = (obj.feedbackSignals || []).map((s: any) => ({
|
|
158
|
+
id: uuid(),
|
|
159
|
+
taskId,
|
|
160
|
+
type: s.type || "reject",
|
|
161
|
+
source: s.source || "user",
|
|
162
|
+
evidence: s.evidence || "",
|
|
163
|
+
turnId: s.turnId || "",
|
|
164
|
+
confidence: s.confidence || 0.5,
|
|
165
|
+
createdAt: now,
|
|
166
|
+
}));
|
|
167
|
+
|
|
168
|
+
const heuristics: TransferableHeuristic[] = (obj.transferableHeuristics || []).map((h: any) => ({
|
|
169
|
+
kind: h.kind || "avoid",
|
|
170
|
+
trigger: h.trigger || "",
|
|
171
|
+
rule: h.rule || "",
|
|
172
|
+
why: h.why || "",
|
|
173
|
+
evidence: h.evidence || "",
|
|
174
|
+
}));
|
|
175
|
+
|
|
176
|
+
return {
|
|
177
|
+
id: uuid(),
|
|
178
|
+
taskId,
|
|
179
|
+
goal: obj.goal || "",
|
|
180
|
+
successCriteria: Array.isArray(obj.successCriteria) ? obj.successCriteria : [],
|
|
181
|
+
failedAttempts,
|
|
182
|
+
workingApproach: obj.workingApproach || "",
|
|
183
|
+
feedbackSignals: preSignals.length > 0 ? preSignals : inlineSignals,
|
|
184
|
+
transferableHeuristics: heuristics,
|
|
185
|
+
createdAt: now,
|
|
186
|
+
};
|
|
187
|
+
} catch {
|
|
188
|
+
return null;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import { v4 as uuid } from "uuid";
|
|
2
|
+
import type { Chunk, FeedbackSignal, FeedbackType, FeedbackSource, PluginContext } from "../types";
|
|
3
|
+
import { buildSkillConfigChain, callLLMWithFallback } from "../shared/llm-call";
|
|
4
|
+
|
|
5
|
+
const FEEDBACK_EXTRACTION_PROMPT = `You are a feedback signal extraction expert. Analyze the following multi-turn conversation and identify feedback signals.
|
|
6
|
+
|
|
7
|
+
A feedback signal is any indication from the user, execution results, or the assistant's own checks about the quality/direction of the current approach.
|
|
8
|
+
|
|
9
|
+
Signal types:
|
|
10
|
+
- "reject": User explicitly or implicitly rejects the current approach/output
|
|
11
|
+
- "correction": User provides a correction to the agent's work
|
|
12
|
+
- "constraint": User adds new constraints or requirements not previously mentioned
|
|
13
|
+
- "success": User confirms the solution works or expresses satisfaction
|
|
14
|
+
- "confusion": User expresses confusion or asks for clarification about agent's approach
|
|
15
|
+
- "preference": User states a preference (tool choice, style, methodology)
|
|
16
|
+
|
|
17
|
+
Signal sources:
|
|
18
|
+
- "user": Signal comes from user messages
|
|
19
|
+
- "execution": Signal comes from error outputs, test failures, runtime errors
|
|
20
|
+
- "assistant_self_check": Assistant self-identifies issues in its own output
|
|
21
|
+
|
|
22
|
+
Rules:
|
|
23
|
+
- Look at semantic context, not just keywords. A user restating their goal may be an implicit "reject" of the current direction.
|
|
24
|
+
- Each signal must include the exact evidence text (verbatim quote or close paraphrase).
|
|
25
|
+
- Prioritize implicit signals: user NOT saying "wrong" but demonstrating dissatisfaction through rephrasing goals.
|
|
26
|
+
- One message can produce multiple signals.
|
|
27
|
+
- Only extract signals with confidence >= 0.5.
|
|
28
|
+
|
|
29
|
+
Conversation:
|
|
30
|
+
{CONVERSATION}
|
|
31
|
+
|
|
32
|
+
Reply with a JSON array only, no extra text:
|
|
33
|
+
[
|
|
34
|
+
{
|
|
35
|
+
"type": "reject|correction|constraint|success|confusion|preference",
|
|
36
|
+
"source": "user|execution|assistant_self_check",
|
|
37
|
+
"evidence": "exact quote or close paraphrase from conversation",
|
|
38
|
+
"turnId": "the turn ID where this signal appears",
|
|
39
|
+
"confidence": 0.0-1.0
|
|
40
|
+
}
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
If no meaningful signals found, return: []`;
|
|
44
|
+
|
|
45
|
+
const VALID_FEEDBACK_TYPES = new Set<FeedbackType>([
|
|
46
|
+
"reject", "correction", "constraint", "success", "confusion", "preference",
|
|
47
|
+
]);
|
|
48
|
+
|
|
49
|
+
const VALID_FEEDBACK_SOURCES = new Set<FeedbackSource>([
|
|
50
|
+
"user", "execution", "assistant_self_check",
|
|
51
|
+
]);
|
|
52
|
+
|
|
53
|
+
interface RawSignal {
|
|
54
|
+
type: string;
|
|
55
|
+
source: string;
|
|
56
|
+
evidence: string;
|
|
57
|
+
turnId: string;
|
|
58
|
+
confidence: number;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export class FeedbackSignalExtractor {
|
|
62
|
+
constructor(private ctx: PluginContext) {}
|
|
63
|
+
|
|
64
|
+
async extract(taskId: string, chunks: Chunk[]): Promise<FeedbackSignal[]> {
|
|
65
|
+
const chain = buildSkillConfigChain(this.ctx);
|
|
66
|
+
if (chain.length === 0) {
|
|
67
|
+
this.ctx.log.warn("FeedbackSignalExtractor: no LLM config, skipping");
|
|
68
|
+
return [];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const conversationText = this.buildConversation(chunks);
|
|
72
|
+
if (conversationText.length < 100) return [];
|
|
73
|
+
|
|
74
|
+
const prompt = FEEDBACK_EXTRACTION_PROMPT.replace(
|
|
75
|
+
"{CONVERSATION}",
|
|
76
|
+
conversationText.slice(0, 15_000),
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
const raw = await callLLMWithFallback(
|
|
81
|
+
chain,
|
|
82
|
+
prompt,
|
|
83
|
+
this.ctx.log,
|
|
84
|
+
"FeedbackSignalExtractor",
|
|
85
|
+
{ maxTokens: 2000, temperature: 0.1, timeoutMs: 60_000, openclawAPI: this.ctx.openclawAPI },
|
|
86
|
+
);
|
|
87
|
+
return this.parseSignals(raw, taskId);
|
|
88
|
+
} catch (err) {
|
|
89
|
+
this.ctx.log.warn(`FeedbackSignalExtractor failed: ${err}`);
|
|
90
|
+
return [];
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** Lightweight extraction for live interaction signals (used during recall, not full extraction). */
|
|
95
|
+
async extractLiveSignals(recentChunks: Chunk[]): Promise<string[]> {
|
|
96
|
+
const chain = buildSkillConfigChain(this.ctx);
|
|
97
|
+
if (chain.length === 0) return [];
|
|
98
|
+
|
|
99
|
+
const text = recentChunks
|
|
100
|
+
.filter((c) => c.role === "user" || c.role === "assistant")
|
|
101
|
+
.slice(-8)
|
|
102
|
+
.map((c) => `[${c.role}]: ${c.content.slice(0, 500)}`)
|
|
103
|
+
.join("\n");
|
|
104
|
+
|
|
105
|
+
if (text.length < 50) return [];
|
|
106
|
+
|
|
107
|
+
const prompt = `Analyze the recent conversation turns below and identify which interaction signals are present. Output ONLY the signal names that are present from this list:
|
|
108
|
+
- userDissatisfaction
|
|
109
|
+
- constraintRestatement
|
|
110
|
+
- repeatedFailure
|
|
111
|
+
- acceptanceUnclear
|
|
112
|
+
- sameErrorRepeated
|
|
113
|
+
|
|
114
|
+
Conversation:
|
|
115
|
+
${text.slice(0, 4000)}
|
|
116
|
+
|
|
117
|
+
Reply with a JSON array of signal names only: ["signal1", "signal2"]
|
|
118
|
+
If none present, reply: []`;
|
|
119
|
+
|
|
120
|
+
try {
|
|
121
|
+
const raw = await callLLMWithFallback(
|
|
122
|
+
chain,
|
|
123
|
+
prompt,
|
|
124
|
+
this.ctx.log,
|
|
125
|
+
"LiveSignalExtractor",
|
|
126
|
+
{ maxTokens: 200, temperature: 0, timeoutMs: 15_000, openclawAPI: this.ctx.openclawAPI },
|
|
127
|
+
);
|
|
128
|
+
const match = raw.match(/\[[\s\S]*\]/);
|
|
129
|
+
if (!match) return [];
|
|
130
|
+
return JSON.parse(match[0]).filter((s: unknown) => typeof s === "string");
|
|
131
|
+
} catch {
|
|
132
|
+
return [];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
private buildConversation(chunks: Chunk[]): string {
|
|
137
|
+
return chunks
|
|
138
|
+
.filter((c) => c.role !== "system")
|
|
139
|
+
.map((c) => {
|
|
140
|
+
const role =
|
|
141
|
+
c.role === "user" ? "User" :
|
|
142
|
+
c.role === "assistant" ? "Assistant" :
|
|
143
|
+
c.role === "tool" ? "Tool" :
|
|
144
|
+
c.role;
|
|
145
|
+
const content = c.role === "tool" ? c.content.slice(0, 800) : c.content;
|
|
146
|
+
return `[${role}] (turn=${c.turnId}): ${content}`;
|
|
147
|
+
})
|
|
148
|
+
.join("\n\n");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
private parseSignals(raw: string, taskId: string): FeedbackSignal[] {
|
|
152
|
+
const match = raw.match(/\[[\s\S]*\]/);
|
|
153
|
+
if (!match) return [];
|
|
154
|
+
try {
|
|
155
|
+
const arr: unknown = JSON.parse(match[0]);
|
|
156
|
+
if (!Array.isArray(arr)) return [];
|
|
157
|
+
const now = Date.now();
|
|
158
|
+
return (arr as RawSignal[])
|
|
159
|
+
.filter(
|
|
160
|
+
(s) =>
|
|
161
|
+
s.confidence >= 0.5 &&
|
|
162
|
+
VALID_FEEDBACK_TYPES.has(s.type as FeedbackType) &&
|
|
163
|
+
s.evidence,
|
|
164
|
+
)
|
|
165
|
+
.map((s) => ({
|
|
166
|
+
id: uuid(),
|
|
167
|
+
taskId,
|
|
168
|
+
type: s.type as FeedbackType,
|
|
169
|
+
source: (VALID_FEEDBACK_SOURCES.has(s.source as FeedbackSource)
|
|
170
|
+
? s.source
|
|
171
|
+
: "user") as FeedbackSource,
|
|
172
|
+
evidence: s.evidence,
|
|
173
|
+
turnId: s.turnId || "",
|
|
174
|
+
confidence: s.confidence,
|
|
175
|
+
createdAt: now,
|
|
176
|
+
}));
|
|
177
|
+
} catch {
|
|
178
|
+
return [];
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|