openclaw-node-harness 2.0.4 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +646 -3
- package/bin/hyperagent.mjs +419 -0
- package/bin/mesh-agent.js +401 -12
- package/bin/mesh-bridge.js +66 -1
- package/bin/mesh-task-daemon.js +816 -26
- package/bin/mesh.js +403 -1
- package/config/claude-settings.json +95 -0
- package/config/daemon.json.template +2 -1
- package/config/git-hooks/pre-commit +13 -0
- package/config/git-hooks/pre-push +12 -0
- package/config/harness-rules.json +174 -0
- package/config/plan-templates/team-bugfix.yaml +52 -0
- package/config/plan-templates/team-deploy.yaml +50 -0
- package/config/plan-templates/team-feature.yaml +71 -0
- package/config/roles/qa-engineer.yaml +36 -0
- package/config/roles/solidity-dev.yaml +51 -0
- package/config/roles/tech-architect.yaml +36 -0
- package/config/rules/framework/solidity.md +22 -0
- package/config/rules/framework/typescript.md +21 -0
- package/config/rules/framework/unity.md +21 -0
- package/config/rules/universal/design-docs.md +18 -0
- package/config/rules/universal/git-hygiene.md +18 -0
- package/config/rules/universal/security.md +19 -0
- package/config/rules/universal/test-standards.md +19 -0
- package/identity/DELEGATION.md +6 -6
- package/install.sh +293 -8
- package/lib/circling-parser.js +119 -0
- package/lib/hyperagent-store.mjs +652 -0
- package/lib/kanban-io.js +9 -0
- package/lib/mcp-knowledge/bench.mjs +118 -0
- package/lib/mcp-knowledge/core.mjs +528 -0
- package/lib/mcp-knowledge/package.json +25 -0
- package/lib/mcp-knowledge/server.mjs +245 -0
- package/lib/mcp-knowledge/test.mjs +802 -0
- package/lib/memory-budget.mjs +261 -0
- package/lib/mesh-collab.js +301 -1
- package/lib/mesh-harness.js +427 -0
- package/lib/mesh-plans.js +13 -5
- package/lib/mesh-tasks.js +67 -0
- package/lib/plan-templates.js +226 -0
- package/lib/pre-compression-flush.mjs +320 -0
- package/lib/role-loader.js +292 -0
- package/lib/rule-loader.js +358 -0
- package/lib/session-store.mjs +458 -0
- package/lib/transcript-parser.mjs +292 -0
- package/mission-control/drizzle/soul_schema_update.sql +29 -0
- package/mission-control/drizzle.config.ts +1 -4
- package/mission-control/package-lock.json +1571 -83
- package/mission-control/package.json +6 -2
- package/mission-control/scripts/gen-chronology.js +3 -3
- package/mission-control/scripts/import-pipeline-v2.js +0 -16
- package/mission-control/scripts/import-pipeline.js +0 -15
- package/mission-control/src/app/api/cowork/clusters/[id]/members/route.ts +117 -0
- package/mission-control/src/app/api/cowork/clusters/[id]/route.ts +84 -0
- package/mission-control/src/app/api/cowork/clusters/route.ts +141 -0
- package/mission-control/src/app/api/cowork/dispatch/route.ts +128 -0
- package/mission-control/src/app/api/cowork/events/route.ts +65 -0
- package/mission-control/src/app/api/cowork/intervene/route.ts +259 -0
- package/mission-control/src/app/api/cowork/sessions/[id]/route.ts +37 -0
- package/mission-control/src/app/api/cowork/sessions/route.ts +64 -0
- package/mission-control/src/app/api/diagnostics/route.ts +97 -0
- package/mission-control/src/app/api/diagnostics/test-runner/route.ts +990 -0
- package/mission-control/src/app/api/mesh/events/route.ts +95 -19
- package/mission-control/src/app/api/mesh/identity/route.ts +11 -0
- package/mission-control/src/app/api/mesh/tasks/[id]/route.ts +92 -0
- package/mission-control/src/app/api/mesh/tasks/route.ts +91 -0
- package/mission-control/src/app/api/tasks/[id]/handoff/route.ts +1 -1
- package/mission-control/src/app/api/tasks/[id]/route.ts +90 -4
- package/mission-control/src/app/api/tasks/route.ts +21 -30
- package/mission-control/src/app/cowork/page.tsx +261 -0
- package/mission-control/src/app/diagnostics/page.tsx +385 -0
- package/mission-control/src/app/graph/page.tsx +26 -0
- package/mission-control/src/app/memory/page.tsx +1 -1
- package/mission-control/src/app/obsidian/page.tsx +36 -6
- package/mission-control/src/app/roadmap/page.tsx +24 -0
- package/mission-control/src/app/souls/page.tsx +2 -2
- package/mission-control/src/components/board/execution-config.tsx +431 -0
- package/mission-control/src/components/board/kanban-board.tsx +75 -9
- package/mission-control/src/components/board/kanban-column.tsx +135 -19
- package/mission-control/src/components/board/task-card.tsx +55 -2
- package/mission-control/src/components/board/unified-task-dialog.tsx +82 -4
- package/mission-control/src/components/cowork/cluster-card.tsx +176 -0
- package/mission-control/src/components/cowork/create-cluster-dialog.tsx +251 -0
- package/mission-control/src/components/cowork/dispatch-form.tsx +423 -0
- package/mission-control/src/components/cowork/role-picker.tsx +102 -0
- package/mission-control/src/components/cowork/session-card.tsx +284 -0
- package/mission-control/src/components/layout/sidebar.tsx +39 -2
- package/mission-control/src/lib/__tests__/daily-log.test.ts +82 -0
- package/mission-control/src/lib/__tests__/memory-md.test.ts +87 -0
- package/mission-control/src/lib/__tests__/mesh-kv-sync.test.ts +465 -0
- package/mission-control/src/lib/__tests__/mocks/mock-kv.ts +131 -0
- package/mission-control/src/lib/__tests__/status-kanban.test.ts +46 -0
- package/mission-control/src/lib/__tests__/task-markdown.test.ts +188 -0
- package/mission-control/src/lib/__tests__/wikilinks.test.ts +175 -0
- package/mission-control/src/lib/config.ts +58 -0
- package/mission-control/src/lib/db/index.ts +69 -0
- package/mission-control/src/lib/db/schema.ts +61 -3
- package/mission-control/src/lib/hooks.ts +309 -0
- package/mission-control/src/lib/memory/entities.ts +3 -2
- package/mission-control/src/lib/nats.ts +66 -1
- package/mission-control/src/lib/parsers/task-markdown.ts +52 -2
- package/mission-control/src/lib/parsers/transcript.ts +4 -4
- package/mission-control/src/lib/scheduler.ts +12 -11
- package/mission-control/src/lib/sync/mesh-kv.ts +279 -0
- package/mission-control/src/lib/sync/tasks.ts +23 -1
- package/mission-control/src/lib/task-id.ts +32 -0
- package/mission-control/src/lib/tts/index.ts +33 -9
- package/mission-control/tsconfig.json +2 -1
- package/mission-control/vitest.config.ts +14 -0
- package/package.json +15 -2
- package/services/service-manifest.json +1 -1
- package/skills/cc-godmode/references/agents.md +8 -8
- package/workspace-bin/memory-daemon.mjs +199 -5
- package/workspace-bin/session-search.mjs +204 -0
- package/workspace-bin/web-fetch.mjs +65 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* plan-templates.js — Load, validate, and instantiate plan templates.
|
|
3
|
+
*
|
|
4
|
+
* Templates are YAML files in .openclaw/plan-templates/ that define
|
|
5
|
+
* reusable multi-phase pipelines. Instantiation substitutes context
|
|
6
|
+
* variables and produces a plan ready for mesh.plans.create.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const fs = require('fs');
|
|
10
|
+
const path = require('path');
|
|
11
|
+
const yaml = require('js-yaml');
|
|
12
|
+
const { autoRoutePlan, createPlan } = require('./mesh-plans');
|
|
13
|
+
|
|
14
|
+
// ── Template Loading ──────────────────────────────
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Load a single template from a YAML file.
|
|
18
|
+
*/
|
|
19
|
+
function loadTemplate(templatePath) {
|
|
20
|
+
const content = fs.readFileSync(templatePath, 'utf-8');
|
|
21
|
+
const template = yaml.load(content);
|
|
22
|
+
|
|
23
|
+
if (!template.id) {
|
|
24
|
+
template.id = path.basename(templatePath, '.yaml');
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return template;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* List all available templates in a directory.
|
|
32
|
+
* Returns array of { id, name, description, file }.
|
|
33
|
+
*/
|
|
34
|
+
function listTemplates(templatesDir) {
|
|
35
|
+
if (!fs.existsSync(templatesDir)) return [];
|
|
36
|
+
|
|
37
|
+
return fs.readdirSync(templatesDir)
|
|
38
|
+
.filter(f => f.endsWith('.yaml') || f.endsWith('.yml'))
|
|
39
|
+
.map(file => {
|
|
40
|
+
try {
|
|
41
|
+
const template = loadTemplate(path.join(templatesDir, file));
|
|
42
|
+
return {
|
|
43
|
+
id: template.id,
|
|
44
|
+
name: template.name || template.id,
|
|
45
|
+
description: template.description || '',
|
|
46
|
+
file,
|
|
47
|
+
};
|
|
48
|
+
} catch {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
})
|
|
52
|
+
.filter(Boolean);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ── Template Validation ───────────────────────────
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Validate a template for structural correctness.
|
|
59
|
+
* Returns { valid: boolean, errors: string[] }.
|
|
60
|
+
*/
|
|
61
|
+
function validateTemplate(template) {
|
|
62
|
+
const errors = [];
|
|
63
|
+
|
|
64
|
+
if (!template.id) errors.push('Missing template id');
|
|
65
|
+
if (!template.phases || !Array.isArray(template.phases)) {
|
|
66
|
+
errors.push('Missing or invalid phases array');
|
|
67
|
+
return { valid: false, errors };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const allIds = new Set();
|
|
71
|
+
const allSubtasks = [];
|
|
72
|
+
|
|
73
|
+
for (let i = 0; i < template.phases.length; i++) {
|
|
74
|
+
const phase = template.phases[i];
|
|
75
|
+
if (!phase.subtasks || !Array.isArray(phase.subtasks)) {
|
|
76
|
+
errors.push(`Phase ${i}: missing subtasks array`);
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
for (const st of phase.subtasks) {
|
|
81
|
+
if (!st.id) {
|
|
82
|
+
errors.push(`Phase ${i}: subtask missing id`);
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
if (allIds.has(st.id)) {
|
|
86
|
+
errors.push(`Duplicate subtask id: ${st.id}`);
|
|
87
|
+
}
|
|
88
|
+
allIds.add(st.id);
|
|
89
|
+
allSubtasks.push(st);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Check dependency references
|
|
94
|
+
for (const st of allSubtasks) {
|
|
95
|
+
if (st.depends_on) {
|
|
96
|
+
for (const dep of st.depends_on) {
|
|
97
|
+
if (!allIds.has(dep)) {
|
|
98
|
+
errors.push(`Subtask ${st.id}: depends on unknown subtask '${dep}'`);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Check for circular dependencies (simple DFS)
|
|
105
|
+
const visiting = new Set();
|
|
106
|
+
const visited = new Set();
|
|
107
|
+
const stMap = new Map(allSubtasks.map(st => [st.id, st]));
|
|
108
|
+
|
|
109
|
+
function hasCycle(id) {
|
|
110
|
+
if (visiting.has(id)) return true;
|
|
111
|
+
if (visited.has(id)) return false;
|
|
112
|
+
visiting.add(id);
|
|
113
|
+
const st = stMap.get(id);
|
|
114
|
+
if (st && st.depends_on) {
|
|
115
|
+
for (const dep of st.depends_on) {
|
|
116
|
+
if (hasCycle(dep)) return true;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
visiting.delete(id);
|
|
120
|
+
visited.add(id);
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
for (const id of allIds) {
|
|
125
|
+
if (hasCycle(id)) {
|
|
126
|
+
errors.push(`Circular dependency detected involving subtask '${id}'`);
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Validate delegation modes
|
|
132
|
+
const validModes = ['solo_mesh', 'collab_mesh', 'local', 'soul', 'human', 'auto'];
|
|
133
|
+
for (const st of allSubtasks) {
|
|
134
|
+
if (st.delegation && st.delegation.mode && !validModes.includes(st.delegation.mode)) {
|
|
135
|
+
errors.push(`Subtask ${st.id}: invalid delegation mode '${st.delegation.mode}'`);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return { valid: errors.length === 0, errors };
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ── Template Instantiation ────────────────────────
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Substitute {{context}} and {{vars.key}} in all string fields.
|
|
146
|
+
*/
|
|
147
|
+
function substituteVars(obj, context, vars = {}) {
|
|
148
|
+
if (typeof obj === 'string') {
|
|
149
|
+
let result = obj.replace(/\{\{context\}\}/g, context);
|
|
150
|
+
for (const [key, val] of Object.entries(vars)) {
|
|
151
|
+
result = result.replace(new RegExp(`\\{\\{vars\\.${key}\\}\\}`, 'g'), String(val));
|
|
152
|
+
}
|
|
153
|
+
return result;
|
|
154
|
+
}
|
|
155
|
+
if (Array.isArray(obj)) {
|
|
156
|
+
return obj.map(item => substituteVars(item, context, vars));
|
|
157
|
+
}
|
|
158
|
+
if (obj && typeof obj === 'object') {
|
|
159
|
+
const result = {};
|
|
160
|
+
for (const [key, val] of Object.entries(obj)) {
|
|
161
|
+
result[key] = substituteVars(val, context, vars);
|
|
162
|
+
}
|
|
163
|
+
return result;
|
|
164
|
+
}
|
|
165
|
+
return obj;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Instantiate a template into a plan-ready object.
|
|
170
|
+
*
|
|
171
|
+
* @param {object} template — loaded template
|
|
172
|
+
* @param {string} context — main context string (substituted into {{context}})
|
|
173
|
+
* @param {object} opts — { parent_task_id, vars, planner }
|
|
174
|
+
* @returns {object} — plan object ready for mesh.plans.create
|
|
175
|
+
*/
|
|
176
|
+
function instantiateTemplate(template, context, opts = {}) {
|
|
177
|
+
const { parent_task_id, vars = {}, planner = 'daedalus' } = opts;
|
|
178
|
+
|
|
179
|
+
// Flatten phases into subtask array with dependency wiring
|
|
180
|
+
const subtasks = [];
|
|
181
|
+
|
|
182
|
+
for (const phase of template.phases) {
|
|
183
|
+
for (const stSpec of phase.subtasks) {
|
|
184
|
+
const substituted = substituteVars(stSpec, context, vars);
|
|
185
|
+
|
|
186
|
+
const subtask = {
|
|
187
|
+
subtask_id: substituted.id,
|
|
188
|
+
title: substituted.title || substituted.id,
|
|
189
|
+
description: substituted.description || '',
|
|
190
|
+
delegation: substituted.delegation || { mode: 'auto' },
|
|
191
|
+
budget_minutes: parseInt(substituted.budget_minutes) || 15,
|
|
192
|
+
metric: substituted.metric || null,
|
|
193
|
+
scope: substituted.scope || [],
|
|
194
|
+
success_criteria: substituted.success_criteria || [],
|
|
195
|
+
depends_on: substituted.depends_on || [],
|
|
196
|
+
critical: substituted.critical || false,
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
subtasks.push(subtask);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Create the plan
|
|
204
|
+
const plan = createPlan({
|
|
205
|
+
parent_task_id: parent_task_id || `TEMPLATE-${template.id}-${Date.now()}`,
|
|
206
|
+
title: substituteVars(template.name || template.id, context, vars),
|
|
207
|
+
description: substituteVars(template.description || '', context, vars),
|
|
208
|
+
planner,
|
|
209
|
+
failure_policy: template.failure_policy || 'continue_best_effort',
|
|
210
|
+
requires_approval: template.requires_approval !== false, // default true
|
|
211
|
+
subtasks,
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
// Auto-route any subtasks with mode: 'auto'
|
|
215
|
+
autoRoutePlan(plan);
|
|
216
|
+
|
|
217
|
+
return plan;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
module.exports = {
|
|
221
|
+
loadTemplate,
|
|
222
|
+
listTemplates,
|
|
223
|
+
validateTemplate,
|
|
224
|
+
instantiateTemplate,
|
|
225
|
+
substituteVars,
|
|
226
|
+
};
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pre-compression-flush.mjs — Pre-compression memory extraction
|
|
3
|
+
*
|
|
4
|
+
* Detects when a session is approaching context compression
|
|
5
|
+
* (by JSONL size / estimated token count) and extracts durable facts from
|
|
6
|
+
* the conversation tail before they're lost.
|
|
7
|
+
*
|
|
8
|
+
* LLM-agnostic: uses transcript-parser.mjs to handle any JSONL format
|
|
9
|
+
* (Claude Code, OpenClaw Gateway, or future backends).
|
|
10
|
+
*
|
|
11
|
+
* Zero token cost — pure JSONL parsing + heuristic extraction.
|
|
12
|
+
* Writes to MEMORY.md with bigram-similarity dedup to prevent bloat.
|
|
13
|
+
*
|
|
14
|
+
* Adapted from Hermes's pre-compression flush pattern, fitted to
|
|
15
|
+
* OpenClaw's daemon architecture.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import fs from 'fs';
|
|
19
|
+
import path from 'path';
|
|
20
|
+
import { parseJsonlFile, estimateFileTokens } from './transcript-parser.mjs';
|
|
21
|
+
|
|
22
|
+
// ── Token Estimation ────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
const CHARS_PER_TOKEN = 4; // rough approximation across common LLM tokenizers
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Estimate token count from character length.
|
|
28
|
+
* Good enough for flush threshold — no tokenizer dependency needed.
|
|
29
|
+
*/
|
|
30
|
+
export function estimateTokens(text) {
|
|
31
|
+
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Estimate total conversation tokens from a JSONL session file.
|
|
36
|
+
* Format-agnostic — delegates to transcript-parser.
|
|
37
|
+
* Returns { totalTokens, messageCount, tailMessages }.
|
|
38
|
+
*
|
|
39
|
+
* @param {string} jsonlPath
|
|
40
|
+
* @param {number} tailCount
|
|
41
|
+
* @param {Object} opts
|
|
42
|
+
* @param {string} opts.format - Transcript format (auto-detected if omitted)
|
|
43
|
+
*/
|
|
44
|
+
export async function estimateSessionTokens(jsonlPath, tailCount = 40, opts = {}) {
|
|
45
|
+
if (!fs.existsSync(jsonlPath)) return { totalTokens: 0, messageCount: 0, tailMessages: [] };
|
|
46
|
+
|
|
47
|
+
const messages = await parseJsonlFile(jsonlPath, { format: opts.format });
|
|
48
|
+
let totalChars = 0;
|
|
49
|
+
for (const msg of messages) {
|
|
50
|
+
totalChars += msg.content.length;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const tailMessages = messages.slice(-tailCount);
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
totalTokens: Math.ceil(totalChars / CHARS_PER_TOKEN),
|
|
57
|
+
messageCount: messages.length,
|
|
58
|
+
tailMessages,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ── Flush Threshold ────────────────────────────────────
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Check if a session should trigger a pre-compression flush.
|
|
66
|
+
*
|
|
67
|
+
* @param {string} jsonlPath - Path to the session's JSONL file
|
|
68
|
+
* @param {Object} opts
|
|
69
|
+
* @param {number} opts.contextWindowTokens - Model's context window size in tokens (default: 200000)
|
|
70
|
+
* @param {number} opts.flushPct - Flush at this % of context window (default: 0.75)
|
|
71
|
+
* @returns {{ shouldFlush: boolean, estimatedTokens: number, pctUsed: number, threshold: number }}
|
|
72
|
+
*/
|
|
73
|
+
export async function shouldFlush(jsonlPath, opts = {}) {
|
|
74
|
+
const { contextWindowTokens = 200000, flushPct = 0.75 } = opts;
|
|
75
|
+
const threshold = Math.floor(contextWindowTokens * flushPct);
|
|
76
|
+
|
|
77
|
+
const stat = fs.statSync(jsonlPath);
|
|
78
|
+
// Quick estimate from file size — ~4 chars/token, but JSONL has overhead (~2x)
|
|
79
|
+
const quickEstimate = Math.ceil(stat.size / (CHARS_PER_TOKEN * 2));
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
shouldFlush: quickEstimate >= threshold,
|
|
83
|
+
estimatedTokens: quickEstimate,
|
|
84
|
+
pctUsed: Math.round((quickEstimate / contextWindowTokens) * 100),
|
|
85
|
+
threshold,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ── Bigram Similarity ────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Compute bigram similarity between two strings (0.0 - 1.0).
|
|
93
|
+
* Used for dedup when merging new facts into MEMORY.md.
|
|
94
|
+
*/
|
|
95
|
+
export function bigramSimilarity(a, b) {
|
|
96
|
+
if (!a || !b) return 0;
|
|
97
|
+
|
|
98
|
+
const norm = s => s.toLowerCase().replace(/[^a-z0-9\s]/g, '').trim();
|
|
99
|
+
const bigrams = s => {
|
|
100
|
+
const tokens = norm(s).split(/\s+/);
|
|
101
|
+
const bg = new Set();
|
|
102
|
+
for (let i = 0; i < tokens.length - 1; i++) {
|
|
103
|
+
bg.add(`${tokens[i]} ${tokens[i + 1]}`);
|
|
104
|
+
}
|
|
105
|
+
// Also add unigrams for short strings
|
|
106
|
+
for (const t of tokens) bg.add(t);
|
|
107
|
+
return bg;
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
const setA = bigrams(a);
|
|
111
|
+
const setB = bigrams(b);
|
|
112
|
+
|
|
113
|
+
if (setA.size === 0 || setB.size === 0) return 0;
|
|
114
|
+
|
|
115
|
+
let intersection = 0;
|
|
116
|
+
for (const bg of setA) {
|
|
117
|
+
if (setB.has(bg)) intersection++;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const union = new Set([...setA, ...setB]).size;
|
|
121
|
+
return union === 0 ? 0 : intersection / union;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// ── Fact Extraction ────────────────────────────────────
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Extract durable facts from conversation tail messages.
|
|
128
|
+
* Heuristic approach — looks for:
|
|
129
|
+
* - User corrections / preferences ("don't...", "always...", "I prefer...")
|
|
130
|
+
* - Decisions ("we decided...", "let's go with...")
|
|
131
|
+
* - Environment discoveries ("the API is at...", "config is in...")
|
|
132
|
+
* - Named entities + context (URLs, file paths, project names)
|
|
133
|
+
*
|
|
134
|
+
* Returns array of { fact, category, confidence } objects.
|
|
135
|
+
*/
|
|
136
|
+
export function extractFacts(tailMessages) {
|
|
137
|
+
const facts = [];
|
|
138
|
+
const seen = new Set();
|
|
139
|
+
|
|
140
|
+
const patterns = [
|
|
141
|
+
// User corrections / preferences
|
|
142
|
+
{ re: /(?:don'?t|never|always|stop|prefer|please)\s+(.{10,80})/i, category: 'preference', confidence: 85 },
|
|
143
|
+
// Decisions
|
|
144
|
+
{ re: /(?:decided|let'?s go with|we'?ll use|switching to|going with)\s+(.{10,80})/i, category: 'decision', confidence: 80 },
|
|
145
|
+
// Environment / config
|
|
146
|
+
{ re: /(?:api|endpoint|url|port|config|database|db)\s+(?:is|at|on|in)\s+(.{5,80})/i, category: 'environment', confidence: 75 },
|
|
147
|
+
// File paths
|
|
148
|
+
{ re: /((?:\/[\w.-]+){3,})/g, category: 'reference', confidence: 60 },
|
|
149
|
+
// URLs
|
|
150
|
+
{ re: /(https?:\/\/\S{10,80})/g, category: 'reference', confidence: 65 },
|
|
151
|
+
];
|
|
152
|
+
|
|
153
|
+
for (const msg of tailMessages) {
|
|
154
|
+
if (msg.role !== 'user') continue; // focus on user statements
|
|
155
|
+
const content = msg.content;
|
|
156
|
+
|
|
157
|
+
for (const { re, category, confidence } of patterns) {
|
|
158
|
+
const flags = re.flags.includes('g') ? re.flags : re.flags + 'g';
|
|
159
|
+
const matches = content.matchAll(new RegExp(re.source, flags.includes('i') ? flags : flags + 'i'));
|
|
160
|
+
for (const match of matches) {
|
|
161
|
+
const factText = match[0].trim().slice(0, 120);
|
|
162
|
+
|
|
163
|
+
// Dedup within extraction
|
|
164
|
+
const key = factText.toLowerCase().replace(/\s+/g, ' ');
|
|
165
|
+
if (seen.has(key)) continue;
|
|
166
|
+
seen.add(key);
|
|
167
|
+
|
|
168
|
+
facts.push({ fact: factText, category, confidence });
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return facts;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// ── MEMORY.md Merge ────────────────────────────────────
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Parse MEMORY.md into structured entries.
|
|
180
|
+
* Each entry is a markdown line (typically a "- " bullet under a section).
|
|
181
|
+
*/
|
|
182
|
+
export function parseMemoryMd(content) {
|
|
183
|
+
const lines = content.split('\n');
|
|
184
|
+
const entries = [];
|
|
185
|
+
let currentSection = '';
|
|
186
|
+
|
|
187
|
+
for (const line of lines) {
|
|
188
|
+
if (line.startsWith('##')) {
|
|
189
|
+
currentSection = line.replace(/^#+\s*/, '').trim();
|
|
190
|
+
} else if (line.startsWith('- ') || line.startsWith('* ')) {
|
|
191
|
+
entries.push({
|
|
192
|
+
section: currentSection,
|
|
193
|
+
text: line.replace(/^[-*]\s*/, '').trim(),
|
|
194
|
+
raw: line,
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return entries;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Merge new facts into MEMORY.md content with dedup.
|
|
204
|
+
*
|
|
205
|
+
* Strategy:
|
|
206
|
+
* - >90% similarity to existing entry → skip (already known)
|
|
207
|
+
* - >70% similarity → merge (append new info to existing entry)
|
|
208
|
+
* - <70% similarity → append as new entry under appropriate section
|
|
209
|
+
*
|
|
210
|
+
* @param {string} memoryContent - Current MEMORY.md content
|
|
211
|
+
* @param {Array} facts - Array of { fact, category, confidence }
|
|
212
|
+
* @param {number} charBudget - Max character budget (default 2200)
|
|
213
|
+
* @returns {{ content: string, added: number, merged: number, skipped: number }}
|
|
214
|
+
*/
|
|
215
|
+
export function mergeFacts(memoryContent, facts, charBudget = 2200) {
|
|
216
|
+
const entries = parseMemoryMd(memoryContent);
|
|
217
|
+
let content = memoryContent;
|
|
218
|
+
let added = 0, merged = 0, skipped = 0;
|
|
219
|
+
|
|
220
|
+
for (const { fact, category } of facts) {
|
|
221
|
+
// Check against existing entries
|
|
222
|
+
let bestSim = 0;
|
|
223
|
+
let bestEntry = null;
|
|
224
|
+
|
|
225
|
+
for (const entry of entries) {
|
|
226
|
+
const sim = bigramSimilarity(fact, entry.text);
|
|
227
|
+
if (sim > bestSim) {
|
|
228
|
+
bestSim = sim;
|
|
229
|
+
bestEntry = entry;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (bestSim > 0.9) {
|
|
234
|
+
skipped++;
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
if (bestSim > 0.7 && bestEntry) {
|
|
239
|
+
// Merge: replace the existing line with a combined version
|
|
240
|
+
const combined = `${bestEntry.text} (updated: ${fact.slice(0, 60)})`;
|
|
241
|
+
content = content.replace(bestEntry.raw, `- ${combined}`);
|
|
242
|
+
merged++;
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Budget check before appending
|
|
247
|
+
if (content.length + fact.length + 10 > charBudget) {
|
|
248
|
+
break; // respect character budget
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Append under "## Recent" section (create if missing)
|
|
252
|
+
if (!content.includes('## Recent')) {
|
|
253
|
+
content = content.trimEnd() + '\n\n## Recent\n';
|
|
254
|
+
}
|
|
255
|
+
content = content.trimEnd() + `\n- ${fact}`;
|
|
256
|
+
added++;
|
|
257
|
+
entries.push({ section: 'Recent', text: fact, raw: `- ${fact}` });
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return { content: content.trimEnd() + '\n', added, merged, skipped };
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// ── Main Flush Pipeline ────────────────────────────────────
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Run the pre-compression flush pipeline.
|
|
267
|
+
*
|
|
268
|
+
* 1. Read tail of JSONL conversation
|
|
269
|
+
* 2. Extract durable facts
|
|
270
|
+
* 3. Merge into MEMORY.md with dedup
|
|
271
|
+
* 4. Return stats
|
|
272
|
+
*
|
|
273
|
+
* @param {string} jsonlPath - Path to current session JSONL
|
|
274
|
+
* @param {string} memoryMdPath - Path to MEMORY.md
|
|
275
|
+
* @param {Object} opts
|
|
276
|
+
* @param {number} opts.tailCount - Number of tail messages to scan (default 40)
|
|
277
|
+
* @param {number} opts.charBudget - MEMORY.md character budget (default 2200)
|
|
278
|
+
* @param {string} opts.format - Transcript format (auto-detected if omitted)
|
|
279
|
+
* @returns {Promise<{ flushed: boolean, facts: number, added: number, merged: number, skipped: number }>}
|
|
280
|
+
*/
|
|
281
|
+
export async function runFlush(jsonlPath, memoryMdPath, opts = {}) {
|
|
282
|
+
const { tailCount = 40, charBudget = 2200, format } = opts;
|
|
283
|
+
|
|
284
|
+
if (!fs.existsSync(jsonlPath)) {
|
|
285
|
+
return { flushed: false, facts: 0, added: 0, merged: 0, skipped: 0 };
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// 1. Get tail messages (format-agnostic via transcript-parser)
|
|
289
|
+
const { tailMessages } = await estimateSessionTokens(jsonlPath, tailCount, { format });
|
|
290
|
+
|
|
291
|
+
if (tailMessages.length === 0) {
|
|
292
|
+
return { flushed: false, facts: 0, added: 0, merged: 0, skipped: 0 };
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// 2. Extract facts
|
|
296
|
+
const facts = extractFacts(tailMessages);
|
|
297
|
+
|
|
298
|
+
if (facts.length === 0) {
|
|
299
|
+
return { flushed: true, facts: 0, added: 0, merged: 0, skipped: 0 };
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// 3. Read and merge into MEMORY.md
|
|
303
|
+
let memoryContent = '';
|
|
304
|
+
if (fs.existsSync(memoryMdPath)) {
|
|
305
|
+
memoryContent = fs.readFileSync(memoryMdPath, 'utf-8');
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
const result = mergeFacts(memoryContent, facts, charBudget);
|
|
309
|
+
|
|
310
|
+
// 4. Write back
|
|
311
|
+
fs.writeFileSync(memoryMdPath, result.content);
|
|
312
|
+
|
|
313
|
+
return {
|
|
314
|
+
flushed: true,
|
|
315
|
+
facts: facts.length,
|
|
316
|
+
added: result.added,
|
|
317
|
+
merged: result.merged,
|
|
318
|
+
skipped: result.skipped,
|
|
319
|
+
};
|
|
320
|
+
}
|