@totalreclaw/totalreclaw 1.6.0 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAWHUB.md +134 -0
- package/README.md +407 -64
- package/SKILL.md +1032 -0
- package/api-client.ts +5 -5
- package/claims-helper.ts +686 -0
- package/config.ts +211 -0
- package/consolidation.ts +141 -33
- package/contradiction-sync.ts +1389 -0
- package/crypto.ts +63 -261
- package/digest-sync.ts +516 -0
- package/embedding.ts +69 -46
- package/extractor.ts +1307 -84
- package/hot-cache-wrapper.ts +1 -1
- package/import-adapters/gemini-adapter.ts +243 -0
- package/import-adapters/index.ts +3 -0
- package/import-adapters/types.ts +1 -1
- package/index.ts +1887 -323
- package/llm-client.ts +106 -53
- package/lsh.ts +21 -210
- package/package.json +20 -7
- package/pin.ts +502 -0
- package/reranker.ts +96 -124
- package/skill.json +213 -0
- package/subgraph-search.ts +112 -5
- package/subgraph-store.ts +559 -275
- package/consolidation.test.ts +0 -356
- package/extractor-dedup.test.ts +0 -168
- package/import-adapters/import-adapters.test.ts +0 -1123
- package/lsh.test.ts +0 -463
- package/pocv2-e2e-test.ts +0 -917
- package/porter-stemmer.d.ts +0 -4
- package/reranker.test.ts +0 -594
- package/semantic-dedup.test.ts +0 -392
- package/setup.sh +0 -19
- package/store-dedup-wiring.test.ts +0 -186
package/claims-helper.ts
ADDED
|
@@ -0,0 +1,686 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TotalReclaw Plugin — Knowledge Graph helpers for the write path.
|
|
3
|
+
*
|
|
4
|
+
* Builds canonical Claim JSON from an ExtractedFact, generates entity
|
|
5
|
+
* trapdoors for blind search, and resolves the claim-format feature flag.
|
|
6
|
+
*
|
|
7
|
+
* The canonical Claim schema uses compact short keys (t, c, cf, i, sa, ea, e, ...)
|
|
8
|
+
* and is produced byte-identically across Rust, WASM, and Python via
|
|
9
|
+
* `canonicalizeClaim()` in @totalreclaw/core.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import crypto from 'node:crypto';
|
|
13
|
+
import { createRequire } from 'node:module';
|
|
14
|
+
import type {
|
|
15
|
+
ExtractedEntity,
|
|
16
|
+
ExtractedFact,
|
|
17
|
+
MemoryType,
|
|
18
|
+
MemoryTypeV0,
|
|
19
|
+
MemoryTypeV1,
|
|
20
|
+
MemoryScope,
|
|
21
|
+
MemorySource,
|
|
22
|
+
MemoryVolatility,
|
|
23
|
+
} from './extractor.js';
|
|
24
|
+
import {
|
|
25
|
+
isValidMemoryType,
|
|
26
|
+
isValidMemoryTypeV1,
|
|
27
|
+
V0_TO_V1_TYPE,
|
|
28
|
+
VALID_MEMORY_SCOPES,
|
|
29
|
+
VALID_MEMORY_SOURCES,
|
|
30
|
+
VALID_MEMORY_VOLATILITIES,
|
|
31
|
+
VALID_MEMORY_TYPES_V1,
|
|
32
|
+
} from './extractor.js';
|
|
33
|
+
|
|
34
|
+
// Lazy-load WASM. We use createRequire so this module loads cleanly under
|
|
35
|
+
// both the OpenClaw runtime (CJS-ish tsx) and bare Node ESM (used by tests).
|
|
36
|
+
const requireWasm = createRequire(import.meta.url);
|
|
37
|
+
let _wasm: typeof import('@totalreclaw/core') | null = null;
|
|
38
|
+
function getWasm() {
|
|
39
|
+
if (!_wasm) _wasm = requireWasm('@totalreclaw/core');
|
|
40
|
+
return _wasm!;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Category mapping (ExtractedFact.type → compact Claim category short key)
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
// Legacy v0 type → compact category mapping. Kept for reading pre-v1 vault
|
|
48
|
+
// entries that stored the short-form category as the decrypted `c` key.
|
|
49
|
+
const TYPE_TO_CATEGORY_V0: Record<MemoryTypeV0, string> = {
|
|
50
|
+
fact: 'fact',
|
|
51
|
+
preference: 'pref',
|
|
52
|
+
decision: 'dec',
|
|
53
|
+
episodic: 'epi',
|
|
54
|
+
goal: 'goal',
|
|
55
|
+
context: 'ctx',
|
|
56
|
+
summary: 'sum',
|
|
57
|
+
rule: 'rule',
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
// v1 type → compact category mapping for recall display. These short keys
|
|
61
|
+
// remain the display-layer category tags (e.g. `[rule]`, `[fact]`) that the
|
|
62
|
+
// recall tool surfaces, so the v1 types map onto the v0 category keys.
|
|
63
|
+
const TYPE_TO_CATEGORY_V1: Record<MemoryType, string> = {
|
|
64
|
+
claim: 'claim',
|
|
65
|
+
preference: 'pref',
|
|
66
|
+
directive: 'rule', // v1 directive → v0 category "rule" for display
|
|
67
|
+
commitment: 'goal', // v1 commitment → v0 category "goal" for display
|
|
68
|
+
episode: 'epi',
|
|
69
|
+
summary: 'sum',
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Map any memory type (v1 or legacy v0) to the compact category short key.
|
|
74
|
+
*
|
|
75
|
+
* v1 types take priority; unknown tokens fall through to the v0 table for
|
|
76
|
+
* pre-v1 vault entries; anything else returns `'fact'`.
|
|
77
|
+
*/
|
|
78
|
+
export function mapTypeToCategory(type: MemoryType | MemoryTypeV0): string {
|
|
79
|
+
if (type in TYPE_TO_CATEGORY_V1) return TYPE_TO_CATEGORY_V1[type as MemoryType];
|
|
80
|
+
return TYPE_TO_CATEGORY_V0[type as MemoryTypeV0] ?? 'fact';
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
// Canonical Claim builder
|
|
85
|
+
// ---------------------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
export interface BuildClaimInput {
|
|
88
|
+
fact: ExtractedFact;
|
|
89
|
+
importance: number; // 1-10, may differ from fact.importance after store-time dedup supersede
|
|
90
|
+
/**
|
|
91
|
+
* Source-agent metadata string. Carried through as legacy context only —
|
|
92
|
+
* plugin v3.0.0 emits v1 JSON blobs where provenance lives in `fact.source`
|
|
93
|
+
* and this field is ignored. Kept on the input interface so existing
|
|
94
|
+
* call-site signatures continue to type-check.
|
|
95
|
+
*/
|
|
96
|
+
sourceAgent: string;
|
|
97
|
+
/** Creation timestamp. Defaults to now. */
|
|
98
|
+
extractedAt?: string;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Construct a canonical Claim JSON string from an ExtractedFact.
|
|
103
|
+
*
|
|
104
|
+
* As of plugin v3.0.0, this unconditionally emits a Memory Taxonomy v1 JSON
|
|
105
|
+
* blob (schema_version "1.0") — forwarded to `buildCanonicalClaimV1`. The
|
|
106
|
+
* legacy v0 short-key {t, c, i, sa, ea} format is no longer produced on the
|
|
107
|
+
* write path.
|
|
108
|
+
*
|
|
109
|
+
* When `fact.source` is missing we default it to `'user-inferred'` so a
|
|
110
|
+
* misconfigured extraction hook doesn't drop the write. The outer protobuf
|
|
111
|
+
* wrapper's `version` field MUST be set to 4 when storing the returned
|
|
112
|
+
* payload (see `subgraph-store.ts::encodeFactProtobuf`).
|
|
113
|
+
*/
|
|
114
|
+
export function buildCanonicalClaim(input: BuildClaimInput): string {
|
|
115
|
+
const { fact, importance, extractedAt } = input;
|
|
116
|
+
|
|
117
|
+
// Defensive: ensure fact.source is always populated before v1 validation.
|
|
118
|
+
// `applyProvenanceFilterLax` should have set this upstream; this is the
|
|
119
|
+
// belt-and-suspenders fallback for explicit tool paths / legacy callers.
|
|
120
|
+
const factWithSource: ExtractedFact = fact.source
|
|
121
|
+
? fact
|
|
122
|
+
: { ...fact, source: 'user-inferred' };
|
|
123
|
+
|
|
124
|
+
return buildCanonicalClaimV1({
|
|
125
|
+
fact: factWithSource,
|
|
126
|
+
importance,
|
|
127
|
+
createdAt: extractedAt,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
// v1 Claim payload builder (Phase 3 — plugin v3.0.0)
|
|
133
|
+
//
|
|
134
|
+
// Produces a MemoryClaimV1-shaped JSON payload matching
|
|
135
|
+
// `docs/specs/totalreclaw/memory-taxonomy-v1.md`.
|
|
136
|
+
//
|
|
137
|
+
// The v1 payload uses long field names + a schema_version marker so that
|
|
138
|
+
// decrypt logic can discriminate between v0 short-key claims and v1 claims
|
|
139
|
+
// without any external hint. The protobuf outer wrapper sets `version = 4`
|
|
140
|
+
// when writing v1 payloads — see `subgraph-store.ts`.
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
export const V1_SCHEMA_VERSION = '1.0' as const;
|
|
144
|
+
|
|
145
|
+
export interface BuildClaimV1Input {
|
|
146
|
+
/** The extracted fact in v1 shape. Must have `type` as a MemoryTypeV1 token. */
|
|
147
|
+
fact: ExtractedFact;
|
|
148
|
+
/** Final importance after any store-time dedup adjustment. 1-10. */
|
|
149
|
+
importance: number;
|
|
150
|
+
/** Creation timestamp. Defaults to now. */
|
|
151
|
+
createdAt?: string;
|
|
152
|
+
/** Optional superseded-by chain pointer (for pin / retype / forget). */
|
|
153
|
+
supersededBy?: string;
|
|
154
|
+
/** Optional explicit expiration timestamp. */
|
|
155
|
+
expiresAt?: string;
|
|
156
|
+
/** Stable claim ID. Defaults to crypto.randomUUID() at the call site; keep the
|
|
157
|
+
* same ID for both the blob and the on-chain fact id. */
|
|
158
|
+
id?: string;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Build a v1 MemoryClaimV1 JSON blob.
|
|
163
|
+
*
|
|
164
|
+
* Throws if the fact does not have a valid v1 `source` set — v1 requires
|
|
165
|
+
* every claim to carry provenance (the whole taxonomy depends on it).
|
|
166
|
+
*
|
|
167
|
+
* The build pipeline:
|
|
168
|
+
* 1. Build the full v1 payload object (including plugin-only extras like
|
|
169
|
+
* `volatility` and `schema_version`).
|
|
170
|
+
* 2. Send the core-required subset through `validateMemoryClaimV1` for
|
|
171
|
+
* schema enforcement (throws on invalid type/source/missing id).
|
|
172
|
+
* 3. Emit the FULL payload (core canonical fields + plugin extras) as the
|
|
173
|
+
* final stored JSON so round-trip preserves client-side state.
|
|
174
|
+
*
|
|
175
|
+
* Plugin-only extras (not round-tripped by core's validator as of v2.0.0):
|
|
176
|
+
* - `schema_version` — version marker the decrypt path reads
|
|
177
|
+
* - `volatility` — stable | updatable | ephemeral (re-scored after extraction)
|
|
178
|
+
*
|
|
179
|
+
* The outer protobuf wrapper's `version` field must be set to 4 when storing
|
|
180
|
+
* the returned payload (see subgraph-store.ts).
|
|
181
|
+
*/
|
|
182
|
+
export function buildCanonicalClaimV1(input: BuildClaimV1Input): string {
|
|
183
|
+
const { fact, importance, createdAt, supersededBy, expiresAt } = input;
|
|
184
|
+
const id = input.id ?? crypto.randomUUID();
|
|
185
|
+
|
|
186
|
+
if (!fact.source) {
|
|
187
|
+
throw new Error(
|
|
188
|
+
'buildCanonicalClaimV1: fact.source is required (v1 taxonomy mandates provenance)',
|
|
189
|
+
);
|
|
190
|
+
}
|
|
191
|
+
if (!(VALID_MEMORY_SOURCES as readonly string[]).includes(fact.source)) {
|
|
192
|
+
throw new Error(`buildCanonicalClaimV1: invalid source "${fact.source}"`);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const type = normalizeToV1Type(fact.type);
|
|
196
|
+
const resolvedCreatedAt = createdAt ?? new Date().toISOString();
|
|
197
|
+
const resolvedImportance = Math.max(1, Math.min(10, Math.round(importance)));
|
|
198
|
+
|
|
199
|
+
// Core-canonical subset sent through validateMemoryClaimV1. Core strips
|
|
200
|
+
// fields it doesn't understand, so we send it the subset it accepts and
|
|
201
|
+
// re-attach client-side extras to the final payload.
|
|
202
|
+
const corePayload: Record<string, unknown> = {
|
|
203
|
+
id,
|
|
204
|
+
text: fact.text,
|
|
205
|
+
type,
|
|
206
|
+
source: fact.source,
|
|
207
|
+
created_at: resolvedCreatedAt,
|
|
208
|
+
importance: resolvedImportance,
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
if (fact.scope && (VALID_MEMORY_SCOPES as readonly string[]).includes(fact.scope)) {
|
|
212
|
+
corePayload.scope = fact.scope;
|
|
213
|
+
}
|
|
214
|
+
if (fact.reasoning && fact.reasoning.length > 0) {
|
|
215
|
+
corePayload.reasoning = fact.reasoning.slice(0, 256);
|
|
216
|
+
}
|
|
217
|
+
if (fact.entities && fact.entities.length > 0) {
|
|
218
|
+
corePayload.entities = fact.entities.slice(0, 8).map((e) => {
|
|
219
|
+
const entity: Record<string, unknown> = { name: e.name, type: e.type };
|
|
220
|
+
if (e.role) entity.role = e.role;
|
|
221
|
+
return entity;
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
if (typeof fact.confidence === 'number') {
|
|
225
|
+
corePayload.confidence = Math.max(0, Math.min(1, fact.confidence));
|
|
226
|
+
}
|
|
227
|
+
if (expiresAt) corePayload.expires_at = expiresAt;
|
|
228
|
+
if (supersededBy) corePayload.superseded_by = supersededBy;
|
|
229
|
+
|
|
230
|
+
// Validate through core — throws on invalid type / source / missing id.
|
|
231
|
+
const validated = getWasm().validateMemoryClaimV1(JSON.stringify(corePayload)) as string;
|
|
232
|
+
const canonical = JSON.parse(validated) as Record<string, unknown>;
|
|
233
|
+
|
|
234
|
+
// Re-attach plugin-only extras not round-tripped by core's validator.
|
|
235
|
+
canonical.schema_version = V1_SCHEMA_VERSION;
|
|
236
|
+
if (fact.volatility && (VALID_MEMORY_VOLATILITIES as readonly string[]).includes(fact.volatility)) {
|
|
237
|
+
canonical.volatility = fact.volatility;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return JSON.stringify(canonical);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Normalize any type token (v0 or v1) to a v1 type. Uses the v0→v1 mapping
|
|
245
|
+
* for legacy tokens; passes through when already v1.
|
|
246
|
+
*/
|
|
247
|
+
export function normalizeToV1Type(type: string): MemoryType {
|
|
248
|
+
if (isValidMemoryType(type)) return type;
|
|
249
|
+
return V0_TO_V1_TYPE[type as MemoryTypeV0] ?? 'claim';
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Heuristic: does a decrypted blob look like a v1 JSON payload?
|
|
254
|
+
*
|
|
255
|
+
* We check for the schema_version marker + the long-form `text` field.
|
|
256
|
+
* Falls back false on any parse error.
|
|
257
|
+
*/
|
|
258
|
+
export function isV1Blob(decrypted: string): boolean {
|
|
259
|
+
try {
|
|
260
|
+
const obj = JSON.parse(decrypted) as Record<string, unknown>;
|
|
261
|
+
return (
|
|
262
|
+
typeof obj === 'object' &&
|
|
263
|
+
obj !== null &&
|
|
264
|
+
typeof obj.text === 'string' &&
|
|
265
|
+
typeof obj.type === 'string' &&
|
|
266
|
+
typeof obj.schema_version === 'string' &&
|
|
267
|
+
obj.schema_version.startsWith('1.')
|
|
268
|
+
);
|
|
269
|
+
} catch {
|
|
270
|
+
return false;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Parse a decrypted v1 blob into a structured object. Returns null if the
|
|
276
|
+
* blob is not a v1 payload or fails validation.
|
|
277
|
+
*/
|
|
278
|
+
export interface V1BlobReadResult {
|
|
279
|
+
text: string;
|
|
280
|
+
type: MemoryTypeV1;
|
|
281
|
+
source: MemorySource;
|
|
282
|
+
scope: MemoryScope;
|
|
283
|
+
volatility: MemoryVolatility;
|
|
284
|
+
reasoning?: string;
|
|
285
|
+
entities?: Array<{ name: string; type: string; role?: string }>;
|
|
286
|
+
importance: number; // integer 1-10
|
|
287
|
+
confidence: number; // 0-1
|
|
288
|
+
createdAt: string;
|
|
289
|
+
expiresAt?: string;
|
|
290
|
+
supersededBy?: string;
|
|
291
|
+
id?: string;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
export function readV1Blob(decrypted: string): V1BlobReadResult | null {
|
|
295
|
+
try {
|
|
296
|
+
const obj = JSON.parse(decrypted) as Record<string, unknown>;
|
|
297
|
+
if (typeof obj.schema_version !== 'string' || !obj.schema_version.startsWith('1.')) {
|
|
298
|
+
return null;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const text = typeof obj.text === 'string' ? obj.text : '';
|
|
302
|
+
const rawType = typeof obj.type === 'string' ? obj.type : 'claim';
|
|
303
|
+
const type: MemoryTypeV1 = isValidMemoryTypeV1(rawType) ? rawType : 'claim';
|
|
304
|
+
|
|
305
|
+
const rawSource = typeof obj.source === 'string' ? obj.source : 'user-inferred';
|
|
306
|
+
const source: MemorySource = (VALID_MEMORY_SOURCES as readonly string[]).includes(rawSource)
|
|
307
|
+
? (rawSource as MemorySource)
|
|
308
|
+
: 'user-inferred';
|
|
309
|
+
|
|
310
|
+
const rawScope = typeof obj.scope === 'string' ? obj.scope : 'unspecified';
|
|
311
|
+
const scope: MemoryScope = (VALID_MEMORY_SCOPES as readonly string[]).includes(rawScope)
|
|
312
|
+
? (rawScope as MemoryScope)
|
|
313
|
+
: 'unspecified';
|
|
314
|
+
|
|
315
|
+
const rawVolatility = typeof obj.volatility === 'string' ? obj.volatility : 'updatable';
|
|
316
|
+
const volatility: MemoryVolatility = (VALID_MEMORY_VOLATILITIES as readonly string[]).includes(rawVolatility)
|
|
317
|
+
? (rawVolatility as MemoryVolatility)
|
|
318
|
+
: 'updatable';
|
|
319
|
+
|
|
320
|
+
const impRaw = typeof obj.importance === 'number' ? obj.importance : 5;
|
|
321
|
+
const importance = Math.max(1, Math.min(10, Math.round(impRaw)));
|
|
322
|
+
|
|
323
|
+
const confRaw = typeof obj.confidence === 'number' ? obj.confidence : 0.85;
|
|
324
|
+
const confidence = Math.max(0, Math.min(1, confRaw));
|
|
325
|
+
|
|
326
|
+
const result: V1BlobReadResult = {
|
|
327
|
+
text,
|
|
328
|
+
type,
|
|
329
|
+
source,
|
|
330
|
+
scope,
|
|
331
|
+
volatility,
|
|
332
|
+
importance,
|
|
333
|
+
confidence,
|
|
334
|
+
createdAt: typeof obj.created_at === 'string' ? obj.created_at : '',
|
|
335
|
+
};
|
|
336
|
+
|
|
337
|
+
if (typeof obj.reasoning === 'string' && obj.reasoning.length > 0) {
|
|
338
|
+
result.reasoning = obj.reasoning;
|
|
339
|
+
}
|
|
340
|
+
if (Array.isArray(obj.entities)) {
|
|
341
|
+
result.entities = (obj.entities as unknown[]).filter(
|
|
342
|
+
(e): e is { name: string; type: string; role?: string } =>
|
|
343
|
+
!!e &&
|
|
344
|
+
typeof e === 'object' &&
|
|
345
|
+
typeof (e as { name?: unknown }).name === 'string' &&
|
|
346
|
+
typeof (e as { type?: unknown }).type === 'string',
|
|
347
|
+
) as Array<{ name: string; type: string; role?: string }>;
|
|
348
|
+
}
|
|
349
|
+
if (typeof obj.expires_at === 'string') result.expiresAt = obj.expires_at;
|
|
350
|
+
if (typeof obj.superseded_by === 'string') result.supersededBy = obj.superseded_by;
|
|
351
|
+
if (typeof obj.id === 'string') result.id = obj.id;
|
|
352
|
+
|
|
353
|
+
return result;
|
|
354
|
+
} catch {
|
|
355
|
+
return null;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// Suppress unused-import lint warnings for VALID_MEMORY_TYPES_V1 — it is
|
|
360
|
+
// exported from extractor.ts for downstream clients and kept in scope here
|
|
361
|
+
// so future v1 helpers can reuse it without re-importing.
|
|
362
|
+
void VALID_MEMORY_TYPES_V1;
|
|
363
|
+
|
|
364
|
+
// ---------------------------------------------------------------------------
|
|
365
|
+
// Back-compat alias: buildCanonicalClaimRouted
|
|
366
|
+
//
|
|
367
|
+
// Plugin v3.0.0 removed the v0/v1 taxonomy toggle (`TOTALRECLAW_TAXONOMY_VERSION`
|
|
368
|
+
// env var) — all extraction + write paths emit v1 unconditionally. This
|
|
369
|
+
// alias is kept so any external caller that imports the Phase-3 rollout
|
|
370
|
+
// name keeps compiling; it simply forwards to `buildCanonicalClaim`.
|
|
371
|
+
//
|
|
372
|
+
// @deprecated Use `buildCanonicalClaim` directly.
|
|
373
|
+
// ---------------------------------------------------------------------------
|
|
374
|
+
|
|
375
|
+
export function buildCanonicalClaimRouted(input: BuildClaimInput): string {
|
|
376
|
+
return buildCanonicalClaim(input);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// ---------------------------------------------------------------------------
|
|
380
|
+
// Digest helpers (Stage 3b read path)
|
|
381
|
+
// ---------------------------------------------------------------------------
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* Well-known blind index marker used to locate digest claims on the subgraph.
|
|
385
|
+
* Computed as plain SHA-256("type:digest") — same primitive as word trapdoors
|
|
386
|
+
* so it lives in the existing `blindIndices` array. The `type:` namespace
|
|
387
|
+
* prefix keeps it distinct from any user word trapdoor.
|
|
388
|
+
*/
|
|
389
|
+
export const DIGEST_TRAPDOOR: string = crypto
|
|
390
|
+
.createHash('sha256')
|
|
391
|
+
.update('type:digest')
|
|
392
|
+
.digest('hex');
|
|
393
|
+
|
|
394
|
+
/** Compact category short key for digest claims (ClaimCategory::Digest). */
|
|
395
|
+
export const DIGEST_CATEGORY = 'dig';
|
|
396
|
+
|
|
397
|
+
/** Distinctive source marker so operators can grep for digest writes. */
|
|
398
|
+
export const DIGEST_SOURCE_AGENT = 'openclaw-plugin-digest';
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Hard ceiling on claim count for LLM-assisted digest compilation.
|
|
402
|
+
* Above this, we skip the LLM entirely and use the template path to keep
|
|
403
|
+
* token cost bounded. See plan §9 and Stage 3b design question #3.
|
|
404
|
+
*/
|
|
405
|
+
export const DIGEST_CLAIM_CAP = 200;
|
|
406
|
+
|
|
407
|
+
export type DigestMode = 'on' | 'off' | 'template';
|
|
408
|
+
|
|
409
|
+
/**
|
|
410
|
+
* Digest injection is always ON in v1. The TOTALRECLAW_DIGEST_MODE env var
|
|
411
|
+
* was removed — the G-pipeline ships a digest on every recall with an LLM
|
|
412
|
+
* template fallback baked into the digest compiler. Kept as a function
|
|
413
|
+
* returning `'on'` so legacy call-sites continue to compile.
|
|
414
|
+
*
|
|
415
|
+
* @deprecated v1 always returns `'on'`.
|
|
416
|
+
*/
|
|
417
|
+
export function resolveDigestMode(): DigestMode {
|
|
418
|
+
return 'on';
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// ---------------------------------------------------------------------------
|
|
422
|
+
// Auto-resolution mode — INTERNAL DEBUG KILL-SWITCH
|
|
423
|
+
//
|
|
424
|
+
// Not a user-facing env var. This is kept as an emergency off-switch for
|
|
425
|
+
// auto-contradiction-resolution if we have to disable it in production
|
|
426
|
+
// without a redeploy. It is NOT documented in the env var reference and
|
|
427
|
+
// MUST NOT be surfaced in any client README / SKILL.md.
|
|
428
|
+
//
|
|
429
|
+
// See `contradiction-sync.ts` for the read site.
|
|
430
|
+
// ---------------------------------------------------------------------------
|
|
431
|
+
|
|
432
|
+
export type AutoResolveMode = 'active' | 'off' | 'shadow';
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Internal kill-switch for the auto-resolution loop.
|
|
436
|
+
*
|
|
437
|
+
* - `active` (default, unset, unknown): full detection + auto-resolution.
|
|
438
|
+
* - `off`: skip contradiction detection entirely; Phase 1 behaviour.
|
|
439
|
+
* - `shadow`: detect + log decisions, but do not apply them (debug only).
|
|
440
|
+
*
|
|
441
|
+
* @internal Not public config — emergency kill-switch only.
|
|
442
|
+
*/
|
|
443
|
+
export function resolveAutoResolveMode(): AutoResolveMode {
|
|
444
|
+
const raw = (process.env.TOTALRECLAW_AUTO_RESOLVE_MODE ?? '').trim().toLowerCase();
|
|
445
|
+
if (raw === 'off') return 'off';
|
|
446
|
+
if (raw === 'shadow') return 'shadow';
|
|
447
|
+
return 'active';
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// ---------------------------------------------------------------------------
|
|
451
|
+
// Decrypted blob reader — handles both new Claim ({t,c,i,...}) and
|
|
452
|
+
// legacy {text, metadata: {importance: 0-1}} formats transparently.
|
|
453
|
+
// Any decrypt site should use this instead of parsing doc.text directly.
|
|
454
|
+
// ---------------------------------------------------------------------------
|
|
455
|
+
|
|
456
|
+
export interface BlobReadResult {
|
|
457
|
+
text: string;
|
|
458
|
+
importance: number; // integer 1-10
|
|
459
|
+
category: string;
|
|
460
|
+
metadata: Record<string, unknown>;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
export function readClaimFromBlob(decryptedJson: string): BlobReadResult {
|
|
464
|
+
try {
|
|
465
|
+
const obj = JSON.parse(decryptedJson) as Record<string, unknown>;
|
|
466
|
+
|
|
467
|
+
// v1 payload: long-form fields + schema_version "1.x"
|
|
468
|
+
if (
|
|
469
|
+
typeof obj.text === 'string' &&
|
|
470
|
+
typeof obj.type === 'string' &&
|
|
471
|
+
typeof obj.schema_version === 'string' &&
|
|
472
|
+
obj.schema_version.startsWith('1.')
|
|
473
|
+
) {
|
|
474
|
+
const importance = typeof obj.importance === 'number'
|
|
475
|
+
? Math.max(1, Math.min(10, Math.round(obj.importance)))
|
|
476
|
+
: 5;
|
|
477
|
+
return {
|
|
478
|
+
text: obj.text,
|
|
479
|
+
importance,
|
|
480
|
+
category: mapTypeToCategory(obj.type as MemoryTypeV1),
|
|
481
|
+
metadata: {
|
|
482
|
+
type: obj.type,
|
|
483
|
+
source: typeof obj.source === 'string' ? obj.source : 'user-inferred',
|
|
484
|
+
scope: typeof obj.scope === 'string' ? obj.scope : 'unspecified',
|
|
485
|
+
volatility: typeof obj.volatility === 'string' ? obj.volatility : 'updatable',
|
|
486
|
+
reasoning: typeof obj.reasoning === 'string' ? obj.reasoning : undefined,
|
|
487
|
+
importance: importance / 10,
|
|
488
|
+
created_at: typeof obj.created_at === 'string' ? obj.created_at : '',
|
|
489
|
+
schema_version: obj.schema_version,
|
|
490
|
+
},
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// New canonical Claim format: short keys
|
|
495
|
+
if (typeof obj.t === 'string' && typeof obj.c === 'string') {
|
|
496
|
+
const importance = typeof obj.i === 'number' ? Math.max(1, Math.min(10, Math.round(obj.i))) : 5;
|
|
497
|
+
return {
|
|
498
|
+
text: obj.t,
|
|
499
|
+
importance,
|
|
500
|
+
category: obj.c,
|
|
501
|
+
metadata: {
|
|
502
|
+
type: obj.c,
|
|
503
|
+
importance: importance / 10,
|
|
504
|
+
source: typeof obj.sa === 'string' ? obj.sa : 'auto-extraction',
|
|
505
|
+
created_at: typeof obj.ea === 'string' ? obj.ea : '',
|
|
506
|
+
},
|
|
507
|
+
};
|
|
508
|
+
}
|
|
509
|
+
// Legacy plugin {text, metadata: {importance: 0-1}} format
|
|
510
|
+
if (typeof obj.text === 'string') {
|
|
511
|
+
const meta = (obj.metadata as Record<string, unknown>) ?? {};
|
|
512
|
+
const impFloat = typeof meta.importance === 'number' ? meta.importance : 0.5;
|
|
513
|
+
const importance = Math.max(1, Math.min(10, Math.round(impFloat * 10)));
|
|
514
|
+
return {
|
|
515
|
+
text: obj.text,
|
|
516
|
+
importance,
|
|
517
|
+
category: typeof meta.type === 'string' ? meta.type : 'fact',
|
|
518
|
+
metadata: meta,
|
|
519
|
+
};
|
|
520
|
+
}
|
|
521
|
+
} catch {
|
|
522
|
+
// fall through
|
|
523
|
+
}
|
|
524
|
+
return { text: decryptedJson, importance: 5, category: 'fact', metadata: {} };
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
export interface BuildDigestClaimInput {
|
|
528
|
+
/** The full Digest JSON produced by buildTemplateDigest / assembleDigestFromLlm. */
|
|
529
|
+
digestJson: string;
|
|
530
|
+
/** ISO 8601 timestamp the digest was compiled at. Becomes the `ea` field. */
|
|
531
|
+
compiledAt: string;
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
/**
|
|
535
|
+
* Wrap a serialized Digest JSON as a canonical Claim so it can be encrypted
|
|
536
|
+
* and stored on-chain via the same pipeline as regular facts.
|
|
537
|
+
*
|
|
538
|
+
* Stores the raw Digest JSON as the claim's `t` (text) field. Reader path
|
|
539
|
+
* is `parseClaimOrLegacy(decrypted) → extractDigestFromClaim`.
|
|
540
|
+
*
|
|
541
|
+
* Digest claims deliberately carry no entity refs — otherwise entity
|
|
542
|
+
* trapdoors would surface the digest blob in normal recall queries.
|
|
543
|
+
*/
|
|
544
|
+
export function buildDigestClaim(input: BuildDigestClaimInput): string {
|
|
545
|
+
const { digestJson, compiledAt } = input;
|
|
546
|
+
const claim = {
|
|
547
|
+
t: digestJson,
|
|
548
|
+
c: DIGEST_CATEGORY,
|
|
549
|
+
cf: 1.0,
|
|
550
|
+
i: 10,
|
|
551
|
+
sa: DIGEST_SOURCE_AGENT,
|
|
552
|
+
ea: compiledAt,
|
|
553
|
+
};
|
|
554
|
+
return getWasm().canonicalizeClaim(JSON.stringify(claim));
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
/**
|
|
558
|
+
* Parse a canonical Claim JSON (produced by parseClaimOrLegacy) and, if it is
|
|
559
|
+
* a digest claim, return the wrapped Digest object. Returns null if the claim
|
|
560
|
+
* is not of category `dig` or if the inner JSON fails to parse.
|
|
561
|
+
*/
|
|
562
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
563
|
+
export function extractDigestFromClaim(canonicalClaimJson: string): any | null {
|
|
564
|
+
let claim: { c?: string; t?: string };
|
|
565
|
+
try {
|
|
566
|
+
claim = JSON.parse(canonicalClaimJson);
|
|
567
|
+
} catch {
|
|
568
|
+
return null;
|
|
569
|
+
}
|
|
570
|
+
if (claim.c !== DIGEST_CATEGORY || typeof claim.t !== 'string') return null;
|
|
571
|
+
try {
|
|
572
|
+
const digest = JSON.parse(claim.t);
|
|
573
|
+
// Minimal shape check: a Digest must at least have prompt_text.
|
|
574
|
+
if (typeof digest !== 'object' || digest === null) return null;
|
|
575
|
+
if (typeof digest.prompt_text !== 'string') return null;
|
|
576
|
+
return digest;
|
|
577
|
+
} catch {
|
|
578
|
+
return null;
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/**
|
|
583
|
+
* Lightweight check: does this decrypted blob look like a digest claim?
|
|
584
|
+
* Used to filter digest blobs out of user-facing recall results.
|
|
585
|
+
*
|
|
586
|
+
* Accepts both canonical Claim JSON (`{c:"dig",...}`) and the already-parsed
|
|
587
|
+
* form; returns false for legacy `{text, metadata}` docs and any parse error.
|
|
588
|
+
*/
|
|
589
|
+
export function isDigestBlob(decrypted: string): boolean {
|
|
590
|
+
try {
|
|
591
|
+
const obj = JSON.parse(decrypted);
|
|
592
|
+
return obj && typeof obj === 'object' && obj.c === DIGEST_CATEGORY;
|
|
593
|
+
} catch {
|
|
594
|
+
return false;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
/**
|
|
599
|
+
* Hours between two timestamps.
|
|
600
|
+
*
|
|
601
|
+
* Returns `Infinity` when `compiledAtIso` is unparseable (forces a recompile,
|
|
602
|
+
* which is the safe default when we can't trust the stored timestamp). Returns
|
|
603
|
+
* 0 for future dates (clock-skew defensive).
|
|
604
|
+
*/
|
|
605
|
+
export function hoursSince(compiledAtIso: string, nowMs: number): number {
|
|
606
|
+
const then = Date.parse(compiledAtIso);
|
|
607
|
+
if (Number.isNaN(then)) return Infinity;
|
|
608
|
+
const deltaMs = nowMs - then;
|
|
609
|
+
if (deltaMs <= 0) return 0;
|
|
610
|
+
return deltaMs / (1000 * 60 * 60);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
/**
|
|
614
|
+
* The digest is stale if new claims have been written since it was compiled.
|
|
615
|
+
* Both inputs are Unix seconds.
|
|
616
|
+
*
|
|
617
|
+
* Falsely-equal or regressing values (clock skew, empty vault) return false —
|
|
618
|
+
* we only recompile on strictly-newer evidence.
|
|
619
|
+
*/
|
|
620
|
+
export function isDigestStale(
|
|
621
|
+
digestVersion: number,
|
|
622
|
+
currentMaxCreatedAtUnix: number,
|
|
623
|
+
): boolean {
|
|
624
|
+
return currentMaxCreatedAtUnix > digestVersion;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
export interface RecompileGuardInput {
|
|
628
|
+
countNewClaims: number;
|
|
629
|
+
hoursSinceCompilation: number;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
/**
|
|
633
|
+
* Recompile guard (plan §15.10):
|
|
634
|
+
* trigger if countNewClaims >= 10 OR hoursSinceCompilation >= 24.
|
|
635
|
+
*
|
|
636
|
+
* The caller is still responsible for the in-memory "in progress" flag
|
|
637
|
+
* (see digest-sync.ts) — this is a pure predicate.
|
|
638
|
+
*/
|
|
639
|
+
export function shouldRecompile(input: RecompileGuardInput): boolean {
|
|
640
|
+
const { countNewClaims, hoursSinceCompilation } = input;
|
|
641
|
+
return countNewClaims >= 10 || hoursSinceCompilation >= 24;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
// ---------------------------------------------------------------------------
|
|
645
|
+
// Entity trapdoors
|
|
646
|
+
// ---------------------------------------------------------------------------
|
|
647
|
+
|
|
648
|
+
/**
|
|
649
|
+
* Compute a single entity trapdoor: sha256("entity:" + normalized_name) as hex.
|
|
650
|
+
*
|
|
651
|
+
* Uses the same primitive (plain SHA-256, not HMAC) as word / stem trapdoors in
|
|
652
|
+
* `generateBlindIndices()`. The `entity:` prefix namespaces the result so a
|
|
653
|
+
* user called "postgresql" never collides with the word trapdoor for the token
|
|
654
|
+
* "postgresql". The search path must construct queries with the same prefix.
|
|
655
|
+
*
|
|
656
|
+
* Rationale for plain SHA-256 vs HMAC: the existing word trapdoor implementation
|
|
657
|
+
* in `rust/totalreclaw-core/src/blind.rs` uses plain SHA-256 of the normalized
|
|
658
|
+
* token (no dedup_key). For entity trapdoors to appear in the same blindIndices
|
|
659
|
+
* array and be findable by the current search pipeline, they must use the same
|
|
660
|
+
* primitive. Adopting HMAC for entities alone would break search consistency.
|
|
661
|
+
*/
|
|
662
|
+
export function computeEntityTrapdoor(name: string): string {
|
|
663
|
+
const normalized = getWasm().normalizeEntityName(name);
|
|
664
|
+
return crypto
|
|
665
|
+
.createHash('sha256')
|
|
666
|
+
.update('entity:' + normalized)
|
|
667
|
+
.digest('hex');
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
/**
|
|
671
|
+
* Compute entity trapdoors for every entity on a fact, deduplicated.
|
|
672
|
+
* Returns an empty array when the fact has no entities.
|
|
673
|
+
*/
|
|
674
|
+
export function computeEntityTrapdoors(entities: readonly ExtractedEntity[] | undefined): string[] {
|
|
675
|
+
if (!entities || entities.length === 0) return [];
|
|
676
|
+
const seen = new Set<string>();
|
|
677
|
+
const out: string[] = [];
|
|
678
|
+
for (const e of entities) {
|
|
679
|
+
const td = computeEntityTrapdoor(e.name);
|
|
680
|
+
if (!seen.has(td)) {
|
|
681
|
+
seen.add(td);
|
|
682
|
+
out.push(td);
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
return out;
|
|
686
|
+
}
|