clawmem 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +13 -1
- package/CLAUDE.md +13 -1
- package/README.md +12 -0
- package/package.json +1 -1
- package/src/amem.ts +13 -2
- package/src/clawmem.ts +14 -0
- package/src/consolidation.ts +352 -34
- package/src/entity.ts +497 -0
- package/src/graph-traversal.ts +232 -0
- package/src/hooks/context-surfacing.ts +39 -6
- package/src/hooks/decision-extractor.ts +24 -2
- package/src/hooks/staleness-check.ts +27 -0
- package/src/hooks.ts +1 -0
- package/src/intent.ts +87 -13
- package/src/mcp.ts +170 -16
- package/src/openclaw/index.ts +33 -1
- package/src/store.ts +120 -9
package/src/graph-traversal.ts
CHANGED
|
@@ -201,6 +201,238 @@ export function adaptiveTraversal(
|
|
|
201
201
|
return Array.from(visited.values()).sort((a, b) => b.score - a.score);
|
|
202
202
|
}
|
|
203
203
|
|
|
204
|
+
// =============================================================================
|
|
205
|
+
// MPFP: Multi-Path Fact Propagation (Pattern E)
|
|
206
|
+
// =============================================================================
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Predefined meta-path patterns for graph traversal.
|
|
210
|
+
* Each pattern is a sequence of edge types to follow at each hop.
|
|
211
|
+
*/
|
|
212
|
+
export type MetaPath = string[];
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Get MPFP meta-path patterns based on intent.
|
|
216
|
+
*/
|
|
217
|
+
export function getMetaPathsForIntent(intent: IntentType): MetaPath[] {
|
|
218
|
+
switch (intent) {
|
|
219
|
+
case 'WHY':
|
|
220
|
+
return [
|
|
221
|
+
['semantic', 'causal'], // forward causal reasoning
|
|
222
|
+
['causal', 'semantic'], // backward reasoning → context
|
|
223
|
+
['semantic', 'semantic'], // topic expansion
|
|
224
|
+
];
|
|
225
|
+
case 'ENTITY':
|
|
226
|
+
return [
|
|
227
|
+
['entity', 'semantic'], // entity → related topics
|
|
228
|
+
['entity', 'entity'], // entity co-occurrence chains
|
|
229
|
+
['semantic', 'entity'], // topic → entity discovery
|
|
230
|
+
];
|
|
231
|
+
case 'WHEN':
|
|
232
|
+
return [
|
|
233
|
+
['temporal', 'semantic'], // timeline → context
|
|
234
|
+
['semantic', 'temporal'], // context → timeline
|
|
235
|
+
];
|
|
236
|
+
case 'WHAT':
|
|
237
|
+
return [
|
|
238
|
+
['semantic', 'semantic'], // topic expansion
|
|
239
|
+
['semantic', 'supporting'], // evidence chains
|
|
240
|
+
];
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Shared edge cache for hop-synchronized loading.
|
|
246
|
+
* All patterns share this cache to avoid redundant DB queries.
|
|
247
|
+
*/
|
|
248
|
+
type EdgeCache = Map<number, Map<string, { docId: number; weight: number }[]>>;
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Batch-load edges for a set of node IDs, filtered by edge type.
|
|
252
|
+
* Results cached in edgeCache for reuse across patterns.
|
|
253
|
+
*/
|
|
254
|
+
function batchLoadEdges(
|
|
255
|
+
db: Database,
|
|
256
|
+
nodeIds: number[],
|
|
257
|
+
edgeType: string,
|
|
258
|
+
edgeCache: EdgeCache,
|
|
259
|
+
topK: number = 10
|
|
260
|
+
): void {
|
|
261
|
+
// Only load nodes not already cached for this edge type
|
|
262
|
+
const uncached = nodeIds.filter(id => {
|
|
263
|
+
const cached = edgeCache.get(id);
|
|
264
|
+
return !cached || !cached.has(edgeType);
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
if (uncached.length === 0) return;
|
|
268
|
+
|
|
269
|
+
const placeholders = uncached.map(() => '?').join(',');
|
|
270
|
+
|
|
271
|
+
// Outbound edges (source → target)
|
|
272
|
+
const outbound = db.prepare(`
|
|
273
|
+
SELECT source_id, target_id as docId, weight
|
|
274
|
+
FROM memory_relations
|
|
275
|
+
WHERE source_id IN (${placeholders}) AND relation_type = ?
|
|
276
|
+
ORDER BY weight DESC
|
|
277
|
+
`).all(...uncached, edgeType) as { source_id: number; docId: number; weight: number }[];
|
|
278
|
+
|
|
279
|
+
// Inbound edges for symmetric types (semantic, entity)
|
|
280
|
+
let inbound: { source_id: number; docId: number; weight: number }[] = [];
|
|
281
|
+
if (edgeType === 'semantic' || edgeType === 'entity') {
|
|
282
|
+
inbound = db.prepare(`
|
|
283
|
+
SELECT target_id as source_id, source_id as docId, weight
|
|
284
|
+
FROM memory_relations
|
|
285
|
+
WHERE target_id IN (${placeholders}) AND relation_type = ?
|
|
286
|
+
ORDER BY weight DESC
|
|
287
|
+
`).all(...uncached, edgeType) as typeof inbound;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Populate cache (top-k per node)
|
|
291
|
+
for (const nodeId of uncached) {
|
|
292
|
+
if (!edgeCache.has(nodeId)) edgeCache.set(nodeId, new Map());
|
|
293
|
+
const nodeEdges = [
|
|
294
|
+
...outbound.filter(e => e.source_id === nodeId),
|
|
295
|
+
...inbound.filter(e => e.source_id === nodeId),
|
|
296
|
+
].slice(0, topK);
|
|
297
|
+
edgeCache.get(nodeId)!.set(edgeType, nodeEdges.map(e => ({ docId: e.docId, weight: e.weight })));
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Execute a single meta-path traversal using Forward Push with teleport.
|
|
303
|
+
*
|
|
304
|
+
* @param db - Database instance
|
|
305
|
+
* @param anchors - Seed nodes with initial scores
|
|
306
|
+
* @param metaPath - Edge type sequence to follow at each hop
|
|
307
|
+
* @param edgeCache - Shared edge cache (hop-synchronized)
|
|
308
|
+
* @param alpha - Teleport probability (default 0.15)
|
|
309
|
+
* @param threshold - Mass pruning threshold (default 1e-4)
|
|
310
|
+
* @returns Nodes discovered with scores
|
|
311
|
+
*/
|
|
312
|
+
function executeMetaPath(
|
|
313
|
+
db: Database,
|
|
314
|
+
anchors: { docId: number; score: number }[],
|
|
315
|
+
metaPath: MetaPath,
|
|
316
|
+
edgeCache: EdgeCache,
|
|
317
|
+
alpha: number = 0.15,
|
|
318
|
+
threshold: number = 1e-4
|
|
319
|
+
): TraversalNode[] {
|
|
320
|
+
const results = new Map<number, number>(); // docId → accumulated score
|
|
321
|
+
|
|
322
|
+
// Initialize residual with anchor scores
|
|
323
|
+
let residual = new Map<number, number>();
|
|
324
|
+
for (const a of anchors) {
|
|
325
|
+
residual.set(a.docId, a.score);
|
|
326
|
+
results.set(a.docId, a.score * alpha); // teleport portion stays at seed
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Walk each hop in the meta-path
|
|
330
|
+
for (let hop = 0; hop < metaPath.length; hop++) {
|
|
331
|
+
const edgeType = metaPath[hop]!;
|
|
332
|
+
const activeNodes = [...residual.entries()].filter(([_, mass]) => mass > threshold);
|
|
333
|
+
if (activeNodes.length === 0) break;
|
|
334
|
+
|
|
335
|
+
// Batch-load edges for this hop (shared cache)
|
|
336
|
+
const nodeIds = activeNodes.map(([id]) => id);
|
|
337
|
+
batchLoadEdges(db, nodeIds, edgeType, edgeCache);
|
|
338
|
+
|
|
339
|
+
const nextResidual = new Map<number, number>();
|
|
340
|
+
|
|
341
|
+
for (const [nodeId, mass] of activeNodes) {
|
|
342
|
+
const propagated = mass * (1 - alpha);
|
|
343
|
+
const nodeEdges = edgeCache.get(nodeId)?.get(edgeType) || [];
|
|
344
|
+
|
|
345
|
+
if (nodeEdges.length === 0) continue;
|
|
346
|
+
|
|
347
|
+
// Distribute mass evenly across neighbors (weighted by edge weight)
|
|
348
|
+
const totalWeight = nodeEdges.reduce((sum, e) => sum + e.weight, 0);
|
|
349
|
+
if (totalWeight === 0) continue;
|
|
350
|
+
|
|
351
|
+
for (const edge of nodeEdges) {
|
|
352
|
+
const share = (propagated * edge.weight) / totalWeight;
|
|
353
|
+
const current = nextResidual.get(edge.docId) || 0;
|
|
354
|
+
nextResidual.set(edge.docId, current + share);
|
|
355
|
+
|
|
356
|
+
// Accumulate in results (teleport portion)
|
|
357
|
+
const existing = results.get(edge.docId) || 0;
|
|
358
|
+
results.set(edge.docId, existing + share * alpha);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
residual = nextResidual;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// Convert to TraversalNode array
|
|
366
|
+
return [...results.entries()]
|
|
367
|
+
.filter(([_, score]) => score > threshold)
|
|
368
|
+
.map(([docId, score]) => ({
|
|
369
|
+
docId,
|
|
370
|
+
path: getDocPath(db, docId),
|
|
371
|
+
score,
|
|
372
|
+
hops: metaPath.length,
|
|
373
|
+
viaRelation: metaPath.join('→'),
|
|
374
|
+
}))
|
|
375
|
+
.sort((a, b) => b.score - a.score);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* MPFP Multi-Path Fact Propagation traversal.
|
|
380
|
+
* Runs multiple meta-path patterns in parallel, fuses results.
|
|
381
|
+
*
|
|
382
|
+
* @param db - Database instance
|
|
383
|
+
* @param anchors - Seed documents (from BM25/vector search)
|
|
384
|
+
* @param intent - Query intent for pattern selection
|
|
385
|
+
* @param budget - Maximum total nodes to return
|
|
386
|
+
* @returns Traversed nodes with scores
|
|
387
|
+
*/
|
|
388
|
+
export function mpfpTraversal(
|
|
389
|
+
db: Database,
|
|
390
|
+
anchors: { hash: string; score: number }[],
|
|
391
|
+
intent: IntentType,
|
|
392
|
+
budget: number = 30
|
|
393
|
+
): TraversalNode[] {
|
|
394
|
+
// Convert hashes to IDs
|
|
395
|
+
const anchorNodes: { docId: number; score: number }[] = [];
|
|
396
|
+
for (const anchor of anchors) {
|
|
397
|
+
const docId = getDocIdFromHash(db, anchor.hash);
|
|
398
|
+
if (docId !== null) {
|
|
399
|
+
anchorNodes.push({ docId, score: anchor.score });
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
if (anchorNodes.length === 0) return [];
|
|
404
|
+
|
|
405
|
+
const metaPaths = getMetaPathsForIntent(intent);
|
|
406
|
+
const edgeCache: EdgeCache = new Map(); // Shared across all patterns
|
|
407
|
+
|
|
408
|
+
// Execute all meta-paths (synchronous — SQLite is single-threaded anyway)
|
|
409
|
+
const pathResults: TraversalNode[][] = metaPaths.map(path =>
|
|
410
|
+
executeMetaPath(db, anchorNodes, path, edgeCache)
|
|
411
|
+
);
|
|
412
|
+
|
|
413
|
+
// Fuse results via max-score (not RRF): Forward Push produces absolute propagation mass
|
|
414
|
+
// where magnitude carries signal. Rank-only fusion (RRF) would discard the difference
|
|
415
|
+
// between a strong path hit (0.9) and a barely-surviving tail hit (0.01). Meta-paths are
|
|
416
|
+
// alternative explanations — "best supporting path wins" is the correct fusion rule here.
|
|
417
|
+
const fused = new Map<number, TraversalNode>();
|
|
418
|
+
for (const results of pathResults) {
|
|
419
|
+
for (const node of results) {
|
|
420
|
+
const existing = fused.get(node.docId);
|
|
421
|
+
if (!existing || node.score > existing.score) {
|
|
422
|
+
fused.set(node.docId, node);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
return [...fused.values()]
|
|
428
|
+
.sort((a, b) => b.score - a.score)
|
|
429
|
+
.slice(0, budget);
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// =============================================================================
|
|
433
|
+
// Merge Helpers
|
|
434
|
+
// =============================================================================
|
|
435
|
+
|
|
204
436
|
/**
|
|
205
437
|
* Merge graph traversal results with original search results.
|
|
206
438
|
* Returns results with both hash and score for re-integration.
|
|
@@ -52,6 +52,11 @@ function getTierConfig(score: number): { snippetLen: number; showMeta: boolean;
|
|
|
52
52
|
// Directories to never surface
|
|
53
53
|
const FILTERED_PATHS = ["_PRIVATE/", "experiments/", "_clawmem/"];
|
|
54
54
|
|
|
55
|
+
// Memory nudge: prompt agent to use lifecycle tools after N prompts without use
|
|
56
|
+
const NUDGE_INTERVAL = parseInt(process.env.CLAWMEM_NUDGE_INTERVAL || "15", 10);
|
|
57
|
+
const LIFECYCLE_HOOK_NAMES = ["memory_pin", "memory_forget", "memory_snooze", "lifecycle-archive"];
|
|
58
|
+
const NUDGE_TEXT = "You haven't managed memory recently. If vault-context is surfacing noise → snooze it. If a critical decision was just made → pin it. If stale knowledge appeared → forget it.";
|
|
59
|
+
|
|
55
60
|
// File path patterns to extract from prompts (E13 replacement: file-aware UserPromptSubmit)
|
|
56
61
|
const FILE_PATH_RE = /(?:^|\s)((?:\/[\w.@-]+)+(?:\.\w+)?|[\w.@-]+\.(?:ts|js|py|md|sh|yaml|yml|json|toml|rs|go|tsx|jsx|css|html))\b/g;
|
|
57
62
|
|
|
@@ -331,12 +336,15 @@ export async function contextSurfacing(
|
|
|
331
336
|
// This makes routing instructions salient at the moment of tool selection (per research)
|
|
332
337
|
const routingHint = detectRoutingHint(prompt);
|
|
333
338
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
);
|
|
339
|
+
// Memory nudge: periodically remind agent to use lifecycle tools
|
|
340
|
+
const nudge = NUDGE_INTERVAL > 0 ? shouldNudge(store) : null;
|
|
341
|
+
|
|
342
|
+
const parts: string[] = [];
|
|
343
|
+
if (routingHint) parts.push(`<vault-routing>${routingHint}</vault-routing>`);
|
|
344
|
+
parts.push(`<vault-context>\n${context}\n</vault-context>`);
|
|
345
|
+
if (nudge) parts.push(`<vault-nudge>${NUDGE_TEXT}</vault-nudge>`);
|
|
346
|
+
|
|
347
|
+
return makeContextOutput("context-surfacing", parts.join("\n"));
|
|
340
348
|
}
|
|
341
349
|
|
|
342
350
|
// =============================================================================
|
|
@@ -422,3 +430,28 @@ function buildContext(
|
|
|
422
430
|
tokens: totalTokens,
|
|
423
431
|
};
|
|
424
432
|
}
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Check if the agent should be nudged to use lifecycle tools.
|
|
436
|
+
* Returns true if N+ context-surfacing invocations have occurred since the
|
|
437
|
+
* last lifecycle tool use (memory_pin, memory_forget, memory_snooze).
|
|
438
|
+
*/
|
|
439
|
+
function shouldNudge(store: Store): boolean {
|
|
440
|
+
try {
|
|
441
|
+
// Count context-surfacing invocations since last lifecycle tool use
|
|
442
|
+
const lastLifecycle = store.db.prepare(`
|
|
443
|
+
SELECT MAX(id) as max_id FROM context_usage
|
|
444
|
+
WHERE hook_name IN (${LIFECYCLE_HOOK_NAMES.map(() => "?").join(",")})
|
|
445
|
+
`).get(...LIFECYCLE_HOOK_NAMES) as { max_id: number | null } | undefined;
|
|
446
|
+
|
|
447
|
+
const sinceId = lastLifecycle?.max_id ?? 0;
|
|
448
|
+
const count = store.db.prepare(`
|
|
449
|
+
SELECT COUNT(*) as cnt FROM context_usage
|
|
450
|
+
WHERE hook_name = 'context-surfacing' AND id > ?
|
|
451
|
+
`).get(sinceId) as { cnt: number } | undefined;
|
|
452
|
+
|
|
453
|
+
return (count?.cnt ?? 0) >= NUDGE_INTERVAL;
|
|
454
|
+
} catch {
|
|
455
|
+
return false; // DB error — fail silent, no nudge
|
|
456
|
+
}
|
|
457
|
+
}
|
|
@@ -254,13 +254,35 @@ Only include pairs with confidence >= 0.7. Return [] if no relationships found.
|
|
|
254
254
|
if (rel.relation === "contradiction") {
|
|
255
255
|
// Lower old doc confidence by 0.25 (floor 0.2)
|
|
256
256
|
const currentConfidence = existingDoc.confidence ?? 0.5;
|
|
257
|
+
const newConfidence = Math.max(0.2, currentConfidence - 0.25);
|
|
257
258
|
store.updateDocumentMeta(existingDoc.id, {
|
|
258
|
-
confidence:
|
|
259
|
+
confidence: newConfidence,
|
|
259
260
|
});
|
|
260
261
|
contradictionCount++;
|
|
261
262
|
console.error(
|
|
262
263
|
`[decision-extractor] CONTRADICTION: "${newFacts[rel.new_idx]}" vs "${oldDoc.displayPath}" (conf: ${rel.confidence})`
|
|
263
264
|
);
|
|
265
|
+
|
|
266
|
+
// Soft invalidation: if confidence drops to floor AND content is observation type,
|
|
267
|
+
// mark as invalidated (Pattern I — prevents stale contradicted knowledge from surfacing)
|
|
268
|
+
if (newConfidence <= 0.2) {
|
|
269
|
+
try {
|
|
270
|
+
// Find the new contradicting observation's doc ID (if already persisted in this session)
|
|
271
|
+
const newObsDoc = store.db.prepare(`
|
|
272
|
+
SELECT id FROM documents
|
|
273
|
+
WHERE collection = '_clawmem' AND path LIKE ? AND active = 1
|
|
274
|
+
ORDER BY created_at DESC LIMIT 1
|
|
275
|
+
`).get(`%${sessionId.slice(0, 8)}%decision%`) as { id: number } | undefined;
|
|
276
|
+
|
|
277
|
+
store.db.prepare(`
|
|
278
|
+
UPDATE documents
|
|
279
|
+
SET invalidated_at = datetime('now'),
|
|
280
|
+
invalidated_by = ?
|
|
281
|
+
WHERE id = ? AND invalidated_at IS NULL AND content_type = 'observation'
|
|
282
|
+
`).run(newObsDoc?.id || null, existingDoc.id);
|
|
283
|
+
} catch { /* non-fatal — invalidation is best-effort */ }
|
|
284
|
+
}
|
|
285
|
+
|
|
264
286
|
} else if (rel.relation === "update") {
|
|
265
287
|
// Lower old doc confidence by 0.15 (floor 0.3)
|
|
266
288
|
const currentConfidence = existingDoc.confidence ?? 0.5;
|
|
@@ -313,7 +335,7 @@ export async function decisionExtractor(
|
|
|
313
335
|
const doc = store.findActiveDocument("_clawmem", obsPath);
|
|
314
336
|
if (doc) {
|
|
315
337
|
store.updateDocumentMeta(doc.id, {
|
|
316
|
-
content_type: obs.type === "decision" ? "decision" : "
|
|
338
|
+
content_type: obs.type === "decision" ? "decision" : "observation",
|
|
317
339
|
confidence: 0.80,
|
|
318
340
|
});
|
|
319
341
|
store.updateObservationFields(obsPath, "_clawmem", {
|
|
@@ -51,6 +51,33 @@ export async function stalenessCheck(
|
|
|
51
51
|
}
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
+
// Auto-archive if lifecycle policy is configured (runs regardless of stale report results)
|
|
55
|
+
try {
|
|
56
|
+
const { loadVaultConfig } = await import("../config.ts");
|
|
57
|
+
const config = loadVaultConfig();
|
|
58
|
+
if (config.lifecycle) {
|
|
59
|
+
const candidates = store.getArchiveCandidates(config.lifecycle);
|
|
60
|
+
if (candidates.length > 0 && !config.lifecycle.dry_run) {
|
|
61
|
+
const archived = store.archiveDocuments(candidates.map(c => c.id));
|
|
62
|
+
if (archived > 0 && input.sessionId) {
|
|
63
|
+
store.insertUsage({
|
|
64
|
+
sessionId: input.sessionId,
|
|
65
|
+
timestamp: now.toISOString(),
|
|
66
|
+
hookName: "lifecycle-archive",
|
|
67
|
+
injectedPaths: candidates.map(c => `${c.collection}/${c.path}`),
|
|
68
|
+
estimatedTokens: 0,
|
|
69
|
+
wasReferenced: 0,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (config.lifecycle.purge_after_days && !config.lifecycle.dry_run) {
|
|
74
|
+
store.purgeArchivedDocuments(config.lifecycle.purge_after_days);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
} catch {
|
|
78
|
+
// Fail-open: lifecycle errors never block the hook
|
|
79
|
+
}
|
|
80
|
+
|
|
54
81
|
if (allStale.size === 0) return makeEmptyOutput("staleness-check");
|
|
55
82
|
|
|
56
83
|
// Build context within budget
|
package/src/hooks.ts
CHANGED
|
@@ -84,6 +84,7 @@ const HOOK_EVENT_MAP: Record<string, string | null> = {
|
|
|
84
84
|
"feedback-loop": null, // Stop — no hookSpecificOutput
|
|
85
85
|
"precompact-extract": null, // PreCompact — side-effect only, no context injection
|
|
86
86
|
"postcompact-inject": "SessionStart", // SessionStart(compact) — injects additionalContext
|
|
87
|
+
"curator-nudge": "SessionStart", // SessionStart — surfaces curator report actions
|
|
87
88
|
"pretool-inject": null, // PreToolUse — disabled (cannot inject additionalContext; E13 folded into context-surfacing)
|
|
88
89
|
};
|
|
89
90
|
|
package/src/intent.ts
CHANGED
|
@@ -29,40 +29,114 @@ const ENTITY_PATTERNS = /\b(who|person|team|project|(?:@|#)\w+|relationship|ment
|
|
|
29
29
|
// Temporal extraction patterns
|
|
30
30
|
type TemporalExtractor = (now: Date, match?: RegExpMatchArray) => { start?: string; end?: string };
|
|
31
31
|
|
|
32
|
+
const MONTH_MAP: Record<string, number> = { jan: 0, feb: 1, mar: 2, apr: 3, may: 4, jun: 5, jul: 6, aug: 7, sep: 8, oct: 9, nov: 10, dec: 11 };
|
|
33
|
+
|
|
34
|
+
/** Format a Date as YYYY-MM-DD in local timezone (avoids UTC shift from toISOString). */
|
|
35
|
+
function localDateStr(d: Date): string {
|
|
36
|
+
return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}-${String(d.getDate()).padStart(2, '0')}`;
|
|
37
|
+
}
|
|
38
|
+
|
|
32
39
|
const TEMPORAL_RELATIVE: [RegExp, TemporalExtractor][] = [
|
|
40
|
+
[/\btoday\b/i, (now: Date) => {
|
|
41
|
+
const d = localDateStr(now);
|
|
42
|
+
return { start: d, end: d };
|
|
43
|
+
}],
|
|
44
|
+
[/\byesterday\b/i, (now: Date) => {
|
|
45
|
+
const s = new Date(now); s.setDate(s.getDate() - 1);
|
|
46
|
+
return { start: localDateStr(s), end: localDateStr(s) };
|
|
47
|
+
}],
|
|
48
|
+
[/\bthis week\b/i, (now: Date) => {
|
|
49
|
+
const s = new Date(now); s.setDate(s.getDate() - s.getDay());
|
|
50
|
+
return { start: localDateStr(s), end: localDateStr(now) };
|
|
51
|
+
}],
|
|
33
52
|
[/\blast week\b/i, (now: Date) => {
|
|
34
53
|
const s = new Date(now); s.setDate(s.getDate() - 7);
|
|
35
|
-
return { start: s
|
|
54
|
+
return { start: localDateStr(s), end: localDateStr(now) };
|
|
55
|
+
}],
|
|
56
|
+
[/\bthis month\b/i, (now: Date) => {
|
|
57
|
+
const s = new Date(now.getFullYear(), now.getMonth(), 1);
|
|
58
|
+
return { start: localDateStr(s), end: localDateStr(now) };
|
|
36
59
|
}],
|
|
37
60
|
[/\blast month\b/i, (now: Date) => {
|
|
38
61
|
const s = new Date(now); s.setMonth(s.getMonth() - 1);
|
|
39
|
-
return { start: s
|
|
40
|
-
}],
|
|
41
|
-
[/\byesterday\b/i, (now: Date) => {
|
|
42
|
-
const s = new Date(now); s.setDate(s.getDate() - 1);
|
|
43
|
-
return { start: s.toISOString().slice(0, 10), end: s.toISOString().slice(0, 10) };
|
|
62
|
+
return { start: localDateStr(s), end: localDateStr(now) };
|
|
44
63
|
}],
|
|
45
64
|
[/\b(\d+)\s*days?\s*ago\b/i, (now: Date, m?: RegExpMatchArray) => {
|
|
46
65
|
const s = new Date(now); s.setDate(s.getDate() - parseInt(m?.[1] ?? "1"));
|
|
47
|
-
return { start: s
|
|
66
|
+
return { start: localDateStr(s), end: localDateStr(now) };
|
|
67
|
+
}],
|
|
68
|
+
[/\b(\d+)\s*weeks?\s*ago\b/i, (now: Date, m?: RegExpMatchArray) => {
|
|
69
|
+
const s = new Date(now); s.setDate(s.getDate() - parseInt(m?.[1] ?? "1") * 7);
|
|
70
|
+
return { start: localDateStr(s), end: localDateStr(now) };
|
|
48
71
|
}],
|
|
49
|
-
[/\
|
|
50
|
-
const
|
|
51
|
-
|
|
52
|
-
|
|
72
|
+
[/\b(\d+)\s*months?\s*ago\b/i, (now: Date, m?: RegExpMatchArray) => {
|
|
73
|
+
const s = new Date(now); s.setMonth(s.getMonth() - parseInt(m?.[1] ?? "1"));
|
|
74
|
+
return { start: localDateStr(s), end: localDateStr(now) };
|
|
75
|
+
}],
|
|
76
|
+
[/\bsince\s+(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s*(\d{4})?\b/i, (now: Date, m?: RegExpMatchArray) => {
|
|
77
|
+
const mo = MONTH_MAP[(m?.[1] ?? "jan").slice(0, 3).toLowerCase()] ?? 0;
|
|
78
|
+
const yr = m?.[2] ? parseInt(m[2]) : now.getFullYear();
|
|
79
|
+
const s = new Date(yr, mo, 1);
|
|
80
|
+
return { start: localDateStr(s), end: localDateStr(now) };
|
|
81
|
+
}],
|
|
82
|
+
[/\bin\s+(jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s*(\d{4})?\b/i, (now: Date, m?: RegExpMatchArray) => {
|
|
83
|
+
const mo = MONTH_MAP[(m?.[1] ?? "jan").slice(0, 3).toLowerCase()] ?? 0;
|
|
84
|
+
const yr = m?.[2] ? parseInt(m[2]) : now.getFullYear();
|
|
53
85
|
const s = new Date(yr, mo, 1);
|
|
54
86
|
const e = new Date(yr, mo + 1, 0);
|
|
55
|
-
return { start: s
|
|
87
|
+
return { start: localDateStr(s), end: localDateStr(e) };
|
|
88
|
+
}],
|
|
89
|
+
[/\blast\s+(\d+)\s*days?\b/i, (now: Date, m?: RegExpMatchArray) => {
|
|
90
|
+
const s = new Date(now); s.setDate(s.getDate() - parseInt(m?.[1] ?? "7"));
|
|
91
|
+
return { start: localDateStr(s), end: localDateStr(now) };
|
|
56
92
|
}],
|
|
57
93
|
];
|
|
58
94
|
|
|
95
|
+
/**
|
|
96
|
+
* Convert a local date string (YYYY-MM-DD) to a UTC ISO timestamp for the start of that local day.
|
|
97
|
+
* Accounts for timezone offset so comparisons against UTC modified_at are correct.
|
|
98
|
+
*/
|
|
99
|
+
function localDateToUtcStart(localDate: string): string {
|
|
100
|
+
const d = new Date(localDate + 'T00:00:00');
|
|
101
|
+
return d.toISOString();
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Convert a local date string (YYYY-MM-DD) to a UTC ISO timestamp for the end of that local day.
|
|
106
|
+
*/
|
|
107
|
+
function localDateToUtcEnd(localDate: string): string {
|
|
108
|
+
const d = new Date(localDate + 'T23:59:59.999');
|
|
109
|
+
return d.toISOString();
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Extract temporal constraint from query. Returns {start, end} as UTC ISO timestamps or null.
|
|
114
|
+
* Pure regex — no LLM, ~0ms.
|
|
115
|
+
*/
|
|
116
|
+
export function extractTemporalConstraint(query: string): { start: string; end: string } | null {
|
|
117
|
+
const q = query.toLowerCase();
|
|
118
|
+
for (const [pattern, extractor] of TEMPORAL_RELATIVE) {
|
|
119
|
+
const match = q.match(pattern);
|
|
120
|
+
if (match) {
|
|
121
|
+
const result = extractor(new Date(), match);
|
|
122
|
+
if (result.start && result.end) {
|
|
123
|
+
return {
|
|
124
|
+
start: localDateToUtcStart(result.start),
|
|
125
|
+
end: localDateToUtcEnd(result.end),
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
|
|
59
133
|
/**
|
|
60
134
|
* Fast heuristic intent classification (no LLM, instant).
|
|
61
135
|
*/
|
|
62
136
|
function classifyIntentHeuristic(query: string): IntentResult {
|
|
63
137
|
const q = query.toLowerCase();
|
|
64
138
|
|
|
65
|
-
// Extract temporal info
|
|
139
|
+
// Extract temporal info (local date strings for intent scoring; UTC conversion at query time)
|
|
66
140
|
let temporal_start: string | undefined;
|
|
67
141
|
let temporal_end: string | undefined;
|
|
68
142
|
const now = new Date();
|