@context-vault/core 2.14.0 → 2.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/capture/index.js +11 -0
- package/src/consolidation/index.js +112 -0
- package/src/constants.js +7 -2
- package/src/core/categories.js +10 -0
- package/src/core/config.js +46 -2
- package/src/core/status.js +28 -2
- package/src/index/db.js +102 -9
- package/src/index/index.js +48 -21
- package/src/index.js +4 -0
- package/src/retrieve/index.js +268 -64
- package/src/server/tools/context-status.js +7 -0
- package/src/server/tools/create-snapshot.js +12 -3
- package/src/server/tools/get-context.js +311 -11
- package/src/server/tools/ingest-project.js +244 -0
- package/src/server/tools/list-buckets.js +116 -0
- package/src/server/tools/save-context.js +190 -19
- package/src/server/tools/session-start.js +285 -0
- package/src/server/tools.js +6 -0
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
+
import { createHash } from "node:crypto";
|
|
3
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
4
|
+
import { resolve } from "node:path";
|
|
2
5
|
import { hybridSearch } from "../../retrieve/index.js";
|
|
3
6
|
import { categoryFor } from "../../core/categories.js";
|
|
4
7
|
import { normalizeKind } from "../../core/files.js";
|
|
@@ -6,6 +9,33 @@ import { ok, err } from "../helpers.js";
|
|
|
6
9
|
import { isEmbedAvailable } from "../../index/embed.js";
|
|
7
10
|
|
|
8
11
|
const STALE_DUPLICATE_DAYS = 7;
|
|
12
|
+
const DEFAULT_PIVOT_COUNT = 2;
|
|
13
|
+
const SKELETON_BODY_CHARS = 100;
|
|
14
|
+
const CONSOLIDATION_TAG_THRESHOLD = 10;
|
|
15
|
+
const CONSOLIDATION_SNAPSHOT_MAX_AGE_DAYS = 7;
|
|
16
|
+
const BRIEF_SCORE_BOOST = 0.05;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Truncate a body string to ~SKELETON_BODY_CHARS, breaking at sentence or
|
|
20
|
+
* word boundary. Returns the truncated string with "..." appended.
|
|
21
|
+
*/
|
|
22
|
+
export function skeletonBody(body) {
|
|
23
|
+
if (!body) return "";
|
|
24
|
+
if (body.length <= SKELETON_BODY_CHARS) return body;
|
|
25
|
+
const slice = body.slice(0, SKELETON_BODY_CHARS);
|
|
26
|
+
const sentenceEnd = Math.max(
|
|
27
|
+
slice.lastIndexOf(". "),
|
|
28
|
+
slice.lastIndexOf(".\n"),
|
|
29
|
+
);
|
|
30
|
+
if (sentenceEnd > SKELETON_BODY_CHARS * 0.4) {
|
|
31
|
+
return slice.slice(0, sentenceEnd + 1) + "...";
|
|
32
|
+
}
|
|
33
|
+
const wordEnd = slice.lastIndexOf(" ");
|
|
34
|
+
if (wordEnd > SKELETON_BODY_CHARS * 0.4) {
|
|
35
|
+
return slice.slice(0, wordEnd) + "...";
|
|
36
|
+
}
|
|
37
|
+
return slice + "...";
|
|
38
|
+
}
|
|
9
39
|
|
|
10
40
|
/**
|
|
11
41
|
* Detect conflicts among a set of search result entries.
|
|
@@ -86,6 +116,131 @@ export function detectConflicts(entries, _ctx) {
|
|
|
86
116
|
return conflicts;
|
|
87
117
|
}
|
|
88
118
|
|
|
119
|
+
/**
|
|
120
|
+
* Detect tag clusters that would benefit from consolidation via create_snapshot.
|
|
121
|
+
* A suggestion is emitted when a tag appears on threshold+ entries in the full
|
|
122
|
+
* vault AND no recent brief (kind='brief') exists for that tag within the
|
|
123
|
+
* staleness window.
|
|
124
|
+
*
|
|
125
|
+
* Tag counts are derived from the full vault (not just the search result set)
|
|
126
|
+
* so the check reflects the true size of the knowledge cluster. Only tags that
|
|
127
|
+
* appear in the current search results are evaluated — this keeps the check
|
|
128
|
+
* targeted to what the user is actually working with.
|
|
129
|
+
*
|
|
130
|
+
* @param {Array} entries - Search result rows (used to select candidate tags)
|
|
131
|
+
* @param {import('node:sqlite').DatabaseSync} db - Database handle for vault-wide counts and brief lookups
|
|
132
|
+
* @param {number|undefined} userId - Optional user_id scope
|
|
133
|
+
* @param {{ tagThreshold?: number, maxAgeDays?: number }} opts - Configurable thresholds
|
|
134
|
+
* @returns {Array<{tag: string, entry_count: number, last_snapshot_age_days: number|null}>}
|
|
135
|
+
*/
|
|
136
|
+
export function detectConsolidationHints(entries, db, userId, opts = {}) {
|
|
137
|
+
const tagThreshold = opts.tagThreshold ?? CONSOLIDATION_TAG_THRESHOLD;
|
|
138
|
+
const maxAgeDays = opts.maxAgeDays ?? CONSOLIDATION_SNAPSHOT_MAX_AGE_DAYS;
|
|
139
|
+
|
|
140
|
+
const candidateTags = new Set();
|
|
141
|
+
for (const entry of entries) {
|
|
142
|
+
if (entry.kind === "brief") continue;
|
|
143
|
+
const entryTags = entry.tags ? JSON.parse(entry.tags) : [];
|
|
144
|
+
for (const tag of entryTags) candidateTags.add(tag);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (candidateTags.size === 0) return [];
|
|
148
|
+
|
|
149
|
+
const suggestions = [];
|
|
150
|
+
const cutoff = new Date(Date.now() - maxAgeDays * 86400000).toISOString();
|
|
151
|
+
|
|
152
|
+
for (const tag of candidateTags) {
|
|
153
|
+
let vaultCount = 0;
|
|
154
|
+
try {
|
|
155
|
+
const userClause =
|
|
156
|
+
userId !== undefined ? " AND user_id = ?" : " AND user_id IS NULL";
|
|
157
|
+
const countParams =
|
|
158
|
+
userId !== undefined ? [`%"${tag}"%`, userId] : [`%"${tag}"%`];
|
|
159
|
+
const countRow = db
|
|
160
|
+
.prepare(
|
|
161
|
+
`SELECT COUNT(*) as c FROM vault WHERE kind != 'brief' AND tags LIKE ?${userClause} AND (expires_at IS NULL OR expires_at > datetime('now')) AND superseded_by IS NULL`,
|
|
162
|
+
)
|
|
163
|
+
.get(...countParams);
|
|
164
|
+
vaultCount = countRow?.c ?? 0;
|
|
165
|
+
} catch {
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (vaultCount < tagThreshold) continue;
|
|
170
|
+
|
|
171
|
+
let lastSnapshotAgeDays = null;
|
|
172
|
+
try {
|
|
173
|
+
const userClause =
|
|
174
|
+
userId !== undefined ? " AND user_id = ?" : " AND user_id IS NULL";
|
|
175
|
+
const params =
|
|
176
|
+
userId !== undefined ? [`%"${tag}"%`, userId] : [`%"${tag}"%`];
|
|
177
|
+
const recentBrief = db
|
|
178
|
+
.prepare(
|
|
179
|
+
`SELECT created_at FROM vault WHERE kind = 'brief' AND tags LIKE ?${userClause} ORDER BY created_at DESC LIMIT 1`,
|
|
180
|
+
)
|
|
181
|
+
.get(...params);
|
|
182
|
+
|
|
183
|
+
if (recentBrief) {
|
|
184
|
+
lastSnapshotAgeDays = Math.round(
|
|
185
|
+
(Date.now() - new Date(recentBrief.created_at).getTime()) / 86400000,
|
|
186
|
+
);
|
|
187
|
+
if (recentBrief.created_at >= cutoff) continue;
|
|
188
|
+
}
|
|
189
|
+
} catch {
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
suggestions.push({
|
|
194
|
+
tag,
|
|
195
|
+
entry_count: vaultCount,
|
|
196
|
+
last_snapshot_age_days: lastSnapshotAgeDays,
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return suggestions;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Check if an entry's source files have changed since the entry was saved.
|
|
205
|
+
* Returns { stale: true, stale_reason } if stale, or null if fresh.
|
|
206
|
+
* Best-effort: any read/parse failure returns null (no crash).
|
|
207
|
+
*
|
|
208
|
+
* @param {object} entry - DB row with source_files JSON column
|
|
209
|
+
* @returns {{ stale: boolean, stale_reason: string } | null}
|
|
210
|
+
*/
|
|
211
|
+
function checkStaleness(entry) {
|
|
212
|
+
if (!entry.source_files) return null;
|
|
213
|
+
let sourceFiles;
|
|
214
|
+
try {
|
|
215
|
+
sourceFiles = JSON.parse(entry.source_files);
|
|
216
|
+
} catch {
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
if (!Array.isArray(sourceFiles) || sourceFiles.length === 0) return null;
|
|
220
|
+
|
|
221
|
+
for (const sf of sourceFiles) {
|
|
222
|
+
try {
|
|
223
|
+
const absPath = sf.path.startsWith("/")
|
|
224
|
+
? sf.path
|
|
225
|
+
: resolve(process.cwd(), sf.path);
|
|
226
|
+
if (!existsSync(absPath)) {
|
|
227
|
+
return { stale: true, stale_reason: "source file not found" };
|
|
228
|
+
}
|
|
229
|
+
const contents = readFileSync(absPath);
|
|
230
|
+
const currentHash = createHash("sha256").update(contents).digest("hex");
|
|
231
|
+
if (currentHash !== sf.hash) {
|
|
232
|
+
return {
|
|
233
|
+
stale: true,
|
|
234
|
+
stale_reason: "source file modified since observation",
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
} catch {
|
|
238
|
+
// skip this file on any error — best-effort
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return null;
|
|
242
|
+
}
|
|
243
|
+
|
|
89
244
|
export const name = "get_context";
|
|
90
245
|
|
|
91
246
|
export const description =
|
|
@@ -113,7 +268,15 @@ export const inputSchema = {
|
|
|
113
268
|
tags: z
|
|
114
269
|
.array(z.string())
|
|
115
270
|
.optional()
|
|
116
|
-
.describe(
|
|
271
|
+
.describe(
|
|
272
|
+
"Filter by tags (entries must match at least one). Use 'bucket:' prefixed tags for project-scoped retrieval (e.g., ['bucket:autohub']).",
|
|
273
|
+
),
|
|
274
|
+
buckets: z
|
|
275
|
+
.array(z.string())
|
|
276
|
+
.optional()
|
|
277
|
+
.describe(
|
|
278
|
+
"Filter by project-scoped buckets. Each name expands to a 'bucket:<name>' tag. Composes with 'tags' via OR (entries matching any tag or any bucket are included).",
|
|
279
|
+
),
|
|
117
280
|
since: z
|
|
118
281
|
.string()
|
|
119
282
|
.optional()
|
|
@@ -135,6 +298,30 @@ export const inputSchema = {
|
|
|
135
298
|
.describe(
|
|
136
299
|
"If true, compare results for contradicting entries and append a conflicts array. Flags superseded entries still in results and stale duplicates (same kind+tags, updated_at >7 days apart). No LLM calls — pure DB logic.",
|
|
137
300
|
),
|
|
301
|
+
max_tokens: z
|
|
302
|
+
.number()
|
|
303
|
+
.optional()
|
|
304
|
+
.describe(
|
|
305
|
+
"Limit output to entries that fit within this token budget (rough estimate: 1 token ≈ 4 chars). Entries are packed greedily by relevance rank. At least 1 result is always returned. Response metadata includes tokens_used and tokens_budget.",
|
|
306
|
+
),
|
|
307
|
+
pivot_count: z
|
|
308
|
+
.number()
|
|
309
|
+
.optional()
|
|
310
|
+
.describe(
|
|
311
|
+
"Skeleton mode: top pivot_count entries by relevance are returned with full body. Remaining entries are returned as skeletons (title + tags + first ~100 chars of body). Default: 2. Set to 0 to skeleton all results, or a high number to disable.",
|
|
312
|
+
),
|
|
313
|
+
include_ephemeral: z
|
|
314
|
+
.boolean()
|
|
315
|
+
.optional()
|
|
316
|
+
.describe(
|
|
317
|
+
"If true, include ephemeral tier entries in results. Default: false — only working and durable tiers are returned.",
|
|
318
|
+
),
|
|
319
|
+
include_events: z
|
|
320
|
+
.boolean()
|
|
321
|
+
.optional()
|
|
322
|
+
.describe(
|
|
323
|
+
"If true, include event category entries in semantic search results. Default: false — events are excluded from query-based search but remain accessible via category/tag filters.",
|
|
324
|
+
),
|
|
138
325
|
};
|
|
139
326
|
|
|
140
327
|
/**
|
|
@@ -149,11 +336,16 @@ export async function handler(
|
|
|
149
336
|
category,
|
|
150
337
|
identity_key,
|
|
151
338
|
tags,
|
|
339
|
+
buckets,
|
|
152
340
|
since,
|
|
153
341
|
until,
|
|
154
342
|
limit,
|
|
155
343
|
include_superseded,
|
|
156
344
|
detect_conflicts,
|
|
345
|
+
max_tokens,
|
|
346
|
+
pivot_count,
|
|
347
|
+
include_ephemeral,
|
|
348
|
+
include_events,
|
|
157
349
|
},
|
|
158
350
|
ctx,
|
|
159
351
|
{ ensureIndexed, reindexFailed },
|
|
@@ -162,8 +354,12 @@ export async function handler(
|
|
|
162
354
|
const userId = ctx.userId !== undefined ? ctx.userId : undefined;
|
|
163
355
|
|
|
164
356
|
const hasQuery = query?.trim();
|
|
357
|
+
const shouldExcludeEvents = hasQuery && !include_events && !category;
|
|
358
|
+
// Expand buckets to bucket: prefixed tags and merge with explicit tags
|
|
359
|
+
const bucketTags = buckets?.length ? buckets.map((b) => `bucket:${b}`) : [];
|
|
360
|
+
const effectiveTags = [...(tags ?? []), ...bucketTags];
|
|
165
361
|
const hasFilters =
|
|
166
|
-
kind || category ||
|
|
362
|
+
kind || category || effectiveTags.length || since || until || identity_key;
|
|
167
363
|
if (!hasQuery && !hasFilters)
|
|
168
364
|
return err(
|
|
169
365
|
"Required: query or at least one filter (kind, category, tags, since, until, identity_key)",
|
|
@@ -215,7 +411,7 @@ export async function handler(
|
|
|
215
411
|
const effectiveLimit = limit || 10;
|
|
216
412
|
// When tag-filtering, over-fetch to compensate for post-filter reduction
|
|
217
413
|
const MAX_FETCH_LIMIT = 500;
|
|
218
|
-
const fetchLimit =
|
|
414
|
+
const fetchLimit = effectiveTags.length
|
|
219
415
|
? Math.min(effectiveLimit * 10, MAX_FETCH_LIMIT)
|
|
220
416
|
: effectiveLimit;
|
|
221
417
|
|
|
@@ -225,6 +421,7 @@ export async function handler(
|
|
|
225
421
|
const sorted = await hybridSearch(ctx, query, {
|
|
226
422
|
kindFilter,
|
|
227
423
|
categoryFilter: category || null,
|
|
424
|
+
excludeEvents: shouldExcludeEvents,
|
|
228
425
|
since: effectiveSince,
|
|
229
426
|
until: effectiveUntil,
|
|
230
427
|
limit: fetchLimit,
|
|
@@ -234,11 +431,11 @@ export async function handler(
|
|
|
234
431
|
});
|
|
235
432
|
|
|
236
433
|
// Post-filter by tags if provided, then apply requested limit
|
|
237
|
-
filtered =
|
|
434
|
+
filtered = effectiveTags.length
|
|
238
435
|
? sorted
|
|
239
436
|
.filter((r) => {
|
|
240
437
|
const entryTags = r.tags ? JSON.parse(r.tags) : [];
|
|
241
|
-
return
|
|
438
|
+
return effectiveTags.some((t) => entryTags.includes(t));
|
|
242
439
|
})
|
|
243
440
|
.slice(0, effectiveLimit)
|
|
244
441
|
: sorted;
|
|
@@ -267,6 +464,9 @@ export async function handler(
|
|
|
267
464
|
params.push(effectiveUntil);
|
|
268
465
|
}
|
|
269
466
|
clauses.push("(expires_at IS NULL OR expires_at > datetime('now'))");
|
|
467
|
+
if (!include_superseded) {
|
|
468
|
+
clauses.push("superseded_by IS NULL");
|
|
469
|
+
}
|
|
270
470
|
const where = clauses.length ? `WHERE ${clauses.join(" AND ")}` : "";
|
|
271
471
|
params.push(fetchLimit);
|
|
272
472
|
const rows = ctx.db
|
|
@@ -274,11 +474,11 @@ export async function handler(
|
|
|
274
474
|
.all(...params);
|
|
275
475
|
|
|
276
476
|
// Post-filter by tags if provided, then apply requested limit
|
|
277
|
-
filtered =
|
|
477
|
+
filtered = effectiveTags.length
|
|
278
478
|
? rows
|
|
279
479
|
.filter((r) => {
|
|
280
480
|
const entryTags = r.tags ? JSON.parse(r.tags) : [];
|
|
281
|
-
return
|
|
481
|
+
return effectiveTags.some((t) => entryTags.includes(t));
|
|
282
482
|
})
|
|
283
483
|
.slice(0, effectiveLimit)
|
|
284
484
|
: rows;
|
|
@@ -287,6 +487,23 @@ export async function handler(
|
|
|
287
487
|
for (const r of filtered) r.score = 0;
|
|
288
488
|
}
|
|
289
489
|
|
|
490
|
+
// Brief score boost: briefs rank slightly higher so consolidated snapshots
|
|
491
|
+
// surface above the individual entries they summarize.
|
|
492
|
+
for (const r of filtered) {
|
|
493
|
+
if (r.kind === "brief") r.score = (r.score || 0) + BRIEF_SCORE_BOOST;
|
|
494
|
+
}
|
|
495
|
+
filtered.sort((a, b) => b.score - a.score);
|
|
496
|
+
|
|
497
|
+
// Tier filter: exclude ephemeral entries by default (NULL tier treated as working)
|
|
498
|
+
if (!include_ephemeral) {
|
|
499
|
+
filtered = filtered.filter((r) => r.tier !== "ephemeral");
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// Event category filter: exclude events from semantic search by default
|
|
503
|
+
if (shouldExcludeEvents) {
|
|
504
|
+
filtered = filtered.filter((r) => r.category !== "event");
|
|
505
|
+
}
|
|
506
|
+
|
|
290
507
|
if (!filtered.length) {
|
|
291
508
|
if (autoWindowed) {
|
|
292
509
|
const days = config.eventDecayDays || 30;
|
|
@@ -315,6 +532,29 @@ export async function handler(
|
|
|
315
532
|
}
|
|
316
533
|
}
|
|
317
534
|
|
|
535
|
+
// Token-budgeted packing
|
|
536
|
+
let tokensBudget = null;
|
|
537
|
+
let tokensUsed = null;
|
|
538
|
+
if (max_tokens != null && max_tokens > 0) {
|
|
539
|
+
tokensBudget = max_tokens;
|
|
540
|
+
const packed = [];
|
|
541
|
+
let used = 0;
|
|
542
|
+
for (const entry of filtered) {
|
|
543
|
+
const entryTokens = Math.ceil((entry.body?.length || 0) / 4);
|
|
544
|
+
if (packed.length === 0 || used + entryTokens <= tokensBudget) {
|
|
545
|
+
packed.push(entry);
|
|
546
|
+
used += entryTokens;
|
|
547
|
+
}
|
|
548
|
+
if (used >= tokensBudget) break;
|
|
549
|
+
}
|
|
550
|
+
tokensUsed = used;
|
|
551
|
+
filtered = packed;
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
// Skeleton mode: determine pivot threshold
|
|
555
|
+
const effectivePivot =
|
|
556
|
+
pivot_count != null ? pivot_count : DEFAULT_PIVOT_COUNT;
|
|
557
|
+
|
|
318
558
|
// Conflict detection
|
|
319
559
|
const conflicts = detect_conflicts ? detectConflicts(filtered, ctx) : [];
|
|
320
560
|
|
|
@@ -329,6 +569,11 @@ export async function handler(
|
|
|
329
569
|
);
|
|
330
570
|
const heading = hasQuery ? `Results for "${query}"` : "Filtered entries";
|
|
331
571
|
lines.push(`## ${heading} (${filtered.length} matches)\n`);
|
|
572
|
+
if (tokensBudget != null) {
|
|
573
|
+
lines.push(
|
|
574
|
+
`> Token budget: ${tokensUsed} / ${tokensBudget} tokens used.\n`,
|
|
575
|
+
);
|
|
576
|
+
}
|
|
332
577
|
if (autoWindowed) {
|
|
333
578
|
const days = config.eventDecayDays || 30;
|
|
334
579
|
lines.push(
|
|
@@ -337,23 +582,36 @@ export async function handler(
|
|
|
337
582
|
}
|
|
338
583
|
for (let i = 0; i < filtered.length; i++) {
|
|
339
584
|
const r = filtered[i];
|
|
585
|
+
const isSkeleton = i >= effectivePivot;
|
|
340
586
|
const entryTags = r.tags ? JSON.parse(r.tags) : [];
|
|
341
587
|
const tagStr = entryTags.length ? entryTags.join(", ") : "none";
|
|
342
588
|
const relPath =
|
|
343
589
|
r.file_path && config.vaultDir
|
|
344
590
|
? r.file_path.replace(config.vaultDir + "/", "")
|
|
345
591
|
: r.file_path || "n/a";
|
|
592
|
+
const skeletonLabel = isSkeleton ? " ⊘ skeleton" : "";
|
|
346
593
|
lines.push(
|
|
347
|
-
`### [${i + 1}/${filtered.length}] ${r.title || "(untitled)"} [${r.kind}/${r.category}]`,
|
|
594
|
+
`### [${i + 1}/${filtered.length}] ${r.title || "(untitled)"} [${r.kind}/${r.category}]${skeletonLabel}`,
|
|
348
595
|
);
|
|
349
596
|
const dateStr =
|
|
350
597
|
r.updated_at && r.updated_at !== r.created_at
|
|
351
598
|
? `${r.created_at} (updated ${r.updated_at})`
|
|
352
599
|
: r.created_at || "";
|
|
600
|
+
const tierStr = r.tier ? ` · tier: ${r.tier}` : "";
|
|
353
601
|
lines.push(
|
|
354
|
-
`${r.score.toFixed(3)} · ${tagStr} · ${relPath} · ${dateStr} · id: \`${r.id}\``,
|
|
602
|
+
`${r.score.toFixed(3)} · ${tagStr} · ${relPath} · ${dateStr} · skeleton: ${isSkeleton}${tierStr} · id: \`${r.id}\``,
|
|
355
603
|
);
|
|
356
|
-
|
|
604
|
+
const stalenessResult = checkStaleness(r);
|
|
605
|
+
if (stalenessResult) {
|
|
606
|
+
r.stale = true;
|
|
607
|
+
r.stale_reason = stalenessResult.stale_reason;
|
|
608
|
+
lines.push(`> ⚠ **Stale**: ${stalenessResult.stale_reason}`);
|
|
609
|
+
}
|
|
610
|
+
if (isSkeleton) {
|
|
611
|
+
lines.push(skeletonBody(r.body));
|
|
612
|
+
} else {
|
|
613
|
+
lines.push(r.body?.slice(0, 300) + (r.body?.length > 300 ? "..." : ""));
|
|
614
|
+
}
|
|
357
615
|
lines.push("");
|
|
358
616
|
}
|
|
359
617
|
|
|
@@ -374,5 +632,47 @@ export async function handler(
|
|
|
374
632
|
}
|
|
375
633
|
}
|
|
376
634
|
|
|
377
|
-
|
|
635
|
+
// Consolidation suggestion detection — lazy, opportunistic, vault-wide
|
|
636
|
+
const consolidationOpts = {
|
|
637
|
+
tagThreshold:
|
|
638
|
+
config.consolidation?.tagThreshold ?? CONSOLIDATION_TAG_THRESHOLD,
|
|
639
|
+
maxAgeDays:
|
|
640
|
+
config.consolidation?.maxAgeDays ?? CONSOLIDATION_SNAPSHOT_MAX_AGE_DAYS,
|
|
641
|
+
};
|
|
642
|
+
const consolidationSuggestions = detectConsolidationHints(
|
|
643
|
+
filtered,
|
|
644
|
+
ctx.db,
|
|
645
|
+
userId,
|
|
646
|
+
consolidationOpts,
|
|
647
|
+
);
|
|
648
|
+
|
|
649
|
+
// Auto-consolidate: fire-and-forget create_snapshot for eligible tags
|
|
650
|
+
if (
|
|
651
|
+
config.consolidation?.autoConsolidate &&
|
|
652
|
+
consolidationSuggestions.length > 0
|
|
653
|
+
) {
|
|
654
|
+
const { handler: snapshotHandler } = await import("./create-snapshot.js");
|
|
655
|
+
for (const suggestion of consolidationSuggestions) {
|
|
656
|
+
snapshotHandler({ topic: suggestion.tag, tags: [suggestion.tag] }, ctx, {
|
|
657
|
+
ensureIndexed: async () => {},
|
|
658
|
+
}).catch(() => {});
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
const result = ok(lines.join("\n"));
|
|
663
|
+
const meta = {};
|
|
664
|
+
if (tokensBudget != null) {
|
|
665
|
+
meta.tokens_used = tokensUsed;
|
|
666
|
+
meta.tokens_budget = tokensBudget;
|
|
667
|
+
}
|
|
668
|
+
if (buckets?.length) {
|
|
669
|
+
meta.buckets = buckets;
|
|
670
|
+
}
|
|
671
|
+
if (consolidationSuggestions.length > 0) {
|
|
672
|
+
meta.consolidation_suggestions = consolidationSuggestions;
|
|
673
|
+
}
|
|
674
|
+
if (Object.keys(meta).length > 0) {
|
|
675
|
+
result._meta = meta;
|
|
676
|
+
}
|
|
677
|
+
return result;
|
|
378
678
|
}
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
3
|
+
import { execSync } from "node:child_process";
|
|
4
|
+
import { join, basename } from "node:path";
|
|
5
|
+
import { captureAndIndex } from "../../capture/index.js";
|
|
6
|
+
import { ok, err, ensureVaultExists } from "../helpers.js";
|
|
7
|
+
|
|
8
|
+
export const name = "ingest_project";
|
|
9
|
+
|
|
10
|
+
export const description =
|
|
11
|
+
"Scan a local project directory and register it as a project entity in the vault. Extracts metadata from package.json, git history, and README. Also creates a bucket entity for project-scoped tagging.";
|
|
12
|
+
|
|
13
|
+
export const inputSchema = {
|
|
14
|
+
path: z.string().describe("Absolute path to the project directory to ingest"),
|
|
15
|
+
tags: z
|
|
16
|
+
.array(z.string())
|
|
17
|
+
.optional()
|
|
18
|
+
.describe("Additional tags to apply (bucket tags are auto-generated)"),
|
|
19
|
+
pillar: z
|
|
20
|
+
.string()
|
|
21
|
+
.optional()
|
|
22
|
+
.describe("Parent pillar/domain name — creates a bucket:pillar tag"),
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
function safeRead(filePath) {
|
|
26
|
+
try {
|
|
27
|
+
return readFileSync(filePath, "utf-8");
|
|
28
|
+
} catch {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function safeExec(cmd, cwd) {
|
|
34
|
+
try {
|
|
35
|
+
return execSync(cmd, { cwd, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
|
|
36
|
+
} catch {
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function detectTechStack(projectPath, pkgJson) {
|
|
42
|
+
const stack = [];
|
|
43
|
+
|
|
44
|
+
if (existsSync(join(projectPath, "pyproject.toml")) || existsSync(join(projectPath, "setup.py"))) {
|
|
45
|
+
stack.push("python");
|
|
46
|
+
}
|
|
47
|
+
if (existsSync(join(projectPath, "Cargo.toml"))) {
|
|
48
|
+
stack.push("rust");
|
|
49
|
+
}
|
|
50
|
+
if (existsSync(join(projectPath, "go.mod"))) {
|
|
51
|
+
stack.push("go");
|
|
52
|
+
}
|
|
53
|
+
if (pkgJson) {
|
|
54
|
+
stack.push("javascript");
|
|
55
|
+
const allDeps = {
|
|
56
|
+
...(pkgJson.dependencies || {}),
|
|
57
|
+
...(pkgJson.devDependencies || {}),
|
|
58
|
+
};
|
|
59
|
+
if (allDeps.typescript || existsSync(join(projectPath, "tsconfig.json"))) {
|
|
60
|
+
stack.push("typescript");
|
|
61
|
+
}
|
|
62
|
+
if (allDeps.react || allDeps["react-dom"]) stack.push("react");
|
|
63
|
+
if (allDeps.next || allDeps["next"]) stack.push("nextjs");
|
|
64
|
+
if (allDeps.vue) stack.push("vue");
|
|
65
|
+
if (allDeps.svelte) stack.push("svelte");
|
|
66
|
+
if (allDeps.express) stack.push("express");
|
|
67
|
+
if (allDeps.fastify) stack.push("fastify");
|
|
68
|
+
if (allDeps.hono) stack.push("hono");
|
|
69
|
+
if (allDeps.vite) stack.push("vite");
|
|
70
|
+
if (allDeps.electron) stack.push("electron");
|
|
71
|
+
if (allDeps.tauri || allDeps["@tauri-apps/api"]) stack.push("tauri");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return [...new Set(stack)];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function extractReadmeDescription(projectPath) {
|
|
78
|
+
const raw = safeRead(join(projectPath, "README.md")) || safeRead(join(projectPath, "readme.md"));
|
|
79
|
+
if (!raw) return null;
|
|
80
|
+
for (const line of raw.split("\n")) {
|
|
81
|
+
const trimmed = line.trim();
|
|
82
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
83
|
+
return trimmed.slice(0, 200);
|
|
84
|
+
}
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function buildProjectBody({ projectName, description, techStack, repoUrl, lastCommit, projectPath, hasClaudeMd }) {
|
|
89
|
+
const lines = [];
|
|
90
|
+
lines.push(`## ${projectName}`);
|
|
91
|
+
if (description) lines.push("", description);
|
|
92
|
+
lines.push("", "### Metadata");
|
|
93
|
+
lines.push(`- **Path**: \`${projectPath}\``);
|
|
94
|
+
if (repoUrl) lines.push(`- **Repo**: ${repoUrl}`);
|
|
95
|
+
if (techStack.length) lines.push(`- **Stack**: ${techStack.join(", ")}`);
|
|
96
|
+
if (lastCommit) lines.push(`- **Last commit**: ${lastCommit}`);
|
|
97
|
+
lines.push(`- **CLAUDE.md**: ${hasClaudeMd ? "yes" : "no"}`);
|
|
98
|
+
return lines.join("\n");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* @param {object} args
|
|
103
|
+
* @param {import('../types.js').BaseCtx & Partial<import('../types.js').HostedCtxExtensions>} ctx
|
|
104
|
+
* @param {import('../types.js').ToolShared} shared
|
|
105
|
+
*/
|
|
106
|
+
export async function handler({ path: projectPath, tags, pillar }, ctx, { ensureIndexed }) {
|
|
107
|
+
const { config } = ctx;
|
|
108
|
+
const userId = ctx.userId !== undefined ? ctx.userId : undefined;
|
|
109
|
+
|
|
110
|
+
const vaultErr = ensureVaultExists(config);
|
|
111
|
+
if (vaultErr) return vaultErr;
|
|
112
|
+
|
|
113
|
+
if (!projectPath?.trim()) {
|
|
114
|
+
return err("Required: path (absolute path to project directory)", "INVALID_INPUT");
|
|
115
|
+
}
|
|
116
|
+
if (!existsSync(projectPath)) {
|
|
117
|
+
return err(`Directory not found: ${projectPath}`, "INVALID_INPUT");
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
await ensureIndexed();
|
|
121
|
+
|
|
122
|
+
// Read package.json if present
|
|
123
|
+
let pkgJson = null;
|
|
124
|
+
const pkgPath = join(projectPath, "package.json");
|
|
125
|
+
if (existsSync(pkgPath)) {
|
|
126
|
+
try {
|
|
127
|
+
pkgJson = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
128
|
+
} catch {
|
|
129
|
+
pkgJson = null;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Derive project name
|
|
134
|
+
let projectName = basename(projectPath);
|
|
135
|
+
if (pkgJson?.name) {
|
|
136
|
+
projectName = pkgJson.name.replace(/^@[^/]+\//, "");
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Slug-safe identity_key
|
|
140
|
+
const identityKey = projectName.toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
141
|
+
|
|
142
|
+
// Description: package.json > README
|
|
143
|
+
const description =
|
|
144
|
+
pkgJson?.description || extractReadmeDescription(projectPath) || null;
|
|
145
|
+
|
|
146
|
+
// Tech stack detection
|
|
147
|
+
const techStack = detectTechStack(projectPath, pkgJson);
|
|
148
|
+
|
|
149
|
+
// Git metadata
|
|
150
|
+
const isGitRepo = existsSync(join(projectPath, ".git"));
|
|
151
|
+
const repoUrl = isGitRepo
|
|
152
|
+
? safeExec("git remote get-url origin", projectPath)
|
|
153
|
+
: null;
|
|
154
|
+
const lastCommit = isGitRepo
|
|
155
|
+
? safeExec("git log -1 --format=%ci", projectPath)
|
|
156
|
+
: null;
|
|
157
|
+
|
|
158
|
+
// CLAUDE.md presence
|
|
159
|
+
const hasClaudeMd = existsSync(join(projectPath, "CLAUDE.md"));
|
|
160
|
+
|
|
161
|
+
// Build tags
|
|
162
|
+
const bucketTag = `bucket:${identityKey}`;
|
|
163
|
+
const autoTags = [bucketTag];
|
|
164
|
+
if (pillar) autoTags.push(`bucket:${pillar}`);
|
|
165
|
+
const allTags = [...new Set([...autoTags, ...(tags || [])])];
|
|
166
|
+
|
|
167
|
+
// Build body
|
|
168
|
+
const body = buildProjectBody({
|
|
169
|
+
projectName,
|
|
170
|
+
description,
|
|
171
|
+
techStack,
|
|
172
|
+
repoUrl,
|
|
173
|
+
lastCommit,
|
|
174
|
+
projectPath,
|
|
175
|
+
hasClaudeMd,
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
// Build meta
|
|
179
|
+
const meta = {
|
|
180
|
+
path: projectPath,
|
|
181
|
+
...(repoUrl ? { repo_url: repoUrl } : {}),
|
|
182
|
+
...(techStack.length ? { tech_stack: techStack } : {}),
|
|
183
|
+
has_claude_md: hasClaudeMd,
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
// Save project entity
|
|
187
|
+
const projectEntry = await captureAndIndex(ctx, {
|
|
188
|
+
kind: "project",
|
|
189
|
+
title: projectName,
|
|
190
|
+
body,
|
|
191
|
+
tags: allTags,
|
|
192
|
+
identity_key: identityKey,
|
|
193
|
+
meta,
|
|
194
|
+
userId,
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
// Save bucket entity if it doesn't already exist
|
|
198
|
+
const bucketUserClause = userId !== undefined ? "AND user_id = ?" : "";
|
|
199
|
+
const bucketParams = userId !== undefined ? [bucketTag, userId] : [bucketTag];
|
|
200
|
+
const bucketExists = ctx.db
|
|
201
|
+
.prepare(
|
|
202
|
+
`SELECT 1 FROM vault WHERE kind = 'bucket' AND identity_key = ? ${bucketUserClause} LIMIT 1`,
|
|
203
|
+
)
|
|
204
|
+
.get(...bucketParams);
|
|
205
|
+
|
|
206
|
+
let bucketEntry = null;
|
|
207
|
+
if (!bucketExists) {
|
|
208
|
+
bucketEntry = await captureAndIndex(ctx, {
|
|
209
|
+
kind: "bucket",
|
|
210
|
+
title: projectName,
|
|
211
|
+
body: `Bucket for project: ${projectName}`,
|
|
212
|
+
tags: allTags,
|
|
213
|
+
identity_key: bucketTag,
|
|
214
|
+
meta: { project_path: projectPath },
|
|
215
|
+
userId,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const relPath = projectEntry.filePath
|
|
220
|
+
? projectEntry.filePath.replace(config.vaultDir + "/", "")
|
|
221
|
+
: projectEntry.filePath;
|
|
222
|
+
|
|
223
|
+
const parts = [
|
|
224
|
+
`✓ Ingested project → ${relPath}`,
|
|
225
|
+
` id: ${projectEntry.id}`,
|
|
226
|
+
` title: ${projectEntry.title}`,
|
|
227
|
+
` tags: ${allTags.join(", ")}`,
|
|
228
|
+
...(techStack.length ? [` stack: ${techStack.join(", ")}`] : []),
|
|
229
|
+
...(repoUrl ? [` repo: ${repoUrl}`] : []),
|
|
230
|
+
];
|
|
231
|
+
|
|
232
|
+
if (bucketEntry) {
|
|
233
|
+
const bucketRelPath = bucketEntry.filePath
|
|
234
|
+
? bucketEntry.filePath.replace(config.vaultDir + "/", "")
|
|
235
|
+
: bucketEntry.filePath;
|
|
236
|
+
parts.push(``, `✓ Created bucket → ${bucketRelPath}`);
|
|
237
|
+
parts.push(` id: ${bucketEntry.id}`);
|
|
238
|
+
} else {
|
|
239
|
+
parts.push(``, ` (bucket '${bucketTag}' already exists — skipped)`);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
parts.push("", "_Use get_context with bucket tag to retrieve project-scoped entries._");
|
|
243
|
+
return ok(parts.join("\n"));
|
|
244
|
+
}
|