@context-vault/core 2.14.0 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/capture/index.js +11 -0
- package/src/consolidation/index.js +112 -0
- package/src/core/categories.js +10 -0
- package/src/core/config.js +37 -0
- package/src/index/db.js +102 -9
- package/src/index/index.js +24 -1
- package/src/index.js +4 -0
- package/src/retrieve/index.js +261 -64
- package/src/server/tools/create-snapshot.js +12 -3
- package/src/server/tools/get-context.js +297 -11
- package/src/server/tools/ingest-project.js +244 -0
- package/src/server/tools/list-buckets.js +116 -0
- package/src/server/tools/save-context.js +190 -19
- package/src/server/tools/session-start.js +285 -0
- package/src/server/tools.js +6 -0
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
|
+
import { createHash } from "node:crypto";
|
|
3
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
4
|
+
import { resolve } from "node:path";
|
|
2
5
|
import { hybridSearch } from "../../retrieve/index.js";
|
|
3
6
|
import { categoryFor } from "../../core/categories.js";
|
|
4
7
|
import { normalizeKind } from "../../core/files.js";
|
|
@@ -6,6 +9,33 @@ import { ok, err } from "../helpers.js";
|
|
|
6
9
|
import { isEmbedAvailable } from "../../index/embed.js";
|
|
7
10
|
|
|
8
11
|
const STALE_DUPLICATE_DAYS = 7;
|
|
12
|
+
const DEFAULT_PIVOT_COUNT = 2;
|
|
13
|
+
const SKELETON_BODY_CHARS = 100;
|
|
14
|
+
const CONSOLIDATION_TAG_THRESHOLD = 10;
|
|
15
|
+
const CONSOLIDATION_SNAPSHOT_MAX_AGE_DAYS = 7;
|
|
16
|
+
const BRIEF_SCORE_BOOST = 0.05;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Truncate a body string to ~SKELETON_BODY_CHARS, breaking at sentence or
|
|
20
|
+
* word boundary. Returns the truncated string with "..." appended.
|
|
21
|
+
*/
|
|
22
|
+
export function skeletonBody(body) {
|
|
23
|
+
if (!body) return "";
|
|
24
|
+
if (body.length <= SKELETON_BODY_CHARS) return body;
|
|
25
|
+
const slice = body.slice(0, SKELETON_BODY_CHARS);
|
|
26
|
+
const sentenceEnd = Math.max(
|
|
27
|
+
slice.lastIndexOf(". "),
|
|
28
|
+
slice.lastIndexOf(".\n"),
|
|
29
|
+
);
|
|
30
|
+
if (sentenceEnd > SKELETON_BODY_CHARS * 0.4) {
|
|
31
|
+
return slice.slice(0, sentenceEnd + 1) + "...";
|
|
32
|
+
}
|
|
33
|
+
const wordEnd = slice.lastIndexOf(" ");
|
|
34
|
+
if (wordEnd > SKELETON_BODY_CHARS * 0.4) {
|
|
35
|
+
return slice.slice(0, wordEnd) + "...";
|
|
36
|
+
}
|
|
37
|
+
return slice + "...";
|
|
38
|
+
}
|
|
9
39
|
|
|
10
40
|
/**
|
|
11
41
|
* Detect conflicts among a set of search result entries.
|
|
@@ -86,6 +116,131 @@ export function detectConflicts(entries, _ctx) {
|
|
|
86
116
|
return conflicts;
|
|
87
117
|
}
|
|
88
118
|
|
|
119
|
+
/**
|
|
120
|
+
* Detect tag clusters that would benefit from consolidation via create_snapshot.
|
|
121
|
+
* A suggestion is emitted when a tag appears on threshold+ entries in the full
|
|
122
|
+
* vault AND no recent brief (kind='brief') exists for that tag within the
|
|
123
|
+
* staleness window.
|
|
124
|
+
*
|
|
125
|
+
* Tag counts are derived from the full vault (not just the search result set)
|
|
126
|
+
* so the check reflects the true size of the knowledge cluster. Only tags that
|
|
127
|
+
* appear in the current search results are evaluated — this keeps the check
|
|
128
|
+
* targeted to what the user is actually working with.
|
|
129
|
+
*
|
|
130
|
+
* @param {Array} entries - Search result rows (used to select candidate tags)
|
|
131
|
+
* @param {import('node:sqlite').DatabaseSync} db - Database handle for vault-wide counts and brief lookups
|
|
132
|
+
* @param {number|undefined} userId - Optional user_id scope
|
|
133
|
+
* @param {{ tagThreshold?: number, maxAgeDays?: number }} opts - Configurable thresholds
|
|
134
|
+
* @returns {Array<{tag: string, entry_count: number, last_snapshot_age_days: number|null}>}
|
|
135
|
+
*/
|
|
136
|
+
export function detectConsolidationHints(entries, db, userId, opts = {}) {
|
|
137
|
+
const tagThreshold = opts.tagThreshold ?? CONSOLIDATION_TAG_THRESHOLD;
|
|
138
|
+
const maxAgeDays = opts.maxAgeDays ?? CONSOLIDATION_SNAPSHOT_MAX_AGE_DAYS;
|
|
139
|
+
|
|
140
|
+
const candidateTags = new Set();
|
|
141
|
+
for (const entry of entries) {
|
|
142
|
+
if (entry.kind === "brief") continue;
|
|
143
|
+
const entryTags = entry.tags ? JSON.parse(entry.tags) : [];
|
|
144
|
+
for (const tag of entryTags) candidateTags.add(tag);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if (candidateTags.size === 0) return [];
|
|
148
|
+
|
|
149
|
+
const suggestions = [];
|
|
150
|
+
const cutoff = new Date(Date.now() - maxAgeDays * 86400000).toISOString();
|
|
151
|
+
|
|
152
|
+
for (const tag of candidateTags) {
|
|
153
|
+
let vaultCount = 0;
|
|
154
|
+
try {
|
|
155
|
+
const userClause =
|
|
156
|
+
userId !== undefined ? " AND user_id = ?" : " AND user_id IS NULL";
|
|
157
|
+
const countParams =
|
|
158
|
+
userId !== undefined ? [`%"${tag}"%`, userId] : [`%"${tag}"%`];
|
|
159
|
+
const countRow = db
|
|
160
|
+
.prepare(
|
|
161
|
+
`SELECT COUNT(*) as c FROM vault WHERE kind != 'brief' AND tags LIKE ?${userClause} AND (expires_at IS NULL OR expires_at > datetime('now')) AND superseded_by IS NULL`,
|
|
162
|
+
)
|
|
163
|
+
.get(...countParams);
|
|
164
|
+
vaultCount = countRow?.c ?? 0;
|
|
165
|
+
} catch {
|
|
166
|
+
continue;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (vaultCount < tagThreshold) continue;
|
|
170
|
+
|
|
171
|
+
let lastSnapshotAgeDays = null;
|
|
172
|
+
try {
|
|
173
|
+
const userClause =
|
|
174
|
+
userId !== undefined ? " AND user_id = ?" : " AND user_id IS NULL";
|
|
175
|
+
const params =
|
|
176
|
+
userId !== undefined ? [`%"${tag}"%`, userId] : [`%"${tag}"%`];
|
|
177
|
+
const recentBrief = db
|
|
178
|
+
.prepare(
|
|
179
|
+
`SELECT created_at FROM vault WHERE kind = 'brief' AND tags LIKE ?${userClause} ORDER BY created_at DESC LIMIT 1`,
|
|
180
|
+
)
|
|
181
|
+
.get(...params);
|
|
182
|
+
|
|
183
|
+
if (recentBrief) {
|
|
184
|
+
lastSnapshotAgeDays = Math.round(
|
|
185
|
+
(Date.now() - new Date(recentBrief.created_at).getTime()) / 86400000,
|
|
186
|
+
);
|
|
187
|
+
if (recentBrief.created_at >= cutoff) continue;
|
|
188
|
+
}
|
|
189
|
+
} catch {
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
suggestions.push({
|
|
194
|
+
tag,
|
|
195
|
+
entry_count: vaultCount,
|
|
196
|
+
last_snapshot_age_days: lastSnapshotAgeDays,
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return suggestions;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Check if an entry's source files have changed since the entry was saved.
|
|
205
|
+
* Returns { stale: true, stale_reason } if stale, or null if fresh.
|
|
206
|
+
* Best-effort: any read/parse failure returns null (no crash).
|
|
207
|
+
*
|
|
208
|
+
* @param {object} entry - DB row with source_files JSON column
|
|
209
|
+
* @returns {{ stale: boolean, stale_reason: string } | null}
|
|
210
|
+
*/
|
|
211
|
+
function checkStaleness(entry) {
|
|
212
|
+
if (!entry.source_files) return null;
|
|
213
|
+
let sourceFiles;
|
|
214
|
+
try {
|
|
215
|
+
sourceFiles = JSON.parse(entry.source_files);
|
|
216
|
+
} catch {
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
if (!Array.isArray(sourceFiles) || sourceFiles.length === 0) return null;
|
|
220
|
+
|
|
221
|
+
for (const sf of sourceFiles) {
|
|
222
|
+
try {
|
|
223
|
+
const absPath = sf.path.startsWith("/")
|
|
224
|
+
? sf.path
|
|
225
|
+
: resolve(process.cwd(), sf.path);
|
|
226
|
+
if (!existsSync(absPath)) {
|
|
227
|
+
return { stale: true, stale_reason: "source file not found" };
|
|
228
|
+
}
|
|
229
|
+
const contents = readFileSync(absPath);
|
|
230
|
+
const currentHash = createHash("sha256").update(contents).digest("hex");
|
|
231
|
+
if (currentHash !== sf.hash) {
|
|
232
|
+
return {
|
|
233
|
+
stale: true,
|
|
234
|
+
stale_reason: "source file modified since observation",
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
} catch {
|
|
238
|
+
// skip this file on any error — best-effort
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return null;
|
|
242
|
+
}
|
|
243
|
+
|
|
89
244
|
export const name = "get_context";
|
|
90
245
|
|
|
91
246
|
export const description =
|
|
@@ -113,7 +268,15 @@ export const inputSchema = {
|
|
|
113
268
|
tags: z
|
|
114
269
|
.array(z.string())
|
|
115
270
|
.optional()
|
|
116
|
-
.describe(
|
|
271
|
+
.describe(
|
|
272
|
+
"Filter by tags (entries must match at least one). Use 'bucket:' prefixed tags for project-scoped retrieval (e.g., ['bucket:autohub']).",
|
|
273
|
+
),
|
|
274
|
+
buckets: z
|
|
275
|
+
.array(z.string())
|
|
276
|
+
.optional()
|
|
277
|
+
.describe(
|
|
278
|
+
"Filter by project-scoped buckets. Each name expands to a 'bucket:<name>' tag. Composes with 'tags' via OR (entries matching any tag or any bucket are included).",
|
|
279
|
+
),
|
|
117
280
|
since: z
|
|
118
281
|
.string()
|
|
119
282
|
.optional()
|
|
@@ -135,6 +298,24 @@ export const inputSchema = {
|
|
|
135
298
|
.describe(
|
|
136
299
|
"If true, compare results for contradicting entries and append a conflicts array. Flags superseded entries still in results and stale duplicates (same kind+tags, updated_at >7 days apart). No LLM calls — pure DB logic.",
|
|
137
300
|
),
|
|
301
|
+
max_tokens: z
|
|
302
|
+
.number()
|
|
303
|
+
.optional()
|
|
304
|
+
.describe(
|
|
305
|
+
"Limit output to entries that fit within this token budget (rough estimate: 1 token ≈ 4 chars). Entries are packed greedily by relevance rank. At least 1 result is always returned. Response metadata includes tokens_used and tokens_budget.",
|
|
306
|
+
),
|
|
307
|
+
pivot_count: z
|
|
308
|
+
.number()
|
|
309
|
+
.optional()
|
|
310
|
+
.describe(
|
|
311
|
+
"Skeleton mode: top pivot_count entries by relevance are returned with full body. Remaining entries are returned as skeletons (title + tags + first ~100 chars of body). Default: 2. Set to 0 to skeleton all results, or a high number to disable.",
|
|
312
|
+
),
|
|
313
|
+
include_ephemeral: z
|
|
314
|
+
.boolean()
|
|
315
|
+
.optional()
|
|
316
|
+
.describe(
|
|
317
|
+
"If true, include ephemeral tier entries in results. Default: false — only working and durable tiers are returned.",
|
|
318
|
+
),
|
|
138
319
|
};
|
|
139
320
|
|
|
140
321
|
/**
|
|
@@ -149,11 +330,15 @@ export async function handler(
|
|
|
149
330
|
category,
|
|
150
331
|
identity_key,
|
|
151
332
|
tags,
|
|
333
|
+
buckets,
|
|
152
334
|
since,
|
|
153
335
|
until,
|
|
154
336
|
limit,
|
|
155
337
|
include_superseded,
|
|
156
338
|
detect_conflicts,
|
|
339
|
+
max_tokens,
|
|
340
|
+
pivot_count,
|
|
341
|
+
include_ephemeral,
|
|
157
342
|
},
|
|
158
343
|
ctx,
|
|
159
344
|
{ ensureIndexed, reindexFailed },
|
|
@@ -162,8 +347,11 @@ export async function handler(
|
|
|
162
347
|
const userId = ctx.userId !== undefined ? ctx.userId : undefined;
|
|
163
348
|
|
|
164
349
|
const hasQuery = query?.trim();
|
|
350
|
+
// Expand buckets to bucket: prefixed tags and merge with explicit tags
|
|
351
|
+
const bucketTags = buckets?.length ? buckets.map((b) => `bucket:${b}`) : [];
|
|
352
|
+
const effectiveTags = [...(tags ?? []), ...bucketTags];
|
|
165
353
|
const hasFilters =
|
|
166
|
-
kind || category ||
|
|
354
|
+
kind || category || effectiveTags.length || since || until || identity_key;
|
|
167
355
|
if (!hasQuery && !hasFilters)
|
|
168
356
|
return err(
|
|
169
357
|
"Required: query or at least one filter (kind, category, tags, since, until, identity_key)",
|
|
@@ -215,7 +403,7 @@ export async function handler(
|
|
|
215
403
|
const effectiveLimit = limit || 10;
|
|
216
404
|
// When tag-filtering, over-fetch to compensate for post-filter reduction
|
|
217
405
|
const MAX_FETCH_LIMIT = 500;
|
|
218
|
-
const fetchLimit =
|
|
406
|
+
const fetchLimit = effectiveTags.length
|
|
219
407
|
? Math.min(effectiveLimit * 10, MAX_FETCH_LIMIT)
|
|
220
408
|
: effectiveLimit;
|
|
221
409
|
|
|
@@ -234,11 +422,11 @@ export async function handler(
|
|
|
234
422
|
});
|
|
235
423
|
|
|
236
424
|
// Post-filter by tags if provided, then apply requested limit
|
|
237
|
-
filtered =
|
|
425
|
+
filtered = effectiveTags.length
|
|
238
426
|
? sorted
|
|
239
427
|
.filter((r) => {
|
|
240
428
|
const entryTags = r.tags ? JSON.parse(r.tags) : [];
|
|
241
|
-
return
|
|
429
|
+
return effectiveTags.some((t) => entryTags.includes(t));
|
|
242
430
|
})
|
|
243
431
|
.slice(0, effectiveLimit)
|
|
244
432
|
: sorted;
|
|
@@ -267,6 +455,9 @@ export async function handler(
|
|
|
267
455
|
params.push(effectiveUntil);
|
|
268
456
|
}
|
|
269
457
|
clauses.push("(expires_at IS NULL OR expires_at > datetime('now'))");
|
|
458
|
+
if (!include_superseded) {
|
|
459
|
+
clauses.push("superseded_by IS NULL");
|
|
460
|
+
}
|
|
270
461
|
const where = clauses.length ? `WHERE ${clauses.join(" AND ")}` : "";
|
|
271
462
|
params.push(fetchLimit);
|
|
272
463
|
const rows = ctx.db
|
|
@@ -274,11 +465,11 @@ export async function handler(
|
|
|
274
465
|
.all(...params);
|
|
275
466
|
|
|
276
467
|
// Post-filter by tags if provided, then apply requested limit
|
|
277
|
-
filtered =
|
|
468
|
+
filtered = effectiveTags.length
|
|
278
469
|
? rows
|
|
279
470
|
.filter((r) => {
|
|
280
471
|
const entryTags = r.tags ? JSON.parse(r.tags) : [];
|
|
281
|
-
return
|
|
472
|
+
return effectiveTags.some((t) => entryTags.includes(t));
|
|
282
473
|
})
|
|
283
474
|
.slice(0, effectiveLimit)
|
|
284
475
|
: rows;
|
|
@@ -287,6 +478,18 @@ export async function handler(
|
|
|
287
478
|
for (const r of filtered) r.score = 0;
|
|
288
479
|
}
|
|
289
480
|
|
|
481
|
+
// Brief score boost: briefs rank slightly higher so consolidated snapshots
|
|
482
|
+
// surface above the individual entries they summarize.
|
|
483
|
+
for (const r of filtered) {
|
|
484
|
+
if (r.kind === "brief") r.score = (r.score || 0) + BRIEF_SCORE_BOOST;
|
|
485
|
+
}
|
|
486
|
+
filtered.sort((a, b) => b.score - a.score);
|
|
487
|
+
|
|
488
|
+
// Tier filter: exclude ephemeral entries by default (NULL tier treated as working)
|
|
489
|
+
if (!include_ephemeral) {
|
|
490
|
+
filtered = filtered.filter((r) => r.tier !== "ephemeral");
|
|
491
|
+
}
|
|
492
|
+
|
|
290
493
|
if (!filtered.length) {
|
|
291
494
|
if (autoWindowed) {
|
|
292
495
|
const days = config.eventDecayDays || 30;
|
|
@@ -315,6 +518,29 @@ export async function handler(
|
|
|
315
518
|
}
|
|
316
519
|
}
|
|
317
520
|
|
|
521
|
+
// Token-budgeted packing
|
|
522
|
+
let tokensBudget = null;
|
|
523
|
+
let tokensUsed = null;
|
|
524
|
+
if (max_tokens != null && max_tokens > 0) {
|
|
525
|
+
tokensBudget = max_tokens;
|
|
526
|
+
const packed = [];
|
|
527
|
+
let used = 0;
|
|
528
|
+
for (const entry of filtered) {
|
|
529
|
+
const entryTokens = Math.ceil((entry.body?.length || 0) / 4);
|
|
530
|
+
if (packed.length === 0 || used + entryTokens <= tokensBudget) {
|
|
531
|
+
packed.push(entry);
|
|
532
|
+
used += entryTokens;
|
|
533
|
+
}
|
|
534
|
+
if (used >= tokensBudget) break;
|
|
535
|
+
}
|
|
536
|
+
tokensUsed = used;
|
|
537
|
+
filtered = packed;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// Skeleton mode: determine pivot threshold
|
|
541
|
+
const effectivePivot =
|
|
542
|
+
pivot_count != null ? pivot_count : DEFAULT_PIVOT_COUNT;
|
|
543
|
+
|
|
318
544
|
// Conflict detection
|
|
319
545
|
const conflicts = detect_conflicts ? detectConflicts(filtered, ctx) : [];
|
|
320
546
|
|
|
@@ -329,6 +555,11 @@ export async function handler(
|
|
|
329
555
|
);
|
|
330
556
|
const heading = hasQuery ? `Results for "${query}"` : "Filtered entries";
|
|
331
557
|
lines.push(`## ${heading} (${filtered.length} matches)\n`);
|
|
558
|
+
if (tokensBudget != null) {
|
|
559
|
+
lines.push(
|
|
560
|
+
`> Token budget: ${tokensUsed} / ${tokensBudget} tokens used.\n`,
|
|
561
|
+
);
|
|
562
|
+
}
|
|
332
563
|
if (autoWindowed) {
|
|
333
564
|
const days = config.eventDecayDays || 30;
|
|
334
565
|
lines.push(
|
|
@@ -337,23 +568,36 @@ export async function handler(
|
|
|
337
568
|
}
|
|
338
569
|
for (let i = 0; i < filtered.length; i++) {
|
|
339
570
|
const r = filtered[i];
|
|
571
|
+
const isSkeleton = i >= effectivePivot;
|
|
340
572
|
const entryTags = r.tags ? JSON.parse(r.tags) : [];
|
|
341
573
|
const tagStr = entryTags.length ? entryTags.join(", ") : "none";
|
|
342
574
|
const relPath =
|
|
343
575
|
r.file_path && config.vaultDir
|
|
344
576
|
? r.file_path.replace(config.vaultDir + "/", "")
|
|
345
577
|
: r.file_path || "n/a";
|
|
578
|
+
const skeletonLabel = isSkeleton ? " ⊘ skeleton" : "";
|
|
346
579
|
lines.push(
|
|
347
|
-
`### [${i + 1}/${filtered.length}] ${r.title || "(untitled)"} [${r.kind}/${r.category}]`,
|
|
580
|
+
`### [${i + 1}/${filtered.length}] ${r.title || "(untitled)"} [${r.kind}/${r.category}]${skeletonLabel}`,
|
|
348
581
|
);
|
|
349
582
|
const dateStr =
|
|
350
583
|
r.updated_at && r.updated_at !== r.created_at
|
|
351
584
|
? `${r.created_at} (updated ${r.updated_at})`
|
|
352
585
|
: r.created_at || "";
|
|
586
|
+
const tierStr = r.tier ? ` · tier: ${r.tier}` : "";
|
|
353
587
|
lines.push(
|
|
354
|
-
`${r.score.toFixed(3)} · ${tagStr} · ${relPath} · ${dateStr} · id: \`${r.id}\``,
|
|
588
|
+
`${r.score.toFixed(3)} · ${tagStr} · ${relPath} · ${dateStr} · skeleton: ${isSkeleton}${tierStr} · id: \`${r.id}\``,
|
|
355
589
|
);
|
|
356
|
-
|
|
590
|
+
const stalenessResult = checkStaleness(r);
|
|
591
|
+
if (stalenessResult) {
|
|
592
|
+
r.stale = true;
|
|
593
|
+
r.stale_reason = stalenessResult.stale_reason;
|
|
594
|
+
lines.push(`> ⚠ **Stale**: ${stalenessResult.stale_reason}`);
|
|
595
|
+
}
|
|
596
|
+
if (isSkeleton) {
|
|
597
|
+
lines.push(skeletonBody(r.body));
|
|
598
|
+
} else {
|
|
599
|
+
lines.push(r.body?.slice(0, 300) + (r.body?.length > 300 ? "..." : ""));
|
|
600
|
+
}
|
|
357
601
|
lines.push("");
|
|
358
602
|
}
|
|
359
603
|
|
|
@@ -374,5 +618,47 @@ export async function handler(
|
|
|
374
618
|
}
|
|
375
619
|
}
|
|
376
620
|
|
|
377
|
-
|
|
621
|
+
// Consolidation suggestion detection — lazy, opportunistic, vault-wide
|
|
622
|
+
const consolidationOpts = {
|
|
623
|
+
tagThreshold:
|
|
624
|
+
config.consolidation?.tagThreshold ?? CONSOLIDATION_TAG_THRESHOLD,
|
|
625
|
+
maxAgeDays:
|
|
626
|
+
config.consolidation?.maxAgeDays ?? CONSOLIDATION_SNAPSHOT_MAX_AGE_DAYS,
|
|
627
|
+
};
|
|
628
|
+
const consolidationSuggestions = detectConsolidationHints(
|
|
629
|
+
filtered,
|
|
630
|
+
ctx.db,
|
|
631
|
+
userId,
|
|
632
|
+
consolidationOpts,
|
|
633
|
+
);
|
|
634
|
+
|
|
635
|
+
// Auto-consolidate: fire-and-forget create_snapshot for eligible tags
|
|
636
|
+
if (
|
|
637
|
+
config.consolidation?.autoConsolidate &&
|
|
638
|
+
consolidationSuggestions.length > 0
|
|
639
|
+
) {
|
|
640
|
+
const { handler: snapshotHandler } = await import("./create-snapshot.js");
|
|
641
|
+
for (const suggestion of consolidationSuggestions) {
|
|
642
|
+
snapshotHandler({ topic: suggestion.tag, tags: [suggestion.tag] }, ctx, {
|
|
643
|
+
ensureIndexed: async () => {},
|
|
644
|
+
}).catch(() => {});
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
const result = ok(lines.join("\n"));
|
|
649
|
+
const meta = {};
|
|
650
|
+
if (tokensBudget != null) {
|
|
651
|
+
meta.tokens_used = tokensUsed;
|
|
652
|
+
meta.tokens_budget = tokensBudget;
|
|
653
|
+
}
|
|
654
|
+
if (buckets?.length) {
|
|
655
|
+
meta.buckets = buckets;
|
|
656
|
+
}
|
|
657
|
+
if (consolidationSuggestions.length > 0) {
|
|
658
|
+
meta.consolidation_suggestions = consolidationSuggestions;
|
|
659
|
+
}
|
|
660
|
+
if (Object.keys(meta).length > 0) {
|
|
661
|
+
result._meta = meta;
|
|
662
|
+
}
|
|
663
|
+
return result;
|
|
378
664
|
}
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { readFileSync, existsSync } from "node:fs";
|
|
3
|
+
import { execSync } from "node:child_process";
|
|
4
|
+
import { join, basename } from "node:path";
|
|
5
|
+
import { captureAndIndex } from "../../capture/index.js";
|
|
6
|
+
import { ok, err, ensureVaultExists } from "../helpers.js";
|
|
7
|
+
|
|
8
|
+
export const name = "ingest_project";
|
|
9
|
+
|
|
10
|
+
export const description =
|
|
11
|
+
"Scan a local project directory and register it as a project entity in the vault. Extracts metadata from package.json, git history, and README. Also creates a bucket entity for project-scoped tagging.";
|
|
12
|
+
|
|
13
|
+
export const inputSchema = {
|
|
14
|
+
path: z.string().describe("Absolute path to the project directory to ingest"),
|
|
15
|
+
tags: z
|
|
16
|
+
.array(z.string())
|
|
17
|
+
.optional()
|
|
18
|
+
.describe("Additional tags to apply (bucket tags are auto-generated)"),
|
|
19
|
+
pillar: z
|
|
20
|
+
.string()
|
|
21
|
+
.optional()
|
|
22
|
+
.describe("Parent pillar/domain name — creates a bucket:pillar tag"),
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
function safeRead(filePath) {
|
|
26
|
+
try {
|
|
27
|
+
return readFileSync(filePath, "utf-8");
|
|
28
|
+
} catch {
|
|
29
|
+
return null;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function safeExec(cmd, cwd) {
|
|
34
|
+
try {
|
|
35
|
+
return execSync(cmd, { cwd, encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
|
|
36
|
+
} catch {
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function detectTechStack(projectPath, pkgJson) {
|
|
42
|
+
const stack = [];
|
|
43
|
+
|
|
44
|
+
if (existsSync(join(projectPath, "pyproject.toml")) || existsSync(join(projectPath, "setup.py"))) {
|
|
45
|
+
stack.push("python");
|
|
46
|
+
}
|
|
47
|
+
if (existsSync(join(projectPath, "Cargo.toml"))) {
|
|
48
|
+
stack.push("rust");
|
|
49
|
+
}
|
|
50
|
+
if (existsSync(join(projectPath, "go.mod"))) {
|
|
51
|
+
stack.push("go");
|
|
52
|
+
}
|
|
53
|
+
if (pkgJson) {
|
|
54
|
+
stack.push("javascript");
|
|
55
|
+
const allDeps = {
|
|
56
|
+
...(pkgJson.dependencies || {}),
|
|
57
|
+
...(pkgJson.devDependencies || {}),
|
|
58
|
+
};
|
|
59
|
+
if (allDeps.typescript || existsSync(join(projectPath, "tsconfig.json"))) {
|
|
60
|
+
stack.push("typescript");
|
|
61
|
+
}
|
|
62
|
+
if (allDeps.react || allDeps["react-dom"]) stack.push("react");
|
|
63
|
+
if (allDeps.next || allDeps["next"]) stack.push("nextjs");
|
|
64
|
+
if (allDeps.vue) stack.push("vue");
|
|
65
|
+
if (allDeps.svelte) stack.push("svelte");
|
|
66
|
+
if (allDeps.express) stack.push("express");
|
|
67
|
+
if (allDeps.fastify) stack.push("fastify");
|
|
68
|
+
if (allDeps.hono) stack.push("hono");
|
|
69
|
+
if (allDeps.vite) stack.push("vite");
|
|
70
|
+
if (allDeps.electron) stack.push("electron");
|
|
71
|
+
if (allDeps.tauri || allDeps["@tauri-apps/api"]) stack.push("tauri");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return [...new Set(stack)];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function extractReadmeDescription(projectPath) {
|
|
78
|
+
const raw = safeRead(join(projectPath, "README.md")) || safeRead(join(projectPath, "readme.md"));
|
|
79
|
+
if (!raw) return null;
|
|
80
|
+
for (const line of raw.split("\n")) {
|
|
81
|
+
const trimmed = line.trim();
|
|
82
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
83
|
+
return trimmed.slice(0, 200);
|
|
84
|
+
}
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function buildProjectBody({ projectName, description, techStack, repoUrl, lastCommit, projectPath, hasClaudeMd }) {
|
|
89
|
+
const lines = [];
|
|
90
|
+
lines.push(`## ${projectName}`);
|
|
91
|
+
if (description) lines.push("", description);
|
|
92
|
+
lines.push("", "### Metadata");
|
|
93
|
+
lines.push(`- **Path**: \`${projectPath}\``);
|
|
94
|
+
if (repoUrl) lines.push(`- **Repo**: ${repoUrl}`);
|
|
95
|
+
if (techStack.length) lines.push(`- **Stack**: ${techStack.join(", ")}`);
|
|
96
|
+
if (lastCommit) lines.push(`- **Last commit**: ${lastCommit}`);
|
|
97
|
+
lines.push(`- **CLAUDE.md**: ${hasClaudeMd ? "yes" : "no"}`);
|
|
98
|
+
return lines.join("\n");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* @param {object} args
|
|
103
|
+
* @param {import('../types.js').BaseCtx & Partial<import('../types.js').HostedCtxExtensions>} ctx
|
|
104
|
+
* @param {import('../types.js').ToolShared} shared
|
|
105
|
+
*/
|
|
106
|
+
export async function handler({ path: projectPath, tags, pillar }, ctx, { ensureIndexed }) {
|
|
107
|
+
const { config } = ctx;
|
|
108
|
+
const userId = ctx.userId !== undefined ? ctx.userId : undefined;
|
|
109
|
+
|
|
110
|
+
const vaultErr = ensureVaultExists(config);
|
|
111
|
+
if (vaultErr) return vaultErr;
|
|
112
|
+
|
|
113
|
+
if (!projectPath?.trim()) {
|
|
114
|
+
return err("Required: path (absolute path to project directory)", "INVALID_INPUT");
|
|
115
|
+
}
|
|
116
|
+
if (!existsSync(projectPath)) {
|
|
117
|
+
return err(`Directory not found: ${projectPath}`, "INVALID_INPUT");
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
await ensureIndexed();
|
|
121
|
+
|
|
122
|
+
// Read package.json if present
|
|
123
|
+
let pkgJson = null;
|
|
124
|
+
const pkgPath = join(projectPath, "package.json");
|
|
125
|
+
if (existsSync(pkgPath)) {
|
|
126
|
+
try {
|
|
127
|
+
pkgJson = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
128
|
+
} catch {
|
|
129
|
+
pkgJson = null;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Derive project name
|
|
134
|
+
let projectName = basename(projectPath);
|
|
135
|
+
if (pkgJson?.name) {
|
|
136
|
+
projectName = pkgJson.name.replace(/^@[^/]+\//, "");
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Slug-safe identity_key
|
|
140
|
+
const identityKey = projectName.toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
141
|
+
|
|
142
|
+
// Description: package.json > README
|
|
143
|
+
const description =
|
|
144
|
+
pkgJson?.description || extractReadmeDescription(projectPath) || null;
|
|
145
|
+
|
|
146
|
+
// Tech stack detection
|
|
147
|
+
const techStack = detectTechStack(projectPath, pkgJson);
|
|
148
|
+
|
|
149
|
+
// Git metadata
|
|
150
|
+
const isGitRepo = existsSync(join(projectPath, ".git"));
|
|
151
|
+
const repoUrl = isGitRepo
|
|
152
|
+
? safeExec("git remote get-url origin", projectPath)
|
|
153
|
+
: null;
|
|
154
|
+
const lastCommit = isGitRepo
|
|
155
|
+
? safeExec("git log -1 --format=%ci", projectPath)
|
|
156
|
+
: null;
|
|
157
|
+
|
|
158
|
+
// CLAUDE.md presence
|
|
159
|
+
const hasClaudeMd = existsSync(join(projectPath, "CLAUDE.md"));
|
|
160
|
+
|
|
161
|
+
// Build tags
|
|
162
|
+
const bucketTag = `bucket:${identityKey}`;
|
|
163
|
+
const autoTags = [bucketTag];
|
|
164
|
+
if (pillar) autoTags.push(`bucket:${pillar}`);
|
|
165
|
+
const allTags = [...new Set([...autoTags, ...(tags || [])])];
|
|
166
|
+
|
|
167
|
+
// Build body
|
|
168
|
+
const body = buildProjectBody({
|
|
169
|
+
projectName,
|
|
170
|
+
description,
|
|
171
|
+
techStack,
|
|
172
|
+
repoUrl,
|
|
173
|
+
lastCommit,
|
|
174
|
+
projectPath,
|
|
175
|
+
hasClaudeMd,
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
// Build meta
|
|
179
|
+
const meta = {
|
|
180
|
+
path: projectPath,
|
|
181
|
+
...(repoUrl ? { repo_url: repoUrl } : {}),
|
|
182
|
+
...(techStack.length ? { tech_stack: techStack } : {}),
|
|
183
|
+
has_claude_md: hasClaudeMd,
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
// Save project entity
|
|
187
|
+
const projectEntry = await captureAndIndex(ctx, {
|
|
188
|
+
kind: "project",
|
|
189
|
+
title: projectName,
|
|
190
|
+
body,
|
|
191
|
+
tags: allTags,
|
|
192
|
+
identity_key: identityKey,
|
|
193
|
+
meta,
|
|
194
|
+
userId,
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
// Save bucket entity if it doesn't already exist
|
|
198
|
+
const bucketUserClause = userId !== undefined ? "AND user_id = ?" : "";
|
|
199
|
+
const bucketParams = userId !== undefined ? [bucketTag, userId] : [bucketTag];
|
|
200
|
+
const bucketExists = ctx.db
|
|
201
|
+
.prepare(
|
|
202
|
+
`SELECT 1 FROM vault WHERE kind = 'bucket' AND identity_key = ? ${bucketUserClause} LIMIT 1`,
|
|
203
|
+
)
|
|
204
|
+
.get(...bucketParams);
|
|
205
|
+
|
|
206
|
+
let bucketEntry = null;
|
|
207
|
+
if (!bucketExists) {
|
|
208
|
+
bucketEntry = await captureAndIndex(ctx, {
|
|
209
|
+
kind: "bucket",
|
|
210
|
+
title: projectName,
|
|
211
|
+
body: `Bucket for project: ${projectName}`,
|
|
212
|
+
tags: allTags,
|
|
213
|
+
identity_key: bucketTag,
|
|
214
|
+
meta: { project_path: projectPath },
|
|
215
|
+
userId,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const relPath = projectEntry.filePath
|
|
220
|
+
? projectEntry.filePath.replace(config.vaultDir + "/", "")
|
|
221
|
+
: projectEntry.filePath;
|
|
222
|
+
|
|
223
|
+
const parts = [
|
|
224
|
+
`✓ Ingested project → ${relPath}`,
|
|
225
|
+
` id: ${projectEntry.id}`,
|
|
226
|
+
` title: ${projectEntry.title}`,
|
|
227
|
+
` tags: ${allTags.join(", ")}`,
|
|
228
|
+
...(techStack.length ? [` stack: ${techStack.join(", ")}`] : []),
|
|
229
|
+
...(repoUrl ? [` repo: ${repoUrl}`] : []),
|
|
230
|
+
];
|
|
231
|
+
|
|
232
|
+
if (bucketEntry) {
|
|
233
|
+
const bucketRelPath = bucketEntry.filePath
|
|
234
|
+
? bucketEntry.filePath.replace(config.vaultDir + "/", "")
|
|
235
|
+
: bucketEntry.filePath;
|
|
236
|
+
parts.push(``, `✓ Created bucket → ${bucketRelPath}`);
|
|
237
|
+
parts.push(` id: ${bucketEntry.id}`);
|
|
238
|
+
} else {
|
|
239
|
+
parts.push(``, ` (bucket '${bucketTag}' already exists — skipped)`);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
parts.push("", "_Use get_context with bucket tag to retrieve project-scoped entries._");
|
|
243
|
+
return ok(parts.join("\n"));
|
|
244
|
+
}
|