akm-cli 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/dist/cli.js +43 -11
- package/dist/commands/history.js +2 -7
- package/dist/commands/info.js +2 -2
- package/dist/commands/installed-stashes.js +44 -0
- package/dist/commands/search.js +2 -2
- package/dist/commands/show.js +4 -19
- package/dist/core/config.js +13 -1
- package/dist/indexer/db-search.js +17 -38
- package/dist/indexer/db.js +51 -1
- package/dist/indexer/indexer.js +312 -115
- package/dist/indexer/manifest.js +18 -23
- package/dist/indexer/metadata.js +253 -21
- package/dist/indexer/search-source.js +10 -4
- package/dist/output/cli-hints.js +3 -2
- package/dist/output/renderers.js +22 -49
- package/dist/registry/build-index.js +13 -18
- package/dist/setup/setup.js +216 -84
- package/dist/sources/providers/git.js +14 -2
- package/dist/wiki/wiki.js +11 -1
- package/dist/workflows/parser.js +19 -4
- package/dist/workflows/runs.js +3 -3
- package/docs/README.md +3 -3
- package/docs/migration/release-notes/0.7.0.md +8 -0
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,32 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
|
6
6
|
|
|
7
7
|
## [Unreleased]
|
|
8
8
|
|
|
9
|
+
## [0.7.3] - 2026-05-05
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- **`akm index --enrich` opt-in for LLM passes** — index-time enrichment work such as metadata enhancement, memory inference, and graph extraction now runs only when explicitly requested with `--enrich`. Default indexing is faster and no longer surprises operators with LLM-backed work during normal maintenance runs.
|
|
14
|
+
- **Config backup snapshots before writes** — config writes now create AKM cache backups so setup/config flows have a recovery path if a config is overwritten or corrupted during development or testing.
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
|
|
18
|
+
- **Setup wizard UX refresh** — `akm setup` now better reflects the real configured state: source prompts are ordered more sensibly, configured and preserved stash information is surfaced, agent defaults can be selected explicitly (including disabled), and post-setup indexing does not implicitly enable enrichment.
|
|
19
|
+
- **CI workflows updated for current GitHub Actions runtimes** — CI, release, and publishing workflows now use current action majors (`checkout@v5`, `cache@v5`, `setup-node@v5`, `upload-artifact@v5`, `download-artifact@v6`) to stay off deprecated Node 20 action runtimes.
|
|
20
|
+
- **Technical investigation notes updated** — the index investigation note now reflects the latest `.stash.json` migration status, current green CI runs, and the narrowed remaining compatibility surface ahead of `v0.8.0`.
|
|
21
|
+
|
|
22
|
+
### Fixed
|
|
23
|
+
|
|
24
|
+
- **Embedding-dimension drift on read-only DB opens** — read/telemetry paths no longer mutate the live index schema with the default embedding dimension. `akm info`, search/show parity paths, and related readers now preserve the configured embedding shape instead of downgrading vector tables.
|
|
25
|
+
- **Incremental index churn across multiple source layouts** — incremental indexing is now significantly more stable for filename-less legacy metadata, wiki-root sources, repo-root git stash layouts, non-indexed companion files, and cross-source dedupe cases.
|
|
26
|
+
- **Git source indexing for repo-root stashes** — git-backed sources no longer assume a `<repo>/content` subtree; repo-root stash layouts are indexed correctly and cached mirrors are treated as fresh instead of being needlessly refreshed.
|
|
27
|
+
- **`show` metadata no longer depends on `.stash.json`** — command and skill summary/show metadata now comes from file-local frontmatter and renderer parsing rather than the deprecated disk fallback sidecar.
|
|
28
|
+
- **`.stash.json` no longer drives incremental stale detection** — editing `.stash.json` alone no longer forces directories to rescan during incremental indexing.
|
|
29
|
+
|
|
30
|
+
### Internal
|
|
31
|
+
|
|
32
|
+
- **Ranking and scoring fixtures migrated toward file-local metadata** — routine benchmark and regression fixtures now prefer markdown frontmatter or inline script metadata, with `.stash.json` retained only for intentional legacy-compatibility coverage that still exercises explicit-file override behavior.
|
|
33
|
+
- **Production-path ranking regression coverage** — ranking regression tests now build their fixture index through the production indexer rather than a custom `.stash.json` crawler, reducing fixture drift and improving confidence in the real indexing/search path.
|
|
34
|
+
|
|
9
35
|
### Added
|
|
10
36
|
|
|
11
37
|
- **One-shot URL ingest for `akm import` and `akm wiki stash`** — both commands now accept a single HTTP/HTTPS URL in addition to file paths and stdin. `akm import <url>` fetches the exact page, converts it to markdown, and writes it into `knowledge/` using a URL-path-derived default name. `akm wiki stash <wiki> <url>` fetches the exact page, converts it to markdown, and writes it into `wikis/<wiki>/raw/`. Neither command registers a persistent website source or crawls linked pages.
|
|
@@ -13,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
|
13
39
|
### Changed
|
|
14
40
|
|
|
15
41
|
- **Shared website ingest boundary** — website URL validation, single-page fetch/convert, and website mirror generation now live in a dedicated shared ingest module. The website source provider is a thin adapter, and `akm add`, `akm import`, and `akm wiki stash` all reuse the same core website-ingest path.
|
|
42
|
+
- **`.stash.json` docs deprecation timeline** — the docs now explicitly state that `.stash.json` is deprecated, remains only as a 0.7.x compatibility bridge, and will be removed in v0.8.0 to match the current aggressive pre-release phase-out posture.
|
|
16
43
|
|
|
17
44
|
## [0.7.0]
|
|
18
45
|
|
package/dist/cli.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
import fs from "node:fs";
|
|
3
3
|
import path from "node:path";
|
|
4
|
+
import * as p from "@clack/prompts";
|
|
4
5
|
import { defineCommand, runMain } from "citty";
|
|
5
6
|
import { generateBashCompletions, installBashCompletions } from "./commands/completions";
|
|
6
7
|
import { getConfigValue, listConfig, setConfigValue, unsetConfigValue } from "./commands/config-cli";
|
|
@@ -32,7 +33,7 @@ import { appendEvent } from "./core/events";
|
|
|
32
33
|
import { getCacheDir, getDbPath, getDefaultStashDir } from "./core/paths";
|
|
33
34
|
import { setQuiet, setVerbose, warn } from "./core/warn";
|
|
34
35
|
import { resolveWriteTarget, writeAssetToSource } from "./core/write-source";
|
|
35
|
-
import { closeDatabase, findEntryIdByRef,
|
|
36
|
+
import { closeDatabase, findEntryIdByRef, openExistingDatabase } from "./indexer/db";
|
|
36
37
|
import { akmIndex } from "./indexer/indexer";
|
|
37
38
|
import { resolveSourceEntries } from "./indexer/search-source";
|
|
38
39
|
import { insertUsageEvent } from "./indexer/usage-events";
|
|
@@ -139,22 +140,53 @@ const indexCommand = defineCommand({
|
|
|
139
140
|
meta: { name: "index", description: "Build search index (incremental by default; --full forces full reindex)" },
|
|
140
141
|
args: {
|
|
141
142
|
full: { type: "boolean", description: "Force full reindex", default: false },
|
|
142
|
-
|
|
143
|
+
enrich: { type: "boolean", description: "Enable LLM inference and enrichment passes", default: false },
|
|
144
|
+
verbose: { type: "boolean", description: "Print phase-by-phase indexing progress to stderr", default: false },
|
|
143
145
|
},
|
|
144
146
|
async run({ args }) {
|
|
145
147
|
await runWithJsonErrors(async () => {
|
|
148
|
+
const outputMode = getOutputMode();
|
|
146
149
|
const controller = new AbortController();
|
|
147
150
|
const abort = () => controller.abort(new Error("index interrupted"));
|
|
148
151
|
process.once("SIGINT", abort);
|
|
149
152
|
process.once("SIGTERM", abort);
|
|
150
|
-
const
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
153
|
+
const spin = !args.verbose && outputMode.format === "text" ? p.spinner() : null;
|
|
154
|
+
if (spin) {
|
|
155
|
+
spin.start(`Building search index${args.full ? " (full rebuild)" : ""}...`);
|
|
156
|
+
}
|
|
157
|
+
let latestMessage = "";
|
|
158
|
+
try {
|
|
159
|
+
const result = await akmIndex({
|
|
160
|
+
full: args.full,
|
|
161
|
+
enrich: args.enrich,
|
|
162
|
+
onProgress: ({ message, processed, total }) => {
|
|
163
|
+
latestMessage = message;
|
|
164
|
+
const progressPrefix = processed !== undefined && total !== undefined ? `[${processed}/${total}] ` : "";
|
|
165
|
+
if (args.verbose) {
|
|
166
|
+
console.error(`[index] ${progressPrefix}${message}`);
|
|
167
|
+
}
|
|
168
|
+
else if (spin) {
|
|
169
|
+
spin.stop(`${progressPrefix}${message}`);
|
|
170
|
+
spin.start(`${progressPrefix}${message}`);
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
signal: controller.signal,
|
|
174
|
+
});
|
|
175
|
+
if (spin) {
|
|
176
|
+
spin.stop(`Indexed ${result.totalEntries} assets.`);
|
|
177
|
+
}
|
|
178
|
+
output("index", result);
|
|
179
|
+
}
|
|
180
|
+
catch (error) {
|
|
181
|
+
if (spin) {
|
|
182
|
+
spin.stop(latestMessage ? `Indexing failed after: ${latestMessage}` : "Indexing failed.");
|
|
183
|
+
}
|
|
184
|
+
throw error;
|
|
185
|
+
}
|
|
186
|
+
finally {
|
|
187
|
+
process.off("SIGINT", abort);
|
|
188
|
+
process.off("SIGTERM", abort);
|
|
189
|
+
}
|
|
158
190
|
});
|
|
159
191
|
},
|
|
160
192
|
});
|
|
@@ -947,7 +979,7 @@ const feedbackCommand = defineCommand({
|
|
|
947
979
|
}
|
|
948
980
|
const signal = args.positive ? "positive" : "negative";
|
|
949
981
|
const metadata = args.note ? JSON.stringify({ note: args.note }) : undefined;
|
|
950
|
-
const db =
|
|
982
|
+
const db = openExistingDatabase();
|
|
951
983
|
try {
|
|
952
984
|
const entryId = findEntryIdByRef(db, ref);
|
|
953
985
|
if (entryId === undefined) {
|
package/dist/commands/history.js
CHANGED
|
@@ -16,8 +16,7 @@
|
|
|
16
16
|
import { parseAssetRef } from "../core/asset-ref";
|
|
17
17
|
import { UsageError } from "../core/errors";
|
|
18
18
|
import { readEvents } from "../core/events";
|
|
19
|
-
import { closeDatabase,
|
|
20
|
-
import { ensureUsageEventsSchema } from "../indexer/usage-events";
|
|
19
|
+
import { closeDatabase, openExistingDatabase } from "../indexer/db";
|
|
21
20
|
// Proposal lifecycle event types emitted by the proposal substrate (#225).
|
|
22
21
|
const PROPOSAL_EVENT_TYPES = new Set(["promoted", "rejected"]);
|
|
23
22
|
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
@@ -106,13 +105,9 @@ export async function akmHistory(options = {}) {
|
|
|
106
105
|
normalizedRef = trimmed;
|
|
107
106
|
}
|
|
108
107
|
const sinceNormalized = options.since !== undefined ? normalizeSince(options.since) : undefined;
|
|
109
|
-
const db = options.db ??
|
|
108
|
+
const db = options.db ?? openExistingDatabase();
|
|
110
109
|
const ownsDb = options.db === undefined;
|
|
111
110
|
try {
|
|
112
|
-
// The schema is normally created during `akm index`; ensure it exists so
|
|
113
|
-
// `akm history` works on a freshly-initialised stash that has never been
|
|
114
|
-
// indexed (and just returns an empty list rather than an error).
|
|
115
|
-
ensureUsageEventsSchema(db);
|
|
116
111
|
const conditions = [];
|
|
117
112
|
const params = [];
|
|
118
113
|
if (normalizedRef !== undefined) {
|
package/dist/commands/info.js
CHANGED
|
@@ -2,7 +2,7 @@ import fs from "node:fs";
|
|
|
2
2
|
import { getAssetTypes } from "../core/asset-spec";
|
|
3
3
|
import { loadConfig } from "../core/config";
|
|
4
4
|
import { getDbPath } from "../core/paths";
|
|
5
|
-
import { closeDatabase, getEntryCount, getMeta, isVecAvailable,
|
|
5
|
+
import { closeDatabase, getEntryCount, getMeta, isVecAvailable, openExistingDatabase } from "../indexer/db";
|
|
6
6
|
import { getEffectiveSemanticStatus, readSemanticStatus } from "../indexer/semantic-status";
|
|
7
7
|
import { pkgVersion } from "../version";
|
|
8
8
|
/**
|
|
@@ -74,7 +74,7 @@ function readIndexStats(dbPath) {
|
|
|
74
74
|
}
|
|
75
75
|
let db;
|
|
76
76
|
try {
|
|
77
|
-
db =
|
|
77
|
+
db = openExistingDatabase(resolvedPath);
|
|
78
78
|
const entryCount = getEntryCount(db);
|
|
79
79
|
const lastBuiltAt = getMeta(db, "builtAt") ?? null;
|
|
80
80
|
const vecAvailable = isVecAvailable(db);
|
|
@@ -12,6 +12,7 @@ import { NotFoundError, UsageError } from "../core/errors";
|
|
|
12
12
|
import { akmIndex } from "../indexer/indexer";
|
|
13
13
|
import { removeLockEntry, upsertLockEntry } from "../integrations/lockfile";
|
|
14
14
|
import { parseRegistryRef } from "../registry/resolve";
|
|
15
|
+
import { parseGitRepoUrl, syncMirroredRepo } from "../sources/providers/git";
|
|
15
16
|
import { syncFromRef } from "../sources/providers/sync-from-ref";
|
|
16
17
|
import { ensureWebsiteMirror } from "../sources/website-ingest";
|
|
17
18
|
import { listWikis, resolveWikisRoot } from "../wiki/wiki";
|
|
@@ -173,6 +174,49 @@ export async function akmUpdate(input) {
|
|
|
173
174
|
const stashes = config.sources ?? config.stashes ?? [];
|
|
174
175
|
const isUrl = target.startsWith("http://") || target.startsWith("https://");
|
|
175
176
|
const resolvedPath = !isUrl ? path.resolve(target) : undefined;
|
|
177
|
+
const gitMatch = stashes.find((s) => {
|
|
178
|
+
if (s.type !== "git")
|
|
179
|
+
return false;
|
|
180
|
+
if (isUrl && s.url === target)
|
|
181
|
+
return true;
|
|
182
|
+
if (resolvedPath && s.path && path.resolve(s.path) === resolvedPath)
|
|
183
|
+
return true;
|
|
184
|
+
if (s.name === target)
|
|
185
|
+
return true;
|
|
186
|
+
if (s.url) {
|
|
187
|
+
try {
|
|
188
|
+
const repo = parseGitRepoUrl(s.url);
|
|
189
|
+
if (repo.canonicalUrl === target)
|
|
190
|
+
return true;
|
|
191
|
+
}
|
|
192
|
+
catch {
|
|
193
|
+
// Ignore malformed config here; later provider sync will surface it.
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
return false;
|
|
197
|
+
});
|
|
198
|
+
if (gitMatch) {
|
|
199
|
+
await syncMirroredRepo(gitMatch, { force: true, writable: gitMatch.writable === true });
|
|
200
|
+
const index = await akmIndex({ stashDir, full: true });
|
|
201
|
+
const updatedConfig = loadConfig();
|
|
202
|
+
return {
|
|
203
|
+
schemaVersion: 1,
|
|
204
|
+
stashDir,
|
|
205
|
+
target,
|
|
206
|
+
all,
|
|
207
|
+
processed: [],
|
|
208
|
+
config: {
|
|
209
|
+
sourceCount: (updatedConfig.sources ?? updatedConfig.stashes ?? []).length,
|
|
210
|
+
installedKitCount: updatedConfig.installed?.length ?? 0,
|
|
211
|
+
},
|
|
212
|
+
index: {
|
|
213
|
+
mode: index.mode,
|
|
214
|
+
totalEntries: index.totalEntries,
|
|
215
|
+
directoriesScanned: index.directoriesScanned,
|
|
216
|
+
directoriesSkipped: index.directoriesSkipped,
|
|
217
|
+
},
|
|
218
|
+
};
|
|
219
|
+
}
|
|
176
220
|
const websiteMatch = stashes.find((s) => {
|
|
177
221
|
if (s.type !== "website")
|
|
178
222
|
return false;
|
package/dist/commands/search.js
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
import { loadConfig } from "../core/config";
|
|
12
12
|
import { UsageError } from "../core/errors";
|
|
13
13
|
import { appendEvent } from "../core/events";
|
|
14
|
-
import { closeDatabase,
|
|
14
|
+
import { closeDatabase, openExistingDatabase } from "../indexer/db";
|
|
15
15
|
import { searchLocal } from "../indexer/db-search";
|
|
16
16
|
import { resolveSourceEntries } from "../indexer/search-source";
|
|
17
17
|
// Eagerly import source providers to trigger self-registration before the
|
|
@@ -169,7 +169,7 @@ function logSearchEvent(query, response, existingDb) {
|
|
|
169
169
|
metadata: { query, hitCount: stashHits.length, resultRefs: stashHits.map((h) => h.ref) },
|
|
170
170
|
});
|
|
171
171
|
try {
|
|
172
|
-
const db = existingDb ??
|
|
172
|
+
const db = existingDb ?? openExistingDatabase();
|
|
173
173
|
try {
|
|
174
174
|
const resolved = resolveEntryIds(db, stashHits.slice(0, 50));
|
|
175
175
|
for (const { entryId, ref } of resolved) {
|
package/dist/commands/show.js
CHANGED
|
@@ -26,10 +26,9 @@ import { loadConfig } from "../core/config";
|
|
|
26
26
|
import { NotFoundError, UsageError } from "../core/errors";
|
|
27
27
|
import { appendEvent, readEvents } from "../core/events";
|
|
28
28
|
import { parseFrontmatter, toStringOrUndefined } from "../core/frontmatter";
|
|
29
|
-
import { closeDatabase, findEntryIdByRef,
|
|
29
|
+
import { closeDatabase, findEntryIdByRef, openExistingDatabase } from "../indexer/db";
|
|
30
30
|
import { buildFileContext, buildRenderContext, getRenderer, runMatchers } from "../indexer/file-context";
|
|
31
31
|
import { lookup } from "../indexer/indexer";
|
|
32
|
-
import { loadStashFile } from "../indexer/metadata";
|
|
33
32
|
import { buildEditHint, findSourceForPath, isEditable, resolveSourceEntries } from "../indexer/search-source";
|
|
34
33
|
import { insertUsageEvent } from "../indexer/usage-events";
|
|
35
34
|
import { resolveSourcesForOrigin } from "../registry/origin-resolve";
|
|
@@ -203,7 +202,7 @@ function logShowEvent(ref, existingDb) {
|
|
|
203
202
|
const parsed = parseAssetRef(ref);
|
|
204
203
|
appendEvent({ eventType: "show", ref, metadata: { type: parsed.type, name: parsed.name } });
|
|
205
204
|
try {
|
|
206
|
-
const db = existingDb ??
|
|
205
|
+
const db = existingDb ?? openExistingDatabase();
|
|
207
206
|
try {
|
|
208
207
|
insertUsageEvent(db, {
|
|
209
208
|
event_type: "show",
|
|
@@ -369,33 +368,19 @@ function buildBriefResponse(full, assetPath) {
|
|
|
369
368
|
*
|
|
370
369
|
* Strips content/template/prompt and returns only metadata fields:
|
|
371
370
|
* type, name, path, description, tags, parameters, action.
|
|
372
|
-
* Enriches description and tags from
|
|
371
|
+
* Enriches description and tags from rendered content when available.
|
|
373
372
|
*
|
|
374
373
|
* The resulting JSON should be under 200 tokens.
|
|
375
374
|
*/
|
|
376
375
|
function buildSummaryResponse(full, assetPath) {
|
|
377
376
|
let description = full.description;
|
|
378
|
-
|
|
377
|
+
const tags = full.tags;
|
|
379
378
|
if (assetPath) {
|
|
380
379
|
const textContent = full.content ?? full.template ?? full.prompt;
|
|
381
380
|
if (textContent && !description) {
|
|
382
381
|
const parsed = parseFrontmatter(textContent);
|
|
383
382
|
description = toStringOrUndefined(parsed.data.description);
|
|
384
383
|
}
|
|
385
|
-
const dir = path.dirname(assetPath);
|
|
386
|
-
const stashFile = loadStashFile(dir);
|
|
387
|
-
if (stashFile) {
|
|
388
|
-
const fileName = path.basename(assetPath);
|
|
389
|
-
const entry = stashFile.entries.find((e) => e.filename === fileName);
|
|
390
|
-
if (entry) {
|
|
391
|
-
if (!description && entry.description) {
|
|
392
|
-
description = entry.description;
|
|
393
|
-
}
|
|
394
|
-
if (!tags && entry.tags) {
|
|
395
|
-
tags = entry.tags;
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
384
|
}
|
|
400
385
|
const summary = {
|
|
401
386
|
type: full.type,
|
package/dist/core/config.js
CHANGED
|
@@ -4,7 +4,7 @@ import path from "node:path";
|
|
|
4
4
|
import { parseAgentConfig } from "../integrations/agent/config";
|
|
5
5
|
import { filterNonEmptyStrings } from "./common";
|
|
6
6
|
import { ConfigError } from "./errors";
|
|
7
|
-
import { getConfigDir as _getConfigDir, getConfigPath as _getConfigPath } from "./paths";
|
|
7
|
+
import { getConfigDir as _getConfigDir, getConfigPath as _getConfigPath, getCacheDir } from "./paths";
|
|
8
8
|
import { warn } from "./warn";
|
|
9
9
|
// ── Defaults ────────────────────────────────────────────────────────────────
|
|
10
10
|
export const DEFAULT_CONFIG = {
|
|
@@ -108,9 +108,21 @@ export function saveConfig(config) {
|
|
|
108
108
|
const configPath = getConfigPath();
|
|
109
109
|
const dir = path.dirname(configPath);
|
|
110
110
|
fs.mkdirSync(dir, { recursive: true });
|
|
111
|
+
backupExistingConfig(configPath);
|
|
111
112
|
const sanitized = sanitizeConfigForWrite(config);
|
|
112
113
|
writeConfigObject(configPath, sanitized);
|
|
113
114
|
}
|
|
115
|
+
function backupExistingConfig(configPath) {
|
|
116
|
+
if (!fs.existsSync(configPath))
|
|
117
|
+
return;
|
|
118
|
+
const backupDir = path.join(getCacheDir(), "config-backups");
|
|
119
|
+
fs.mkdirSync(backupDir, { recursive: true });
|
|
120
|
+
const timestamp = new Date().toISOString().replace(/[.:]/g, "-");
|
|
121
|
+
const backupPath = path.join(backupDir, `config-${timestamp}.json`);
|
|
122
|
+
fs.copyFileSync(configPath, backupPath);
|
|
123
|
+
const latestPath = path.join(backupDir, "config.latest.json");
|
|
124
|
+
fs.copyFileSync(configPath, latestPath);
|
|
125
|
+
}
|
|
114
126
|
/**
|
|
115
127
|
* Strip apiKey fields before writing config to disk.
|
|
116
128
|
* API keys should be provided via environment variables
|
|
@@ -17,7 +17,7 @@ import { defaultRendererRegistry } from "../core/asset-registry";
|
|
|
17
17
|
import { deriveCanonicalAssetNameFromStashRoot } from "../core/asset-spec";
|
|
18
18
|
import { getDbPath } from "../core/paths";
|
|
19
19
|
import { warn } from "../core/warn";
|
|
20
|
-
import { closeDatabase, getAllEntries, getEntryById, getEntryCount, getMeta, getUtilityScoresByIds,
|
|
20
|
+
import { closeDatabase, getAllEntries, getEntryById, getEntryCount, getMeta, getUtilityScoresByIds, openExistingDatabase, sanitizeFtsQuery, searchFts, searchVec, } from "./db";
|
|
21
21
|
import { getRenderer } from "./file-context";
|
|
22
22
|
import { computeGraphBoost, loadGraphBoostContext } from "./graph-boost";
|
|
23
23
|
import { generateMetadataFlat, isProposedQuality, loadStashFile, shouldIndexStashFile, } from "./metadata";
|
|
@@ -69,8 +69,7 @@ export async function searchLocal(input) {
|
|
|
69
69
|
const dbPath = getDbPath();
|
|
70
70
|
try {
|
|
71
71
|
if (fs.existsSync(dbPath)) {
|
|
72
|
-
const
|
|
73
|
-
const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
|
|
72
|
+
const db = openExistingDatabase(dbPath);
|
|
74
73
|
try {
|
|
75
74
|
const entryCount = getEntryCount(db);
|
|
76
75
|
const storedStashDir = getMeta(db, "stashDir");
|
|
@@ -706,44 +705,20 @@ async function indexAssets(stashDir, type, sources) {
|
|
|
706
705
|
dirGroups.set(ctx.parentDirAbs, [ctx.absPath]);
|
|
707
706
|
}
|
|
708
707
|
for (const [dirPath, files] of dirGroups) {
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
}
|
|
718
|
-
}
|
|
719
|
-
}
|
|
720
|
-
else {
|
|
721
|
-
const generated = await generateMetadataFlat(stashDir, files);
|
|
722
|
-
if (generated.entries.length === 0)
|
|
723
|
-
continue;
|
|
724
|
-
stash = generated;
|
|
725
|
-
}
|
|
726
|
-
// Build a lookup for matching filename-less entries to actual files
|
|
727
|
-
const fileBasenameMap = new Map();
|
|
728
|
-
for (const file of files) {
|
|
729
|
-
const base = path.basename(file, path.extname(file));
|
|
730
|
-
if (!fileBasenameMap.has(base))
|
|
731
|
-
fileBasenameMap.set(base, file);
|
|
732
|
-
}
|
|
708
|
+
const generated = await generateMetadataFlat(stashDir, files);
|
|
709
|
+
const legacyOverrides = loadStashFile(dirPath, { requireFilename: true });
|
|
710
|
+
const mergedEntries = legacyOverrides
|
|
711
|
+
? generated.entries.map((entry) => mergeLegacyEntry(entry, legacyOverrides.entries))
|
|
712
|
+
: generated.entries;
|
|
713
|
+
const stash = mergedEntries.length > 0 ? { entries: mergedEntries } : legacyOverrides;
|
|
714
|
+
if (!stash || stash.entries.length === 0)
|
|
715
|
+
continue;
|
|
733
716
|
for (const entry of stash.entries) {
|
|
734
717
|
if (filterType && entry.type !== filterType)
|
|
735
718
|
continue;
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
}
|
|
740
|
-
else {
|
|
741
|
-
// Try matching entry name to a file by basename
|
|
742
|
-
entryPath =
|
|
743
|
-
fileBasenameMap.get(entry.name) ??
|
|
744
|
-
fileBasenameMap.get(entry.name.split("/").pop() ?? "") ??
|
|
745
|
-
(files[0] || dirPath);
|
|
746
|
-
}
|
|
719
|
+
if (!entry.filename)
|
|
720
|
+
continue;
|
|
721
|
+
const entryPath = path.join(dirPath, entry.filename);
|
|
747
722
|
if (!shouldIndexStashFile(stashDir, entryPath))
|
|
748
723
|
continue;
|
|
749
724
|
assets.push({ entry, path: entryPath });
|
|
@@ -751,6 +726,10 @@ async function indexAssets(stashDir, type, sources) {
|
|
|
751
726
|
}
|
|
752
727
|
return assets;
|
|
753
728
|
}
|
|
729
|
+
function mergeLegacyEntry(entry, legacyEntries) {
|
|
730
|
+
const legacy = legacyEntries.find((candidate) => candidate.filename === entry.filename);
|
|
731
|
+
return legacy ? { ...entry, ...legacy, filename: entry.filename } : entry;
|
|
732
|
+
}
|
|
754
733
|
async function indexWikiRootAssets(wikiRoot, wikiName, type) {
|
|
755
734
|
if (type !== "any" && type !== "wiki")
|
|
756
735
|
return [];
|
package/dist/indexer/db.js
CHANGED
|
@@ -9,7 +9,7 @@ import { cosineSimilarity } from "../llm/embedders/types";
|
|
|
9
9
|
import { buildSearchFields } from "./search-fields";
|
|
10
10
|
import { ensureUsageEventsSchema } from "./usage-events";
|
|
11
11
|
// ── Constants ───────────────────────────────────────────────────────────────
|
|
12
|
-
export const DB_VERSION =
|
|
12
|
+
export const DB_VERSION = 10;
|
|
13
13
|
export const EMBEDDING_DIM = 384;
|
|
14
14
|
// ── Database lifecycle ──────────────────────────────────────────────────────
|
|
15
15
|
export function openDatabase(dbPath, options) {
|
|
@@ -29,6 +29,17 @@ export function openDatabase(dbPath, options) {
|
|
|
29
29
|
warnIfVecMissing(db, { once: true });
|
|
30
30
|
return db;
|
|
31
31
|
}
|
|
32
|
+
export function openExistingDatabase(dbPath) {
|
|
33
|
+
const resolvedPath = dbPath ?? getDbPath();
|
|
34
|
+
const db = new Database(resolvedPath);
|
|
35
|
+
db.exec("PRAGMA journal_mode = WAL");
|
|
36
|
+
db.exec("PRAGMA busy_timeout = 5000");
|
|
37
|
+
db.exec("PRAGMA foreign_keys = ON");
|
|
38
|
+
// Existing-DB callers must not mutate schema or embedding metadata on open,
|
|
39
|
+
// but some paths still need write access to usage_events and other tables.
|
|
40
|
+
loadVecExtension(db);
|
|
41
|
+
return db;
|
|
42
|
+
}
|
|
32
43
|
export function closeDatabase(db) {
|
|
33
44
|
db.close();
|
|
34
45
|
}
|
|
@@ -163,6 +174,15 @@ function ensureSchema(db, embeddingDim) {
|
|
|
163
174
|
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
164
175
|
FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
|
|
165
176
|
);
|
|
177
|
+
`);
|
|
178
|
+
db.exec(`
|
|
179
|
+
CREATE TABLE IF NOT EXISTS index_dir_state (
|
|
180
|
+
dir_path TEXT PRIMARY KEY,
|
|
181
|
+
file_set_hash TEXT NOT NULL,
|
|
182
|
+
file_mtime_max_ms REAL NOT NULL,
|
|
183
|
+
reason TEXT NOT NULL,
|
|
184
|
+
updated_at TEXT NOT NULL
|
|
185
|
+
);
|
|
166
186
|
`);
|
|
167
187
|
// FTS-dirty queue. Created here (not lazily on first upsert) so the
|
|
168
188
|
// per-entry write path doesn't issue a CREATE TABLE IF NOT EXISTS on
|
|
@@ -261,6 +281,7 @@ function handleVersionUpgrade(db) {
|
|
|
261
281
|
db.exec("DROP TABLE IF EXISTS embeddings");
|
|
262
282
|
db.exec("DROP TABLE IF EXISTS entries_vec");
|
|
263
283
|
db.exec("DROP TABLE IF EXISTS entries_fts");
|
|
284
|
+
db.exec("DROP TABLE IF EXISTS index_dir_state");
|
|
264
285
|
db.exec("DROP INDEX IF EXISTS idx_entries_dir");
|
|
265
286
|
db.exec("DROP INDEX IF EXISTS idx_entries_type");
|
|
266
287
|
db.exec("DROP TABLE IF EXISTS entries");
|
|
@@ -332,6 +353,35 @@ export function getMeta(db, key) {
|
|
|
332
353
|
export function setMeta(db, key, value) {
|
|
333
354
|
db.prepare("INSERT OR REPLACE INTO index_meta (key, value) VALUES (?, ?)").run(key, value);
|
|
334
355
|
}
|
|
356
|
+
export function getIndexDirState(db, dirPath) {
|
|
357
|
+
const row = db
|
|
358
|
+
.prepare("SELECT dir_path, file_set_hash, file_mtime_max_ms, reason, updated_at FROM index_dir_state WHERE dir_path = ?")
|
|
359
|
+
.get(dirPath);
|
|
360
|
+
if (!row)
|
|
361
|
+
return undefined;
|
|
362
|
+
return {
|
|
363
|
+
dirPath: row.dir_path,
|
|
364
|
+
fileSetHash: row.file_set_hash,
|
|
365
|
+
fileMtimeMaxMs: row.file_mtime_max_ms,
|
|
366
|
+
reason: row.reason,
|
|
367
|
+
updatedAt: row.updated_at,
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
export function upsertIndexDirState(db, state) {
|
|
371
|
+
db.prepare(`INSERT INTO index_dir_state (dir_path, file_set_hash, file_mtime_max_ms, reason, updated_at)
|
|
372
|
+
VALUES (?, ?, ?, ?, ?)
|
|
373
|
+
ON CONFLICT(dir_path) DO UPDATE SET
|
|
374
|
+
file_set_hash = excluded.file_set_hash,
|
|
375
|
+
file_mtime_max_ms = excluded.file_mtime_max_ms,
|
|
376
|
+
reason = excluded.reason,
|
|
377
|
+
updated_at = excluded.updated_at`).run(state.dirPath, state.fileSetHash, state.fileMtimeMaxMs, state.reason, new Date().toISOString());
|
|
378
|
+
}
|
|
379
|
+
export function deleteIndexDirState(db, dirPath) {
|
|
380
|
+
db.prepare("DELETE FROM index_dir_state WHERE dir_path = ?").run(dirPath);
|
|
381
|
+
}
|
|
382
|
+
export function deleteIndexDirStatesByStashDir(db, stashDir) {
|
|
383
|
+
db.prepare("DELETE FROM index_dir_state WHERE dir_path = ? OR dir_path LIKE ?").run(stashDir, `${stashDir}${path.sep}%`);
|
|
384
|
+
}
|
|
335
385
|
// ── Entry operations ────────────────────────────────────────────────────────
|
|
336
386
|
/**
|
|
337
387
|
* Insert or update an entry in the `entries` table. Returns the row id.
|