akm-cli 0.7.4 → 0.8.0-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +224 -1
- package/README.md +22 -6
- package/SECURITY.md +93 -0
- package/dist/cli/config-migrate.js +144 -0
- package/dist/cli/config-validate.js +39 -0
- package/dist/cli/confirm.js +73 -0
- package/dist/cli/parse-args.js +133 -0
- package/dist/cli/shared.js +129 -0
- package/dist/cli.js +2631 -1440
- package/dist/commands/add-cli.js +279 -0
- package/dist/commands/agent-dispatch.js +110 -0
- package/dist/commands/agent-support.js +68 -0
- package/dist/commands/completions.js +3 -0
- package/dist/commands/config-cli.js +130 -534
- package/dist/commands/consolidate.js +2122 -0
- package/dist/commands/curate.js +45 -3
- package/dist/commands/db-cli.js +23 -0
- package/dist/commands/distill-promotion-policy.js +660 -0
- package/dist/commands/distill.js +1081 -73
- package/dist/commands/env.js +213 -0
- package/dist/commands/eval-cases.js +43 -0
- package/dist/commands/events.js +15 -24
- package/dist/commands/extract-cli.js +127 -0
- package/dist/commands/extract-prompt.js +204 -0
- package/dist/commands/extract.js +477 -0
- package/dist/commands/feedback-cli.js +331 -0
- package/dist/commands/graph.js +477 -0
- package/dist/commands/health.js +1302 -0
- package/dist/commands/help/help-accept.md +12 -0
- package/dist/commands/help/help-improve.md +69 -0
- package/dist/commands/help/help-proposals.md +18 -0
- package/dist/commands/help/help-propose.md +17 -0
- package/dist/commands/help/help-reject.md +11 -0
- package/dist/commands/history.js +54 -46
- package/dist/commands/improve-auto-accept.js +97 -0
- package/dist/commands/improve-cli.js +217 -0
- package/dist/commands/improve-profiles.js +166 -0
- package/dist/commands/improve-result-file.js +167 -0
- package/dist/commands/improve.js +2373 -0
- package/dist/commands/info.js +5 -2
- package/dist/commands/init.js +50 -2
- package/dist/commands/installed-stashes.js +102 -139
- package/dist/commands/knowledge.js +136 -0
- package/dist/commands/lint/agent-linter.js +49 -0
- package/dist/commands/lint/base-linter.js +479 -0
- package/dist/commands/lint/command-linter.js +49 -0
- package/dist/commands/lint/default-linter.js +16 -0
- package/dist/commands/lint/env-key-rules.js +154 -0
- package/dist/commands/lint/index.js +196 -0
- package/dist/commands/lint/knowledge-linter.js +16 -0
- package/dist/commands/lint/markdown-insertion.js +343 -0
- package/dist/commands/lint/memory-linter.js +61 -0
- package/dist/commands/lint/registry.js +36 -0
- package/dist/commands/lint/skill-linter.js +45 -0
- package/dist/commands/lint/task-linter.js +50 -0
- package/dist/commands/lint/types.js +4 -0
- package/dist/commands/lint/workflow-linter.js +56 -0
- package/dist/commands/lint.js +4 -0
- package/dist/commands/migration-help.js +3 -0
- package/dist/commands/proposal.js +67 -12
- package/dist/commands/propose.js +120 -45
- package/dist/commands/reflect.js +1104 -60
- package/dist/commands/registry-cli.js +150 -0
- package/dist/commands/registry-search.js +5 -2
- package/dist/commands/remember-cli.js +257 -0
- package/dist/commands/remember.js +70 -7
- package/dist/commands/schema-repair.js +203 -0
- package/dist/commands/search.js +115 -14
- package/dist/commands/secret.js +173 -0
- package/dist/commands/self-update.js +3 -0
- package/dist/commands/show.js +158 -60
- package/dist/commands/source-add.js +17 -45
- package/dist/commands/source-clone.js +3 -0
- package/dist/commands/source-manage.js +14 -19
- package/dist/commands/tasks.js +437 -0
- package/dist/commands/url-checker.js +42 -0
- package/dist/core/action-contributors.js +28 -0
- package/dist/core/asset-ref.js +17 -2
- package/dist/core/asset-registry.js +12 -17
- package/dist/core/asset-serialize.js +88 -0
- package/dist/core/asset-spec.js +67 -1
- package/dist/core/common.js +182 -0
- package/dist/core/concurrent.js +25 -0
- package/dist/core/config-io.js +347 -0
- package/dist/core/config-migration.js +622 -0
- package/dist/core/config-schema.js +534 -0
- package/dist/core/config-sources.js +108 -0
- package/dist/core/config-types.js +4 -0
- package/dist/core/config-walker.js +337 -0
- package/dist/core/config.js +364 -968
- package/dist/core/errors.js +42 -20
- package/dist/core/events.js +105 -135
- package/dist/core/file-lock.js +104 -0
- package/dist/core/frontmatter.js +75 -8
- package/dist/core/lesson-lint.js +3 -0
- package/dist/core/markdown.js +20 -0
- package/dist/core/memory-belief.js +62 -0
- package/dist/core/memory-contradiction-detect.js +274 -0
- package/dist/core/memory-improve.js +806 -0
- package/dist/core/parse.js +158 -0
- package/dist/core/paths.js +280 -14
- package/dist/core/proposal-quality-validators.js +380 -0
- package/dist/core/proposal-validators.js +69 -0
- package/dist/core/proposals.js +512 -42
- package/dist/core/state-db.js +1068 -0
- package/dist/core/text-truncation.js +107 -0
- package/dist/core/time.js +54 -0
- package/dist/core/tty.js +59 -0
- package/dist/core/warn.js +64 -1
- package/dist/core/write-source.js +3 -0
- package/dist/indexer/db-backup.js +391 -0
- package/dist/indexer/db-search.js +198 -489
- package/dist/indexer/db.js +990 -108
- package/dist/indexer/ensure-index.js +136 -0
- package/dist/indexer/file-context.js +3 -0
- package/dist/indexer/graph-boost.js +376 -101
- package/dist/indexer/graph-db.js +391 -0
- package/dist/indexer/graph-dedup.js +95 -0
- package/dist/indexer/graph-extraction.js +550 -114
- package/dist/indexer/index-context.js +4 -0
- package/dist/indexer/indexer.js +547 -309
- package/dist/indexer/llm-cache.js +52 -0
- package/dist/indexer/manifest.js +3 -0
- package/dist/indexer/matchers.js +167 -160
- package/dist/indexer/memory-inference.js +152 -74
- package/dist/indexer/metadata-contributors.js +29 -0
- package/dist/indexer/metadata.js +275 -196
- package/dist/indexer/path-resolver.js +92 -0
- package/dist/indexer/project-context.js +192 -0
- package/dist/indexer/ranking-contributors.js +331 -0
- package/dist/indexer/ranking.js +81 -0
- package/dist/indexer/search-fields.js +5 -9
- package/dist/indexer/search-hit-enrichers.js +111 -0
- package/dist/indexer/search-source.js +44 -10
- package/dist/indexer/semantic-status.js +6 -17
- package/dist/indexer/staleness-detect.js +447 -0
- package/dist/indexer/usage-events.js +12 -9
- package/dist/indexer/walker.js +28 -0
- package/dist/integrations/agent/builders.js +135 -0
- package/dist/integrations/agent/config.js +122 -230
- package/dist/integrations/agent/detect.js +3 -0
- package/dist/integrations/agent/index.js +7 -13
- package/dist/integrations/agent/model-aliases.js +55 -0
- package/dist/integrations/agent/profiles.js +70 -5
- package/dist/integrations/agent/prompts.js +250 -36
- package/dist/integrations/agent/runner.js +151 -0
- package/dist/integrations/agent/sdk-runner.js +126 -0
- package/dist/integrations/agent/spawn.js +183 -35
- package/dist/integrations/github.js +3 -0
- package/dist/integrations/lockfile.js +32 -69
- package/dist/integrations/session-logs/index.js +69 -0
- package/dist/integrations/session-logs/inline-refs.js +35 -0
- package/dist/integrations/session-logs/pre-filter.js +152 -0
- package/dist/integrations/session-logs/providers/claude-code.js +282 -0
- package/dist/integrations/session-logs/providers/opencode.js +258 -0
- package/dist/integrations/session-logs/types.js +4 -0
- package/dist/llm/call-ai.js +62 -0
- package/dist/llm/client.js +79 -88
- package/dist/llm/embedder.js +20 -29
- package/dist/llm/embedders/cache.js +3 -7
- package/dist/llm/embedders/local.js +42 -1
- package/dist/llm/embedders/remote.js +20 -8
- package/dist/llm/embedders/types.js +3 -7
- package/dist/llm/feature-gate.js +95 -48
- package/dist/llm/graph-extract.js +676 -72
- package/dist/llm/index-passes.js +44 -29
- package/dist/llm/memory-infer.js +80 -71
- package/dist/llm/metadata-enhance.js +42 -29
- package/dist/llm/prompts/extract-session.md +80 -0
- package/dist/llm/prompts/graph-extract-user-prompt.md +35 -0
- package/dist/output/cli-hints-full.md +292 -0
- package/dist/output/cli-hints-short.md +66 -0
- package/dist/output/cli-hints.js +7 -311
- package/dist/output/context.js +60 -8
- package/dist/output/renderers.js +306 -258
- package/dist/output/shapes/curate.js +56 -0
- package/dist/output/shapes/distill.js +10 -0
- package/dist/output/shapes/env-list.js +19 -0
- package/dist/output/shapes/events.js +11 -0
- package/dist/output/shapes/helpers.js +424 -0
- package/dist/output/shapes/history.js +7 -0
- package/dist/output/shapes/passthrough.js +102 -0
- package/dist/output/shapes/proposal-accept.js +7 -0
- package/dist/output/shapes/proposal-diff.js +7 -0
- package/dist/output/shapes/proposal-list.js +7 -0
- package/dist/output/shapes/proposal-producer.js +11 -0
- package/dist/output/shapes/proposal-reject.js +7 -0
- package/dist/output/shapes/proposal-show.js +7 -0
- package/dist/output/shapes/registry-search.js +6 -0
- package/dist/output/shapes/registry.js +30 -0
- package/dist/output/shapes/search.js +6 -0
- package/dist/output/shapes/secret-list.js +19 -0
- package/dist/output/shapes/show.js +6 -0
- package/dist/output/shapes/vault-list.js +19 -0
- package/dist/output/shapes.js +51 -511
- package/dist/output/text/add.js +6 -0
- package/dist/output/text/clone.js +6 -0
- package/dist/output/text/config.js +6 -0
- package/dist/output/text/curate.js +6 -0
- package/dist/output/text/distill.js +7 -0
- package/dist/output/text/enable-disable.js +7 -0
- package/dist/output/text/events.js +10 -0
- package/dist/output/text/feedback.js +6 -0
- package/dist/output/text/helpers.js +1039 -0
- package/dist/output/text/history.js +7 -0
- package/dist/output/text/import.js +6 -0
- package/dist/output/text/index.js +6 -0
- package/dist/output/text/info.js +6 -0
- package/dist/output/text/init.js +6 -0
- package/dist/output/text/list.js +6 -0
- package/dist/output/text/proposal-producer.js +8 -0
- package/dist/output/text/proposal.js +11 -0
- package/dist/output/text/registry-commands.js +11 -0
- package/dist/output/text/registry.js +30 -0
- package/dist/output/text/remember.js +6 -0
- package/dist/output/text/remove.js +6 -0
- package/dist/output/text/save.js +6 -0
- package/dist/output/text/search.js +6 -0
- package/dist/output/text/show.js +6 -0
- package/dist/output/text/update.js +6 -0
- package/dist/output/text/upgrade.js +6 -0
- package/dist/output/text/vault.js +16 -0
- package/dist/output/text/wiki.js +15 -0
- package/dist/output/text/workflow.js +14 -0
- package/dist/output/text.js +44 -1093
- package/dist/registry/build-index.js +3 -0
- package/dist/registry/create-provider-registry.js +3 -0
- package/dist/registry/factory.js +4 -1
- package/dist/registry/origin-resolve.js +3 -0
- package/dist/registry/providers/index.js +3 -0
- package/dist/registry/providers/skills-sh.js +71 -50
- package/dist/registry/providers/static-index.js +53 -48
- package/dist/registry/providers/types.js +3 -24
- package/dist/registry/resolve.js +11 -16
- package/dist/registry/types.js +3 -0
- package/dist/scripts/migrate-storage.js +17750 -0
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
- package/dist/scripts/migrations/v16-to-v17.js +141 -0
- package/dist/setup/detect.js +3 -0
- package/dist/setup/ripgrep-install.js +3 -0
- package/dist/setup/ripgrep-resolve.js +3 -0
- package/dist/setup/setup.js +775 -37
- package/dist/setup/steps.js +3 -15
- package/dist/sources/include.js +3 -0
- package/dist/sources/provider-factory.js +5 -12
- package/dist/sources/provider.js +3 -20
- package/dist/sources/providers/filesystem.js +19 -23
- package/dist/sources/providers/git.js +179 -20
- package/dist/sources/providers/index.js +3 -0
- package/dist/sources/providers/install-types.js +3 -13
- package/dist/sources/providers/npm.js +3 -4
- package/dist/sources/providers/provider-utils.js +3 -0
- package/dist/sources/providers/sync-from-ref.js +3 -11
- package/dist/sources/providers/tar-utils.js +3 -0
- package/dist/sources/providers/website.js +18 -22
- package/dist/sources/resolve.js +3 -0
- package/dist/sources/types.js +3 -0
- package/dist/sources/website-ingest.js +7 -0
- package/dist/tasks/backends/cron.js +203 -0
- package/dist/tasks/backends/exec-utils.js +28 -0
- package/dist/tasks/backends/index.js +24 -0
- package/dist/tasks/backends/launchd-template.xml +19 -0
- package/dist/tasks/backends/launchd.js +187 -0
- package/dist/tasks/backends/schtasks-template.xml +29 -0
- package/dist/tasks/backends/schtasks.js +215 -0
- package/dist/tasks/parser.js +211 -0
- package/dist/tasks/resolveAkmBin.js +87 -0
- package/dist/tasks/runner.js +458 -0
- package/dist/tasks/schedule.js +227 -0
- package/dist/tasks/schema.js +15 -0
- package/dist/tasks/validator.js +62 -0
- package/dist/version.js +3 -0
- package/dist/wiki/index-template.md +12 -0
- package/dist/wiki/ingest-workflow-template.md +54 -0
- package/dist/wiki/log-template.md +8 -0
- package/dist/wiki/schema-template.md +61 -0
- package/dist/wiki/wiki-templates.js +15 -0
- package/dist/wiki/wiki.js +13 -61
- package/dist/workflows/authoring.js +8 -25
- package/dist/workflows/cli.js +3 -0
- package/dist/workflows/db.js +141 -2
- package/dist/workflows/document-cache.js +3 -10
- package/dist/workflows/parser.js +3 -0
- package/dist/workflows/renderer.js +11 -3
- package/dist/workflows/runs.js +91 -89
- package/dist/workflows/schema.js +3 -0
- package/dist/workflows/scope-key.js +79 -0
- package/dist/workflows/validator.js +4 -8
- package/dist/workflows/workflow-template.md +24 -0
- package/docs/README.md +10 -2
- package/docs/data-and-telemetry.md +225 -0
- package/docs/migration/release-notes/0.7.0.md +1 -1
- package/docs/migration/release-notes/0.7.4.md +1 -1
- package/docs/migration/release-notes/0.7.5.md +20 -0
- package/docs/migration/release-notes/0.8.0.md +48 -0
- package/docs/migration/v0.7-to-v0.8.md +1307 -0
- package/package.json +29 -11
- package/dist/commands/install-audit.js +0 -381
- package/dist/commands/vault.js +0 -333
- package/dist/templates/wiki-templates.js +0 -100
package/dist/indexer/indexer.js
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
|
+
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
|
+
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
|
+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
1
4
|
import fs from "node:fs";
|
|
2
5
|
import path from "node:path";
|
|
6
|
+
import { SCRIPT_EXTENSIONS } from "../core/asset-spec";
|
|
3
7
|
import { isHttpUrl, resolveStashDir, toErrorMessage } from "../core/common";
|
|
8
|
+
import { concurrentMap } from "../core/concurrent";
|
|
4
9
|
import { getDbPath } from "../core/paths";
|
|
5
10
|
import { isVerbose, warn, warnVerbose } from "../core/warn";
|
|
6
11
|
import { resolveIndexPassLLM } from "../llm/index-passes";
|
|
7
12
|
import { takeWorkflowDocument } from "../workflows/document-cache";
|
|
8
|
-
import { closeDatabase, deleteEntriesByDir, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, warnIfVecMissing, } from "./db";
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
|
|
13
|
+
import { clearStaleCacheEntries, closeDatabase, deleteEntriesByDir, deleteEntriesByIds, deleteEntriesByStashDir, deleteIndexDirStatesByStashDir, getAllEntriesForEmbedding, getEmbeddingCount, getEntriesByDir, getEntryCount, getIndexDirState, getMeta, isVecAvailable, openDatabase, openExistingDatabase, rebuildFts, relinkUsageEvents, setMeta, upsertEmbedding, upsertEntry, upsertIndexDirState, upsertUtilityScore, upsertWorkflowDocument, warnIfVecMissing, } from "./db";
|
|
14
|
+
import { deleteStoredGraph } from "./graph-db";
|
|
15
|
+
import { applyCuratedFrontmatter, applyWikiFrontmatter, generateMetadataFlat, isEnrichmentComplete, isWorkflowSkipWarning, loadStashFile, shouldIndexStashFile, } from "./metadata";
|
|
12
16
|
import { buildSearchText } from "./search-fields";
|
|
13
17
|
import { classifySemanticFailure, clearSemanticStatus, deriveSemanticProviderFingerprint, writeSemanticStatus, } from "./semantic-status";
|
|
14
18
|
import { ensureUsageEventsSchema, purgeOldUsageEvents } from "./usage-events";
|
|
@@ -18,19 +22,222 @@ function throwIfAborted(signal) {
|
|
|
18
22
|
throw signal.reason instanceof Error ? signal.reason : new Error("index interrupted");
|
|
19
23
|
}
|
|
20
24
|
}
|
|
25
|
+
function getDefaultLlmConcurrency(llmConfig) {
|
|
26
|
+
if (typeof llmConfig?.concurrency === "number")
|
|
27
|
+
return llmConfig.concurrency;
|
|
28
|
+
if (!llmConfig?.endpoint)
|
|
29
|
+
return 1;
|
|
30
|
+
try {
|
|
31
|
+
const url = new URL(llmConfig.endpoint);
|
|
32
|
+
const host = url.hostname.toLowerCase();
|
|
33
|
+
if (host === "localhost" || host === "127.0.0.1" || host === "::1" || host.endsWith(".localhost"))
|
|
34
|
+
return 1;
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return 1;
|
|
38
|
+
}
|
|
39
|
+
return 4;
|
|
40
|
+
}
|
|
41
|
+
// ── Phase functions ──────────────────────────────────────────────────────────
|
|
42
|
+
/**
|
|
43
|
+
* Source cache phase: ensure git stash caches are up to date and purge orphaned
|
|
44
|
+
* entries from removed sources (incremental only).
|
|
45
|
+
*/
|
|
46
|
+
async function runSourceCachePhase(ctx) {
|
|
47
|
+
const { db, config, sourceDirs, isIncremental, full } = ctx;
|
|
48
|
+
if (isIncremental && !full) {
|
|
49
|
+
// Purge entries from stash dirs that have been removed since the last run
|
|
50
|
+
// (e.g. after `akm remove`) so orphaned entries don't linger.
|
|
51
|
+
const prevStashDirsJson = getMeta(db, "stashDirs");
|
|
52
|
+
if (prevStashDirsJson) {
|
|
53
|
+
let prevStashDirs = [];
|
|
54
|
+
try {
|
|
55
|
+
const parsed = JSON.parse(prevStashDirsJson);
|
|
56
|
+
if (Array.isArray(parsed)) {
|
|
57
|
+
prevStashDirs = parsed.filter((d) => typeof d === "string");
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
warn("index_meta stashDirs value is not an array — treating as empty");
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
warn("index_meta stashDirs value is corrupt JSON — treating as empty");
|
|
65
|
+
}
|
|
66
|
+
const currentSet = new Set(sourceDirs);
|
|
67
|
+
for (const dir of prevStashDirs) {
|
|
68
|
+
if (!currentSet.has(dir)) {
|
|
69
|
+
ctx.hadRemovedSources = true;
|
|
70
|
+
deleteEntriesByStashDir(db, dir);
|
|
71
|
+
deleteIndexDirStatesByStashDir(db, dir);
|
|
72
|
+
deleteStoredGraph(db, dir);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
// Source caches are hydrated before akmIndex() calls this phase; nothing
|
|
78
|
+
// further to do here. The flag is exposed on ctx for runWalkPhase().
|
|
79
|
+
void config;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Walk phase: scan the filesystem, generate metadata, and persist entries to
|
|
83
|
+
* the database. Also kicks off LLM enrichment for directories that need it.
|
|
84
|
+
*
|
|
85
|
+
* Writes `ctx.scannedDirs`, `ctx.skippedDirs`, `ctx.generatedCount`,
|
|
86
|
+
* `ctx.walkWarnings`, and `ctx.dirsNeedingLlm` for downstream phases.
|
|
87
|
+
*/
|
|
88
|
+
async function runWalkPhase(ctx) {
|
|
89
|
+
const { db, sources, isIncremental, builtAtMs, hadRemovedSources, full, reEnrich, signal, onProgress, config } = ctx;
|
|
90
|
+
throwIfAborted(signal);
|
|
91
|
+
ctx.timing.tWalkStart = Date.now();
|
|
92
|
+
const doFullDelete = full || !isIncremental;
|
|
93
|
+
const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, sources, isIncremental, builtAtMs, hadRemovedSources, doFullDelete, onProgress);
|
|
94
|
+
ctx.scannedDirs = scannedDirs;
|
|
95
|
+
ctx.skippedDirs = skippedDirs;
|
|
96
|
+
ctx.generatedCount = generatedCount;
|
|
97
|
+
ctx.walkWarnings = warnings;
|
|
98
|
+
ctx.dirsNeedingLlm = dirsNeedingLlm;
|
|
99
|
+
onProgress({
|
|
100
|
+
phase: "scan",
|
|
101
|
+
message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
|
|
102
|
+
});
|
|
103
|
+
// Workflow validation noise gate (issue #273): suppress per-spec stderr lines
|
|
104
|
+
// at default verbosity and emit a single summary instead.
|
|
105
|
+
// In verbose mode the per-spec lines are already printed by
|
|
106
|
+
// buildMetadataSkipWarning at generation time — no second pass needed here.
|
|
107
|
+
if (!isVerbose()) {
|
|
108
|
+
const workflowSkipWarnings = warnings.filter(isWorkflowSkipWarning);
|
|
109
|
+
const skippedWorkflowCount = workflowSkipWarnings.length;
|
|
110
|
+
if (skippedWorkflowCount > 0) {
|
|
111
|
+
const noun = skippedWorkflowCount === 1 ? "workflow spec" : "workflow specs";
|
|
112
|
+
warn(`${skippedWorkflowCount} ${noun} skipped due to validation errors; ` +
|
|
113
|
+
"rerun with --verbose (or AKM_VERBOSE=1) to see details.");
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
ctx.timing.tWalkEnd = Date.now();
|
|
117
|
+
throwIfAborted(signal);
|
|
118
|
+
// LLM enrichment for directories that need it
|
|
119
|
+
await enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, true, reEnrich);
|
|
120
|
+
onProgress({
|
|
121
|
+
phase: "llm",
|
|
122
|
+
message: resolveIndexPassLLM("enrichment", config)
|
|
123
|
+
? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
|
|
124
|
+
: "LLM enhancement disabled.",
|
|
125
|
+
});
|
|
126
|
+
ctx.timing.tLlmEnd = Date.now();
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Embedding phase: generate and store vector embeddings for all unembedded
|
|
130
|
+
* entries. Writes `ctx.embeddingResult` for the finalize phase.
|
|
131
|
+
*/
|
|
132
|
+
async function runEmbeddingPhase(ctx) {
|
|
133
|
+
const { db, config, signal, onProgress } = ctx;
|
|
134
|
+
throwIfAborted(signal);
|
|
135
|
+
ctx.embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
|
|
136
|
+
ctx.timing.tEmbedEnd = Date.now();
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Finalize phase: rebuild FTS, re-link usage events, recompute utility scores,
|
|
140
|
+
* regenerate wiki indexes, update index metadata, and emit the verify event.
|
|
141
|
+
*/
|
|
142
|
+
async function runFinalizePhase(ctx) {
|
|
143
|
+
const { db, config, sources, sourceDirs, isIncremental, stashDir, signal, onProgress } = ctx;
|
|
144
|
+
// Rebuild FTS after all inserts. Use incremental mode when this whole
|
|
145
|
+
// index run is incremental — only entries touched by `upsertEntry`
|
|
146
|
+
// since the last rebuild are re-indexed.
|
|
147
|
+
rebuildFts(db, { incremental: isIncremental });
|
|
148
|
+
onProgress({
|
|
149
|
+
phase: "fts",
|
|
150
|
+
message: isIncremental ? "Rebuilt full-text search index (dirty rows only)." : "Rebuilt full-text search index.",
|
|
151
|
+
});
|
|
152
|
+
ctx.timing.tFtsEnd = Date.now();
|
|
153
|
+
// Re-link detached usage_events and recompute utility scores.
|
|
154
|
+
relinkUsageEvents(db);
|
|
155
|
+
recomputeUtilityScores(db);
|
|
156
|
+
// Purge LLM cache entries for assets that no longer exist in the index.
|
|
157
|
+
try {
|
|
158
|
+
clearStaleCacheEntries(db);
|
|
159
|
+
}
|
|
160
|
+
catch {
|
|
161
|
+
/* ignore */
|
|
162
|
+
}
|
|
163
|
+
// Regenerate each wiki's index.md from its pages' frontmatter. Best-effort.
|
|
164
|
+
try {
|
|
165
|
+
const { regenerateAllWikiIndexes } = await import("../wiki/wiki.js");
|
|
166
|
+
regenerateAllWikiIndexes(stashDir);
|
|
167
|
+
}
|
|
168
|
+
catch {
|
|
169
|
+
/* best-effort */
|
|
170
|
+
}
|
|
171
|
+
throwIfAborted(signal);
|
|
172
|
+
// Update index metadata
|
|
173
|
+
const embeddingResult = ctx.embeddingResult ?? { success: false };
|
|
174
|
+
setMeta(db, "builtAt", new Date().toISOString());
|
|
175
|
+
setMeta(db, "stashDir", stashDir);
|
|
176
|
+
setMeta(db, "stashDirs", JSON.stringify(sourceDirs));
|
|
177
|
+
setMeta(db, "hasEmbeddings", embeddingResult.success ? "1" : "0");
|
|
178
|
+
warnIfVecMissing(db);
|
|
179
|
+
const totalEntries = getEntryCount(db);
|
|
180
|
+
const verification = verifyIndexState(db, config, totalEntries, embeddingResult);
|
|
181
|
+
if (config.semanticSearchMode === "off") {
|
|
182
|
+
clearSemanticStatus();
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
writeSemanticStatus({
|
|
186
|
+
status: verification.semanticStatus === "disabled" ? "pending" : verification.semanticStatus,
|
|
187
|
+
...(embeddingResult.reason ? { reason: embeddingResult.reason } : {}),
|
|
188
|
+
...(embeddingResult.message ? { message: embeddingResult.message } : {}),
|
|
189
|
+
providerFingerprint: deriveSemanticProviderFingerprint(config.embedding),
|
|
190
|
+
lastCheckedAt: new Date().toISOString(),
|
|
191
|
+
entryCount: verification.entryCount,
|
|
192
|
+
embeddingCount: verification.embeddingCount,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
onProgress({ phase: "verify", message: verification.message });
|
|
196
|
+
// Store verification result and totalEntries on ctx for the caller to use
|
|
197
|
+
ctx._verification = verification;
|
|
198
|
+
ctx._totalEntries = totalEntries;
|
|
199
|
+
// suppress unused warning — sources was previously used inline
|
|
200
|
+
void sources;
|
|
201
|
+
}
|
|
202
|
+
// ── Clean pass ───────────────────────────────────────────────────────────────
|
|
203
|
+
/**
|
|
204
|
+
* Post-index clean pass: scan the `entries` table for rows whose source file
|
|
205
|
+
* no longer exists on disk and remove them (unless `dryRun` is true).
|
|
206
|
+
*
|
|
207
|
+
* Only rows with a non-empty `file_path` are checked — remote/virtual entries
|
|
208
|
+
* that have no local path are always skipped.
|
|
209
|
+
*/
|
|
210
|
+
function runCleanPass(db, dryRun) {
|
|
211
|
+
const allEntries = db.prepare("SELECT id, entry_key AS ref, file_path AS path FROM entries").all();
|
|
212
|
+
// Only check entries that have a non-empty local path (skip remote/virtual).
|
|
213
|
+
const localEntries = allEntries.filter((e) => typeof e.path === "string" && e.path.trim() !== "");
|
|
214
|
+
const missing = localEntries.filter((e) => !fs.existsSync(e.path));
|
|
215
|
+
if (!dryRun && missing.length > 0) {
|
|
216
|
+
deleteEntriesByIds(db, missing.map((e) => e.id));
|
|
217
|
+
}
|
|
218
|
+
return {
|
|
219
|
+
checked: localEntries.length,
|
|
220
|
+
removed: dryRun ? 0 : missing.length,
|
|
221
|
+
removedRefs: missing.map((e) => e.ref),
|
|
222
|
+
dryRun,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
21
225
|
// ── Indexer ──────────────────────────────────────────────────────────────────
|
|
22
226
|
export async function akmIndex(options) {
|
|
23
227
|
const stashDir = options?.stashDir || resolveStashDir();
|
|
24
228
|
const onProgress = options?.onProgress ?? (() => { });
|
|
25
229
|
const signal = options?.signal;
|
|
26
|
-
const
|
|
230
|
+
const reEnrich = options?.reEnrich === true;
|
|
231
|
+
const full = options?.full === true;
|
|
232
|
+
const clean = options?.clean === true;
|
|
233
|
+
const dryRun = options?.dryRun === true;
|
|
27
234
|
// Load config and resolve all stash sources
|
|
28
235
|
const { loadConfig } = await import("../core/config.js");
|
|
29
236
|
const config = loadConfig();
|
|
30
237
|
// Ensure git stash caches are extracted before resolving stash dirs,
|
|
31
238
|
// so their content directories exist on disk for the walker to discover.
|
|
32
239
|
const { ensureSourceCaches, resolveSourceEntries } = await import("./search-source.js");
|
|
33
|
-
await ensureSourceCaches(config, { force:
|
|
240
|
+
await ensureSourceCaches(config, { force: full });
|
|
34
241
|
const allSourceEntries = resolveSourceEntries(stashDir, config);
|
|
35
242
|
const allSourceDirs = allSourceEntries.map((s) => s.path);
|
|
36
243
|
const t0 = Date.now();
|
|
@@ -39,11 +246,41 @@ export async function akmIndex(options) {
|
|
|
39
246
|
const embeddingDim = config.embedding?.dimension;
|
|
40
247
|
const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
|
|
41
248
|
try {
|
|
42
|
-
//
|
|
249
|
+
// Determine incremental vs full mode
|
|
43
250
|
const prevStashDir = getMeta(db, "stashDir");
|
|
44
251
|
const prevBuiltAt = getMeta(db, "builtAt");
|
|
45
|
-
const isIncremental = !
|
|
252
|
+
const isIncremental = !full && prevStashDir === stashDir && !!prevBuiltAt;
|
|
46
253
|
const builtAtMs = isIncremental && prevBuiltAt ? new Date(prevBuiltAt).getTime() : 0;
|
|
254
|
+
// Assemble the run context
|
|
255
|
+
const ctx = {
|
|
256
|
+
db,
|
|
257
|
+
config,
|
|
258
|
+
sources: allSourceEntries,
|
|
259
|
+
sourceDirs: allSourceDirs,
|
|
260
|
+
full,
|
|
261
|
+
reEnrich,
|
|
262
|
+
stashDir,
|
|
263
|
+
onProgress,
|
|
264
|
+
signal,
|
|
265
|
+
timing: {
|
|
266
|
+
t0,
|
|
267
|
+
tWalkStart: t0,
|
|
268
|
+
tWalkEnd: t0,
|
|
269
|
+
tLlmEnd: t0,
|
|
270
|
+
tFtsEnd: t0,
|
|
271
|
+
tEmbedEnd: t0,
|
|
272
|
+
},
|
|
273
|
+
isIncremental,
|
|
274
|
+
builtAtMs,
|
|
275
|
+
hadRemovedSources: false,
|
|
276
|
+
scannedDirs: 0,
|
|
277
|
+
skippedDirs: 0,
|
|
278
|
+
generatedCount: 0,
|
|
279
|
+
walkWarnings: [],
|
|
280
|
+
dirsNeedingLlm: [],
|
|
281
|
+
embeddingResult: null,
|
|
282
|
+
graphExtractionResult: null,
|
|
283
|
+
};
|
|
47
284
|
onProgress({
|
|
48
285
|
phase: "summary",
|
|
49
286
|
message: buildIndexSummaryMessage({
|
|
@@ -51,219 +288,44 @@ export async function akmIndex(options) {
|
|
|
51
288
|
sourcesCount: allSourceDirs.length,
|
|
52
289
|
semanticSearchMode: config.semanticSearchMode,
|
|
53
290
|
embeddingProvider: getEmbeddingProvider(config.embedding),
|
|
54
|
-
llmEnabled:
|
|
291
|
+
llmEnabled: !!resolveIndexPassLLM("enrichment", config),
|
|
55
292
|
vecAvailable: isVecAvailable(db),
|
|
56
293
|
}),
|
|
57
294
|
});
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
295
|
+
// ── Phase sequence ───────────────────────────────────────────────────────
|
|
296
|
+
await runSourceCachePhase(ctx);
|
|
297
|
+
await runWalkPhase(ctx);
|
|
298
|
+
await runEmbeddingPhase(ctx);
|
|
299
|
+
await runFinalizePhase(ctx);
|
|
300
|
+
// ────────────────────────────────────────────────────────────────────────
|
|
301
|
+
const { _verification: verification, _totalEntries: totalEntries } = ctx;
|
|
302
|
+
const { timing } = ctx;
|
|
303
|
+
// ── Clean pass ───────────────────────────────────────────────────────────
|
|
304
|
+
// After the normal index completes, remove entries whose source files no
|
|
305
|
+
// longer exist on disk. Remote entries (empty file_path) are skipped.
|
|
306
|
+
let cleanResult;
|
|
307
|
+
if (clean) {
|
|
308
|
+
cleanResult = runCleanPass(db, dryRun);
|
|
63
309
|
}
|
|
64
|
-
|
|
65
|
-
// Incremental: purge entries from stash dirs that have been removed
|
|
66
|
-
// (e.g. after `akm remove`) so orphaned entries don't linger.
|
|
67
|
-
const prevStashDirsJson = getMeta(db, "stashDirs");
|
|
68
|
-
if (prevStashDirsJson) {
|
|
69
|
-
let prevStashDirs = [];
|
|
70
|
-
try {
|
|
71
|
-
const parsed = JSON.parse(prevStashDirsJson);
|
|
72
|
-
if (Array.isArray(parsed)) {
|
|
73
|
-
prevStashDirs = parsed.filter((d) => typeof d === "string");
|
|
74
|
-
}
|
|
75
|
-
else {
|
|
76
|
-
warn("index_meta stashDirs value is not an array — treating as empty");
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
catch {
|
|
80
|
-
warn("index_meta stashDirs value is corrupt JSON — treating as empty");
|
|
81
|
-
}
|
|
82
|
-
const currentSet = new Set(allSourceDirs);
|
|
83
|
-
for (const dir of prevStashDirs) {
|
|
84
|
-
if (!currentSet.has(dir)) {
|
|
85
|
-
hadRemovedSources = true;
|
|
86
|
-
deleteEntriesByStashDir(db, dir);
|
|
87
|
-
deleteIndexDirStatesByStashDir(db, dir);
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
throwIfAborted(signal);
|
|
93
|
-
// Memory inference pass (#201). Runs before the walk so any derived-memory
|
|
94
|
-
// children that get written are picked up by the walker in this same run
|
|
95
|
-
// and don't have to wait for the next `akm index`. Gated entirely by
|
|
96
|
-
// `resolveIndexPassLLM("memory", config)` — when the user has no
|
|
97
|
-
// `akm.llm` block or has set `index.memory.llm = false`, this is a no-op
|
|
98
|
-
// and existing inferred children are left in place.
|
|
99
|
-
if (enrich) {
|
|
100
|
-
try {
|
|
101
|
-
const inferenceResult = await runMemoryInferencePass(config, allSourceEntries, signal);
|
|
102
|
-
if (inferenceResult.writtenFacts > 0) {
|
|
103
|
-
onProgress({
|
|
104
|
-
phase: "llm",
|
|
105
|
-
message: `Memory inference wrote ${inferenceResult.writtenFacts} derived memor${inferenceResult.writtenFacts === 1 ? "y" : "ies"} from ${inferenceResult.splitParents} parent memor${inferenceResult.splitParents === 1 ? "y" : "ies"}.`,
|
|
106
|
-
});
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
catch (err) {
|
|
110
|
-
warn(`Memory inference pass aborted: ${err instanceof Error ? err.message : String(err)}`);
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
else {
|
|
114
|
-
onProgress({
|
|
115
|
-
phase: "llm",
|
|
116
|
-
message: "LLM passes disabled; rerun with --enrich to enable inference and enrichment.",
|
|
117
|
-
});
|
|
118
|
-
}
|
|
119
|
-
// Graph extraction pass (#207). Runs after memory inference so any
|
|
120
|
-
// atomic-fact children that just got written are visible to the graph
|
|
121
|
-
// walk. Persists `<stashRoot>/.akm/graph.json` — an indexer artifact,
|
|
122
|
-
// NOT a user-visible asset, so it is not routed through
|
|
123
|
-
// writeAssetToSource. The artifact feeds the existing FTS5+boosts
|
|
124
|
-
// pipeline as a single boost component (see graph-boost.ts); there is
|
|
125
|
-
// no parallel scoring track. Disabled when either gate (the locked
|
|
126
|
-
// `llm.features.graph_extraction` feature flag or the per-pass
|
|
127
|
-
// `index.graph.llm` toggle) is off; the existing graph file is
|
|
128
|
-
// preserved on disk in that case.
|
|
129
|
-
if (enrich) {
|
|
130
|
-
try {
|
|
131
|
-
const graphResult = await runGraphExtractionPass(config, allSourceEntries, signal);
|
|
132
|
-
if (graphResult.written) {
|
|
133
|
-
onProgress({
|
|
134
|
-
phase: "llm",
|
|
135
|
-
message: `Graph extraction wrote ${graphResult.totalEntities} entit${graphResult.totalEntities === 1 ? "y" : "ies"} and ${graphResult.totalRelations} relation${graphResult.totalRelations === 1 ? "" : "s"} from ${graphResult.extracted} file${graphResult.extracted === 1 ? "" : "s"}.`,
|
|
136
|
-
});
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
catch (err) {
|
|
140
|
-
warn(`Graph extraction pass aborted: ${err instanceof Error ? err.message : String(err)}`);
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
throwIfAborted(signal);
|
|
144
|
-
const tWalkStart = Date.now();
|
|
145
|
-
// Walk stash dirs and index entries.
|
|
146
|
-
// doFullDelete=true merges the wipe into the same transaction as the
|
|
147
|
-
// inserts so readers never see an empty database mid-rebuild.
|
|
148
|
-
const doFullDelete = options?.full || !isIncremental;
|
|
149
|
-
const { scannedDirs, skippedDirs, generatedCount, dirsNeedingLlm, warnings } = await indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadRemovedSources, doFullDelete, onProgress);
|
|
150
|
-
onProgress({
|
|
151
|
-
phase: "scan",
|
|
152
|
-
message: `Scanned ${scannedDirs} ${scannedDirs === 1 ? "directory" : "directories"} and skipped ${skippedDirs}.`,
|
|
153
|
-
});
|
|
154
|
-
// Workflow validation noise gate (issue #273): per-spec stderr lines from
|
|
155
|
-
// `buildMetadataSkipWarning` are suppressed at default verbosity in
|
|
156
|
-
// `metadata.ts`. Replace them with a single summary line so operators
|
|
157
|
-
// running a cold-start search against a fresh registry-cloned source
|
|
158
|
-
// don't get the impression akm is broken. Verbose mode keeps the
|
|
159
|
-
// per-spec output instead of (not in addition to) the summary.
|
|
160
|
-
if (!isVerbose()) {
|
|
161
|
-
const skippedWorkflowCount = warnings.filter(isWorkflowSkipWarning).length;
|
|
162
|
-
if (skippedWorkflowCount > 0) {
|
|
163
|
-
const noun = skippedWorkflowCount === 1 ? "workflow spec" : "workflow specs";
|
|
164
|
-
warn(`${skippedWorkflowCount} ${noun} skipped due to validation errors; ` +
|
|
165
|
-
"rerun with --verbose (or AKM_VERBOSE=1) to see details.");
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
const tWalkEnd = Date.now();
|
|
169
|
-
throwIfAborted(signal);
|
|
170
|
-
// Enhance entries with LLM if configured
|
|
171
|
-
await enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich);
|
|
172
|
-
onProgress({
|
|
173
|
-
phase: "llm",
|
|
174
|
-
message: enrich && resolveIndexPassLLM("enrichment", config)
|
|
175
|
-
? `LLM enhancement reviewed ${dirsNeedingLlm.length} ${dirsNeedingLlm.length === 1 ? "directory" : "directories"}.`
|
|
176
|
-
: "LLM enhancement disabled.",
|
|
177
|
-
});
|
|
178
|
-
const tLlmEnd = Date.now();
|
|
179
|
-
throwIfAborted(signal);
|
|
180
|
-
// Rebuild FTS after all inserts. Use incremental mode when this whole
|
|
181
|
-
// index run is incremental — only entries touched by `upsertEntry`
|
|
182
|
-
// since the last rebuild are re-indexed, instead of re-scanning every
|
|
183
|
-
// row on every `akm index` invocation.
|
|
184
|
-
rebuildFts(db, { incremental: isIncremental });
|
|
185
|
-
onProgress({
|
|
186
|
-
phase: "fts",
|
|
187
|
-
message: isIncremental ? "Rebuilt full-text search index (dirty rows only)." : "Rebuilt full-text search index.",
|
|
188
|
-
});
|
|
189
|
-
const tFtsEnd = Date.now();
|
|
190
|
-
// Re-link detached usage_events to their new entry_ids via entry_ref.
|
|
191
|
-
// entry_ref is "type:name" (e.g., "skill:code-review"), entry_key is "stashDir:type:name".
|
|
192
|
-
// Use substr to extract the "type:name" suffix from entry_key for exact comparison
|
|
193
|
-
// (avoids LIKE which would require escaping % and _ in user-facing names).
|
|
194
|
-
try {
|
|
195
|
-
db.exec(`
|
|
196
|
-
UPDATE usage_events SET entry_id = (
|
|
197
|
-
SELECT e.id FROM entries e
|
|
198
|
-
WHERE substr(e.entry_key, length(e.entry_key) - length(usage_events.entry_ref)) = ':' || usage_events.entry_ref
|
|
199
|
-
LIMIT 1
|
|
200
|
-
)
|
|
201
|
-
WHERE entry_id IS NULL AND entry_ref IS NOT NULL
|
|
202
|
-
`);
|
|
203
|
-
}
|
|
204
|
-
catch {
|
|
205
|
-
/* ignore if table doesn't exist yet */
|
|
206
|
-
}
|
|
207
|
-
// Recompute utility scores from usage_events after FTS rebuild
|
|
208
|
-
recomputeUtilityScores(db);
|
|
209
|
-
// Regenerate each wiki's index.md from its pages' frontmatter. Best-effort
|
|
210
|
-
// — errors are caught inside regenerateAllWikiIndexes and never block the
|
|
211
|
-
// index run. The primary stash is the only target: additional sources
|
|
212
|
-
// are read-only caches, and regenerating their indexes would mutate
|
|
213
|
-
// cache content.
|
|
214
|
-
try {
|
|
215
|
-
const { regenerateAllWikiIndexes } = await import("../wiki/wiki.js");
|
|
216
|
-
regenerateAllWikiIndexes(stashDir);
|
|
217
|
-
}
|
|
218
|
-
catch {
|
|
219
|
-
/* best-effort */
|
|
220
|
-
}
|
|
221
|
-
throwIfAborted(signal);
|
|
222
|
-
// Generate embeddings if semantic search is enabled
|
|
223
|
-
const embeddingResult = await generateEmbeddingsForDb(db, config, onProgress);
|
|
224
|
-
const tEmbedEnd = Date.now();
|
|
225
|
-
// Update metadata
|
|
226
|
-
setMeta(db, "builtAt", new Date().toISOString());
|
|
227
|
-
setMeta(db, "stashDir", stashDir);
|
|
228
|
-
setMeta(db, "stashDirs", JSON.stringify(allSourceDirs));
|
|
229
|
-
setMeta(db, "hasEmbeddings", embeddingResult.success ? "1" : "0");
|
|
230
|
-
const totalEntries = getEntryCount(db);
|
|
231
|
-
// Warn on every index run if using JS fallback with many entries
|
|
232
|
-
warnIfVecMissing(db);
|
|
233
|
-
const tEnd = Date.now();
|
|
234
|
-
const verification = verifyIndexState(db, config, totalEntries, embeddingResult);
|
|
235
|
-
if (config.semanticSearchMode === "off") {
|
|
236
|
-
clearSemanticStatus();
|
|
237
|
-
}
|
|
238
|
-
else {
|
|
239
|
-
writeSemanticStatus({
|
|
240
|
-
status: verification.semanticStatus === "disabled" ? "pending" : verification.semanticStatus,
|
|
241
|
-
...(embeddingResult.reason ? { reason: embeddingResult.reason } : {}),
|
|
242
|
-
...(embeddingResult.message ? { message: embeddingResult.message } : {}),
|
|
243
|
-
providerFingerprint: deriveSemanticProviderFingerprint(config.embedding),
|
|
244
|
-
lastCheckedAt: new Date().toISOString(),
|
|
245
|
-
entryCount: verification.entryCount,
|
|
246
|
-
embeddingCount: verification.embeddingCount,
|
|
247
|
-
});
|
|
248
|
-
}
|
|
249
|
-
onProgress({ phase: "verify", message: verification.message });
|
|
310
|
+
// ────────────────────────────────────────────────────────────────────────
|
|
250
311
|
return {
|
|
251
312
|
stashDir,
|
|
252
313
|
totalEntries,
|
|
253
|
-
generatedMetadata: generatedCount,
|
|
314
|
+
generatedMetadata: ctx.generatedCount,
|
|
254
315
|
indexPath: dbPath,
|
|
255
316
|
mode: isIncremental ? "incremental" : "full",
|
|
256
|
-
directoriesScanned: scannedDirs,
|
|
257
|
-
directoriesSkipped: skippedDirs,
|
|
258
|
-
...(
|
|
317
|
+
directoriesScanned: ctx.scannedDirs,
|
|
318
|
+
directoriesSkipped: ctx.skippedDirs,
|
|
319
|
+
...(ctx.walkWarnings.length > 0 ? { warnings: ctx.walkWarnings } : {}),
|
|
259
320
|
verification,
|
|
260
321
|
timing: {
|
|
261
|
-
totalMs:
|
|
262
|
-
walkMs: tWalkEnd - tWalkStart,
|
|
263
|
-
llmMs: tLlmEnd - tWalkEnd,
|
|
264
|
-
embedMs: tEmbedEnd - tFtsEnd,
|
|
265
|
-
ftsMs: tFtsEnd - tLlmEnd,
|
|
322
|
+
totalMs: Date.now() - timing.t0,
|
|
323
|
+
walkMs: timing.tWalkEnd - timing.tWalkStart,
|
|
324
|
+
llmMs: timing.tLlmEnd - timing.tWalkEnd,
|
|
325
|
+
embedMs: timing.tEmbedEnd - timing.tFtsEnd,
|
|
326
|
+
ftsMs: timing.tFtsEnd - timing.tLlmEnd,
|
|
266
327
|
},
|
|
328
|
+
...(cleanResult !== undefined ? { clean: cleanResult } : {}),
|
|
267
329
|
};
|
|
268
330
|
}
|
|
269
331
|
finally {
|
|
@@ -500,8 +562,10 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
|
|
|
500
562
|
if (stash) {
|
|
501
563
|
for (const entry of stash.entries) {
|
|
502
564
|
const entryPath = entry.filename ? path.join(dirPath, entry.filename) : null;
|
|
503
|
-
if (!entryPath)
|
|
504
|
-
|
|
565
|
+
if (!entryPath) {
|
|
566
|
+
warn(`Skipping entry with no resolvable path in ${dirPath}`);
|
|
567
|
+
continue;
|
|
568
|
+
}
|
|
505
569
|
if (!shouldIndexStashFile(currentStashDir, entryPath))
|
|
506
570
|
continue;
|
|
507
571
|
// Skip if a higher-priority stash root already indexed this asset
|
|
@@ -523,7 +587,9 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
|
|
|
523
587
|
}
|
|
524
588
|
}
|
|
525
589
|
}
|
|
526
|
-
// Collect dirs needing LLM enhancement during the first walk
|
|
590
|
+
// Collect dirs needing LLM enhancement during the first walk.
|
|
591
|
+
// Only dirs with "generated" entries need enrichment (unless reEnrich
|
|
592
|
+
// forces re-processing of already-enriched entries).
|
|
527
593
|
if (stash.entries.some((e) => e.quality === "generated")) {
|
|
528
594
|
dirsNeedingLlm.push({ dirPath, files, currentStashDir, stash });
|
|
529
595
|
}
|
|
@@ -541,7 +607,20 @@ async function indexEntries(db, allSourceEntries, isIncremental, builtAtMs, hadR
|
|
|
541
607
|
reason: persistedReason,
|
|
542
608
|
});
|
|
543
609
|
if (persistedRows === 0) {
|
|
544
|
-
|
|
610
|
+
// Warn only when the dir had files that *could* produce entries (.md or
|
|
611
|
+
// known script extensions). Dirs with only non-indexable types (.json,
|
|
612
|
+
// .yaml, .conf, .env, .gitkeep) or deduped-only rows are expected and
|
|
613
|
+
// not actionable at normal log level.
|
|
614
|
+
const hasIndexableExtension = files.some((f) => {
|
|
615
|
+
const ext = path.extname(f).toLowerCase();
|
|
616
|
+
return ext === ".md" || SCRIPT_EXTENSIONS.has(ext);
|
|
617
|
+
});
|
|
618
|
+
if (persistedReason !== "deduped-zero-row" && hasIndexableExtension) {
|
|
619
|
+
warn(`[index] zero-row ${dirPath}: ${persistedReason}`);
|
|
620
|
+
}
|
|
621
|
+
else {
|
|
622
|
+
warnVerbose(`[index] zero-row ${dirPath}: ${persistedReason}`);
|
|
623
|
+
}
|
|
545
624
|
}
|
|
546
625
|
}
|
|
547
626
|
});
|
|
@@ -640,9 +719,7 @@ function inferZeroRowReason(stash, priorReason, warnings, dirPath, dedupedRows)
|
|
|
640
719
|
return "empty-generated-set";
|
|
641
720
|
return `zero-row:${priorReason?.kind ?? "unknown"}`;
|
|
642
721
|
}
|
|
643
|
-
async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal,
|
|
644
|
-
if (!enrich)
|
|
645
|
-
return;
|
|
722
|
+
async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, onProgress, signal, _enrich = false, reEnrich = false) {
|
|
646
723
|
// Resolve per-pass LLM config via the unified shim. Returns undefined when
|
|
647
724
|
// either no `akm.llm` is configured or the user opted this pass out via
|
|
648
725
|
// `index.enrichment.llm = false`. (#208)
|
|
@@ -653,24 +730,142 @@ async function enhanceDirsWithLlm(db, config, dirsNeedingLlm, signal, enrich = f
|
|
|
653
730
|
// as a single visible warning instead of silently degrading every entry
|
|
654
731
|
// and leaving the user wondering why nothing got enhanced.
|
|
655
732
|
const summary = { attempted: 0, succeeded: 0, failureSamples: [] };
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
733
|
+
let completedDirs = 0;
|
|
734
|
+
let completedEntries = 0;
|
|
735
|
+
const totalDirs = dirsNeedingLlm.length;
|
|
736
|
+
const totalEntries = dirsNeedingLlm.reduce((sum, { stash }) => {
|
|
737
|
+
const entriesToEnhance = stash.entries.filter((e) => {
|
|
738
|
+
if (e.quality !== "generated" && !(reEnrich && e.quality === "enriched"))
|
|
739
|
+
return false;
|
|
740
|
+
if (!reEnrich && isEnrichmentComplete(e))
|
|
741
|
+
return false;
|
|
742
|
+
return true;
|
|
743
|
+
});
|
|
744
|
+
return sum + entriesToEnhance.length;
|
|
745
|
+
}, 0);
|
|
746
|
+
// P3 — wall-clock budget for the enrichment pass. Defaults to llm.timeoutMs
|
|
747
|
+
// (or 10 minutes if not set). Users can extend this via llm.timeoutMs in
|
|
748
|
+
// config — no separate knob needed.
|
|
749
|
+
const budgetMs = (llmConfig.timeoutMs ?? 10 * 60 * 1000) * Math.max(totalEntries, 1);
|
|
750
|
+
const enrichDeadline = AbortSignal.timeout(budgetMs);
|
|
751
|
+
let deadlineHit = false;
|
|
752
|
+
const enrichSignal = (() => {
|
|
753
|
+
if (!signal)
|
|
754
|
+
return enrichDeadline;
|
|
755
|
+
// Combine: abort when either fires.
|
|
756
|
+
const controller = new AbortController();
|
|
757
|
+
const onAbort = () => controller.abort();
|
|
758
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
759
|
+
enrichDeadline.addEventListener("abort", () => {
|
|
760
|
+
deadlineHit = true;
|
|
761
|
+
controller.abort();
|
|
762
|
+
}, { once: true });
|
|
763
|
+
return controller.signal;
|
|
764
|
+
})();
|
|
765
|
+
if (totalEntries > 0) {
|
|
766
|
+
onProgress?.({
|
|
767
|
+
phase: "llm",
|
|
768
|
+
message: `LLM enhancement starting for ${totalEntries} entr${totalEntries === 1 ? "y" : "ies"} ` +
|
|
769
|
+
`across ${totalDirs} director${totalDirs === 1 ? "y" : "ies"} (concurrency ${getDefaultLlmConcurrency(llmConfig)}).`,
|
|
770
|
+
processed: 0,
|
|
771
|
+
total: totalEntries,
|
|
772
|
+
});
|
|
773
|
+
}
|
|
774
|
+
let currentDirLabel;
|
|
775
|
+
let lastProgressAt = Date.now();
|
|
776
|
+
let heartbeatTimer;
|
|
777
|
+
if (totalEntries > 0 && onProgress) {
|
|
778
|
+
heartbeatTimer = setInterval(() => {
|
|
779
|
+
if (Date.now() - lastProgressAt < 15000)
|
|
780
|
+
return;
|
|
781
|
+
onProgress({
|
|
782
|
+
phase: "llm",
|
|
783
|
+
message: `Still enriching ${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"}` +
|
|
784
|
+
(currentDirLabel ? `; waiting on ${currentDirLabel}` : "") +
|
|
785
|
+
".",
|
|
786
|
+
processed: completedEntries,
|
|
787
|
+
total: totalEntries,
|
|
788
|
+
});
|
|
789
|
+
lastProgressAt = Date.now();
|
|
790
|
+
}, 15000);
|
|
791
|
+
}
|
|
792
|
+
try {
|
|
793
|
+
await concurrentMap(dirsNeedingLlm, async ({ dirPath, files, currentStashDir, stash: originalStash }) => {
|
|
794
|
+
if (enrichSignal.aborted)
|
|
795
|
+
return undefined;
|
|
796
|
+
// Only enhance generated entries (or all when reEnrich=true);
|
|
797
|
+
// user-provided overrides should not be overwritten.
|
|
798
|
+
// Skip entries that are already fully enriched (description + tags + searchHints)
|
|
799
|
+
// unless the caller explicitly requests re-enrichment via reEnrich=true.
|
|
800
|
+
const entriesToEnhance = originalStash.entries.filter((e) => {
|
|
801
|
+
if (e.quality !== "generated" && !(reEnrich && e.quality === "enriched"))
|
|
802
|
+
return false;
|
|
803
|
+
if (!reEnrich && isEnrichmentComplete(e)) {
|
|
804
|
+
warnVerbose(`[akm] skipping LLM enrichment for "${e.name}" — entry already complete`);
|
|
805
|
+
return false;
|
|
806
|
+
}
|
|
807
|
+
return true;
|
|
808
|
+
});
|
|
809
|
+
if (entriesToEnhance.length === 0)
|
|
810
|
+
return undefined;
|
|
811
|
+
currentDirLabel = path.relative(currentStashDir, dirPath) || ".";
|
|
812
|
+
onProgress?.({
|
|
813
|
+
phase: "llm",
|
|
814
|
+
message: `Enhancing ${currentDirLabel} ` +
|
|
815
|
+
`(${entriesToEnhance.length} entr${entriesToEnhance.length === 1 ? "y" : "ies"}).`,
|
|
816
|
+
processed: completedEntries,
|
|
817
|
+
total: totalEntries,
|
|
818
|
+
});
|
|
819
|
+
lastProgressAt = Date.now();
|
|
820
|
+
const targetStash = { entries: entriesToEnhance };
|
|
821
|
+
const entryKeys = entriesToEnhance.map((e) => `${currentStashDir}:${e.type}:${e.name}`);
|
|
822
|
+
const enhanced = await enhanceStashWithLlm(llmConfig, targetStash, files, summary, enrichSignal, db, entryKeys, reEnrich, config, (event) => {
|
|
823
|
+
completedEntries++;
|
|
824
|
+
lastProgressAt = Date.now();
|
|
825
|
+
onProgress?.({
|
|
826
|
+
phase: "llm",
|
|
827
|
+
message: `Enhanced ${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"}; ` +
|
|
828
|
+
`${completedDirs}/${totalDirs} director${totalDirs === 1 ? "y" : "ies"} complete` +
|
|
829
|
+
(event.entryName ? `; current ${event.entryName}` : "") +
|
|
830
|
+
(currentDirLabel ? ` in ${currentDirLabel}` : "") +
|
|
831
|
+
(event.outcome === "cache-hit" ? " (cache hit)" : ""),
|
|
832
|
+
processed: completedEntries,
|
|
833
|
+
total: totalEntries,
|
|
834
|
+
});
|
|
835
|
+
});
|
|
836
|
+
// Re-upsert the enhanced entries in a single transaction so a crash
|
|
837
|
+
// cannot leave half the entries updated and the rest stale.
|
|
838
|
+
db.transaction(() => {
|
|
839
|
+
for (const entry of enhanced.entries) {
|
|
840
|
+
const entryPath = entry.filename ? path.join(dirPath, entry.filename) : files[0] || dirPath;
|
|
841
|
+
const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
|
|
842
|
+
const searchText = buildSearchText(entry);
|
|
843
|
+
upsertEntry(db, entryKey, dirPath, entryPath, currentStashDir, attachFileSize(entry, entryPath), searchText);
|
|
844
|
+
}
|
|
845
|
+
})();
|
|
846
|
+
completedDirs++;
|
|
847
|
+
lastProgressAt = Date.now();
|
|
848
|
+
onProgress?.({
|
|
849
|
+
phase: "llm",
|
|
850
|
+
message: `Completed ${completedDirs}/${totalDirs} director${totalDirs === 1 ? "y" : "ies"}; ` +
|
|
851
|
+
`${completedEntries}/${totalEntries} entr${totalEntries === 1 ? "y" : "ies"} processed.`,
|
|
852
|
+
processed: completedEntries,
|
|
853
|
+
total: totalEntries,
|
|
854
|
+
});
|
|
855
|
+
return undefined;
|
|
856
|
+
},
|
|
857
|
+
// Default concurrency of 4 works well for cloud LLM APIs. Local model
|
|
858
|
+
// servers (LM Studio, Ollama) run one inference at a time — set
|
|
859
|
+
// `llm.concurrency: 1` in config.json to avoid "Model reloaded" / 500
|
|
860
|
+
// errors from concurrent request overload.
|
|
861
|
+
getDefaultLlmConcurrency(llmConfig));
|
|
862
|
+
}
|
|
863
|
+
finally {
|
|
864
|
+
if (heartbeatTimer)
|
|
865
|
+
clearInterval(heartbeatTimer);
|
|
866
|
+
}
|
|
867
|
+
if (deadlineHit) {
|
|
868
|
+
warn("[akm] LLM enrichment budget exceeded. Re-run `akm index` to continue. Increase llm.timeoutMs for a larger budget.");
|
|
674
869
|
}
|
|
675
870
|
if (summary.attempted > 0 && summary.succeeded === 0) {
|
|
676
871
|
const sample = summary.failureSamples.length ? ` Example: ${summary.failureSamples[0]}` : "";
|
|
@@ -742,14 +937,24 @@ async function generateEmbeddingsForDb(db, config, onProgress, signal) {
|
|
|
742
937
|
throwIfAborted(signal);
|
|
743
938
|
// Wrap all embedding upserts in a single transaction so partial
|
|
744
939
|
// state is rolled back on failure rather than leaving the table half-filled.
|
|
940
|
+
let storedCount = 0;
|
|
941
|
+
let skippedCount = 0;
|
|
745
942
|
db.transaction(() => {
|
|
746
943
|
for (let i = 0; i < allEntries.length; i++) {
|
|
747
|
-
upsertEmbedding(db, allEntries[i].id, embeddings[i])
|
|
944
|
+
if (upsertEmbedding(db, allEntries[i].id, embeddings[i])) {
|
|
945
|
+
storedCount++;
|
|
946
|
+
}
|
|
947
|
+
else {
|
|
948
|
+
skippedCount++;
|
|
949
|
+
}
|
|
748
950
|
}
|
|
749
951
|
})();
|
|
952
|
+
if (skippedCount > 0) {
|
|
953
|
+
warn(`[embed] ${skippedCount} embedding${skippedCount === 1 ? "" : "s"} skipped (entry deleted between queue and write)`);
|
|
954
|
+
}
|
|
750
955
|
onProgress({
|
|
751
956
|
phase: "embeddings",
|
|
752
|
-
message: `Stored ${
|
|
957
|
+
message: `Stored ${storedCount} embedding${storedCount === 1 ? "" : "s"}.`,
|
|
753
958
|
});
|
|
754
959
|
setMeta(db, "embeddingFingerprint", currentFingerprint);
|
|
755
960
|
return { success: true };
|
|
@@ -769,14 +974,6 @@ async function generateEmbeddingsForDb(db, config, onProgress, signal) {
|
|
|
769
974
|
}
|
|
770
975
|
}
|
|
771
976
|
// ── Helpers ─────────────────────────────────────────────────────────────────
|
|
772
|
-
function getAllEntriesForEmbedding(db) {
|
|
773
|
-
return db
|
|
774
|
-
.prepare(`
|
|
775
|
-
SELECT e.id, e.search_text AS searchText, e.entry_key AS entryKey, e.file_path AS filePath FROM entries e
|
|
776
|
-
WHERE NOT EXISTS (SELECT 1 FROM embeddings b WHERE b.id = e.id)
|
|
777
|
-
`)
|
|
778
|
-
.all();
|
|
779
|
-
}
|
|
780
977
|
function attachFileSize(entry, entryPath) {
|
|
781
978
|
try {
|
|
782
979
|
return { ...entry, fileSize: fs.statSync(entryPath).size };
|
|
@@ -785,28 +982,6 @@ function attachFileSize(entry, entryPath) {
|
|
|
785
982
|
return entry;
|
|
786
983
|
}
|
|
787
984
|
}
|
|
788
|
-
function upsertWorkflowDocument(db, entryId, doc, content) {
|
|
789
|
-
const sourceHash = computeSourceHash(content);
|
|
790
|
-
db.prepare(`INSERT INTO workflow_documents (entry_id, schema_version, document_json, source_path, source_hash, updated_at)
|
|
791
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
792
|
-
ON CONFLICT(entry_id) DO UPDATE SET
|
|
793
|
-
schema_version = excluded.schema_version,
|
|
794
|
-
document_json = excluded.document_json,
|
|
795
|
-
source_path = excluded.source_path,
|
|
796
|
-
source_hash = excluded.source_hash,
|
|
797
|
-
updated_at = excluded.updated_at`).run(entryId, doc.schemaVersion, JSON.stringify(doc), doc.source.path, sourceHash, new Date().toISOString());
|
|
798
|
-
}
|
|
799
|
-
function computeSourceHash(content) {
|
|
800
|
-
// Cheap, stable identity for the source markdown — used by future
|
|
801
|
-
// incremental fast-paths that skip re-validation when content is unchanged.
|
|
802
|
-
// Not security-sensitive; FNV-1a over the bytes is sufficient.
|
|
803
|
-
let hash = 0x811c9dc5;
|
|
804
|
-
for (let i = 0; i < content.length; i++) {
|
|
805
|
-
hash ^= content[i];
|
|
806
|
-
hash = Math.imul(hash, 0x01000193);
|
|
807
|
-
}
|
|
808
|
-
return (hash >>> 0).toString(16);
|
|
809
|
-
}
|
|
810
985
|
function buildIndexSummaryMessage(options) {
|
|
811
986
|
const stashSourceLabel = options.sourcesCount === 1 ? "stash source" : "stash sources";
|
|
812
987
|
const semanticDetail = getSemanticSearchLabel(options.semanticSearchMode, options.embeddingProvider, options.vecAvailable);
|
|
@@ -899,11 +1074,12 @@ function resolveIndexedFiles(dirPath, files, stash) {
|
|
|
899
1074
|
}
|
|
900
1075
|
return resolved.size > 0 ? [...resolved] : files;
|
|
901
1076
|
}
|
|
902
|
-
async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
|
|
1077
|
+
async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal, db, entryKeys, reEnrich, akmConfig, onEntryDone) {
|
|
903
1078
|
const { enhanceMetadata } = await import("../llm/metadata-enhance");
|
|
904
|
-
const
|
|
905
|
-
|
|
906
|
-
|
|
1079
|
+
const { computeBodyHash, getLlmCacheEntry, upsertLlmCacheEntry } = await import("./db.js");
|
|
1080
|
+
const results = await concurrentMap(stash.entries, async (entry, idx) => {
|
|
1081
|
+
if (signal?.aborted)
|
|
1082
|
+
return entry;
|
|
907
1083
|
summary.attempted++;
|
|
908
1084
|
try {
|
|
909
1085
|
const entryFile = entry.filename
|
|
@@ -915,10 +1091,38 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
|
|
|
915
1091
|
fileContent = fs.readFileSync(entryFile, "utf8");
|
|
916
1092
|
}
|
|
917
1093
|
catch {
|
|
918
|
-
|
|
1094
|
+
warn(`Could not read file for LLM enrichment: ${entry.filename ?? entry.name}`);
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
// Incremental cache: skip LLM call when file body is unchanged and
|
|
1098
|
+
// --re-enrich was not requested. The cache key is the entry_key
|
|
1099
|
+
// (stashDir:type:name) which is stable across index runs.
|
|
1100
|
+
const cacheBody = fileContent ?? `${entry.name}\n${entry.description ?? ""}`;
|
|
1101
|
+
const bodyHash = computeBodyHash(cacheBody);
|
|
1102
|
+
const cacheKey = entryKeys?.[idx] ?? `${entry.type}:${entry.name}`;
|
|
1103
|
+
if (db && !reEnrich) {
|
|
1104
|
+
const cached = getLlmCacheEntry(db, cacheKey, bodyHash);
|
|
1105
|
+
if (cached) {
|
|
1106
|
+
try {
|
|
1107
|
+
const parsed = JSON.parse(cached.resultJson);
|
|
1108
|
+
const updated = { ...entry };
|
|
1109
|
+
if (parsed.description)
|
|
1110
|
+
updated.description = parsed.description;
|
|
1111
|
+
if (parsed.searchHints?.length)
|
|
1112
|
+
updated.searchHints = parsed.searchHints;
|
|
1113
|
+
if (parsed.tags?.length)
|
|
1114
|
+
updated.tags = parsed.tags;
|
|
1115
|
+
updated.quality = "enriched";
|
|
1116
|
+
summary.succeeded++;
|
|
1117
|
+
onEntryDone?.({ entryName: entry.name, outcome: "cache-hit" });
|
|
1118
|
+
return updated;
|
|
1119
|
+
}
|
|
1120
|
+
catch {
|
|
1121
|
+
warn(`LLM enrichment cache entry corrupt for ${entry.name}; re-running enrichment`);
|
|
1122
|
+
}
|
|
919
1123
|
}
|
|
920
1124
|
}
|
|
921
|
-
const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal);
|
|
1125
|
+
const improvements = await enhanceMetadata(llmConfig, entry, fileContent, signal, akmConfig);
|
|
922
1126
|
const updated = { ...entry };
|
|
923
1127
|
if (improvements.description)
|
|
924
1128
|
updated.description = improvements.description;
|
|
@@ -926,19 +1130,39 @@ async function enhanceStashWithLlm(llmConfig, stash, files, summary, signal) {
|
|
|
926
1130
|
updated.searchHints = improvements.searchHints;
|
|
927
1131
|
if (improvements.tags?.length)
|
|
928
1132
|
updated.tags = improvements.tags;
|
|
929
|
-
|
|
1133
|
+
// Mark as enriched so subsequent index runs skip re-enrichment (P2)
|
|
1134
|
+
updated.quality = "enriched";
|
|
1135
|
+
// Persist to cache so the next run can skip the LLM call when the
|
|
1136
|
+
// file body has not changed.
|
|
1137
|
+
if (db) {
|
|
1138
|
+
upsertLlmCacheEntry(db, cacheKey, bodyHash, JSON.stringify({
|
|
1139
|
+
description: improvements.description,
|
|
1140
|
+
searchHints: improvements.searchHints,
|
|
1141
|
+
tags: improvements.tags,
|
|
1142
|
+
}));
|
|
1143
|
+
}
|
|
930
1144
|
summary.succeeded++;
|
|
1145
|
+
onEntryDone?.({ entryName: entry.name, outcome: "llm" });
|
|
1146
|
+
return updated;
|
|
931
1147
|
}
|
|
932
1148
|
catch (err) {
|
|
933
|
-
enhanced.push(entry);
|
|
934
1149
|
const msg = toErrorMessage(err);
|
|
935
1150
|
// failureSamples is bounded to 3 items, so a linear scan is cheaper
|
|
936
1151
|
// than maintaining a parallel Set for membership checks (#177 review).
|
|
937
1152
|
if (summary.failureSamples.length < 3 && !summary.failureSamples.includes(msg)) {
|
|
938
1153
|
summary.failureSamples.push(msg);
|
|
939
1154
|
}
|
|
1155
|
+
onEntryDone?.({ entryName: entry.name, outcome: "failed" });
|
|
1156
|
+
return entry;
|
|
940
1157
|
}
|
|
941
|
-
}
|
|
1158
|
+
},
|
|
1159
|
+
// Default concurrency of 4 works well for cloud LLM APIs. Set
|
|
1160
|
+
// `llm.concurrency: 1` in config.json for local model servers.
|
|
1161
|
+
getDefaultLlmConcurrency(llmConfig));
|
|
1162
|
+
// concurrentMap returns Array<T | undefined>; filter out undefined slots
|
|
1163
|
+
// (which can only occur if the callback itself returned undefined, which
|
|
1164
|
+
// it never does above — but TypeScript needs the filter for type safety).
|
|
1165
|
+
const enhanced = results.map((r, i) => r ?? stash.entries[i]);
|
|
942
1166
|
return { entries: enhanced };
|
|
943
1167
|
}
|
|
944
1168
|
/**
|
|
@@ -1018,13 +1242,13 @@ export async function lookup(ref) {
|
|
|
1018
1242
|
const dbPath = getDbPath();
|
|
1019
1243
|
const db = openExistingDatabase(dbPath);
|
|
1020
1244
|
try {
|
|
1021
|
-
// entry_key shape: `${stashDir}:${type}:${name}`. Suffix-match on
|
|
1022
|
-
// `:type:name` so we can scope by source dir as a prefix when origin is
|
|
1023
|
-
// supplied. Use parameterised queries throughout — names may include
|
|
1024
|
-
// user-supplied glob characters.
|
|
1025
1245
|
const escapeLike = (value) => value.replace(/\\/g, "\\\\").replace(/%/g, "\\%").replace(/_/g, "\\_");
|
|
1026
|
-
|
|
1027
|
-
|
|
1246
|
+
// Canonical names strip .md for markdown assets, but users often pass
|
|
1247
|
+
// refs with .md (e.g. command:release.md). Normalize by trying both.
|
|
1248
|
+
const nameVariants = [ref.name];
|
|
1249
|
+
if (ref.name.endsWith(".md")) {
|
|
1250
|
+
nameVariants.push(ref.name.slice(0, -3));
|
|
1251
|
+
}
|
|
1028
1252
|
const candidateDirs = (() => {
|
|
1029
1253
|
if (!ref.origin)
|
|
1030
1254
|
return sources.map((s) => s.path);
|
|
@@ -1035,20 +1259,24 @@ export async function lookup(ref) {
|
|
|
1035
1259
|
})();
|
|
1036
1260
|
if (candidateDirs.length === 0)
|
|
1037
1261
|
return null;
|
|
1038
|
-
for (const
|
|
1039
|
-
const
|
|
1040
|
-
const
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1262
|
+
for (const name of nameVariants) {
|
|
1263
|
+
const suffix = `:${ref.type}:${name}`;
|
|
1264
|
+
const escapedSuffix = escapeLike(suffix);
|
|
1265
|
+
for (const dir of candidateDirs) {
|
|
1266
|
+
const escapedDir = escapeLike(dir);
|
|
1267
|
+
const row = db
|
|
1268
|
+
.prepare("SELECT entry_key AS entryKey, file_path AS filePath, stash_dir AS stashDir, entry_type AS type FROM entries " +
|
|
1269
|
+
"WHERE entry_key LIKE ? ESCAPE '\\' AND entry_type = ? LIMIT 1")
|
|
1270
|
+
.get(`${escapedDir}${escapedSuffix}`, ref.type);
|
|
1271
|
+
if (row) {
|
|
1272
|
+
return {
|
|
1273
|
+
entryKey: row.entryKey,
|
|
1274
|
+
filePath: row.filePath,
|
|
1275
|
+
stashDir: row.stashDir,
|
|
1276
|
+
type: row.type,
|
|
1277
|
+
name: ref.name,
|
|
1278
|
+
};
|
|
1279
|
+
}
|
|
1052
1280
|
}
|
|
1053
1281
|
}
|
|
1054
1282
|
return null;
|
|
@@ -1093,18 +1321,25 @@ export function recomputeUtilityScores(db) {
|
|
|
1093
1321
|
const emaDecay = EMA_DECAY ** elapsedDays;
|
|
1094
1322
|
const emaNew = 1 - emaDecay; // complement so weights still sum to 1
|
|
1095
1323
|
// Single aggregate query instead of N+1 per-entry queries.
|
|
1096
|
-
// Only processes entries that actually have usage events
|
|
1324
|
+
// Only processes entries that actually have usage events AND still exist
|
|
1325
|
+
// in `entries`. The latter check is critical: usage_events has no FK to
|
|
1326
|
+
// entries, so its entry_id can become stale (entry deleted, re-keyed,
|
|
1327
|
+
// moved between sources). Without the JOIN, writing the derived row to
|
|
1328
|
+
// utility_scores (which DOES have an FK) raises "FOREIGN KEY constraint
|
|
1329
|
+
// failed" and rolls back the whole finalize transaction — failing every
|
|
1330
|
+
// index run.
|
|
1097
1331
|
const usageRows = db
|
|
1098
1332
|
.prepare(`
|
|
1099
|
-
SELECT entry_id,
|
|
1100
|
-
SUM(CASE WHEN event_type = 'search' THEN 1 ELSE 0 END) AS search_count,
|
|
1101
|
-
SUM(CASE WHEN event_type = 'show' THEN 1 ELSE 0 END) AS show_count,
|
|
1102
|
-
SUM(CASE WHEN event_type = 'feedback' AND signal = 'positive' THEN 1 ELSE 0 END) AS positive_feedback_count,
|
|
1103
|
-
SUM(CASE WHEN event_type = 'feedback' AND signal = 'negative' THEN 1 ELSE 0 END) AS negative_feedback_count,
|
|
1104
|
-
MAX(created_at) AS last_used_at
|
|
1105
|
-
FROM usage_events
|
|
1106
|
-
|
|
1107
|
-
|
|
1333
|
+
SELECT u.entry_id,
|
|
1334
|
+
SUM(CASE WHEN u.event_type = 'search' THEN 1 ELSE 0 END) AS search_count,
|
|
1335
|
+
SUM(CASE WHEN u.event_type = 'show' THEN 1 ELSE 0 END) AS show_count,
|
|
1336
|
+
SUM(CASE WHEN u.event_type = 'feedback' AND u.signal = 'positive' THEN 1 ELSE 0 END) AS positive_feedback_count,
|
|
1337
|
+
SUM(CASE WHEN u.event_type = 'feedback' AND u.signal = 'negative' THEN 1 ELSE 0 END) AS negative_feedback_count,
|
|
1338
|
+
MAX(u.created_at) AS last_used_at
|
|
1339
|
+
FROM usage_events u
|
|
1340
|
+
JOIN entries e ON e.id = u.entry_id
|
|
1341
|
+
WHERE u.entry_id IS NOT NULL
|
|
1342
|
+
GROUP BY u.entry_id
|
|
1108
1343
|
`)
|
|
1109
1344
|
.all();
|
|
1110
1345
|
if (usageRows.length === 0) {
|
|
@@ -1113,23 +1348,26 @@ export function recomputeUtilityScores(db) {
|
|
|
1113
1348
|
}
|
|
1114
1349
|
// Batch-load existing utility scores
|
|
1115
1350
|
const existingScores = new Map();
|
|
1116
|
-
const scoreRows = db.prepare("SELECT entry_id, utility FROM utility_scores").all();
|
|
1351
|
+
const scoreRows = db.prepare("SELECT entry_id, utility, last_used_at FROM utility_scores").all();
|
|
1117
1352
|
for (const row of scoreRows) {
|
|
1118
|
-
existingScores.set(row.entry_id, row.utility);
|
|
1353
|
+
existingScores.set(row.entry_id, { utility: row.utility, lastUsedAt: row.last_used_at ?? undefined });
|
|
1119
1354
|
}
|
|
1355
|
+
const now = new Date().toISOString();
|
|
1120
1356
|
for (const row of usageRows) {
|
|
1121
1357
|
const selectRate = row.search_count > 0 ? Math.min(1, row.show_count / row.search_count) : 0;
|
|
1122
1358
|
const feedbackTotal = row.positive_feedback_count + row.negative_feedback_count;
|
|
1123
1359
|
const feedbackRate = feedbackTotal > 0 ? Math.max(0, row.positive_feedback_count - row.negative_feedback_count) / feedbackTotal : 0;
|
|
1124
1360
|
const effectiveRate = Math.max(selectRate, feedbackRate);
|
|
1125
|
-
const
|
|
1361
|
+
const existing = existingScores.get(row.entry_id);
|
|
1362
|
+
const prevUtility = existing?.utility ?? 0;
|
|
1126
1363
|
const utility = prevUtility * emaDecay + effectiveRate * emaNew;
|
|
1364
|
+
const lastUsedAt = effectiveRate > 0.5 ? now : (existing?.lastUsedAt ?? undefined);
|
|
1127
1365
|
upsertUtilityScore(db, row.entry_id, {
|
|
1128
1366
|
utility,
|
|
1129
1367
|
showCount: row.show_count,
|
|
1130
1368
|
searchCount: row.search_count,
|
|
1131
1369
|
selectRate,
|
|
1132
|
-
lastUsedAt
|
|
1370
|
+
lastUsedAt,
|
|
1133
1371
|
});
|
|
1134
1372
|
}
|
|
1135
1373
|
setMeta(db, "last_utility_computed_at", new Date().toISOString());
|