akm-cli 0.8.7 → 0.8.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +428 -0
- package/dist/assets/help/help-proposals.md +1 -2
- package/dist/assets/hints/cli-hints-full.md +34 -19
- package/dist/assets/hints/cli-hints-short.md +1 -1
- package/dist/assets/profiles/catchup.json +13 -0
- package/dist/assets/profiles/consolidate.json +13 -0
- package/dist/assets/profiles/frequent.json +13 -0
- package/dist/assets/tasks/core/backup.yml +4 -0
- package/dist/assets/tasks/core/extract.yml +4 -0
- package/dist/assets/tasks/core/improve.yml +4 -0
- package/dist/assets/tasks/core/index-refresh.yml +4 -0
- package/dist/assets/tasks/core/sync.yml +4 -0
- package/dist/assets/tasks/core/update-stashes.yml +4 -0
- package/dist/assets/tasks/core/version-check.yml +4 -0
- package/dist/assets/templates/html/default.html +78 -0
- package/dist/assets/templates/html/health.html +560 -0
- package/dist/assets/templates/html/vendor/echarts.min.js +45 -0
- package/dist/cli/config-migrate.js +6 -6
- package/dist/cli/config-validate.js +4 -4
- package/dist/cli/confirm.js +3 -3
- package/dist/cli/parse-args.js +1 -1
- package/dist/cli/shared.js +72 -19
- package/dist/cli-node.mjs +26 -0
- package/dist/cli.js +206 -3866
- package/dist/commands/{agent-dispatch.js → agent/agent-dispatch.js} +6 -6
- package/dist/commands/{agent-support.js → agent/agent-support.js} +2 -2
- package/dist/commands/agent/contribute-cli.js +200 -0
- package/dist/commands/completions.js +1 -1
- package/dist/commands/config-cli.js +230 -3
- package/dist/commands/db-cli.js +2 -2
- package/dist/commands/env/env-cli.js +529 -0
- package/dist/commands/env/env.js +410 -0
- package/dist/commands/env/secret-cli.js +259 -0
- package/dist/commands/{secret.js → env/secret.js} +6 -47
- package/dist/commands/events.js +4 -4
- package/dist/commands/feedback-cli.js +18 -34
- package/dist/commands/graph/graph-cli.js +132 -0
- package/dist/commands/{graph.js → graph/graph.js} +22 -16
- package/dist/commands/health/checks.js +279 -0
- package/dist/commands/health/html-report.js +448 -0
- package/dist/commands/health.js +189 -266
- package/dist/commands/{consolidate.js → improve/consolidate.js} +48 -36
- package/dist/commands/{distill-promotion-policy.js → improve/distill-promotion-policy.js} +3 -3
- package/dist/commands/{distill.js → improve/distill.js} +39 -18
- package/dist/commands/{eval-cases.js → improve/eval-cases.js} +1 -1
- package/dist/commands/{extract-cli.js → improve/extract-cli.js} +4 -4
- package/dist/commands/{extract-prompt.js → improve/extract-prompt.js} +2 -2
- package/dist/commands/{extract.js → improve/extract.js} +221 -26
- package/dist/commands/{improve-auto-accept.js → improve/improve-auto-accept.js} +30 -4
- package/dist/commands/{improve-cli.js → improve/improve-cli.js} +44 -22
- package/dist/commands/{improve-profiles.js → improve/improve-profiles.js} +13 -7
- package/dist/commands/{improve-result-file.js → improve/improve-result-file.js} +1 -1
- package/dist/commands/{improve.js → improve/improve.js} +672 -292
- package/dist/{core → commands/improve/memory}/memory-belief.js +2 -2
- package/dist/{core → commands/improve/memory}/memory-contradiction-detect.js +5 -5
- package/dist/{core → commands/improve/memory}/memory-improve.js +4 -4
- package/dist/commands/improve/reflect-noise.js +0 -0
- package/dist/commands/{reflect.js → improve/reflect.js} +58 -28
- package/dist/commands/improve/session-asset.js +248 -0
- package/dist/commands/lint/agent-linter.js +1 -1
- package/dist/commands/lint/base-linter.js +55 -37
- package/dist/commands/lint/command-linter.js +1 -1
- package/dist/commands/lint/default-linter.js +1 -1
- package/dist/commands/lint/env-key-rules.js +1 -1
- package/dist/commands/lint/index.js +19 -25
- package/dist/commands/lint/knowledge-linter.js +1 -1
- package/dist/commands/lint/memory-linter.js +1 -1
- package/dist/commands/lint/registry.js +8 -8
- package/dist/commands/lint/skill-linter.js +1 -1
- package/dist/commands/lint/task-linter.js +1 -1
- package/dist/commands/lint/workflow-linter.js +1 -1
- package/dist/commands/lint.js +1 -1
- package/dist/commands/observability-cli.js +244 -0
- package/dist/commands/proposal/drain-policies.js +3 -3
- package/dist/commands/proposal/drain.js +87 -15
- package/dist/commands/proposal/proposal-cli.js +490 -0
- package/dist/commands/{proposal.js → proposal/proposal.js} +17 -6
- package/dist/commands/{propose.js → proposal/propose.js} +11 -11
- package/dist/{core → commands/proposal/validators}/proposal-quality-validators.js +8 -3
- package/dist/{core → commands/proposal/validators}/proposal-validators.js +5 -5
- package/dist/{core → commands/proposal/validators}/proposals.js +374 -345
- package/dist/commands/{curate.js → read/curate.js} +7 -7
- package/dist/commands/{knowledge.js → read/knowledge.js} +22 -9
- package/dist/commands/{registry-search.js → read/registry-search.js} +5 -5
- package/dist/commands/{remember-cli.js → read/remember-cli.js} +15 -7
- package/dist/commands/read/search-cli.js +207 -0
- package/dist/commands/{search.js → read/search.js} +22 -27
- package/dist/commands/{show.js → read/show.js} +31 -45
- package/dist/commands/registry-cli.js +8 -8
- package/dist/commands/remember.js +14 -10
- package/dist/commands/sources/add-cli.js +293 -0
- package/dist/commands/{history.js → sources/history.js} +27 -25
- package/dist/commands/{info.js → sources/info.js} +6 -6
- package/dist/commands/{init.js → sources/init.js} +6 -6
- package/dist/commands/{installed-stashes.js → sources/installed-stashes.js} +12 -12
- package/dist/commands/{migration-help.js → sources/migration-help.js} +3 -2
- package/dist/commands/{schema-repair.js → sources/schema-repair.js} +8 -8
- package/dist/commands/{self-update.js → sources/self-update.js} +10 -9
- package/dist/commands/{source-add.js → sources/source-add.js} +10 -10
- package/dist/commands/{source-clone.js → sources/source-clone.js} +7 -7
- package/dist/commands/{source-manage.js → sources/source-manage.js} +4 -4
- package/dist/commands/sources/sources-cli.js +305 -0
- package/dist/commands/sources/stash-cli.js +219 -0
- package/dist/commands/{stash-skeleton.js → sources/stash-skeleton.js} +2 -1
- package/dist/commands/tasks/default-tasks.js +173 -0
- package/dist/commands/tasks/tasks-cli.js +210 -0
- package/dist/commands/{tasks.js → tasks/tasks.js} +14 -14
- package/dist/commands/wiki-cli.js +307 -0
- package/dist/commands/workflow-cli.js +329 -0
- package/dist/core/action-contributors.js +1 -1
- package/dist/core/assert.js +40 -0
- package/dist/core/asset/asset-create.js +54 -0
- package/dist/core/{asset-ref.js → asset/asset-ref.js} +21 -4
- package/dist/core/{asset-registry.js → asset/asset-registry.js} +3 -3
- package/dist/core/{asset-spec.js → asset/asset-spec.js} +17 -31
- package/dist/core/{markdown.js → asset/markdown.js} +1 -1
- package/dist/core/{stash-meta.js → asset/stash-meta.js} +1 -1
- package/dist/core/best-effort.js +64 -0
- package/dist/core/common.js +32 -18
- package/dist/core/{config-io.js → config/config-io.js} +29 -19
- package/dist/core/{config-migration.js → config/config-migration.js} +11 -9
- package/dist/core/{config-schema.js → config/config-schema.js} +50 -7
- package/dist/core/config/config-types.js +16 -0
- package/dist/core/{config-walker.js → config/config-walker.js} +2 -2
- package/dist/core/{config.js → config/config.js} +10 -8
- package/dist/core/env-secret-ref.js +90 -0
- package/dist/core/errors.js +13 -3
- package/dist/core/events.js +27 -4
- package/dist/core/file-lock.js +1 -1
- package/dist/core/improve-types.js +48 -0
- package/dist/core/lesson-lint.js +2 -2
- package/dist/core/logs-db.js +304 -0
- package/dist/core/paths.js +2 -2
- package/dist/core/ripgrep/install.js +2 -2
- package/dist/core/ripgrep/resolve.js +2 -2
- package/dist/core/state-db.js +195 -60
- package/dist/core/text-truncation.js +148 -0
- package/dist/core/time.js +1 -1
- package/dist/core/write-source.js +98 -85
- package/dist/indexer/{db-backup.js → db/db-backup.js} +9 -24
- package/dist/indexer/{db.js → db/db.js} +128 -118
- package/dist/indexer/{graph-db.js → db/graph-db.js} +9 -4
- package/dist/indexer/{llm-cache.js → db/llm-cache.js} +15 -12
- package/dist/indexer/ensure-index.js +4 -4
- package/dist/indexer/{graph-boost.js → graph/graph-boost.js} +1 -1
- package/dist/indexer/{graph-extraction.js → graph/graph-extraction.js} +55 -13
- package/dist/indexer/indexer.js +37 -30
- package/dist/indexer/init.js +54 -0
- package/dist/indexer/manifest.js +10 -10
- package/dist/indexer/{memory-inference.js → passes/memory-inference.js} +141 -33
- package/dist/indexer/{metadata-contributors.js → passes/metadata-contributors.js} +10 -8
- package/dist/indexer/{metadata.js → passes/metadata.js} +15 -19
- package/dist/indexer/{staleness-detect.js → passes/staleness-detect.js} +53 -12
- package/dist/indexer/{db-search.js → search/db-search.js} +28 -16
- package/dist/indexer/{ranking-contributors.js → search/ranking-contributors.js} +1 -1
- package/dist/indexer/{ranking.js → search/ranking.js} +2 -2
- package/dist/indexer/{search-hit-enrichers.js → search/search-hit-enrichers.js} +3 -3
- package/dist/indexer/{search-source.js → search/search-source.js} +8 -8
- package/dist/indexer/{semantic-status.js → search/semantic-status.js} +3 -3
- package/dist/indexer/usage/unmigrated-vaults-guard.js +94 -0
- package/dist/indexer/{usage-events.js → usage/usage-events.js} +32 -0
- package/dist/indexer/{file-context.js → walk/file-context.js} +10 -15
- package/dist/indexer/{matchers.js → walk/matchers.js} +13 -9
- package/dist/indexer/{path-resolver.js → walk/path-resolver.js} +6 -6
- package/dist/indexer/{project-context.js → walk/project-context.js} +1 -1
- package/dist/indexer/{walker.js → walk/walker.js} +4 -3
- package/dist/integrations/agent/builder-shared.js +39 -0
- package/dist/integrations/agent/builders.js +14 -81
- package/dist/integrations/agent/config.js +6 -4
- package/dist/integrations/agent/detect.js +1 -1
- package/dist/integrations/agent/index.js +23 -8
- package/dist/integrations/agent/prompts.js +2 -3
- package/dist/integrations/agent/runner.js +22 -3
- package/dist/integrations/agent/spawn.js +9 -10
- package/dist/integrations/harnesses/claude/agent-builder.js +48 -0
- package/dist/integrations/harnesses/claude/config-import.js +70 -0
- package/dist/integrations/harnesses/claude/index.js +64 -0
- package/dist/integrations/{session-logs/providers/claude-code.js → harnesses/claude/session-log.js} +32 -5
- package/dist/integrations/harnesses/index.js +144 -0
- package/dist/integrations/harnesses/opencode/agent-builder.js +43 -0
- package/dist/integrations/harnesses/opencode/config-import.js +82 -0
- package/dist/integrations/harnesses/opencode/index.js +59 -0
- package/dist/integrations/{session-logs/providers/opencode.js → harnesses/opencode/session-log.js} +1 -1
- package/dist/integrations/harnesses/opencode-sdk/index.js +49 -0
- package/dist/integrations/harnesses/opencode-sdk/sdk-runner.js +234 -0
- package/dist/integrations/harnesses/types.js +43 -0
- package/dist/integrations/lockfile.js +7 -16
- package/dist/integrations/session-logs/index.js +82 -9
- package/dist/llm/call-ai.js +4 -4
- package/dist/llm/client.js +146 -6
- package/dist/llm/embedder.js +6 -6
- package/dist/llm/embedders/local.js +9 -22
- package/dist/llm/embedders/remote.js +2 -2
- package/dist/llm/embedders/types.js +1 -1
- package/dist/llm/graph-extract.js +31 -12
- package/dist/llm/index-passes.js +1 -1
- package/dist/llm/memory-infer.js +12 -5
- package/dist/llm/metadata-enhance.js +2 -2
- package/dist/llm/usage-persist.js +77 -0
- package/dist/llm/usage-telemetry.js +103 -0
- package/dist/output/context.js +9 -46
- package/dist/output/html-render.js +73 -0
- package/dist/output/renderers.js +88 -58
- package/dist/output/shapes/curate.js +7 -3
- package/dist/output/shapes/distill.js +7 -3
- package/dist/output/shapes/env-list.js +18 -16
- package/dist/output/shapes/events.js +5 -4
- package/dist/output/shapes/helpers.js +19 -5
- package/dist/output/shapes/history.js +7 -3
- package/dist/output/shapes/passthrough.js +8 -11
- package/dist/output/shapes/{proposal-accept.js → proposal/accept.js} +7 -3
- package/dist/output/shapes/{proposal-diff.js → proposal/diff.js} +7 -3
- package/dist/output/shapes/{proposal-list.js → proposal/list.js} +7 -3
- package/dist/output/shapes/{proposal-producer.js → proposal/producer.js} +5 -4
- package/dist/output/shapes/{proposal-reject.js → proposal/reject.js} +7 -3
- package/dist/output/shapes/{proposal-show.js → proposal/show.js} +7 -3
- package/dist/output/shapes/registry-search.js +7 -3
- package/dist/output/shapes/registry.js +12 -0
- package/dist/output/shapes/search.js +7 -3
- package/dist/output/shapes/secret-list.js +18 -16
- package/dist/output/shapes/show.js +7 -3
- package/dist/output/shapes.js +55 -30
- package/dist/output/text/add.js +2 -3
- package/dist/output/text/clone.js +2 -3
- package/dist/output/text/config.js +2 -3
- package/dist/output/text/curate.js +4 -3
- package/dist/output/text/distill.js +2 -3
- package/dist/output/text/enable-disable.js +5 -4
- package/dist/output/text/env.js +13 -0
- package/dist/output/text/events.js +5 -4
- package/dist/output/text/feedback.js +4 -3
- package/dist/output/text/helpers.js +123 -40
- package/dist/output/text/history.js +2 -3
- package/dist/output/text/import.js +2 -3
- package/dist/output/text/index.js +2 -3
- package/dist/output/text/info.js +2 -3
- package/dist/output/text/init.js +2 -3
- package/dist/output/text/list.js +2 -3
- package/dist/output/text/proposal/producer.js +9 -0
- package/dist/output/text/proposal/proposal.js +13 -0
- package/dist/output/text/registry-commands.js +8 -7
- package/dist/output/text/registry.js +12 -0
- package/dist/output/text/remember.js +4 -3
- package/dist/output/text/remove.js +2 -3
- package/dist/output/text/save.js +2 -3
- package/dist/output/text/search.js +4 -3
- package/dist/output/text/show.js +4 -3
- package/dist/output/text/update.js +2 -3
- package/dist/output/text/upgrade.js +2 -3
- package/dist/output/text/wiki.js +12 -11
- package/dist/output/text/workflow.js +12 -10
- package/dist/output/text.js +66 -32
- package/dist/registry/build-index.js +11 -10
- package/dist/registry/factory.js +1 -1
- package/dist/registry/origin-resolve.js +1 -1
- package/dist/registry/providers/index.js +2 -2
- package/dist/registry/providers/skills-sh.js +91 -72
- package/dist/registry/providers/static-index.js +75 -52
- package/dist/registry/resolve.js +3 -3
- package/dist/runtime.js +242 -0
- package/dist/scripts/migrate-storage.js +1654 -683
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +254 -168
- package/dist/setup/detect.js +311 -9
- package/dist/setup/harness-config-import.js +6 -120
- package/dist/setup/setup.js +454 -43
- package/dist/sources/include.js +1 -1
- package/dist/sources/provider-factory.js +2 -2
- package/dist/sources/providers/filesystem.js +3 -3
- package/dist/sources/providers/git.js +9 -9
- package/dist/sources/providers/index.js +4 -4
- package/dist/sources/providers/npm.js +6 -6
- package/dist/sources/providers/provider-utils.js +13 -20
- package/dist/sources/providers/sync-from-ref.js +5 -5
- package/dist/sources/providers/tar-utils.js +2 -2
- package/dist/sources/providers/website.js +2 -2
- package/dist/sources/resolve.js +5 -5
- package/dist/sources/website-ingest.js +5 -5
- package/dist/storage/database.js +102 -0
- package/dist/storage/engines/sqlite-migrations.js +42 -0
- package/dist/storage/locations.js +25 -0
- package/dist/storage/repositories/index-db.js +43 -0
- package/dist/storage/repositories/workflow-runs-repository.js +141 -0
- package/dist/tasks/backends/cron.js +4 -4
- package/dist/tasks/backends/exec-utils.js +32 -0
- package/dist/tasks/backends/index.js +3 -3
- package/dist/tasks/backends/launchd.js +7 -14
- package/dist/tasks/backends/schtasks.js +7 -16
- package/dist/tasks/embedded.js +71 -0
- package/dist/tasks/parser.js +2 -2
- package/dist/tasks/resolveAkmBin.js +1 -1
- package/dist/tasks/runner.js +127 -31
- package/dist/tasks/schedule.js +1 -1
- package/dist/tasks/validator.js +7 -7
- package/dist/text-import-hook.mjs +51 -0
- package/dist/version.js +2 -1
- package/dist/wiki/wiki.js +7 -7
- package/dist/workflows/{authoring.js → authoring/authoring.js} +6 -6
- package/dist/workflows/{scope-key.js → authoring/scope-key.js} +1 -1
- package/dist/workflows/cli.js +1 -1
- package/dist/workflows/db.js +54 -32
- package/dist/workflows/parser.js +4 -4
- package/dist/workflows/renderer.js +5 -5
- package/dist/workflows/runtime/agent-identity.js +56 -0
- package/dist/workflows/runtime/checkin.js +57 -0
- package/dist/workflows/{runs.js → runtime/runs.js} +197 -101
- package/dist/workflows/validate-summary.js +82 -0
- package/docs/README.md +1 -1
- package/docs/data-and-telemetry.md +6 -6
- package/package.json +17 -8
- package/dist/commands/add-cli.js +0 -279
- package/dist/commands/env.js +0 -213
- package/dist/integrations/agent/sdk-runner.js +0 -126
- package/dist/output/shapes/vault-list.js +0 -19
- package/dist/output/text/proposal-producer.js +0 -8
- package/dist/output/text/proposal.js +0 -12
- package/dist/output/text/vault.js +0 -16
- /package/dist/core/{asset-serialize.js → asset/asset-serialize.js} +0 -0
- /package/dist/core/{frontmatter.js → asset/frontmatter.js} +0 -0
- /package/dist/core/{config-sources.js → config/config-sources.js} +0 -0
- /package/dist/indexer/{graph-dedup.js → graph/graph-dedup.js} +0 -0
- /package/dist/{core/config-types.js → indexer/passes/pass-context.js} +0 -0
- /package/dist/indexer/{search-fields.js → search/search-fields.js} +0 -0
- /package/dist/indexer/{index-context.js → walk/index-context.js} +0 -0
- /package/dist/workflows/{document-cache.js → runtime/document-cache.js} +0 -0
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
2
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
3
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
-
import { Database } from "bun:sqlite";
|
|
5
4
|
import fs from "node:fs";
|
|
6
5
|
import { createRequire } from "node:module";
|
|
7
6
|
import path from "node:path";
|
|
8
|
-
import { parseAssetRef } from "
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import {
|
|
7
|
+
import { parseAssetRef } from "../../core/asset/asset-ref.js";
|
|
8
|
+
import { bestEffort } from "../../core/best-effort.js";
|
|
9
|
+
import { getDbPath } from "../../core/paths.js";
|
|
10
|
+
import { REGISTRY_INDEX_CACHE_DDL } from "../../core/state-db.js";
|
|
11
|
+
import { warn } from "../../core/warn.js";
|
|
12
|
+
import { cosineSimilarity } from "../../llm/embedders/types.js";
|
|
13
|
+
import { sha256Hex } from "../../runtime.js";
|
|
14
|
+
import { openDatabase as openSqlite } from "../../storage/database.js";
|
|
15
|
+
import { buildSearchFields } from "../search/search-fields.js";
|
|
16
|
+
import { ensureUsageEventsSchema } from "../usage/usage-events.js";
|
|
17
|
+
import { backupDataDir, EMBEDDING_DIM_CHANGE_REASON } from "./db-backup.js";
|
|
16
18
|
// ── Constants ───────────────────────────────────────────────────────────────
|
|
17
19
|
export const DB_VERSION = 17;
|
|
18
20
|
export const EMBEDDING_DIM = 384;
|
|
@@ -24,9 +26,9 @@ export function openDatabase(dbPath, options) {
|
|
|
24
26
|
if (!fs.existsSync(dir)) {
|
|
25
27
|
fs.mkdirSync(dir, { recursive: true });
|
|
26
28
|
}
|
|
27
|
-
const db =
|
|
29
|
+
const db = openSqlite(resolvedPath);
|
|
28
30
|
db.exec("PRAGMA journal_mode = WAL");
|
|
29
|
-
db.exec("PRAGMA busy_timeout =
|
|
31
|
+
db.exec("PRAGMA busy_timeout = 30000");
|
|
30
32
|
db.exec("PRAGMA foreign_keys = ON");
|
|
31
33
|
// Try to load sqlite-vec extension
|
|
32
34
|
loadVecExtension(db);
|
|
@@ -52,7 +54,7 @@ export function openDatabase(dbPath, options) {
|
|
|
52
54
|
*/
|
|
53
55
|
function resolveConfiguredEmbeddingDim() {
|
|
54
56
|
try {
|
|
55
|
-
const { loadConfig } = require("
|
|
57
|
+
const { loadConfig } = require("../../core/config/config");
|
|
56
58
|
const dim = loadConfig().embedding?.dimension;
|
|
57
59
|
if (typeof dim === "number" && Number.isInteger(dim) && dim > 0 && dim <= 4096) {
|
|
58
60
|
return dim;
|
|
@@ -65,9 +67,9 @@ function resolveConfiguredEmbeddingDim() {
|
|
|
65
67
|
}
|
|
66
68
|
export function openExistingDatabase(dbPath) {
|
|
67
69
|
const resolvedPath = dbPath ?? getDbPath();
|
|
68
|
-
const db =
|
|
70
|
+
const db = openSqlite(resolvedPath);
|
|
69
71
|
db.exec("PRAGMA journal_mode = WAL");
|
|
70
|
-
db.exec("PRAGMA busy_timeout =
|
|
72
|
+
db.exec("PRAGMA busy_timeout = 30000");
|
|
71
73
|
db.exec("PRAGMA foreign_keys = ON");
|
|
72
74
|
// Existing-DB callers must not mutate schema or embedding metadata on open,
|
|
73
75
|
// but some paths still need write access to usage_events and other tables.
|
|
@@ -83,6 +85,10 @@ function loadVecExtension(db) {
|
|
|
83
85
|
try {
|
|
84
86
|
const esmRequire = createRequire(import.meta.url);
|
|
85
87
|
const sqliteVec = esmRequire("sqlite-vec");
|
|
88
|
+
// `db` here is the genuine underlying driver handle returned by the storage
|
|
89
|
+
// boundary (bun:sqlite on Bun, better-sqlite3 on Node) — only structurally
|
|
90
|
+
// narrowed for callers. sqlite-vec's `load()` accepts either real handle,
|
|
91
|
+
// so no raw-handle escape hatch is required.
|
|
86
92
|
sqliteVec.load(db);
|
|
87
93
|
vecStatus.set(db, true);
|
|
88
94
|
}
|
|
@@ -107,7 +113,7 @@ export function warnIfVecMissing(db, { once } = { once: false }) {
|
|
|
107
113
|
return;
|
|
108
114
|
if (once && vecInitWarnedDbs.has(db))
|
|
109
115
|
return;
|
|
110
|
-
|
|
116
|
+
bestEffort(() => {
|
|
111
117
|
const row = db.prepare("SELECT COUNT(*) AS cnt FROM embeddings").get();
|
|
112
118
|
const count = row?.cnt ?? 0;
|
|
113
119
|
if (count >= VEC_FALLBACK_THRESHOLD) {
|
|
@@ -115,10 +121,7 @@ export function warnIfVecMissing(db, { once } = { once: false }) {
|
|
|
115
121
|
if (once)
|
|
116
122
|
vecInitWarnedDbs.add(db);
|
|
117
123
|
}
|
|
118
|
-
}
|
|
119
|
-
catch {
|
|
120
|
-
/* embeddings table may not exist yet during init */
|
|
121
|
-
}
|
|
124
|
+
}, "embeddings table may not exist yet during init");
|
|
122
125
|
}
|
|
123
126
|
function ensureSchema(db, embeddingDim, options) {
|
|
124
127
|
// Create meta table first so we can check version
|
|
@@ -402,20 +405,10 @@ function ensureSchema(db, embeddingDim, options) {
|
|
|
402
405
|
// uses earlier in this function, just gated on embedding-dim mismatch
|
|
403
406
|
// and tagged so operators can tell the two backup kinds apart.
|
|
404
407
|
backupBeforeEmbeddingDimChange(options?.dataDir, storedDim, String(embeddingDim));
|
|
405
|
-
|
|
406
|
-
db.exec("DROP TABLE IF EXISTS entries_vec");
|
|
407
|
-
}
|
|
408
|
-
catch {
|
|
409
|
-
/* ignore */
|
|
410
|
-
}
|
|
408
|
+
bestEffort(() => db.exec("DROP TABLE IF EXISTS entries_vec"), "drop entries_vec on dim change");
|
|
411
409
|
// Delete stale BLOB embeddings so they don't produce silently wrong
|
|
412
410
|
// similarity scores against the new-dimension vec table.
|
|
413
|
-
|
|
414
|
-
db.exec("DELETE FROM embeddings");
|
|
415
|
-
}
|
|
416
|
-
catch {
|
|
417
|
-
/* ignore */
|
|
418
|
-
}
|
|
411
|
+
bestEffort(() => db.exec("DELETE FROM embeddings"), "delete stale embeddings on dim change");
|
|
419
412
|
setMeta(db, "hasEmbeddings", "0");
|
|
420
413
|
}
|
|
421
414
|
}
|
|
@@ -444,12 +437,7 @@ function ensureSchema(db, embeddingDim, options) {
|
|
|
444
437
|
const storedDim = getMeta(db, "embeddingDim");
|
|
445
438
|
if (storedDim && storedDim !== String(embeddingDim)) {
|
|
446
439
|
backupBeforeEmbeddingDimChange(options?.dataDir, storedDim, String(embeddingDim));
|
|
447
|
-
|
|
448
|
-
db.exec("DELETE FROM embeddings");
|
|
449
|
-
}
|
|
450
|
-
catch {
|
|
451
|
-
/* ignore */
|
|
452
|
-
}
|
|
440
|
+
bestEffort(() => db.exec("DELETE FROM embeddings"), "delete embeddings on explicit dim change");
|
|
453
441
|
setMeta(db, "hasEmbeddings", "0");
|
|
454
442
|
}
|
|
455
443
|
setMeta(db, "embeddingDim", String(embeddingDim));
|
|
@@ -484,12 +472,9 @@ function handleVersionUpgrade(db) {
|
|
|
484
472
|
if (storedVersion === undefined || storedVersion === "" || storedVersion === String(DB_VERSION))
|
|
485
473
|
return [];
|
|
486
474
|
let usageBackup = [];
|
|
487
|
-
|
|
475
|
+
bestEffort(() => {
|
|
488
476
|
usageBackup = db.prepare("SELECT * FROM usage_events").all();
|
|
489
|
-
}
|
|
490
|
-
catch {
|
|
491
|
-
/* table may not exist in older versions */
|
|
492
|
-
}
|
|
477
|
+
}, "usage_events table may not exist in older versions");
|
|
493
478
|
db.exec("DROP TABLE IF EXISTS utility_scores");
|
|
494
479
|
db.exec("DROP TABLE IF EXISTS utility_scores_scoped");
|
|
495
480
|
db.exec("DROP INDEX IF EXISTS idx_utility_scores_scoped_entry_id");
|
|
@@ -720,7 +705,7 @@ function getUpsertStmts(db) {
|
|
|
720
705
|
* data loss. Idempotent: a `PRAGMA table_info` lookup gates the ALTER.
|
|
721
706
|
*/
|
|
722
707
|
function ensureDerivedFromColumn(db) {
|
|
723
|
-
|
|
708
|
+
bestEffort(() => {
|
|
724
709
|
const cols = db.prepare("PRAGMA table_info(entries)").all();
|
|
725
710
|
const hasColumn = cols.some((c) => c.name === "derived_from");
|
|
726
711
|
if (!hasColumn) {
|
|
@@ -728,10 +713,7 @@ function ensureDerivedFromColumn(db) {
|
|
|
728
713
|
}
|
|
729
714
|
// Index creation is idempotent on its own; safe to call unconditionally.
|
|
730
715
|
db.exec("CREATE INDEX IF NOT EXISTS idx_entries_derived_from ON entries(derived_from)");
|
|
731
|
-
}
|
|
732
|
-
catch {
|
|
733
|
-
/* table may not exist on a brand-new DB before CREATE — caller is responsible */
|
|
734
|
-
}
|
|
716
|
+
}, "entries table may not exist on a brand-new DB before CREATE — caller is responsible");
|
|
735
717
|
}
|
|
736
718
|
/**
|
|
737
719
|
* Phase 5A / Advantage D5: look up the derived-memory child row whose
|
|
@@ -798,7 +780,7 @@ export function getPositiveFeedbackCountsByIds(db, ids) {
|
|
|
798
780
|
for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
|
|
799
781
|
const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
800
782
|
const placeholders = chunk.map(() => "?").join(",");
|
|
801
|
-
|
|
783
|
+
bestEffort(() => {
|
|
802
784
|
const rows = db
|
|
803
785
|
.prepare(`SELECT entry_id, COUNT(*) AS cnt
|
|
804
786
|
FROM usage_events
|
|
@@ -812,10 +794,7 @@ export function getPositiveFeedbackCountsByIds(db, ids) {
|
|
|
812
794
|
result.set(row.entry_id, row.cnt);
|
|
813
795
|
}
|
|
814
796
|
}
|
|
815
|
-
}
|
|
816
|
-
catch {
|
|
817
|
-
/* usage_events table may be missing on legacy DBs — treat as zero counts */
|
|
818
|
-
}
|
|
797
|
+
}, "usage_events table may be missing on legacy DBs — treat as zero counts");
|
|
819
798
|
}
|
|
820
799
|
return result;
|
|
821
800
|
}
|
|
@@ -843,57 +822,22 @@ function deleteRelatedRows(db, ids) {
|
|
|
843
822
|
for (let i = 0; i < numericIds.length; i += SQLITE_CHUNK_SIZE) {
|
|
844
823
|
const chunk = numericIds.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
845
824
|
const placeholders = chunk.map(() => "?").join(",");
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
}
|
|
849
|
-
catch {
|
|
850
|
-
/* fts table may not exist on a brand-new db */
|
|
851
|
-
}
|
|
852
|
-
try {
|
|
853
|
-
db.prepare(`DELETE FROM entries_fts_dirty WHERE entry_id IN (${placeholders})`).run(...chunk);
|
|
854
|
-
}
|
|
855
|
-
catch {
|
|
856
|
-
/* dirty table is created lazily by upsertEntry */
|
|
857
|
-
}
|
|
825
|
+
bestEffort(() => db.prepare(`DELETE FROM entries_fts WHERE entry_id IN (${placeholders})`).run(...chunk), "fts table may not exist on a brand-new db");
|
|
826
|
+
bestEffort(() => db.prepare(`DELETE FROM entries_fts_dirty WHERE entry_id IN (${placeholders})`).run(...chunk), "fts dirty table is created lazily by upsertEntry");
|
|
858
827
|
}
|
|
859
828
|
// Process in chunks to stay within SQLITE_MAX_VARIABLE_NUMBER
|
|
860
829
|
for (let i = 0; i < numericIds.length; i += SQLITE_CHUNK_SIZE) {
|
|
861
830
|
const chunk = numericIds.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
862
831
|
const placeholders = chunk.map(() => "?").join(",");
|
|
863
|
-
|
|
864
|
-
db.prepare(`DELETE FROM embeddings WHERE id IN (${placeholders})`).run(...chunk);
|
|
865
|
-
}
|
|
866
|
-
catch {
|
|
867
|
-
/* ignore */
|
|
868
|
-
}
|
|
832
|
+
bestEffort(() => db.prepare(`DELETE FROM embeddings WHERE id IN (${placeholders})`).run(...chunk), "delete embeddings for entries");
|
|
869
833
|
if (vecAvail) {
|
|
870
|
-
|
|
871
|
-
db.prepare(`DELETE FROM entries_vec WHERE id IN (${placeholders})`).run(...chunk);
|
|
872
|
-
}
|
|
873
|
-
catch {
|
|
874
|
-
/* ignore */
|
|
875
|
-
}
|
|
834
|
+
bestEffort(() => db.prepare(`DELETE FROM entries_vec WHERE id IN (${placeholders})`).run(...chunk), "delete entries_vec for entries");
|
|
876
835
|
}
|
|
877
836
|
// Clean up utility scores before deleting entries
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
}
|
|
881
|
-
catch {
|
|
882
|
-
/* ignore */
|
|
883
|
-
}
|
|
884
|
-
try {
|
|
885
|
-
db.prepare(`DELETE FROM utility_scores_scoped WHERE entry_id IN (${placeholders})`).run(...chunk);
|
|
886
|
-
}
|
|
887
|
-
catch {
|
|
888
|
-
/* ignore */
|
|
889
|
-
}
|
|
837
|
+
bestEffort(() => db.prepare(`DELETE FROM utility_scores WHERE entry_id IN (${placeholders})`).run(...chunk), "delete utility_scores for entries");
|
|
838
|
+
bestEffort(() => db.prepare(`DELETE FROM utility_scores_scoped WHERE entry_id IN (${placeholders})`).run(...chunk), "delete utility_scores_scoped for entries");
|
|
890
839
|
// Clean up usage events before deleting entries
|
|
891
|
-
|
|
892
|
-
db.prepare(`DELETE FROM usage_events WHERE entry_id IN (${placeholders})`).run(...chunk);
|
|
893
|
-
}
|
|
894
|
-
catch {
|
|
895
|
-
/* ignore */
|
|
896
|
-
}
|
|
840
|
+
bestEffort(() => db.prepare(`DELETE FROM usage_events WHERE entry_id IN (${placeholders})`).run(...chunk), "delete usage_events for entries");
|
|
897
841
|
}
|
|
898
842
|
}
|
|
899
843
|
/**
|
|
@@ -1005,15 +949,12 @@ export function upsertEmbedding(db, entryId, embedding) {
|
|
|
1005
949
|
// Wrapped in a transaction so a crash between DELETE and INSERT does not
|
|
1006
950
|
// leave the entry missing from the vec table.
|
|
1007
951
|
if (isVecAvailable(db)) {
|
|
1008
|
-
|
|
952
|
+
bestEffort(() => {
|
|
1009
953
|
db.transaction(() => {
|
|
1010
954
|
db.prepare("DELETE FROM entries_vec WHERE id = ?").run(entryId);
|
|
1011
955
|
db.prepare("INSERT INTO entries_vec (id, embedding) VALUES (?, ?)").run(entryId, buf);
|
|
1012
956
|
})();
|
|
1013
|
-
}
|
|
1014
|
-
catch {
|
|
1015
|
-
/* ignore — vec table unavailable or constraint failure */
|
|
1016
|
-
}
|
|
957
|
+
}, "vec table unavailable or constraint failure");
|
|
1017
958
|
}
|
|
1018
959
|
return true;
|
|
1019
960
|
}
|
|
@@ -1279,6 +1220,10 @@ export function getEntryCount(db) {
|
|
|
1279
1220
|
const row = db.prepare("SELECT COUNT(*) AS cnt FROM entries").get();
|
|
1280
1221
|
return row.cnt;
|
|
1281
1222
|
}
|
|
1223
|
+
export function getEmbeddableEntryCount(db) {
|
|
1224
|
+
const row = db.prepare("SELECT COUNT(*) AS cnt FROM entries").get();
|
|
1225
|
+
return row.cnt;
|
|
1226
|
+
}
|
|
1282
1227
|
export function getEmbeddingCount(db) {
|
|
1283
1228
|
const row = db.prepare("SELECT COUNT(*) AS cnt FROM embeddings").get();
|
|
1284
1229
|
return row.cnt;
|
|
@@ -1304,6 +1249,46 @@ export function getEntriesByDir(db, dirPath) {
|
|
|
1304
1249
|
.all(dirPath);
|
|
1305
1250
|
return parseEntryRows(rows, "getEntriesByDir");
|
|
1306
1251
|
}
|
|
1252
|
+
/**
|
|
1253
|
+
* Resolve a single `entries.id` by exact `file_path` (the canonical on-disk
|
|
1254
|
+
* path), or `undefined` if no row matches.
|
|
1255
|
+
*
|
|
1256
|
+
* Lifted verbatim (WS5) from the inline `SELECT id FROM entries WHERE
|
|
1257
|
+
* file_path = ? LIMIT 1` in commands/search.ts so all `entries` SQL lives in
|
|
1258
|
+
* this module. The result is a plain number materialised before return —
|
|
1259
|
+
* nothing lazy crosses a connection boundary.
|
|
1260
|
+
*/
|
|
1261
|
+
export function getEntryIdByFilePath(db, filePath) {
|
|
1262
|
+
const row = db.prepare("SELECT id FROM entries WHERE file_path = ? LIMIT 1").get(filePath);
|
|
1263
|
+
return row?.id;
|
|
1264
|
+
}
|
|
1265
|
+
/**
|
|
1266
|
+
* Resolve a single `entries.file_path` by primary key, or `undefined` if no
|
|
1267
|
+
* row matches.
|
|
1268
|
+
*
|
|
1269
|
+
* Lifted verbatim (WS5) from the inline `SELECT file_path FROM entries WHERE
|
|
1270
|
+
* id = ?` in commands/feedback-cli.ts. Unlike {@link getEntryById}, this does
|
|
1271
|
+
* NOT parse `entry_json`, so a row with corrupt JSON still yields its path —
|
|
1272
|
+
* preserving feedback-cli's pre-extraction behaviour byte-for-byte.
|
|
1273
|
+
*/
|
|
1274
|
+
export function getEntryFilePathById(db, id) {
|
|
1275
|
+
const row = db.prepare("SELECT file_path FROM entries WHERE id = ?").get(id);
|
|
1276
|
+
return row?.file_path;
|
|
1277
|
+
}
|
|
1278
|
+
/**
|
|
1279
|
+
* Fetch every `(file_path, entry_json)` row whose entry belongs to a given
|
|
1280
|
+
* stash root — matched either by exact `stash_dir` OR by `file_path` prefix.
|
|
1281
|
+
*
|
|
1282
|
+
* Lifted verbatim (WS5) from the inline query in commands/graph.ts'
|
|
1283
|
+
* `buildRefByPath`. The full result set is materialised with `.all()` before
|
|
1284
|
+
* return so callers can iterate it after the connection closes (WS5
|
|
1285
|
+
* connection-lifetime rule). JSON parsing stays with the caller, unchanged.
|
|
1286
|
+
*/
|
|
1287
|
+
export function getEntryRefRowsForStashRoot(db, stashRoot) {
|
|
1288
|
+
return db
|
|
1289
|
+
.prepare("SELECT file_path, entry_json FROM entries WHERE stash_dir = ? OR file_path LIKE ?")
|
|
1290
|
+
.all(stashRoot, `${stashRoot}%`);
|
|
1291
|
+
}
|
|
1307
1292
|
/**
|
|
1308
1293
|
* Get the utility score for an entry, or undefined if none exists.
|
|
1309
1294
|
*/
|
|
@@ -1475,28 +1460,21 @@ export function upsertLlmCacheEntry(db, assetRef, bodyHash, resultJson, cacheVar
|
|
|
1475
1460
|
* matches a live file_path.
|
|
1476
1461
|
*/
|
|
1477
1462
|
export function clearStaleCacheEntries(db) {
|
|
1478
|
-
|
|
1463
|
+
bestEffort(() => {
|
|
1479
1464
|
db.exec(`
|
|
1480
1465
|
DELETE FROM llm_enrichment_cache
|
|
1481
1466
|
WHERE asset_ref NOT IN (SELECT file_path FROM entries)
|
|
1482
1467
|
AND asset_ref NOT IN (SELECT entry_key FROM entries)
|
|
1483
1468
|
`);
|
|
1484
|
-
}
|
|
1485
|
-
catch {
|
|
1486
|
-
/* ignore — table may not exist in very old DBs opened without ensureSchema */
|
|
1487
|
-
}
|
|
1469
|
+
}, "llm_enrichment_cache may not exist in very old DBs opened without ensureSchema");
|
|
1488
1470
|
}
|
|
1489
1471
|
/**
|
|
1490
|
-
* Compute a stable SHA-256 hex digest of a UTF-8 string
|
|
1491
|
-
*
|
|
1492
|
-
*
|
|
1493
|
-
* Bun.CryptoHasher is synchronous and allocation-free compared to Web Crypto,
|
|
1494
|
-
* making it suitable for use inside tight per-asset loops.
|
|
1472
|
+
* Compute a stable SHA-256 hex digest of a UTF-8 string. Used as the body_hash
|
|
1473
|
+
* key in `llm_enrichment_cache`. Routed through the runtime boundary so the
|
|
1474
|
+
* SQLite layer stays free of direct runtime-specific references.
|
|
1495
1475
|
*/
|
|
1496
1476
|
export function computeBodyHash(body) {
|
|
1497
|
-
|
|
1498
|
-
hasher.update(body);
|
|
1499
|
-
return hasher.digest("hex");
|
|
1477
|
+
return sha256Hex(body);
|
|
1500
1478
|
}
|
|
1501
1479
|
/**
|
|
1502
1480
|
* Count search and show events for the given entry refs.
|
|
@@ -1588,7 +1566,6 @@ export function getAllEntriesForEmbedding(db) {
|
|
|
1588
1566
|
.prepare(`
|
|
1589
1567
|
SELECT e.id, e.search_text AS searchText, e.entry_key AS entryKey, e.file_path AS filePath FROM entries e
|
|
1590
1568
|
WHERE NOT EXISTS (SELECT 1 FROM embeddings b WHERE b.id = e.id)
|
|
1591
|
-
AND e.entry_type != 'vault'
|
|
1592
1569
|
`)
|
|
1593
1570
|
.all();
|
|
1594
1571
|
}
|
|
@@ -1751,7 +1728,7 @@ export function applyFeedbackToUtilityScore(db, entryId, positiveCount, negative
|
|
|
1751
1728
|
* history survives a full reindex.
|
|
1752
1729
|
*/
|
|
1753
1730
|
export function relinkUsageEvents(db) {
|
|
1754
|
-
|
|
1731
|
+
bestEffort(() => {
|
|
1755
1732
|
// Step 1: null out stale entry_ids (entry was deleted, re-keyed, etc).
|
|
1756
1733
|
// Leaving them in place would let `recomputeUtilityScores` aggregate
|
|
1757
1734
|
// by an entry_id that no longer exists in `entries`, then trip the FK
|
|
@@ -1776,10 +1753,7 @@ export function relinkUsageEvents(db) {
|
|
|
1776
1753
|
)
|
|
1777
1754
|
WHERE entry_id IS NULL AND entry_ref IS NOT NULL
|
|
1778
1755
|
`);
|
|
1779
|
-
}
|
|
1780
|
-
catch {
|
|
1781
|
-
/* ignore if table doesn't exist yet */
|
|
1782
|
-
}
|
|
1756
|
+
}, "usage_events table may not exist yet during entry_id re-resolution");
|
|
1783
1757
|
}
|
|
1784
1758
|
// ── registry_index_cache helpers ─────────────────────────────────────────────
|
|
1785
1759
|
/**
|
|
@@ -1825,3 +1799,39 @@ export function getRegistryIndexCache(db, registryUrl, maxAgeMs = 3_600_000 /* 1
|
|
|
1825
1799
|
return undefined;
|
|
1826
1800
|
return { indexJson: row.index_json, etag: row.etag, lastModified: row.last_modified };
|
|
1827
1801
|
}
|
|
1802
|
+
/**
|
|
1803
|
+
* Walk indexed entries and collect a deduplicated set of tags. When
|
|
1804
|
+
* `entryType` is provided, only entries of that type contribute tags.
|
|
1805
|
+
*
|
|
1806
|
+
* Pure read; never mutates the DB. Used by `akm lessons coverage` (Phase 7A)
|
|
1807
|
+
* to compute the diff between all-asset tags and lesson tags. Tags are
|
|
1808
|
+
* normalised by trimming and lower-casing, and blank tags are dropped.
|
|
1809
|
+
*
|
|
1810
|
+
* SQL owner: this module owns ALL raw SQL against the `entries` table (WS5),
|
|
1811
|
+
* so the `lessons coverage` read lives here rather than leaking into cli.ts.
|
|
1812
|
+
* The result set is fully materialised (`.all()` then iterate) before return.
|
|
1813
|
+
*/
|
|
1814
|
+
export function collectTagSetFromEntries(db, entryType) {
|
|
1815
|
+
const tags = new Set();
|
|
1816
|
+
const stmt = entryType
|
|
1817
|
+
? db.prepare("SELECT entry_json FROM entries WHERE entry_type = ?")
|
|
1818
|
+
: db.prepare("SELECT entry_json FROM entries");
|
|
1819
|
+
const rows = (entryType ? stmt.all(entryType) : stmt.all());
|
|
1820
|
+
for (const row of rows) {
|
|
1821
|
+
let parsed;
|
|
1822
|
+
try {
|
|
1823
|
+
parsed = JSON.parse(row.entry_json);
|
|
1824
|
+
}
|
|
1825
|
+
catch {
|
|
1826
|
+
continue;
|
|
1827
|
+
}
|
|
1828
|
+
if (!Array.isArray(parsed.tags))
|
|
1829
|
+
continue;
|
|
1830
|
+
for (const tag of parsed.tags) {
|
|
1831
|
+
if (typeof tag === "string" && tag.trim().length > 0) {
|
|
1832
|
+
tags.add(tag.trim().toLowerCase());
|
|
1833
|
+
}
|
|
1834
|
+
}
|
|
1835
|
+
}
|
|
1836
|
+
return tags;
|
|
1837
|
+
}
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
3
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
4
|
import fs from "node:fs";
|
|
5
|
-
import { rethrowIfTestIsolationError } from "
|
|
6
|
-
import { getDbPath } from "
|
|
7
|
-
import { warn } from "
|
|
8
|
-
import { closeDatabase, openExistingDatabase } from "./db";
|
|
5
|
+
import { rethrowIfTestIsolationError } from "../../core/errors.js";
|
|
6
|
+
import { getDbPath } from "../../core/paths.js";
|
|
7
|
+
import { warn } from "../../core/warn.js";
|
|
8
|
+
import { closeDatabase, openExistingDatabase } from "./db.js";
|
|
9
9
|
function withReadableGraphDb(db, fn) {
|
|
10
10
|
if (db)
|
|
11
11
|
return fn(db);
|
|
@@ -287,6 +287,11 @@ export function loadStoredGraphMeta(stashPath, db) {
|
|
|
287
287
|
cacheMisses: row.cache_misses,
|
|
288
288
|
truncationCount: row.truncation_count,
|
|
289
289
|
failureCount: row.failure_count,
|
|
290
|
+
// `retry_attempts` is not persisted to the graph-meta table (it is
|
|
291
|
+
// surfaced from the run's emitted telemetry into `akm health`, not
|
|
292
|
+
// from the reuse cache). Default to 0 so the loaded shape satisfies
|
|
293
|
+
// GraphExtractionTelemetry.
|
|
294
|
+
retryAttempts: 0,
|
|
290
295
|
},
|
|
291
296
|
};
|
|
292
297
|
}
|
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
2
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
3
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
-
|
|
4
|
+
/**
|
|
5
|
+
* Generic LLM-result cache wrapper shared across indexer passes.
|
|
6
|
+
*
|
|
7
|
+
* Each pass that calls an LLM and wants to skip re-processing unchanged
|
|
8
|
+
* content can delegate the cache check/write to `withLlmCache` instead of
|
|
9
|
+
* duplicating the hash-compute → lookup → write pattern inline.
|
|
10
|
+
*/
|
|
11
|
+
import { bestEffort } from "../../core/best-effort.js";
|
|
12
|
+
import { computeBodyHash, getLlmCacheEntry, upsertLlmCacheEntry } from "./db.js";
|
|
5
13
|
/**
|
|
6
14
|
* Generic LLM cache wrapper. Returns cached result if body unchanged,
|
|
7
15
|
* otherwise calls llmFn(), caches the result, and returns it.
|
|
@@ -25,7 +33,7 @@ import { computeBodyHash, getLlmCacheEntry, upsertLlmCacheEntry } from "./db";
|
|
|
25
33
|
export async function withLlmCache(db, cacheKey, body, reEnrich, llmFn, validate, precomputedHash, cacheVariant = "", hooks) {
|
|
26
34
|
const bodyHash = precomputedHash ?? computeBodyHash(body);
|
|
27
35
|
if (!reEnrich) {
|
|
28
|
-
|
|
36
|
+
const cacheHit = bestEffort(() => {
|
|
29
37
|
const cached = getLlmCacheEntry(db, cacheKey, bodyHash, cacheVariant);
|
|
30
38
|
if (cached) {
|
|
31
39
|
const result = validate(JSON.parse(cached.resultJson));
|
|
@@ -34,19 +42,14 @@ export async function withLlmCache(db, cacheKey, body, reEnrich, llmFn, validate
|
|
|
34
42
|
return result;
|
|
35
43
|
}
|
|
36
44
|
}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
45
|
+
return undefined;
|
|
46
|
+
}, "llm cache read corrupt — fall through to recompute");
|
|
47
|
+
if (cacheHit !== undefined)
|
|
48
|
+
return cacheHit;
|
|
41
49
|
}
|
|
42
50
|
const result = await llmFn();
|
|
43
51
|
if (result !== undefined) {
|
|
44
|
-
|
|
45
|
-
upsertLlmCacheEntry(db, cacheKey, bodyHash, JSON.stringify(result), cacheVariant);
|
|
46
|
-
}
|
|
47
|
-
catch {
|
|
48
|
-
// Cache write failure is non-fatal
|
|
49
|
-
}
|
|
52
|
+
bestEffort(() => upsertLlmCacheEntry(db, cacheKey, bodyHash, JSON.stringify(result), cacheVariant), "llm cache write failure is non-fatal");
|
|
50
53
|
}
|
|
51
54
|
return result;
|
|
52
55
|
}
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
*/
|
|
14
14
|
import fs from "node:fs";
|
|
15
15
|
import path from "node:path";
|
|
16
|
-
import { ASSET_SPECS, TYPE_DIRS } from "../core/asset-spec";
|
|
17
|
-
import { getDbPath } from "../core/paths";
|
|
18
|
-
import { warn } from "../core/warn";
|
|
19
|
-
import { closeDatabase, getEntryCount, getMeta, openExistingDatabase } from "./db";
|
|
16
|
+
import { ASSET_SPECS, TYPE_DIRS } from "../core/asset/asset-spec.js";
|
|
17
|
+
import { getDbPath } from "../core/paths.js";
|
|
18
|
+
import { warn } from "../core/warn.js";
|
|
19
|
+
import { closeDatabase, getEntryCount, getMeta, openExistingDatabase } from "./db/db.js";
|
|
20
20
|
function getIndexableFiles(root, spec) {
|
|
21
21
|
if (!fs.existsSync(root))
|
|
22
22
|
return [];
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
2
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
3
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
-
import { loadStoredGraphMeta, loadStoredGraphSnapshot } from "
|
|
4
|
+
import { loadStoredGraphMeta, loadStoredGraphSnapshot } from "../db/graph-db.js";
|
|
5
5
|
function normalizeGraphName(value) {
|
|
6
6
|
return value.trim().toLowerCase();
|
|
7
7
|
}
|
|
@@ -1,20 +1,54 @@
|
|
|
1
1
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
2
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
3
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
+
/**
|
|
5
|
+
* Graph-extraction pass for `akm index` (#207).
|
|
6
|
+
*
|
|
7
|
+
* Walks the primary stash for `memory:` and `knowledge:` assets, asks the
|
|
8
|
+
* configured LLM to extract entities and relations from each one, and
|
|
9
|
+
* persists the result to stash-local SQLite graph tables keyed by stash root.
|
|
10
|
+
* The artifact is consumed by the search
|
|
11
|
+
* pipeline (see `src/indexer/graph-boost.ts`) as a single boost component
|
|
12
|
+
* inside the existing FTS5+boosts loop — there is NO second SearchHit
|
|
13
|
+
* scorer and no parallel ranking track.
|
|
14
|
+
*
|
|
15
|
+
* Disabling — three preconditions must ALL hold for the pass to run:
|
|
16
|
+
* 1. An LLM profile must be configured (no provider = no extraction). When
|
|
17
|
+
* absent, `resolveIndexPassLLM("graph", config)` returns `undefined`
|
|
18
|
+
* and the pass short-circuits.
|
|
19
|
+
* 2. `profiles.improve.default.processes.graphExtraction.enabled !== false`
|
|
20
|
+
* — the feature-gate layer (historically v1 spec §14, since superseded by
|
|
21
|
+
* the 0.8.0 profile shape). Set to `false` to block the pass at the
|
|
22
|
+
* feature-gate layer (no network call may ever issue).
|
|
23
|
+
* 3. `index.graph.llm !== false` — the per-pass opt-out layer (#208).
|
|
24
|
+
* Set to `false` to skip just this pass while leaving other passes
|
|
25
|
+
* that share the same LLM profile enabled.
|
|
26
|
+
* Toggling any one off does NOT delete the existing persisted graph — the
|
|
27
|
+
* user keeps the boost component they already have, it just stops
|
|
28
|
+
* refreshing.
|
|
29
|
+
*
|
|
30
|
+
* Locked v1 contract:
|
|
31
|
+
* - LLM access is exclusively via `resolveIndexPassLLM("graph", config)`.
|
|
32
|
+
* - The graph rows are an indexer artifact, NOT a user-visible
|
|
33
|
+
* asset. It does not have an asset ref, does not appear in search
|
|
34
|
+
* hits, and is not addressable via `akm show`. Direct `fs.writeFile`
|
|
35
|
+
* is therefore the correct primitive — `writeAssetToSource` is
|
|
36
|
+
* reserved for asset writes (CLAUDE.md / spec §10 step 5).
|
|
37
|
+
*/
|
|
4
38
|
import fs from "node:fs";
|
|
5
39
|
import path from "node:path";
|
|
6
|
-
import { TYPE_DIRS } from "
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import { warn, warnVerbose } from "
|
|
11
|
-
import { isProcessEnabled } from "
|
|
12
|
-
import * as graphExtract from "
|
|
13
|
-
import { resolveIndexPassLLM } from "
|
|
14
|
-
import { computeBodyHash, GRAPH_SCHEMA_VERSION, getLlmCacheEntriesByRefs, getLlmCacheEntry, upsertLlmCacheEntry, } from "
|
|
15
|
-
import { loadStoredGraphSnapshot, replaceStoredGraph } from "
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
40
|
+
import { TYPE_DIRS } from "../../core/asset/asset-spec.js";
|
|
41
|
+
import { parseFrontmatter } from "../../core/asset/frontmatter.js";
|
|
42
|
+
import { concurrentMap } from "../../core/concurrent.js";
|
|
43
|
+
import { getIndexPassConfig, resolveBatchSize } from "../../core/config/config.js";
|
|
44
|
+
import { warn, warnVerbose } from "../../core/warn.js";
|
|
45
|
+
import { isProcessEnabled } from "../../llm/feature-gate.js";
|
|
46
|
+
import * as graphExtract from "../../llm/graph-extract.js";
|
|
47
|
+
import { resolveIndexPassLLM } from "../../llm/index-passes.js";
|
|
48
|
+
import { computeBodyHash, GRAPH_SCHEMA_VERSION, getLlmCacheEntriesByRefs, getLlmCacheEntry, upsertLlmCacheEntry, } from "../db/db.js";
|
|
49
|
+
import { loadStoredGraphSnapshot, replaceStoredGraph } from "../db/graph-db.js";
|
|
50
|
+
import { walkMarkdownFiles } from "../walk/walker.js";
|
|
51
|
+
import { deduplicateGraph } from "./graph-dedup.js";
|
|
18
52
|
/** Schema version for the persisted artifact — bumps trigger a full rebuild. */
|
|
19
53
|
export const GRAPH_FILE_SCHEMA_VERSION = GRAPH_SCHEMA_VERSION;
|
|
20
54
|
const EMPTY_QUALITY = {
|
|
@@ -37,6 +71,7 @@ const EMPTY_RESULT = {
|
|
|
37
71
|
cacheMisses: 0,
|
|
38
72
|
truncationCount: 0,
|
|
39
73
|
failureCount: 0,
|
|
74
|
+
retryAttempts: 0,
|
|
40
75
|
},
|
|
41
76
|
warnings: [],
|
|
42
77
|
};
|
|
@@ -231,7 +266,8 @@ function reuseGraphNode(previousNodes, candidate, bodyHash) {
|
|
|
231
266
|
* `extractGraphFromBodies`. Default batch size is 1 (one call per asset —
|
|
232
267
|
* preserves existing behaviour, fully opt-in).
|
|
233
268
|
*/
|
|
234
|
-
export async function runGraphExtractionPass(
|
|
269
|
+
export async function runGraphExtractionPass(ctx) {
|
|
270
|
+
const { config, sources, signal, db, reEnrich, onProgress, options = {} } = ctx;
|
|
235
271
|
// Gate 1 — feature gate via isProcessEnabled, which reads the 0.8.0 path
|
|
236
272
|
// (profiles.improve.default.processes.graphExtraction.enabled). Defaults to
|
|
237
273
|
// enabled when the key is absent.
|
|
@@ -306,11 +342,15 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
306
342
|
cacheMisses: 0,
|
|
307
343
|
truncationCount: 0,
|
|
308
344
|
failureCount: 0,
|
|
345
|
+
htmlErrorCount: 0,
|
|
346
|
+
retryAttempts: 0,
|
|
309
347
|
};
|
|
310
348
|
const canReusePreviousGraph = previousGraph.telemetry?.extractorId === extractorId;
|
|
311
349
|
const runtimeTelemetry = {
|
|
312
350
|
truncationCount: 0,
|
|
313
351
|
failureCount: 0,
|
|
352
|
+
htmlErrorCount: 0,
|
|
353
|
+
retryAttempts: 0,
|
|
314
354
|
filteredGenericEntities: 0,
|
|
315
355
|
filteredInvalidRelations: 0,
|
|
316
356
|
filteredLowConfidenceRelations: 0,
|
|
@@ -555,6 +595,8 @@ export async function runGraphExtractionPass(config, sources, signal, db, reEnri
|
|
|
555
595
|
const deduped = deduplicateGraph(mergedNodes.map((node) => ({ entities: node.entities, relations: node.relations })), assetRefs);
|
|
556
596
|
telemetry.truncationCount = runtimeTelemetry.truncationCount ?? 0;
|
|
557
597
|
telemetry.failureCount = runtimeTelemetry.failureCount ?? 0;
|
|
598
|
+
telemetry.htmlErrorCount = runtimeTelemetry.htmlErrorCount ?? 0;
|
|
599
|
+
telemetry.retryAttempts = runtimeTelemetry.retryAttempts ?? 0;
|
|
558
600
|
const qualityConsidered = mergedNodes.length;
|
|
559
601
|
const qualityExtracted = mergedNodes.filter((node) => node.status === "extracted" && node.entities.length > 0).length;
|
|
560
602
|
const quality = computeGraphQualityTelemetry(qualityConsidered, qualityExtracted, deduped.entities.length, deduped.relations.length);
|