akm-cli 0.9.0-beta.57 → 0.9.0-beta.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/prompts/extract-session.md +5 -1
- package/dist/cli/config-migrate.js +7 -1
- package/dist/commands/config-cli.js +8 -11
- package/dist/commands/health/stash-exposure.js +46 -0
- package/dist/commands/health/windows.js +6 -7
- package/dist/commands/health.js +31 -10
- package/dist/commands/improve/collapse-detector.js +2 -1
- package/dist/commands/improve/consolidate.js +207 -159
- package/dist/commands/improve/distill/promote-memory.js +4 -3
- package/dist/commands/improve/distill/quality-gate.js +7 -4
- package/dist/commands/improve/distill-promotion-policy.js +826 -167
- package/dist/commands/improve/distill.js +26 -12
- package/dist/commands/improve/extract-prompt.js +16 -2
- package/dist/commands/improve/extract.js +16 -8
- package/dist/commands/improve/improve-auto-accept.js +22 -1
- package/dist/commands/improve/loop-stages.js +7 -2
- package/dist/commands/improve/memory/memory-belief.js +14 -15
- package/dist/commands/improve/memory/memory-contradiction-detect.js +60 -32
- package/dist/commands/improve/memory/memory-improve.js +27 -27
- package/dist/commands/improve/preparation.js +4 -0
- package/dist/commands/improve/procedural.js +1 -0
- package/dist/commands/improve/recombine.js +1 -0
- package/dist/commands/improve/reflect-noise.js +1 -1
- package/dist/commands/improve/reflect.js +4 -3
- package/dist/commands/improve/shared.js +9 -6
- package/dist/commands/proposal/drain-policies.js +4 -2
- package/dist/commands/read/remember-cli.js +1 -1
- package/dist/commands/read/show.js +15 -0
- package/dist/commands/remember.js +11 -12
- package/dist/commands/sources/init.js +5 -1
- package/dist/commands/sources/stash-skeleton.js +34 -0
- package/dist/core/asset/frontmatter.js +22 -0
- package/dist/core/common.js +1 -15
- package/dist/core/config/config-io.js +10 -1
- package/dist/core/config/config-migration.js +2 -15
- package/dist/core/config/config-schema.js +15 -3
- package/dist/core/config/config.js +22 -14
- package/dist/core/paths.js +4 -4
- package/dist/core/time.js +53 -0
- package/dist/indexer/db/db.js +51 -46
- package/dist/indexer/indexer.js +77 -65
- package/dist/indexer/search/db-search.js +41 -6
- package/dist/indexer/search/ranking-contributors.js +14 -8
- package/dist/indexer/search/search-source.js +15 -3
- package/dist/llm/feature-gate.js +4 -8
- package/dist/output/renderers.js +4 -0
- package/dist/scripts/migrate-storage.js +83 -59
- package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +6 -0
- package/dist/storage/repositories/registry-cache.js +2 -1
- package/dist/storage/repositories/registry-index-cache-repository.js +46 -0
- package/dist/workflows/runtime/runs.js +6 -1
- package/package.json +1 -1
|
@@ -30,7 +30,7 @@ import { parseFrontmatter } from "../../core/asset/frontmatter.js";
|
|
|
30
30
|
import { stripMarkdownFences } from "../../core/asset/markdown.js";
|
|
31
31
|
import { DESCRIPTION_MAX_CHARS, requiresDescription } from "../../core/authoring-rules.js";
|
|
32
32
|
import { resolveStashDir } from "../../core/common.js";
|
|
33
|
-
import { loadConfig } from "../../core/config/config.js";
|
|
33
|
+
import { getImproveProcessConfig, loadConfig } from "../../core/config/config.js";
|
|
34
34
|
import { ConfigError, UsageError } from "../../core/errors.js";
|
|
35
35
|
import { appendEvent, readEvents } from "../../core/events.js";
|
|
36
36
|
import { lintLessonContent } from "../../core/lesson-lint.js";
|
|
@@ -798,7 +798,7 @@ export async function akmReflect(options = {}) {
|
|
|
798
798
|
}
|
|
799
799
|
else {
|
|
800
800
|
const cfg = options.config ?? loadConfig();
|
|
801
|
-
const reflectProcess = cfg.
|
|
801
|
+
const reflectProcess = getImproveProcessConfig(cfg, "reflect", options.improveProfile);
|
|
802
802
|
// Resolve the runner from the improve profile's reflect entry when present.
|
|
803
803
|
runnerSpec = resolveImproveProcessRunnerFromProfile(reflectProcess, cfg) ?? undefined;
|
|
804
804
|
if (runnerSpec) {
|
|
@@ -1171,7 +1171,8 @@ export async function akmReflect(options = {}) {
|
|
|
1171
1171
|
// `profiles.improve.default.processes.reflect.qualityGate.enabled` or
|
|
1172
1172
|
// `profiles.improve.default.processes.distill.qualityGate.enabled` (the
|
|
1173
1173
|
// `lesson_quality_gate` flag name is the legacy alias still accepted by
|
|
1174
|
-
// `isLlmFeatureEnabled`). Fail-
|
|
1174
|
+
// `isLlmFeatureEnabled`). Fail-CLOSED (07 P0-2): a judge error / no-LLM /
|
|
1175
|
+
// parse failure rejects the proposal rather than passing it through.
|
|
1175
1176
|
// G-Eval (arXiv:2303.16634) — quality judgment before admission.
|
|
1176
1177
|
const runtimeConfig = options.config ??
|
|
1177
1178
|
(() => {
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
2
2
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
3
3
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
4
|
-
import { getDefaultLlmConfig } from "../../core/config/config.js";
|
|
4
|
+
import { getDefaultLlmConfig, getImproveProcessConfig } from "../../core/config/config.js";
|
|
5
5
|
import { warn } from "../../core/warn.js";
|
|
6
6
|
import { resolveImproveProcessRunnerFromProfile, runnerIsLlm } from "../../integrations/agent/runner.js";
|
|
7
7
|
import { chatCompletion } from "../../llm/client.js";
|
|
@@ -21,13 +21,16 @@ export function refSlug(ref) {
|
|
|
21
21
|
}
|
|
22
22
|
/**
|
|
23
23
|
* Resolve the production LLM seam for an improve process (`recombine` /
|
|
24
|
-
* `procedural`)
|
|
25
|
-
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
24
|
+
* `procedural`). Returns a function that issues one bounded chatCompletion per
|
|
25
|
+
* call, or `undefined` when no LLM is configured (the pass then makes no
|
|
26
|
+
* calls). Previously copied verbatim in recombine.ts and procedural.ts.
|
|
27
|
+
*
|
|
28
|
+
* When `opts.activeProfile` is supplied, its per-process runner override wins
|
|
29
|
+
* over the `default` profile so `akm improve --profile <name>` selects the
|
|
30
|
+
* profile's model; absent falls back to `default`.
|
|
28
31
|
*/
|
|
29
32
|
export function resolveImproveLlmFn(config, opts) {
|
|
30
|
-
const processConfig = config.
|
|
33
|
+
const processConfig = getImproveProcessConfig(config, opts.processKey, opts.activeProfile);
|
|
31
34
|
const runnerSpec = resolveImproveProcessRunnerFromProfile(processConfig, config);
|
|
32
35
|
const llmConfig = runnerSpec && runnerIsLlm(runnerSpec) ? runnerSpec.connection : getDefaultLlmConfig(config);
|
|
33
36
|
if (!llmConfig)
|
|
@@ -39,8 +39,10 @@ const GeneratorSchema = z.enum(PROPOSAL_SOURCES, {
|
|
|
39
39
|
export const PERSONAL_STASH = {
|
|
40
40
|
name: "personal-stash",
|
|
41
41
|
accept: [
|
|
42
|
-
// Extract proposals carry freshly-pulled real content — accept when present
|
|
43
|
-
|
|
42
|
+
// Extract proposals carry freshly-pulled real content — accept when present,
|
|
43
|
+
// but cap the diff for parity with reflect(80)/consolidate(200): an
|
|
44
|
+
// arbitrarily large extract should not auto-promote with zero LLM calls.
|
|
45
|
+
{ generator: "extract", minContentLines: 1, maxDiffLines: 200 },
|
|
44
46
|
// Reflect refinements: accept small ones; larger refinements defer to review.
|
|
45
47
|
{ generator: "reflect", maxDiffLines: 80 },
|
|
46
48
|
// Consolidate within the diff band; mid-band lands in `defer` below.
|
|
@@ -60,7 +60,7 @@ export const rememberCommand = defineJsonCommand({
|
|
|
60
60
|
},
|
|
61
61
|
expires: {
|
|
62
62
|
type: "string",
|
|
63
|
-
description: "Expiry duration shorthand
|
|
63
|
+
description: "Expiry duration shorthand — e.g. 30d, 12h, 5m (minutes), 3M (months). Resolved to an ISO date.",
|
|
64
64
|
},
|
|
65
65
|
source: {
|
|
66
66
|
type: "string",
|
|
@@ -363,6 +363,21 @@ export async function showLocal(input) {
|
|
|
363
363
|
}
|
|
364
364
|
const renderCtx = buildRenderContext(fileCtx, match, allSourceDirs, source?.registryId);
|
|
365
365
|
const response = renderer.buildShowResponse(renderCtx);
|
|
366
|
+
// 07 P1-D: provenance-aware toolPolicy CEILING. An agent's self-declared
|
|
367
|
+
// `tools` frontmatter is honoured ONLY for the operator's own PRIMARY stash —
|
|
368
|
+
// the assets they authored. Every other source is content pulled from
|
|
369
|
+
// elsewhere and must not name its own tool grant: registry-installed packs, a
|
|
370
|
+
// configured secondary source, and even a git source the operator marked
|
|
371
|
+
// `--writable` to contribute edits upstream (writability is "can I push", not
|
|
372
|
+
// "do I trust this content to grant itself tools"). Drop the policy so dispatch
|
|
373
|
+
// falls back to the parent/default grant. Keys off primary-stash identity —
|
|
374
|
+
// `allSources[0]` is always the primary (search-source.ts) — not a
|
|
375
|
+
// name-derived registryId or the orthogonal `writable` bit. `source` undefined
|
|
376
|
+
// (unresolved path) also fails closed.
|
|
377
|
+
const isPrimaryStash = source !== undefined && source.path === allSources[0]?.path;
|
|
378
|
+
if (response.toolPolicy !== undefined && !isPrimaryStash) {
|
|
379
|
+
delete response.toolPolicy;
|
|
380
|
+
}
|
|
366
381
|
const editable = isEditable(assetPath, config);
|
|
367
382
|
const fullResponse = {
|
|
368
383
|
...response,
|
|
@@ -12,25 +12,24 @@ import { serializeFrontmatter } from "../core/asset/asset-serialize.js";
|
|
|
12
12
|
import { toErrorMessage, tryReadStdinText } from "../core/common.js";
|
|
13
13
|
import { getDefaultLlmConfig, loadConfig } from "../core/config/config.js";
|
|
14
14
|
import { UsageError } from "../core/errors.js";
|
|
15
|
+
import { DURATION_UNITS, parseDuration as parseDurationSpec } from "../core/time.js";
|
|
15
16
|
import { warn } from "../core/warn.js";
|
|
16
17
|
import { SCOPE_KEYS } from "../indexer/passes/metadata.js";
|
|
17
18
|
import { parseFlagValue } from "../output/context.js";
|
|
18
19
|
/**
|
|
19
20
|
* Parse a shorthand duration string to a number of milliseconds.
|
|
20
|
-
* Supports: `30d` (days), `12h` (hours),
|
|
21
|
+
* Supports the CLI-wide canonical grammar: `30d` (days), `12h` (hours),
|
|
22
|
+
* `5m` (minutes), `3M` (months, approximated as 30d).
|
|
21
23
|
*/
|
|
22
24
|
export function parseDuration(s) {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
const
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
return n * 60 * 60 * 1000;
|
|
32
|
-
// 'm' = months, approximated as 30 days
|
|
33
|
-
return n * 30 * 24 * 60 * 60 * 1000;
|
|
25
|
+
// Canonical CLI unit grammar: `m` = minutes, `M` = months. Not lower-cased,
|
|
26
|
+
// so case distinguishes the two (`5m` = 5 minutes, `5M` = 5 months). See
|
|
27
|
+
// core/time.ts DURATION_UNITS.
|
|
28
|
+
const ms = parseDurationSpec(s.trim(), DURATION_UNITS);
|
|
29
|
+
if (ms === null) {
|
|
30
|
+
throw new UsageError(`Invalid --expires format "${s}". Use shorthand like 30d, 12h, 5m, or 3M.`, "INVALID_FLAG_VALUE");
|
|
31
|
+
}
|
|
32
|
+
return ms;
|
|
34
33
|
}
|
|
35
34
|
/**
|
|
36
35
|
* Build a YAML frontmatter block from memory metadata.
|
|
@@ -15,7 +15,7 @@ import { loadUserConfig, saveConfig } from "../../core/config/config.js";
|
|
|
15
15
|
import { ConfigError } from "../../core/errors.js";
|
|
16
16
|
import { assertSafeStashDir, getBinDir, getConfigPath, getDefaultStashDir } from "../../core/paths.js";
|
|
17
17
|
import { ensureRg } from "../../core/ripgrep/install.js";
|
|
18
|
-
import { copyStashSkeleton, scaffoldStashMeta } from "./stash-skeleton.js";
|
|
18
|
+
import { copyStashSkeleton, ensureStashGitignore, scaffoldStashMeta } from "./stash-skeleton.js";
|
|
19
19
|
/**
|
|
20
20
|
* Refuse to persist a temporary-directory stashDir to the user's config when
|
|
21
21
|
* running under a test runner AND `--dir <tempdir>` was passed explicitly.
|
|
@@ -92,6 +92,10 @@ async function akmInitReal(options) {
|
|
|
92
92
|
}
|
|
93
93
|
// Ensure the default stash is a local git repo (no remote required)
|
|
94
94
|
ensureGitRepo(stashDir);
|
|
95
|
+
// 08-F1: scaffold a default `.gitignore` that keeps env/ + secrets/ out of git
|
|
96
|
+
// so a `git push` can never leak them. Idempotent + non-clobbering; the user
|
|
97
|
+
// opts into versioning by un-ignoring a path.
|
|
98
|
+
ensureStashGitignore(stashDir);
|
|
95
99
|
// Run seeding UNCONDITIONALLY (not just when the stash was newly created) so
|
|
96
100
|
// re-running `akm init` on an existing stash backfills any missing skeleton
|
|
97
101
|
// files — the README, the per-type SOFT convention templates under
|
|
@@ -71,6 +71,40 @@ export function scaffoldStashMeta(stashDir) {
|
|
|
71
71
|
// Non-fatal — stash is usable without the .meta orientation doc
|
|
72
72
|
}
|
|
73
73
|
}
|
|
74
|
+
/** Marks the akm-authored block in a stash `.gitignore` (idempotency anchor). */
|
|
75
|
+
const STASH_GITIGNORE_MARKER = "# akm: keep secret material out of git by default";
|
|
76
|
+
const STASH_GITIGNORE_BLOCK = [
|
|
77
|
+
STASH_GITIGNORE_MARKER,
|
|
78
|
+
"# env/ and secrets/ assets hold tokens and keys. They are ignored by default",
|
|
79
|
+
"# so `git push` can never leak them. To version a specific one (e.g. for a",
|
|
80
|
+
"# private-remote backup), un-ignore its path below once you accept the risk.",
|
|
81
|
+
"env/",
|
|
82
|
+
"secrets/",
|
|
83
|
+
"",
|
|
84
|
+
].join("\n");
|
|
85
|
+
/**
|
|
86
|
+
* Ensure the stash `.gitignore` keeps `env/` and `secrets/` out of git by
|
|
87
|
+
* default (08-F1: the v0.8.0 `vaults/` → `env/`+`secrets/` migration never
|
|
88
|
+
* carried the ignore rules forward, and init scaffolded none).
|
|
89
|
+
*
|
|
90
|
+
* Idempotent + non-clobbering: creates the file when absent, appends the akm
|
|
91
|
+
* block when the file exists but lacks it (preserving the user's own rules),
|
|
92
|
+
* and no-ops once the marker is present. The user opts INTO versioning by
|
|
93
|
+
* un-ignoring a path.
|
|
94
|
+
*/
|
|
95
|
+
export function ensureStashGitignore(stashDir) {
|
|
96
|
+
try {
|
|
97
|
+
const gitignorePath = path.join(stashDir, ".gitignore");
|
|
98
|
+
const existing = fs.existsSync(gitignorePath) ? fs.readFileSync(gitignorePath, "utf8") : "";
|
|
99
|
+
if (existing.includes(STASH_GITIGNORE_MARKER))
|
|
100
|
+
return;
|
|
101
|
+
const gap = existing.length === 0 ? "" : existing.endsWith("\n") ? "\n" : "\n\n";
|
|
102
|
+
fs.writeFileSync(gitignorePath, `${existing}${gap}${STASH_GITIGNORE_BLOCK}`);
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
// Non-fatal — the stash is usable without the ignore scaffold.
|
|
106
|
+
}
|
|
107
|
+
}
|
|
74
108
|
const STASH_META_INDEX_TEMPLATE = `---
|
|
75
109
|
# Optional, human-authored orientation for this stash. Not indexed; surfaced
|
|
76
110
|
# on demand via \`akm show meta\` (this file) or \`akm show <stash>//meta\`.
|
|
@@ -8,7 +8,9 @@
|
|
|
8
8
|
* (block scalars, multi-line strings, nested objects, flow sequences, escape
|
|
9
9
|
* sequences) is handled correctly without a brittle hand-rolled state machine.
|
|
10
10
|
*/
|
|
11
|
+
import fs from "node:fs";
|
|
11
12
|
import { parse as yamlParse, stringify as yamlStringify } from "yaml";
|
|
13
|
+
import { assembleAsset } from "./asset-serialize.js";
|
|
12
14
|
/**
|
|
13
15
|
* Parse YAML frontmatter from a Markdown (or similar) string.
|
|
14
16
|
*
|
|
@@ -107,6 +109,26 @@ function parseFrontmatterLenient(frontmatter) {
|
|
|
107
109
|
}
|
|
108
110
|
return data;
|
|
109
111
|
}
|
|
112
|
+
/**
|
|
113
|
+
* Read a file, parse its frontmatter, let `mutator` compute the next
|
|
114
|
+
* frontmatter object, and write the reassembled asset back to disk.
|
|
115
|
+
*
|
|
116
|
+
* This is the shared read→parse→mutate→write primitive. The `mutator` receives
|
|
117
|
+
* the parsed result and returns either the next frontmatter object (to write)
|
|
118
|
+
* or `null` to skip the write entirely (e.g. for idempotent no-ops). The body
|
|
119
|
+
* content is preserved from the parse.
|
|
120
|
+
*
|
|
121
|
+
* @returns `true` if a write occurred, `false` if the mutator returned `null`.
|
|
122
|
+
*/
|
|
123
|
+
export function mutateFrontmatter(filePath, mutator) {
|
|
124
|
+
const raw = fs.readFileSync(filePath, "utf8");
|
|
125
|
+
const parsed = parseFrontmatter(raw);
|
|
126
|
+
const nextFrontmatter = mutator(parsed);
|
|
127
|
+
if (nextFrontmatter === null)
|
|
128
|
+
return false;
|
|
129
|
+
fs.writeFileSync(filePath, assembleAsset(nextFrontmatter, parsed.content), "utf8");
|
|
130
|
+
return true;
|
|
131
|
+
}
|
|
110
132
|
export function parseFrontmatterBlock(raw) {
|
|
111
133
|
// Handle both LF and CRLF line endings throughout.
|
|
112
134
|
// The closing --- may be preceded by \r\n; capture and strip trailing \r
|
package/dist/core/common.js
CHANGED
|
@@ -84,14 +84,7 @@ export function writeFileAtomic(target, content, mode) {
|
|
|
84
84
|
const tmp = `${target}.tmp.${process.pid}.${crypto.randomBytes(8).toString("hex")}`;
|
|
85
85
|
const fd = fs.openSync(tmp, "w", mode ?? 0o600);
|
|
86
86
|
try {
|
|
87
|
-
|
|
88
|
-
// so each call resolves to a single overload. Both write byte-exact.
|
|
89
|
-
if (typeof content === "string") {
|
|
90
|
-
fs.writeSync(fd, content);
|
|
91
|
-
}
|
|
92
|
-
else {
|
|
93
|
-
fs.writeSync(fd, content);
|
|
94
|
-
}
|
|
87
|
+
fs.writeSync(fd, typeof content === "string" ? Buffer.from(content) : content);
|
|
95
88
|
try {
|
|
96
89
|
fs.fdatasyncSync(fd);
|
|
97
90
|
}
|
|
@@ -462,13 +455,6 @@ export function asNonEmptyString(value) {
|
|
|
462
455
|
return trimmed.length > 0 ? trimmed : undefined;
|
|
463
456
|
}
|
|
464
457
|
// ── Generic data utilities ───────────────────────────────────────────────────
|
|
465
|
-
/**
|
|
466
|
-
* Return the trimmed string if non-empty, otherwise `undefined`.
|
|
467
|
-
* Equivalent to `firstString` previously defined in `memory-improve.ts`.
|
|
468
|
-
*/
|
|
469
|
-
export function firstString(value) {
|
|
470
|
-
return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
|
|
471
|
-
}
|
|
472
458
|
/**
|
|
473
459
|
* Coerce an unknown value to a filtered, trimmed string array.
|
|
474
460
|
* Non-strings and empty/whitespace-only entries are dropped.
|
|
@@ -84,12 +84,21 @@ export function backupExistingConfig(configPath) {
|
|
|
84
84
|
if (!fs.existsSync(configPath))
|
|
85
85
|
return undefined;
|
|
86
86
|
const backupDir = path.join(getCacheDir(), "config-backups");
|
|
87
|
-
|
|
87
|
+
// 08-F4: lock the backup dir owner-only up front (0700) — matching the
|
|
88
|
+
// env.ts/secret.ts convention — so no other local user can traverse in during
|
|
89
|
+
// the copy→chmod window. chmod again to tighten a dir from an older version.
|
|
90
|
+
fs.mkdirSync(backupDir, { recursive: true, mode: 0o700 });
|
|
91
|
+
fs.chmodSync(backupDir, 0o700);
|
|
88
92
|
const timestamp = new Date().toISOString().replace(/[.:]/g, "-");
|
|
89
93
|
const timestamped = path.join(backupDir, `config-${timestamp}.json`);
|
|
90
94
|
const latest = path.join(backupDir, "config.latest.json");
|
|
91
95
|
fs.copyFileSync(configPath, timestamped);
|
|
92
96
|
fs.copyFileSync(configPath, latest);
|
|
97
|
+
// 08-F4: a config backup carries the same sensitive fields as the live config
|
|
98
|
+
// (endpoints, tokens). `copyFileSync` inherits the source's (often 0644) mode,
|
|
99
|
+
// so tighten the backups to owner-only — mirrors the env-cli 0600 write floor.
|
|
100
|
+
fs.chmodSync(timestamped, 0o600);
|
|
101
|
+
fs.chmodSync(latest, 0o600);
|
|
93
102
|
pruneOldBackups(backupDir);
|
|
94
103
|
return { timestamped, latest };
|
|
95
104
|
}
|
|
@@ -262,12 +262,6 @@ export function migrateConfigShape(raw, opts) {
|
|
|
262
262
|
if (typeof llmFeatures.metadata_enhance === "boolean")
|
|
263
263
|
me.enabled = llmFeatures.metadata_enhance;
|
|
264
264
|
}
|
|
265
|
-
if ("curate_rerank" in llmFeatures) {
|
|
266
|
-
const search = getObj(result, "search");
|
|
267
|
-
const cr = getObj(search, "curateRerank");
|
|
268
|
-
if (typeof llmFeatures.curate_rerank === "boolean")
|
|
269
|
-
cr.enabled = llmFeatures.curate_rerank;
|
|
270
|
-
}
|
|
271
265
|
if ("lesson_quality_gate" in llmFeatures) {
|
|
272
266
|
const distill = getImproveProcess(result, "distill");
|
|
273
267
|
const qg = getObj(distill, "qualityGate");
|
|
@@ -417,17 +411,10 @@ export function migrateConfigShape(raw, opts) {
|
|
|
417
411
|
}
|
|
418
412
|
if (isObj(features.search)) {
|
|
419
413
|
const fsearch = features.search;
|
|
420
|
-
if ("curate_rerank" in fsearch) {
|
|
421
|
-
const search = getObj(result, "search");
|
|
422
|
-
const cr = getObj(search, "curateRerank");
|
|
423
|
-
const val = fsearch.curate_rerank;
|
|
424
|
-
if (typeof val === "boolean")
|
|
425
|
-
cr.enabled = val;
|
|
426
|
-
else if (isObj(val) && typeof val.enabled === "boolean")
|
|
427
|
-
cr.enabled = val.enabled;
|
|
428
|
-
}
|
|
429
414
|
// Catch-all: unknown features.search.<key> entries land at
|
|
430
415
|
// search.<keyAsCamelCase> (preserving { enabled, options } when present).
|
|
416
|
+
// `curate_rerank` is a removed dead feature — listed here so the catch-all
|
|
417
|
+
// skips it and it is simply dropped (not resurrected under search.curateRerank).
|
|
431
418
|
const knownSearchKeys = new Set(["curate_rerank"]);
|
|
432
419
|
for (const [legacyKey, legacyVal] of Object.entries(fsearch)) {
|
|
433
420
|
if (knownSearchKeys.has(legacyKey))
|
|
@@ -49,8 +49,21 @@ const nonEmptyString = z
|
|
|
49
49
|
const httpUrl = z.string().refine((v) => v.startsWith("http://") || v.startsWith("https://"), {
|
|
50
50
|
message: "endpoint must start with http:// or https://",
|
|
51
51
|
});
|
|
52
|
-
// ── Feedback failure modes
|
|
53
|
-
|
|
52
|
+
// ── Feedback failure modes (F-3 / #384) ─────────────────────────────────────
|
|
53
|
+
/**
|
|
54
|
+
* Curated taxonomy of failure modes for negative feedback.
|
|
55
|
+
*
|
|
56
|
+
* Structured failure modes enable aggregation across feedback events so the
|
|
57
|
+
* distill pipeline can detect that "5 assets failed for the same reason" and
|
|
58
|
+
* act on it — free-text strings about the same issue are not aggregatable.
|
|
59
|
+
*/
|
|
60
|
+
export const FEEDBACK_FAILURE_MODES = [
|
|
61
|
+
"incorrect", // Factually wrong or logically flawed content
|
|
62
|
+
"outdated", // Correct at some point but now stale
|
|
63
|
+
"dangerous", // Could cause harm if followed (security, safety)
|
|
64
|
+
"incomplete", // Missing key steps, context, or caveats
|
|
65
|
+
"redundant", // Duplicates another asset without adding value
|
|
66
|
+
];
|
|
54
67
|
// ── Connection configs (LLM / embedding) ────────────────────────────────────
|
|
55
68
|
const LlmCapabilitiesSchema = z
|
|
56
69
|
.object({
|
|
@@ -552,7 +565,6 @@ export const SearchConfigSchema = z
|
|
|
552
565
|
.object({
|
|
553
566
|
minScore: nonNegativeNumber.optional(),
|
|
554
567
|
defaultExcludeTypes: z.array(nonEmptyString).optional(),
|
|
555
|
-
curateRerank: z.object({ enabled: z.boolean().optional() }).passthrough().optional(),
|
|
556
568
|
graphBoost: SearchGraphBoostSchema.optional(),
|
|
557
569
|
})
|
|
558
570
|
.passthrough();
|
|
@@ -13,20 +13,9 @@ import { warn } from "../warn.js";
|
|
|
13
13
|
// Canonical harness-id source of truth (#565) — runtime value re-export.
|
|
14
14
|
export { VALID_HARNESS_IDS } from "./config-types.js";
|
|
15
15
|
// ── Feedback failure-mode constants (F-3 / #384) ────────────────────────────
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
* Structured failure modes enable aggregation across feedback events so the
|
|
20
|
-
* distill pipeline can detect that "5 assets failed for the same reason" and
|
|
21
|
-
* act on it — free-text strings about the same issue are not aggregatable.
|
|
22
|
-
*/
|
|
23
|
-
export const FEEDBACK_FAILURE_MODES = [
|
|
24
|
-
"incorrect", // Factually wrong or logically flawed content
|
|
25
|
-
"outdated", // Correct at some point but now stale
|
|
26
|
-
"dangerous", // Could cause harm if followed (security, safety)
|
|
27
|
-
"incomplete", // Missing key steps, context, or caveats
|
|
28
|
-
"redundant", // Duplicates another asset without adding value
|
|
29
|
-
];
|
|
16
|
+
// Canonical taxonomy lives in the schema/validator layer; re-exported here so
|
|
17
|
+
// existing `../core/config/config` import sites keep working.
|
|
18
|
+
export { FEEDBACK_FAILURE_MODES } from "./config-schema.js";
|
|
30
19
|
/**
|
|
31
20
|
* Default value for {@link IndexPassConfig.graphExtractionBatchSize}. Chosen
|
|
32
21
|
* empirically: 4 amortises the per-call HTTP overhead 4× while keeping the
|
|
@@ -205,6 +194,25 @@ export function getDefaultLlmConfig(config) {
|
|
|
205
194
|
return undefined;
|
|
206
195
|
return config.profiles?.llm?.[defaultName];
|
|
207
196
|
}
|
|
197
|
+
/**
|
|
198
|
+
* Resolve the per-process config section for an improve process,
|
|
199
|
+
* centralizing the deeply-nested lookup
|
|
200
|
+
* `profile?.processes?.<name>` that was previously copy-pasted across the
|
|
201
|
+
* improve command family (20+ call sites).
|
|
202
|
+
*
|
|
203
|
+
* When an `activeProfile` is supplied (the profile resolved for the current
|
|
204
|
+
* `akm improve --profile <name>` run), its per-process override wins; otherwise
|
|
205
|
+
* — and as a fallback when the active profile does not define the section — the
|
|
206
|
+
* lookup falls back to the `"default"` improve profile from the on-disk config.
|
|
207
|
+
* Callers that have not yet threaded the active profile pass only `config` and
|
|
208
|
+
* get the historical default-profile behavior unchanged.
|
|
209
|
+
*/
|
|
210
|
+
export function getImproveProcessConfig(config, processName, activeProfile) {
|
|
211
|
+
const fromActiveProfile = activeProfile?.processes?.[processName];
|
|
212
|
+
if (fromActiveProfile !== undefined)
|
|
213
|
+
return fromActiveProfile;
|
|
214
|
+
return config.profiles?.improve?.default?.processes?.[processName];
|
|
215
|
+
}
|
|
208
216
|
/**
|
|
209
217
|
* Run `migrateConfigShape` on the raw text and — unless `AKM_NO_AUTO_MIGRATE=1`
|
|
210
218
|
* is set — persist the migrated result. Returns the (possibly migrated) text
|
package/dist/core/paths.js
CHANGED
|
@@ -111,8 +111,8 @@ export function getConfigDir(env = process.env, platform = process.platform) {
|
|
|
111
111
|
}
|
|
112
112
|
return path.join(home, ".config", "akm");
|
|
113
113
|
}
|
|
114
|
-
export function getConfigPath() {
|
|
115
|
-
return path.join(getConfigDir(), "config.json");
|
|
114
|
+
export function getConfigPath(env = process.env) {
|
|
115
|
+
return path.join(getConfigDir(env), "config.json");
|
|
116
116
|
}
|
|
117
117
|
// ── Cache directory ──────────────────────────────────────────────────────────
|
|
118
118
|
export function getCacheDir(env = process.env) {
|
|
@@ -212,8 +212,8 @@ export function getDataDir(env = process.env, platform = process.platform) {
|
|
|
212
212
|
return path.join("/tmp", "akm-data");
|
|
213
213
|
return path.join(home, ".local", "share", "akm");
|
|
214
214
|
}
|
|
215
|
-
export function getDbPath() {
|
|
216
|
-
return path.join(getDataDir(), "index.db");
|
|
215
|
+
export function getDbPath(env = process.env) {
|
|
216
|
+
return path.join(getDataDir(env), "index.db");
|
|
217
217
|
}
|
|
218
218
|
export function getIndexWriterLockPath() {
|
|
219
219
|
return path.join(getDataDir(), "index.db.write.lock");
|
package/dist/core/time.js
CHANGED
|
@@ -9,6 +9,59 @@
|
|
|
9
9
|
* consistently without private re-implementations drifting apart.
|
|
10
10
|
*/
|
|
11
11
|
import { UsageError } from "./errors.js";
|
|
12
|
+
// ── Duration-shorthand parsing ───────────────────────────────────────────────
|
|
13
|
+
const MINUTE_MS = 60 * 1000;
|
|
14
|
+
const HOUR_MS = 60 * 60 * 1000;
|
|
15
|
+
const DAY_MS = 24 * 60 * 60 * 1000;
|
|
16
|
+
/** A month is approximated as 30 days — this shorthand is not calendar-exact. */
|
|
17
|
+
const MONTH_MS = 30 * DAY_MS;
|
|
18
|
+
/**
|
|
19
|
+
* Canonical duration-shorthand unit map shared by every `--since` / `--expires`
|
|
20
|
+
* / `--window-compare` consumer.
|
|
21
|
+
*
|
|
22
|
+
* The grammar is intentionally uniform across the whole CLI:
|
|
23
|
+
* - `m` = MINUTES, `M` = MONTHS (30-day approximation)
|
|
24
|
+
* - `h`/`H` = hours, `d`/`D` = days
|
|
25
|
+
*
|
|
26
|
+
* Matching is CASE-SENSITIVE (see {@link parseDuration}), which is what lets
|
|
27
|
+
* `m` and `M` mean different things. Historically `akm health --since` and
|
|
28
|
+
* `remember --expires` read a case-insensitive `m` as MONTHS while
|
|
29
|
+
* `consolidate` / `--window-compare` read it as MINUTES; that split is now
|
|
30
|
+
* resolved in favour of the conventional `m`=minutes, with `M` reserved for
|
|
31
|
+
* months. Upper-case `H`/`D` aliases are retained so specs that previously
|
|
32
|
+
* relied on the old case-insensitive parsers (e.g. `"7D"`) keep working.
|
|
33
|
+
*/
|
|
34
|
+
export const DURATION_UNITS = {
|
|
35
|
+
m: MINUTE_MS,
|
|
36
|
+
M: MONTH_MS,
|
|
37
|
+
h: HOUR_MS,
|
|
38
|
+
H: HOUR_MS,
|
|
39
|
+
d: DAY_MS,
|
|
40
|
+
D: DAY_MS,
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Parse a compact duration shorthand (e.g. `"30d"`, `"12h"`, `"5m"`, `"3M"`)
|
|
44
|
+
* into a number of milliseconds using an explicit `units` map (default
|
|
45
|
+
* {@link DURATION_UNITS}), or return `null` when the input does not match
|
|
46
|
+
* `<digits><letter>` or the unit is not in the map.
|
|
47
|
+
*
|
|
48
|
+
* Matching is CASE-SENSITIVE against the map keys, so `m` (minutes) and `M`
|
|
49
|
+
* (months) are distinct — do NOT lower-case the spec before calling, or the
|
|
50
|
+
* two collapse. Amount is parsed with base-10 `parseInt`; `null` is returned
|
|
51
|
+
* rather than throwing so each caller keeps its own error/fallback policy.
|
|
52
|
+
*/
|
|
53
|
+
export function parseDuration(spec, units = DURATION_UNITS) {
|
|
54
|
+
const match = spec.trim().match(/^(\d+)([a-zA-Z])$/);
|
|
55
|
+
if (!match)
|
|
56
|
+
return null;
|
|
57
|
+
const amount = Number.parseInt(match[1] ?? "", 10);
|
|
58
|
+
if (!Number.isFinite(amount))
|
|
59
|
+
return null;
|
|
60
|
+
const multiplier = units[match[2] ?? ""];
|
|
61
|
+
if (multiplier === undefined)
|
|
62
|
+
return null;
|
|
63
|
+
return amount * multiplier;
|
|
64
|
+
}
|
|
12
65
|
// ── Since-flag parsing ───────────────────────────────────────────────────────
|
|
13
66
|
/**
|
|
14
67
|
* Parse a user-supplied `--since` value and return an ISO-8601 timestamp
|
package/dist/indexer/db/db.js
CHANGED
|
@@ -257,6 +257,49 @@ export function getDerivedForParent(db, parentRef) {
|
|
|
257
257
|
return null;
|
|
258
258
|
}
|
|
259
259
|
}
|
|
260
|
+
/**
|
|
261
|
+
* 03-R3: for the given derived-twin row ids, fetch each twin's BASE memory
|
|
262
|
+
* `beliefState`, keyed by twin id.
|
|
263
|
+
*
|
|
264
|
+
* Used by the derived-twin belief inheritance in search ranking: a `.derived`
|
|
265
|
+
* twin has no belief state of its own, so it inherits its base memory's
|
|
266
|
+
* demoting state (contradicted/superseded/…) at search time. A twin's
|
|
267
|
+
* `entry_key` is exactly its base's `entry_key` plus the `.derived` suffix
|
|
268
|
+
* (same stash + type prefix, `<name>` vs `<name>.derived`), so the base is
|
|
269
|
+
* found by stripping that suffix — no ref/prefix reconstruction. Returns a map
|
|
270
|
+
* of twin id → base beliefState for bases that carry a non-empty state.
|
|
271
|
+
* Best-effort: any query error (e.g. legacy DB) yields no inheritance rather
|
|
272
|
+
* than failing the search.
|
|
273
|
+
*/
|
|
274
|
+
export function getBaseBeliefStatesForDerivedTwins(db, twinIds) {
|
|
275
|
+
const out = new Map();
|
|
276
|
+
if (twinIds.length === 0)
|
|
277
|
+
return out;
|
|
278
|
+
// Chunk at SQLITE_CHUNK_SIZE like the sibling bulk-by-id helpers, so a large
|
|
279
|
+
// `--limit` candidate set never trips SQLITE_MAX_VARIABLE_NUMBER (which would
|
|
280
|
+
// otherwise fall into the best-effort catch and silently disable the feature).
|
|
281
|
+
for (let i = 0; i < twinIds.length; i += SQLITE_CHUNK_SIZE) {
|
|
282
|
+
const chunk = twinIds.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
283
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
284
|
+
bestEffort(() => {
|
|
285
|
+
const rows = db
|
|
286
|
+
.prepare(`SELECT twin.id AS twin_id, json_extract(base.entry_json, '$.beliefState') AS belief
|
|
287
|
+
FROM entries twin
|
|
288
|
+
JOIN entries base
|
|
289
|
+
ON base.entry_type = 'memory'
|
|
290
|
+
AND base.entry_key = substr(twin.entry_key, 1, length(twin.entry_key) - length('.derived'))
|
|
291
|
+
WHERE twin.id IN (${placeholders})
|
|
292
|
+
AND twin.entry_key LIKE '%.derived'
|
|
293
|
+
AND json_extract(base.entry_json, '$.beliefState') IS NOT NULL`)
|
|
294
|
+
.all(...chunk);
|
|
295
|
+
for (const r of rows) {
|
|
296
|
+
if (typeof r.belief === "string" && r.belief.trim().length > 0)
|
|
297
|
+
out.set(r.twin_id, r.belief.trim());
|
|
298
|
+
}
|
|
299
|
+
}, "legacy DB / entry_json without beliefState — treat as no twin inheritance");
|
|
300
|
+
}
|
|
301
|
+
return out;
|
|
302
|
+
}
|
|
260
303
|
/**
|
|
261
304
|
* Phase 2A / Rec 5: bulk-load positive feedback event counts for the given
|
|
262
305
|
* entry ids. Used by the utility-decay forgetting curve to stabilize
|
|
@@ -648,7 +691,8 @@ function runFtsQuery(db, ftsQuery, limit, entryType, excludeTypes) {
|
|
|
648
691
|
}
|
|
649
692
|
return results;
|
|
650
693
|
}
|
|
651
|
-
catch {
|
|
694
|
+
catch (err) {
|
|
695
|
+
warn("[db] runFtsQuery failed:", err instanceof Error ? err.message : String(err));
|
|
652
696
|
return [];
|
|
653
697
|
}
|
|
654
698
|
}
|
|
@@ -747,8 +791,7 @@ export function getEntryCount(db) {
|
|
|
747
791
|
return row.cnt;
|
|
748
792
|
}
|
|
749
793
|
export function getEmbeddableEntryCount(db) {
|
|
750
|
-
|
|
751
|
-
return row.cnt;
|
|
794
|
+
return getEntryCount(db);
|
|
752
795
|
}
|
|
753
796
|
export function getEmbeddingCount(db) {
|
|
754
797
|
const row = db.prepare("SELECT COUNT(*) AS cnt FROM embeddings").get();
|
|
@@ -1327,49 +1370,11 @@ export function relinkUsageEvents(db) {
|
|
|
1327
1370
|
}, "usage_events table may not exist yet during entry_id re-resolution");
|
|
1328
1371
|
}
|
|
1329
1372
|
// ── registry_index_cache helpers ─────────────────────────────────────────────
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
* @param indexJson - Serialised registry index document (JSON string).
|
|
1336
|
-
* @param opts.etag - HTTP ETag from the response (optional).
|
|
1337
|
-
* @param opts.lastModified - HTTP Last-Modified from the response (optional).
|
|
1338
|
-
*/
|
|
1339
|
-
export function upsertRegistryIndexCache(db, registryUrl, indexJson, opts) {
|
|
1340
|
-
db.prepare(`
|
|
1341
|
-
INSERT INTO registry_index_cache (registry_url, fetched_at, etag, last_modified, index_json)
|
|
1342
|
-
VALUES (?, ?, ?, ?, ?)
|
|
1343
|
-
ON CONFLICT(registry_url) DO UPDATE SET
|
|
1344
|
-
fetched_at = excluded.fetched_at,
|
|
1345
|
-
etag = excluded.etag,
|
|
1346
|
-
last_modified = excluded.last_modified,
|
|
1347
|
-
index_json = excluded.index_json
|
|
1348
|
-
`).run(registryUrl, new Date().toISOString(), opts?.etag ?? null, opts?.lastModified ?? null, indexJson);
|
|
1349
|
-
}
|
|
1350
|
-
/**
|
|
1351
|
-
* Look up a cached registry index entry from index.db.
|
|
1352
|
-
* Returns undefined when not found or when the entry is older than `maxAgeMs`.
|
|
1353
|
-
*
|
|
1354
|
-
* TTL check: if `Date.now() - new Date(fetched_at).getTime() > maxAgeMs` the
|
|
1355
|
-
* entry is considered a cache miss and undefined is returned.
|
|
1356
|
-
*
|
|
1357
|
-
* @param db - Open index.db connection.
|
|
1358
|
-
* @param registryUrl - Canonical URL of the registry (primary key).
|
|
1359
|
-
* @param maxAgeMs - Maximum age in milliseconds before the entry is stale (default: 1 hour).
|
|
1360
|
-
*/
|
|
1361
|
-
export function getRegistryIndexCache(db, registryUrl, maxAgeMs = 3_600_000 /* 1 hour */) {
|
|
1362
|
-
const row = db
|
|
1363
|
-
.prepare(`SELECT fetched_at, etag, last_modified, index_json
|
|
1364
|
-
FROM registry_index_cache WHERE registry_url = ?`)
|
|
1365
|
-
.get(registryUrl);
|
|
1366
|
-
if (!row)
|
|
1367
|
-
return undefined;
|
|
1368
|
-
const fetchedAt = Date.parse(row.fetched_at);
|
|
1369
|
-
if (Number.isNaN(fetchedAt) || Date.now() - fetchedAt > maxAgeMs)
|
|
1370
|
-
return undefined;
|
|
1371
|
-
return { indexJson: row.index_json, etag: row.etag, lastModified: row.last_modified };
|
|
1372
|
-
}
|
|
1373
|
+
// The raw SQL for the `registry_index_cache` table now lives in the storage
|
|
1374
|
+
// layer (`src/storage/repositories/registry-index-cache-repository.ts`) so the
|
|
1375
|
+
// dependency arrow points indexer → storage. These thin re-exports preserve the
|
|
1376
|
+
// previously-public symbols for any importer of this module.
|
|
1377
|
+
export { getRegistryIndexCache, upsertRegistryIndexCache, } from "../../storage/repositories/registry-index-cache-repository.js";
|
|
1373
1378
|
/**
|
|
1374
1379
|
* Walk indexed entries and collect a deduplicated set of tags. When
|
|
1375
1380
|
* `entryType` is provided, only entries of that type contribute tags.
|