akm-cli 0.7.5 → 0.8.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (300) hide show
  1. package/{.github/CHANGELOG.md → CHANGELOG.md} +192 -2
  2. package/README.md +22 -6
  3. package/SECURITY.md +93 -0
  4. package/dist/cli/config-migrate.js +144 -0
  5. package/dist/cli/config-validate.js +39 -0
  6. package/dist/cli/confirm.js +73 -0
  7. package/dist/cli/parse-args.js +133 -0
  8. package/dist/cli/shared.js +129 -0
  9. package/dist/cli.js +2569 -1449
  10. package/dist/commands/add-cli.js +279 -0
  11. package/dist/commands/agent-dispatch.js +110 -0
  12. package/dist/commands/agent-support.js +68 -0
  13. package/dist/commands/completions.js +3 -0
  14. package/dist/commands/config-cli.js +130 -534
  15. package/dist/commands/consolidate.js +2122 -0
  16. package/dist/commands/curate.js +44 -3
  17. package/dist/commands/db-cli.js +23 -0
  18. package/dist/commands/distill-promotion-policy.js +660 -0
  19. package/dist/commands/distill.js +1075 -77
  20. package/dist/commands/env.js +213 -0
  21. package/dist/commands/eval-cases.js +43 -0
  22. package/dist/commands/events.js +5 -23
  23. package/dist/commands/extract-cli.js +127 -0
  24. package/dist/commands/extract-prompt.js +204 -0
  25. package/dist/commands/extract.js +477 -0
  26. package/dist/commands/feedback-cli.js +331 -0
  27. package/dist/commands/graph.js +477 -0
  28. package/dist/commands/health.js +1302 -0
  29. package/dist/commands/help/help-accept.md +12 -0
  30. package/dist/commands/help/help-improve.md +69 -0
  31. package/dist/commands/help/help-proposals.md +18 -0
  32. package/dist/commands/help/help-propose.md +17 -0
  33. package/dist/commands/help/help-reject.md +11 -0
  34. package/dist/commands/history.js +54 -46
  35. package/dist/commands/improve-auto-accept.js +97 -0
  36. package/dist/commands/improve-cli.js +217 -0
  37. package/dist/commands/improve-profiles.js +166 -0
  38. package/dist/commands/improve-result-file.js +167 -0
  39. package/dist/commands/improve.js +2373 -0
  40. package/dist/commands/info.js +5 -2
  41. package/dist/commands/init.js +50 -2
  42. package/dist/commands/installed-stashes.js +102 -139
  43. package/dist/commands/knowledge.js +136 -0
  44. package/dist/commands/lint/agent-linter.js +49 -0
  45. package/dist/commands/lint/base-linter.js +479 -0
  46. package/dist/commands/lint/command-linter.js +49 -0
  47. package/dist/commands/lint/default-linter.js +16 -0
  48. package/dist/commands/lint/env-key-rules.js +154 -0
  49. package/dist/commands/lint/index.js +196 -0
  50. package/dist/commands/lint/knowledge-linter.js +16 -0
  51. package/dist/commands/lint/markdown-insertion.js +343 -0
  52. package/dist/commands/lint/memory-linter.js +61 -0
  53. package/dist/commands/lint/registry.js +36 -0
  54. package/dist/commands/lint/skill-linter.js +45 -0
  55. package/dist/commands/lint/task-linter.js +50 -0
  56. package/dist/commands/lint/types.js +4 -0
  57. package/dist/commands/lint/workflow-linter.js +56 -0
  58. package/dist/commands/lint.js +4 -0
  59. package/dist/commands/migration-help.js +5 -2
  60. package/dist/commands/proposal.js +67 -12
  61. package/dist/commands/propose.js +86 -31
  62. package/dist/commands/reflect.js +1091 -73
  63. package/dist/commands/registry-cli.js +150 -0
  64. package/dist/commands/registry-search.js +5 -2
  65. package/dist/commands/remember-cli.js +257 -0
  66. package/dist/commands/remember.js +69 -6
  67. package/dist/commands/schema-repair.js +203 -0
  68. package/dist/commands/search.js +115 -14
  69. package/dist/commands/secret.js +173 -0
  70. package/dist/commands/self-update.js +3 -0
  71. package/dist/commands/show.js +148 -25
  72. package/dist/commands/source-add.js +17 -45
  73. package/dist/commands/source-clone.js +3 -0
  74. package/dist/commands/source-manage.js +14 -19
  75. package/dist/commands/tasks.js +437 -0
  76. package/dist/commands/url-checker.js +42 -0
  77. package/dist/core/action-contributors.js +28 -0
  78. package/dist/core/asset-ref.js +17 -2
  79. package/dist/core/asset-registry.js +12 -17
  80. package/dist/core/asset-serialize.js +88 -0
  81. package/dist/core/asset-spec.js +67 -1
  82. package/dist/core/common.js +182 -0
  83. package/dist/core/concurrent.js +25 -0
  84. package/dist/core/config-io.js +347 -0
  85. package/dist/core/config-migration.js +622 -0
  86. package/dist/core/config-schema.js +534 -0
  87. package/dist/core/config-sources.js +108 -0
  88. package/dist/core/config-types.js +4 -0
  89. package/dist/core/config-walker.js +337 -0
  90. package/dist/core/config.js +364 -981
  91. package/dist/core/errors.js +42 -20
  92. package/dist/core/events.js +91 -138
  93. package/dist/core/file-lock.js +104 -0
  94. package/dist/core/frontmatter.js +75 -8
  95. package/dist/core/lesson-lint.js +3 -0
  96. package/dist/core/markdown.js +20 -0
  97. package/dist/core/memory-belief.js +62 -0
  98. package/dist/core/memory-contradiction-detect.js +274 -0
  99. package/dist/core/memory-improve.js +806 -0
  100. package/dist/core/parse.js +158 -0
  101. package/dist/core/paths.js +280 -14
  102. package/dist/core/proposal-quality-validators.js +380 -0
  103. package/dist/core/proposal-validators.js +69 -0
  104. package/dist/core/proposals.js +512 -42
  105. package/dist/core/state-db.js +1068 -0
  106. package/dist/core/text-truncation.js +107 -0
  107. package/dist/core/time.js +54 -0
  108. package/dist/core/tty.js +59 -0
  109. package/dist/core/warn.js +64 -1
  110. package/dist/core/write-source.js +3 -0
  111. package/dist/indexer/db-backup.js +391 -0
  112. package/dist/indexer/db-search.js +163 -254
  113. package/dist/indexer/db.js +975 -103
  114. package/dist/indexer/ensure-index.js +64 -0
  115. package/dist/indexer/file-context.js +3 -0
  116. package/dist/indexer/graph-boost.js +376 -101
  117. package/dist/indexer/graph-db.js +391 -0
  118. package/dist/indexer/graph-dedup.js +95 -0
  119. package/dist/indexer/graph-extraction.js +550 -124
  120. package/dist/indexer/index-context.js +4 -0
  121. package/dist/indexer/indexer.js +523 -301
  122. package/dist/indexer/llm-cache.js +52 -0
  123. package/dist/indexer/manifest.js +3 -0
  124. package/dist/indexer/matchers.js +167 -160
  125. package/dist/indexer/memory-inference.js +152 -74
  126. package/dist/indexer/metadata-contributors.js +29 -0
  127. package/dist/indexer/metadata.js +275 -196
  128. package/dist/indexer/path-resolver.js +92 -0
  129. package/dist/indexer/project-context.js +192 -0
  130. package/dist/indexer/ranking-contributors.js +331 -0
  131. package/dist/indexer/ranking.js +81 -0
  132. package/dist/indexer/search-fields.js +5 -9
  133. package/dist/indexer/search-hit-enrichers.js +111 -0
  134. package/dist/indexer/search-source.js +44 -10
  135. package/dist/indexer/semantic-status.js +6 -17
  136. package/dist/indexer/staleness-detect.js +447 -0
  137. package/dist/indexer/usage-events.js +12 -9
  138. package/dist/indexer/walker.js +28 -0
  139. package/dist/integrations/agent/builders.js +135 -0
  140. package/dist/integrations/agent/config.js +122 -230
  141. package/dist/integrations/agent/detect.js +3 -0
  142. package/dist/integrations/agent/index.js +7 -13
  143. package/dist/integrations/agent/model-aliases.js +55 -0
  144. package/dist/integrations/agent/profiles.js +70 -5
  145. package/dist/integrations/agent/prompts.js +214 -80
  146. package/dist/integrations/agent/runner.js +151 -0
  147. package/dist/integrations/agent/sdk-runner.js +126 -0
  148. package/dist/integrations/agent/spawn.js +118 -23
  149. package/dist/integrations/github.js +3 -0
  150. package/dist/integrations/lockfile.js +32 -69
  151. package/dist/integrations/session-logs/index.js +69 -0
  152. package/dist/integrations/session-logs/inline-refs.js +35 -0
  153. package/dist/integrations/session-logs/pre-filter.js +152 -0
  154. package/dist/integrations/session-logs/providers/claude-code.js +282 -0
  155. package/dist/integrations/session-logs/providers/opencode.js +258 -0
  156. package/dist/integrations/session-logs/types.js +4 -0
  157. package/dist/llm/call-ai.js +62 -0
  158. package/dist/llm/client.js +77 -124
  159. package/dist/llm/embedder.js +20 -29
  160. package/dist/llm/embedders/cache.js +3 -7
  161. package/dist/llm/embedders/local.js +42 -1
  162. package/dist/llm/embedders/remote.js +20 -8
  163. package/dist/llm/embedders/types.js +3 -7
  164. package/dist/llm/feature-gate.js +95 -48
  165. package/dist/llm/graph-extract.js +676 -70
  166. package/dist/llm/index-passes.js +44 -29
  167. package/dist/llm/memory-infer.js +77 -71
  168. package/dist/llm/metadata-enhance.js +42 -29
  169. package/dist/llm/prompts/extract-session.md +80 -0
  170. package/dist/llm/prompts/graph-extract-user-prompt.md +35 -0
  171. package/dist/output/cli-hints-full.md +292 -0
  172. package/dist/output/cli-hints-short.md +66 -0
  173. package/dist/output/cli-hints.js +7 -320
  174. package/dist/output/context.js +60 -8
  175. package/dist/output/renderers.js +300 -257
  176. package/dist/output/shapes/curate.js +56 -0
  177. package/dist/output/shapes/distill.js +10 -0
  178. package/dist/output/shapes/env-list.js +19 -0
  179. package/dist/output/shapes/events.js +11 -0
  180. package/dist/output/shapes/helpers.js +424 -0
  181. package/dist/output/shapes/history.js +7 -0
  182. package/dist/output/shapes/passthrough.js +102 -0
  183. package/dist/output/shapes/proposal-accept.js +7 -0
  184. package/dist/output/shapes/proposal-diff.js +7 -0
  185. package/dist/output/shapes/proposal-list.js +7 -0
  186. package/dist/output/shapes/proposal-producer.js +11 -0
  187. package/dist/output/shapes/proposal-reject.js +7 -0
  188. package/dist/output/shapes/proposal-show.js +7 -0
  189. package/dist/output/shapes/registry-search.js +6 -0
  190. package/dist/output/shapes/registry.js +30 -0
  191. package/dist/output/shapes/search.js +6 -0
  192. package/dist/output/shapes/secret-list.js +19 -0
  193. package/dist/output/shapes/show.js +6 -0
  194. package/dist/output/shapes/vault-list.js +19 -0
  195. package/dist/output/shapes.js +51 -516
  196. package/dist/output/text/add.js +6 -0
  197. package/dist/output/text/clone.js +6 -0
  198. package/dist/output/text/config.js +6 -0
  199. package/dist/output/text/curate.js +6 -0
  200. package/dist/output/text/distill.js +7 -0
  201. package/dist/output/text/enable-disable.js +7 -0
  202. package/dist/output/text/events.js +10 -0
  203. package/dist/output/text/feedback.js +6 -0
  204. package/dist/output/text/helpers.js +1039 -0
  205. package/dist/output/text/history.js +7 -0
  206. package/dist/output/text/import.js +6 -0
  207. package/dist/output/text/index.js +6 -0
  208. package/dist/output/text/info.js +6 -0
  209. package/dist/output/text/init.js +6 -0
  210. package/dist/output/text/list.js +6 -0
  211. package/dist/output/text/proposal-producer.js +8 -0
  212. package/dist/output/text/proposal.js +11 -0
  213. package/dist/output/text/registry-commands.js +11 -0
  214. package/dist/output/text/registry.js +30 -0
  215. package/dist/output/text/remember.js +6 -0
  216. package/dist/output/text/remove.js +6 -0
  217. package/dist/output/text/save.js +6 -0
  218. package/dist/output/text/search.js +6 -0
  219. package/dist/output/text/show.js +6 -0
  220. package/dist/output/text/update.js +6 -0
  221. package/dist/output/text/upgrade.js +6 -0
  222. package/dist/output/text/vault.js +16 -0
  223. package/dist/output/text/wiki.js +15 -0
  224. package/dist/output/text/workflow.js +14 -0
  225. package/dist/output/text.js +44 -1092
  226. package/dist/registry/build-index.js +3 -0
  227. package/dist/registry/create-provider-registry.js +3 -0
  228. package/dist/registry/factory.js +4 -1
  229. package/dist/registry/origin-resolve.js +3 -0
  230. package/dist/registry/providers/index.js +3 -0
  231. package/dist/registry/providers/skills-sh.js +71 -50
  232. package/dist/registry/providers/static-index.js +53 -48
  233. package/dist/registry/providers/types.js +3 -24
  234. package/dist/registry/resolve.js +11 -16
  235. package/dist/registry/types.js +3 -0
  236. package/dist/scripts/migrate-storage.js +17750 -0
  237. package/dist/scripts/migrations/import-fs-improve-runs-to-db.js +9031 -0
  238. package/dist/scripts/migrations/v16-to-v17.js +141 -0
  239. package/dist/setup/detect.js +3 -0
  240. package/dist/setup/ripgrep-install.js +3 -0
  241. package/dist/setup/ripgrep-resolve.js +3 -0
  242. package/dist/setup/setup.js +775 -37
  243. package/dist/setup/steps.js +3 -15
  244. package/dist/sources/include.js +3 -0
  245. package/dist/sources/provider-factory.js +5 -12
  246. package/dist/sources/provider.js +3 -20
  247. package/dist/sources/providers/filesystem.js +19 -23
  248. package/dist/sources/providers/git.js +138 -21
  249. package/dist/sources/providers/index.js +3 -0
  250. package/dist/sources/providers/install-types.js +3 -13
  251. package/dist/sources/providers/npm.js +3 -4
  252. package/dist/sources/providers/provider-utils.js +3 -0
  253. package/dist/sources/providers/sync-from-ref.js +3 -11
  254. package/dist/sources/providers/tar-utils.js +3 -0
  255. package/dist/sources/providers/website.js +18 -22
  256. package/dist/sources/resolve.js +3 -0
  257. package/dist/sources/types.js +3 -0
  258. package/dist/sources/website-ingest.js +7 -0
  259. package/dist/tasks/backends/cron.js +203 -0
  260. package/dist/tasks/backends/exec-utils.js +28 -0
  261. package/dist/tasks/backends/index.js +24 -0
  262. package/dist/tasks/backends/launchd-template.xml +19 -0
  263. package/dist/tasks/backends/launchd.js +187 -0
  264. package/dist/tasks/backends/schtasks-template.xml +29 -0
  265. package/dist/tasks/backends/schtasks.js +215 -0
  266. package/dist/tasks/parser.js +211 -0
  267. package/dist/tasks/resolveAkmBin.js +87 -0
  268. package/dist/tasks/runner.js +458 -0
  269. package/dist/tasks/schedule.js +227 -0
  270. package/dist/tasks/schema.js +15 -0
  271. package/dist/tasks/validator.js +62 -0
  272. package/dist/version.js +3 -0
  273. package/dist/wiki/index-template.md +12 -0
  274. package/dist/wiki/ingest-workflow-template.md +54 -0
  275. package/dist/wiki/log-template.md +8 -0
  276. package/dist/wiki/schema-template.md +61 -0
  277. package/dist/wiki/wiki-templates.js +15 -0
  278. package/dist/wiki/wiki.js +13 -61
  279. package/dist/workflows/authoring.js +8 -25
  280. package/dist/workflows/cli.js +3 -0
  281. package/dist/workflows/db.js +140 -10
  282. package/dist/workflows/document-cache.js +3 -10
  283. package/dist/workflows/parser.js +3 -0
  284. package/dist/workflows/renderer.js +11 -3
  285. package/dist/workflows/runs.js +77 -92
  286. package/dist/workflows/schema.js +3 -0
  287. package/dist/workflows/scope-key.js +3 -0
  288. package/dist/workflows/validator.js +4 -8
  289. package/dist/workflows/workflow-template.md +24 -0
  290. package/docs/README.md +10 -2
  291. package/docs/data-and-telemetry.md +225 -0
  292. package/docs/migration/release-notes/0.7.0.md +1 -1
  293. package/docs/migration/release-notes/0.7.5.md +2 -2
  294. package/docs/migration/release-notes/0.8.0.md +48 -0
  295. package/docs/migration/v0.7-to-v0.8.md +1307 -0
  296. package/package.json +30 -12
  297. package/.github/LICENSE +0 -374
  298. package/dist/commands/install-audit.js +0 -381
  299. package/dist/commands/vault.js +0 -328
  300. package/dist/templates/wiki-templates.js +0 -100
@@ -0,0 +1,1068 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /**
5
+ * state.db — Durable SQLite database for non-regenerable akm state.
6
+ *
7
+ * This module owns THREE tables that replace flat-file storage:
8
+ *
9
+ * events — replaces events.jsonl (append-only event bus)
10
+ * proposals — replaces per-uuid JSON directories under .akm/proposals/
11
+ * task_history — replaces per-task JSONL files under <cacheDir>/tasks/history/
12
+ *
13
+ * ## Why a separate database from index.db
14
+ *
15
+ * index.db uses a single DB_VERSION integer: when the version changes it drops
16
+ * ALL tables and recreates them. That is acceptable for the search index because
17
+ * every entry is fully regenerable from the stash on disk. Events, proposals, and
18
+ * task history are NON-REGENERABLE — losing them is data loss. They must live in
19
+ * a database whose schema evolves via incremental, additive migrations that never
20
+ * drop rows.
21
+ *
22
+ * ## Migration-safety contract
23
+ *
24
+ * The `schema_migrations` table records every applied migration by a stable string
25
+ * ID. `runMigrations(db)` is idempotent: new installs run all migrations in order;
26
+ * upgrades run only the ones not yet applied. No migration may DROP a table that
27
+ * holds durable data, RENAME a column, or change a column's type.
28
+ *
29
+ * Permitted schema evolution operations (always migration-safe in SQLite):
30
+ * - ALTER TABLE … ADD COLUMN <name> <type> DEFAULT <value>
31
+ * - CREATE INDEX IF NOT EXISTS …
32
+ * - CREATE TABLE IF NOT EXISTS … (additive new tables)
33
+ *
34
+ * ## Schema design: indexed columns vs. metadata_json
35
+ *
36
+ * Each table holds only the columns needed for indexed queries as first-class
37
+ * columns. All other fields live in a `metadata_json TEXT` column (a JSON object).
38
+ * New fields can be appended to the JSON blob at any time without touching the
39
+ * DDL. This is the same pattern used by `usage_events.metadata` in index.db and
40
+ * by the original events.jsonl format (the `metadata` field was always free-form
41
+ * JSON).
42
+ *
43
+ * ## WAL mode
44
+ *
45
+ * SQLite WAL mode allows concurrent readers while a writer is active and makes
46
+ * crashes safe (the WAL is replayed on next open). The O_APPEND multi-writer model
47
+ * of events.jsonl is replaced by WAL-mode serialised writes — acceptable because
48
+ * CLI commands are almost always single-writer.
49
+ *
50
+ * @module state-db
51
+ */
52
+ import { Database } from "bun:sqlite";
53
+ import fs from "node:fs";
54
+ import path from "node:path";
55
+ import { getDataDir } from "./paths";
56
+ import { error } from "./warn";
57
+ // ── Path helper ──────────────────────────────────────────────────────────────
58
+ /**
59
+ * Default path: `<dataDir>/state.db`.
60
+ * Respects the same `AKM_DATA_DIR` / XDG_DATA_HOME env-isolation as `getDbPath()` so
61
+ * cooperating processes sharing a data root automatically share the same
62
+ * state database.
63
+ */
64
+ export function getStateDbPath() {
65
+ return path.join(getDataDir(), "state.db");
66
+ }
67
+ // ── Database open ────────────────────────────────────────────────────────────
68
+ /**
69
+ * Open (and initialise / migrate) the state database.
70
+ *
71
+ * @param dbPath - Override the database file path. Pass a tmpdir path in tests
72
+ * to avoid touching the real user cache. Mirrors the `filePath` test seam
73
+ * on `EventsContext`.
74
+ *
75
+ * PRAGMA rationale:
76
+ *
77
+ * journal_mode = WAL
78
+ * Write-Ahead Logging: readers never block writers and vice-versa. Crashes
79
+ * are safe — the WAL is replayed on next open. Required for concurrent CLI
80
+ * invocations that may read while another writes.
81
+ *
82
+ * foreign_keys = ON
83
+ * Enforces FK constraints at runtime. SQLite disables them by default for
84
+ * backwards compatibility; enabling them prevents orphaned rows in tables
85
+ * that reference each other (not used in v1 schema but guards future ones).
86
+ *
87
+ * busy_timeout = 5000
88
+ * When another connection holds a write lock, SQLite retries for up to
89
+ * 5 000 ms before returning SQLITE_BUSY. Without this, the default timeout
90
+ * is 0 ms — any concurrent writer causes an immediate error. 5 s matches
91
+ * the same value used in openDatabase() for index.db.
92
+ */
93
+ export function openStateDatabase(dbPath) {
94
+ const resolvedPath = dbPath ?? getStateDbPath();
95
+ const dir = path.dirname(resolvedPath);
96
+ if (!fs.existsSync(dir)) {
97
+ fs.mkdirSync(dir, { recursive: true });
98
+ }
99
+ const db = new Database(resolvedPath);
100
+ // PRAGMAs must run before any DDL or DML.
101
+ db.exec("PRAGMA journal_mode = WAL");
102
+ db.exec("PRAGMA foreign_keys = ON");
103
+ db.exec("PRAGMA busy_timeout = 5000");
104
+ runMigrations(db);
105
+ return db;
106
+ }
107
+ /**
108
+ * All migrations in application order. New migrations are APPENDED to this
109
+ * array — never inserted in the middle or reordered.
110
+ *
111
+ * @see Migration
112
+ */
113
+ const MIGRATIONS = [
114
+ // ── Migration 001 — initial schema ──────────────────────────────────────────
115
+ {
116
+ id: "001-initial-schema",
117
+ up: `
118
+ -- ── events ──────────────────────────────────────────────────────────────
119
+ --
120
+ -- Replaces events.jsonl. Indexed (query) columns:
121
+ -- id INTEGER PK — monotonic rowid; replaces byte-offset cursor.
122
+ -- Callers store this as "sinceId" for resume.
123
+ -- event_type TEXT — indexed; replaces the type filter in readEvents().
124
+ -- ts TEXT — ISO-8601 UTC ms; indexed for range queries.
125
+ -- ref TEXT — nullable asset ref; indexed for ref-scoped queries.
126
+ --
127
+ -- Extensible (metadata_json) columns:
128
+ -- metadata_json TEXT — JSON object storing all non-indexed payload
129
+ -- fields (tags, any future structured fields).
130
+ -- Maps directly to EventEnvelope.metadata.
131
+ --
132
+ -- schema_version mirrors EventEnvelope.schemaVersion — always 1 for v1
133
+ -- rows. Stored as a column (not in the JSON blob) so future schema
134
+ -- changes can be detected and migrated row-by-row if ever needed.
135
+ --
136
+ -- TTL: rows where ts < NOW() - 90 days can be deleted by a maintenance job.
137
+ -- No automatic deletion occurs here — callers call purgeOldEvents().
138
+ --
139
+ -- ADD COLUMN extension points (future migrations):
140
+ -- ALTER TABLE events ADD COLUMN stash_dir TEXT DEFAULT NULL;
141
+ -- ALTER TABLE events ADD COLUMN correlation_id TEXT DEFAULT NULL;
142
+ -- ALTER TABLE events ADD COLUMN schema_version INTEGER NOT NULL DEFAULT 1;
143
+ --
144
+ CREATE TABLE IF NOT EXISTS events (
145
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
146
+ event_type TEXT NOT NULL,
147
+ ts TEXT NOT NULL,
148
+ ref TEXT,
149
+ metadata_json TEXT NOT NULL DEFAULT '{}'
150
+ );
151
+
152
+ -- Query patterns supported by these indexes:
153
+ -- SELECT … WHERE event_type = ? → idx_events_type
154
+ -- SELECT … WHERE ref = ? → idx_events_ref
155
+ -- SELECT … WHERE ts >= ? AND ts <= ? → idx_events_ts
156
+ -- SELECT … WHERE event_type = ? AND ref = ? → idx_events_type (prefix scan) + filter
157
+ -- SELECT … WHERE id > ? → PK (rowid) — no extra index needed
158
+ CREATE INDEX IF NOT EXISTS idx_events_type ON events(event_type);
159
+ CREATE INDEX IF NOT EXISTS idx_events_ref ON events(ref);
160
+ CREATE INDEX IF NOT EXISTS idx_events_ts ON events(ts);
161
+
162
+ -- ── proposals ────────────────────────────────────────────────────────────
163
+ --
164
+ -- Replaces per-uuid JSON directories under <stashDir>/.akm/proposals/.
165
+ --
166
+ -- Indexed (query) columns:
167
+ -- id TEXT PK — UUID (crypto.randomUUID()); stable directory name.
168
+ -- stash_dir TEXT — absolute stash root; multi-stash installs need
169
+ -- this to partition proposal lists per stash.
170
+ -- ref TEXT — target asset ref (e.g. "lesson:alpha");
171
+ -- indexed for ref-scoped queue views.
172
+ -- status TEXT — "pending" | "accepted" | "rejected"; indexed
173
+ -- so pending-queue queries are fast.
174
+ -- source TEXT — human-readable origin tag (e.g. "reflect").
175
+ -- created_at TEXT — ISO-8601; used for ORDER BY created_at ASC.
176
+ -- updated_at TEXT — ISO-8601; updated on accept/reject.
177
+ --
178
+ -- Large payload columns (NOT indexed):
179
+ -- content TEXT — full markdown text; the proposal payload body.
180
+ -- frontmatter_json TEXT — JSON of parsed frontmatter (may be NULL when
181
+ -- the content has no frontmatter block).
182
+ --
183
+ -- Extensible (metadata_json) columns:
184
+ -- metadata_json TEXT — JSON object for future proposal fields.
185
+ -- Current fields stored here: sourceRun, review.
186
+ --
187
+ -- ADD COLUMN extension points (future migrations):
188
+ -- ALTER TABLE proposals ADD COLUMN source_run TEXT DEFAULT NULL;
189
+ -- ALTER TABLE proposals ADD COLUMN review_outcome TEXT DEFAULT NULL;
190
+ -- ALTER TABLE proposals ADD COLUMN review_reason TEXT DEFAULT NULL;
191
+ -- ALTER TABLE proposals ADD COLUMN review_decided_at TEXT DEFAULT NULL;
192
+ -- ALTER TABLE proposals ADD COLUMN archived INTEGER NOT NULL DEFAULT 0;
193
+ --
194
+ CREATE TABLE IF NOT EXISTS proposals (
195
+ id TEXT PRIMARY KEY,
196
+ stash_dir TEXT NOT NULL,
197
+ ref TEXT NOT NULL,
198
+ status TEXT NOT NULL DEFAULT 'pending',
199
+ source TEXT NOT NULL,
200
+ created_at TEXT NOT NULL,
201
+ updated_at TEXT NOT NULL,
202
+ content TEXT NOT NULL DEFAULT '',
203
+ frontmatter_json TEXT,
204
+ metadata_json TEXT NOT NULL DEFAULT '{}'
205
+ );
206
+
207
+ -- Query patterns:
208
+ -- SELECT … WHERE stash_dir = ? AND status = ? → idx_proposals_stash_status
209
+ -- SELECT … WHERE ref = ? AND status = ? → idx_proposals_ref_status
210
+ -- SELECT … WHERE id = ? → PK
211
+ CREATE INDEX IF NOT EXISTS idx_proposals_stash_status
212
+ ON proposals(stash_dir, status);
213
+ CREATE INDEX IF NOT EXISTS idx_proposals_ref_status
214
+ ON proposals(ref, status);
215
+
216
+ -- ── task_history ─────────────────────────────────────────────────────────
217
+ --
218
+ -- Replaces per-task JSONL files under <cacheDir>/tasks/history/.
219
+ --
220
+ -- Indexed (query) columns:
221
+ -- task_id TEXT PK — stable task identifier string.
222
+ -- status TEXT — terminal status (e.g. "completed", "failed",
223
+ -- "cancelled"); indexed for status-scoped queries.
224
+ -- started_at TEXT — ISO-8601; indexed for time-range queries.
225
+ -- target_kind TEXT — kind of the target entity (e.g. "issue",
226
+ -- "workflow", "agent"); indexed for kind-scoped queries.
227
+ -- target_ref TEXT — stable ref of the target entity; indexed for
228
+ -- per-target history lookups.
229
+ --
230
+ -- Non-indexed time columns:
231
+ -- completed_at TEXT — ISO-8601 or NULL if still running.
232
+ -- failed_at TEXT — ISO-8601 or NULL.
233
+ --
234
+ -- Non-indexed diagnostic columns:
235
+ -- log_path TEXT — absolute path to the task log file, if any.
236
+ --
237
+ -- Extensible (metadata_json) columns:
238
+ -- metadata_json TEXT — JSON object for future task fields (exit_code,
239
+ -- runner, priority, parent_task_id, …).
240
+ --
241
+ -- ADD COLUMN extension points (future migrations):
242
+ -- ALTER TABLE task_history ADD COLUMN exit_code INTEGER DEFAULT NULL;
243
+ -- ALTER TABLE task_history ADD COLUMN runner TEXT DEFAULT NULL;
244
+ -- ALTER TABLE task_history ADD COLUMN parent_task_id TEXT DEFAULT NULL;
245
+ -- ALTER TABLE task_history ADD COLUMN priority INTEGER NOT NULL DEFAULT 0;
246
+ --
247
+ CREATE TABLE IF NOT EXISTS task_history (
248
+ task_id TEXT PRIMARY KEY,
249
+ status TEXT NOT NULL,
250
+ started_at TEXT NOT NULL,
251
+ completed_at TEXT,
252
+ failed_at TEXT,
253
+ log_path TEXT,
254
+ target_kind TEXT,
255
+ target_ref TEXT,
256
+ metadata_json TEXT NOT NULL DEFAULT '{}'
257
+ );
258
+
259
+ -- Query patterns:
260
+ -- SELECT … WHERE task_id = ? → PK
261
+ -- SELECT … WHERE started_at >= ? AND started_at <= ? → idx_task_history_started
262
+ -- SELECT … WHERE target_kind = ? AND target_ref = ? → idx_task_history_target
263
+ -- SELECT … WHERE status = ? → idx_task_history_status
264
+ CREATE INDEX IF NOT EXISTS idx_task_history_started
265
+ ON task_history(started_at);
266
+ CREATE INDEX IF NOT EXISTS idx_task_history_target
267
+ ON task_history(target_kind, target_ref);
268
+ CREATE INDEX IF NOT EXISTS idx_task_history_status
269
+ ON task_history(status);
270
+ `,
271
+ },
272
+ // Migration 002 — fix task_history to be a true per-run log.
273
+ //
274
+ // Migration 001 used task_id as PRIMARY KEY, meaning each task had exactly
275
+ // one row and every new run overwrote the previous one. This silently
276
+ // discarded all historical runs — the opposite of a history table.
277
+ //
278
+ // This migration recreates the table with an AUTOINCREMENT id so each run
279
+ // appends a new row. The old single-row table is renamed to _old, the new
280
+ // table is created, data is copied, and the old table is dropped.
281
+ {
282
+ id: "002-task-history-per-run",
283
+ up: `
284
+ ALTER TABLE task_history RENAME TO task_history_v1;
285
+
286
+ CREATE TABLE task_history (
287
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
288
+ task_id TEXT NOT NULL,
289
+ status TEXT NOT NULL,
290
+ started_at TEXT NOT NULL,
291
+ completed_at TEXT,
292
+ failed_at TEXT,
293
+ log_path TEXT,
294
+ target_kind TEXT,
295
+ target_ref TEXT,
296
+ metadata_json TEXT NOT NULL DEFAULT '{}'
297
+ );
298
+
299
+ INSERT INTO task_history
300
+ (task_id, status, started_at, completed_at, failed_at,
301
+ log_path, target_kind, target_ref, metadata_json)
302
+ SELECT task_id, status, started_at, completed_at, failed_at,
303
+ log_path, target_kind, target_ref, metadata_json
304
+ FROM task_history_v1;
305
+
306
+ DROP TABLE task_history_v1;
307
+
308
+ -- Unique constraint: same task cannot have two runs with the same start time.
309
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_task_history_run
310
+ ON task_history(task_id, started_at);
311
+ CREATE INDEX IF NOT EXISTS idx_task_history_task_id
312
+ ON task_history(task_id);
313
+ CREATE INDEX IF NOT EXISTS idx_task_history_started
314
+ ON task_history(started_at);
315
+ CREATE INDEX IF NOT EXISTS idx_task_history_target
316
+ ON task_history(target_kind, target_ref);
317
+ CREATE INDEX IF NOT EXISTS idx_task_history_status
318
+ ON task_history(status);
319
+ `,
320
+ },
321
+ // ── Migration 003 — improve_runs ────────────────────────────────────────────
322
+ //
323
+ // Records every `akm improve` invocation as a durable row, replacing the
324
+ // legacy `<stash>/.akm/runs/<runId>/improve-result.json` artifact files.
325
+ //
326
+ // The `dry_run` column is FIRST-CLASS and indexed so productivity audits can
327
+ // cleanly filter dry-run probes out of real-run analyses without parsing
328
+ // `result_json`. The dry-run/real-run artifact-trap (recorded in
329
+ // feedback_akm_dryrun_artifact_trap) was the specific motivating bug.
330
+ //
331
+ // Indexed (query) columns:
332
+ // id TEXT PK — runId (`buildImproveRunId()` output).
333
+ // started_at TEXT — ISO-8601; indexed for time-range queries.
334
+ // stash_dir TEXT — absolute stash root; multi-stash scoping.
335
+ // dry_run INTEGER — 0/1; indexed for productivity audits.
336
+ // scope_mode TEXT — "all" | "type" | "ref"; indexed via composite
337
+ // with stash_dir for stash-scoped scope queries.
338
+ //
339
+ // Non-indexed payload:
340
+ // completed_at TEXT — ISO-8601 or NULL if interrupted.
341
+ // profile TEXT — improve profile name (nullable).
342
+ // scope_value TEXT — type name or asset ref (nullable).
343
+ // guidance TEXT — user-provided guidance text, if any.
344
+ // ok INTEGER — 0/1; whether the run produced ok=true.
345
+ // result_json TEXT — full AkmImproveResult JSON.
346
+ // metrics_json TEXT — aggregate counts extracted from result, cheap
347
+ // to query without parsing result_json.
348
+ //
349
+ // Extensible (metadata_json) columns:
350
+ // metadata_json TEXT — JSON object for future improve-run fields.
351
+ //
352
+ // ADD COLUMN extension points (future migrations):
353
+ // ALTER TABLE improve_runs ADD COLUMN duration_ms INTEGER DEFAULT NULL;
354
+ // ALTER TABLE improve_runs ADD COLUMN host TEXT DEFAULT NULL;
355
+ //
356
+ // TTL: rows where started_at < NOW() - 90 days can be deleted by
357
+ // `purgeOldImproveRuns()`. No automatic deletion occurs here.
358
+ {
359
+ id: "003-improve-runs",
360
+ up: `
361
+ CREATE TABLE IF NOT EXISTS improve_runs (
362
+ id TEXT PRIMARY KEY,
363
+ started_at TEXT NOT NULL,
364
+ completed_at TEXT,
365
+ stash_dir TEXT NOT NULL,
366
+ dry_run INTEGER NOT NULL DEFAULT 0,
367
+ profile TEXT,
368
+ scope_mode TEXT NOT NULL,
369
+ scope_value TEXT,
370
+ guidance TEXT,
371
+ ok INTEGER NOT NULL,
372
+ result_json TEXT NOT NULL,
373
+ metrics_json TEXT,
374
+ metadata_json TEXT NOT NULL DEFAULT '{}'
375
+ );
376
+
377
+ -- Query patterns supported:
378
+ -- SELECT … WHERE started_at >= ? AND started_at <= ?
379
+ -- → idx_improve_runs_started
380
+ -- SELECT … WHERE dry_run = 0
381
+ -- → idx_improve_runs_dry_run (productivity audits filter trap)
382
+ -- SELECT … WHERE stash_dir = ? AND scope_mode = ?
383
+ -- → idx_improve_runs_stash_scope
384
+ CREATE INDEX IF NOT EXISTS idx_improve_runs_started
385
+ ON improve_runs(started_at);
386
+ CREATE INDEX IF NOT EXISTS idx_improve_runs_dry_run
387
+ ON improve_runs(dry_run);
388
+ CREATE INDEX IF NOT EXISTS idx_improve_runs_stash_scope
389
+ ON improve_runs(stash_dir, scope_mode);
390
+ `,
391
+ },
392
+ // ── Migration 004 — extract_sessions_seen ───────────────────────────────────
393
+ //
394
+ // Tracks which platform sessions the extractor has processed, so the discovery
395
+ // pass in `akm extract --since <window>` skips sessions whose content hasn't
396
+ // changed since the last successful run. Replaces the akm-plugin
397
+ // session-checkpoint hook's implicit "write-once" memory of what's been
398
+ // captured — but persistent and queryable.
399
+ //
400
+ // Indexed (query) columns:
401
+ // harness TEXT — harness name (claude-code, opencode, ...).
402
+ // session_id TEXT — platform-native session identifier.
403
+ // processed_at TEXT — ISO-8601 UTC; when extract last ran on this session.
404
+ // session_ended_at TEXT — session.endedAt at processing time. When a
405
+ // later listSessions reports a *newer* endedAt
406
+ // for the same session_id, the extractor
407
+ // re-processes the appended events.
408
+ // outcome TEXT — "candidates_queued" | "no_candidates" |
409
+ // "skipped" | "failed".
410
+ //
411
+ // Non-indexed columns:
412
+ // candidate_count INTEGER — number of candidates the LLM produced.
413
+ // proposal_count INTEGER — number of proposals actually queued
414
+ // (candidates may fail downstream validation).
415
+ // rationale TEXT — for "no_candidates", the LLM's explanation.
416
+ // source_run TEXT — sourceRun id for PROV-DM traceability.
417
+ // metadata_json TEXT — future-proofing (pre-filter stats, LLM
418
+ // model+version, prompt token count, etc.).
419
+ //
420
+ // PK: (harness, session_id) — one row per session per harness. A re-extract
421
+ // updates the row in place via INSERT OR REPLACE.
422
+ //
423
+ // TTL: no automatic deletion. Sessions stay tracked as long as the source
424
+ // session files exist on disk. Operator can `DELETE FROM extract_sessions_seen
425
+ // WHERE processed_at < ?` for cleanup if desired.
426
+ {
427
+ id: "004-extract-sessions-seen",
428
+ up: `
429
+ CREATE TABLE IF NOT EXISTS extract_sessions_seen (
430
+ harness TEXT NOT NULL,
431
+ session_id TEXT NOT NULL,
432
+ processed_at TEXT NOT NULL,
433
+ session_ended_at TEXT,
434
+ outcome TEXT NOT NULL,
435
+ candidate_count INTEGER NOT NULL DEFAULT 0,
436
+ proposal_count INTEGER NOT NULL DEFAULT 0,
437
+ rationale TEXT,
438
+ source_run TEXT,
439
+ metadata_json TEXT NOT NULL DEFAULT '{}',
440
+ PRIMARY KEY (harness, session_id)
441
+ );
442
+
443
+ -- Query patterns:
444
+ -- SELECT … WHERE harness = ? → idx_extract_sessions_harness
445
+ -- SELECT … WHERE processed_at >= ? → idx_extract_sessions_processed
446
+ -- SELECT … WHERE harness = ? AND session_id = ? → PK
447
+ CREATE INDEX IF NOT EXISTS idx_extract_sessions_harness
448
+ ON extract_sessions_seen(harness);
449
+ CREATE INDEX IF NOT EXISTS idx_extract_sessions_processed
450
+ ON extract_sessions_seen(processed_at);
451
+ `,
452
+ },
453
+ ];
454
+ /**
455
+ * Create the migrations table if it does not exist. This must be called
456
+ * unconditionally on every open so a fresh database bootstraps correctly.
457
+ */
458
+ function ensureMigrationsTable(db) {
459
+ db.exec(`
460
+ CREATE TABLE IF NOT EXISTS schema_migrations (
461
+ id TEXT PRIMARY KEY,
462
+ applied_at TEXT NOT NULL DEFAULT (datetime('now'))
463
+ );
464
+ `);
465
+ }
466
+ /**
467
+ * Apply every pending migration in a single transaction per migration.
468
+ *
469
+ * Each migration is applied in its own transaction so a failure in migration N
470
+ * does not roll back already-applied migrations 1..N-1. The migration row is
471
+ * inserted AFTER the DDL succeeds, so a crash mid-migration leaves no row and
472
+ * the migration will be retried on next open (all DDL in `up` uses IF NOT
473
+ * EXISTS so the retry is safe).
474
+ *
475
+ * Called automatically by `openStateDatabase()`.
476
+ */
477
+ export function runMigrations(db) {
478
+ ensureMigrationsTable(db);
479
+ const appliedRows = db.prepare("SELECT id FROM schema_migrations").all();
480
+ const applied = new Set(appliedRows.map((r) => r.id));
481
+ for (const migration of MIGRATIONS) {
482
+ if (applied.has(migration.id))
483
+ continue;
484
+ db.transaction(() => {
485
+ db.exec(migration.up);
486
+ db.prepare("INSERT INTO schema_migrations (id) VALUES (?)").run(migration.id);
487
+ })();
488
+ }
489
+ }
490
+ /**
491
+ * Convert a raw `EventRow` from the database to the public `EventEnvelope`
492
+ * interface used throughout the events module.
493
+ */
494
+ export function eventRowToEnvelope(row) {
495
+ let metadata;
496
+ try {
497
+ const parsed = JSON.parse(row.metadata_json);
498
+ // Only attach metadata when the JSON blob is non-empty so downstream
499
+ // consumers that check `envelope.metadata !== undefined` keep working.
500
+ if (Object.keys(parsed).length > 0) {
501
+ metadata = parsed;
502
+ }
503
+ }
504
+ catch {
505
+ // Corrupt JSON in the DB — treat as no metadata.
506
+ }
507
+ return {
508
+ schemaVersion: 1,
509
+ id: row.id,
510
+ ts: row.ts,
511
+ eventType: row.event_type,
512
+ ...(row.ref !== null ? { ref: row.ref } : {}),
513
+ ...(metadata !== undefined ? { metadata } : {}),
514
+ };
515
+ }
516
+ /**
517
+ * Convert a raw `ProposalRow` to the public `Proposal` shape.
518
+ */
519
+ export function proposalRowToProposal(row) {
520
+ let frontmatter;
521
+ if (row.frontmatter_json) {
522
+ try {
523
+ frontmatter = JSON.parse(row.frontmatter_json);
524
+ }
525
+ catch {
526
+ /* ignore corrupt frontmatter JSON */
527
+ }
528
+ }
529
+ let meta = {};
530
+ try {
531
+ meta = JSON.parse(row.metadata_json);
532
+ }
533
+ catch {
534
+ /* ignore */
535
+ }
536
+ return {
537
+ id: row.id,
538
+ ref: row.ref,
539
+ status: row.status,
540
+ source: row.source,
541
+ ...(typeof meta.sourceRun === "string" ? { sourceRun: meta.sourceRun } : {}),
542
+ createdAt: row.created_at,
543
+ updatedAt: row.updated_at,
544
+ payload: {
545
+ content: row.content,
546
+ ...(frontmatter !== undefined ? { frontmatter } : {}),
547
+ },
548
+ ...(meta.review !== undefined ? { review: meta.review } : {}),
549
+ };
550
+ }
551
+ /**
552
+ * Convert a public `Proposal` to column values ready for an INSERT/UPDATE.
553
+ * The `stash_dir` comes from the call site (proposals.ts has it in scope).
554
+ */
555
+ export function proposalToRowValues(proposal, stashDir) {
556
+ // Fields that have no dedicated column live in metadata_json.
557
+ const metaObj = {};
558
+ if (proposal.sourceRun !== undefined)
559
+ metaObj.sourceRun = proposal.sourceRun;
560
+ if (proposal.review !== undefined)
561
+ metaObj.review = proposal.review;
562
+ return {
563
+ id: proposal.id,
564
+ stash_dir: stashDir,
565
+ ref: proposal.ref,
566
+ status: proposal.status,
567
+ source: proposal.source,
568
+ created_at: proposal.createdAt,
569
+ updated_at: proposal.updatedAt,
570
+ content: proposal.payload.content,
571
+ frontmatter_json: proposal.payload.frontmatter ? JSON.stringify(proposal.payload.frontmatter) : null,
572
+ metadata_json: JSON.stringify(metaObj),
573
+ };
574
+ }
575
+ // ── events table helpers ─────────────────────────────────────────────────────
576
+ /**
577
+ * Insert a single event. Returns the auto-assigned monotonic rowid, which
578
+ * callers can store as a "sinceId" cursor for future `readEventsSince` calls.
579
+ *
580
+ * Best-effort: mirrors the behaviour of the old `appendEvent` — errors are
581
+ * caught and logged to stderr rather than propagated so observability never
582
+ * breaks mutation.
583
+ */
584
+ export function insertEvent(db, input) {
585
+ try {
586
+ const result = db
587
+ .prepare(`INSERT INTO events (event_type, ts, ref, metadata_json)
588
+ VALUES (?, ?, ?, ?)
589
+ RETURNING id`)
590
+ .get(input.eventType, input.ts, input.ref ?? null, JSON.stringify(input.metadata ?? {}));
591
+ return result?.id;
592
+ }
593
+ catch (err) {
594
+ const message = err instanceof Error ? err.message : String(err);
595
+ error(`akm: state.db event insert failed (${message})`);
596
+ return undefined;
597
+ }
598
+ }
599
+ /**
600
+ * Read events from the database matching the filter. Returns events in
601
+ * ascending id order so consumers can process them in emission order.
602
+ *
603
+ * The returned `nextId` is the maximum id seen (or `sinceId` when no rows
604
+ * match), suitable as the next `sinceId` cursor value.
605
+ */
606
+ export function readStateEvents(db, options = {}) {
607
+ const conditions = [];
608
+ const params = [];
609
+ if (options.sinceId !== undefined && options.sinceId > 0) {
610
+ conditions.push("id > ?");
611
+ params.push(options.sinceId);
612
+ }
613
+ if (options.since) {
614
+ conditions.push("ts >= ?");
615
+ params.push(options.since);
616
+ }
617
+ if (options.type) {
618
+ conditions.push("event_type = ?");
619
+ params.push(options.type);
620
+ }
621
+ if (options.ref) {
622
+ conditions.push("ref = ?");
623
+ params.push(options.ref);
624
+ }
625
+ const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
626
+ const rows = db
627
+ .prepare(`SELECT id, event_type, ts, ref, metadata_json FROM events ${where} ORDER BY id ASC`)
628
+ .all(...params);
629
+ const events = rows.map(eventRowToEnvelope);
630
+ const nextId = events.length > 0 ? events[events.length - 1].id : (options.sinceId ?? 0);
631
+ return { events, nextId };
632
+ }
633
+ /**
634
+ * Delete events older than `retentionDays` (default: 90). Safe to call from
635
+ * a maintenance cron; uses a single DELETE with an index-covered ts predicate.
636
+ *
637
+ * Returns the number of rows actually deleted so callers can emit an
638
+ * `events_purged` observability event. A non-positive or non-finite
639
+ * `retentionDays` is treated as "disabled" and returns 0 without scanning.
640
+ */
641
+ export function purgeOldEvents(db, retentionDays = 90) {
642
+ if (!Number.isFinite(retentionDays) || retentionDays <= 0)
643
+ return 0;
644
+ const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
645
+ const result = db.prepare("DELETE FROM events WHERE ts < ?").run(cutoff);
646
+ // bun:sqlite's run() returns { changes, lastInsertRowid }. `changes` may be
647
+ // a number or bigint depending on the underlying lib; coerce to number for
648
+ // the metadata payload.
649
+ const changes = result.changes ?? 0;
650
+ return typeof changes === "bigint" ? Number(changes) : changes;
651
+ }
652
+ // ── proposals table helpers ──────────────────────────────────────────────────
653
+ /**
654
+ * Upsert a proposal row. Called by the proposal write path when state.db is
655
+ * the active backend.
656
+ */
657
+ export function upsertProposal(db, proposal, stashDir) {
658
+ const v = proposalToRowValues(proposal, stashDir);
659
+ db.prepare(`
660
+ INSERT INTO proposals
661
+ (id, stash_dir, ref, status, source, created_at, updated_at, content, frontmatter_json, metadata_json)
662
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
663
+ ON CONFLICT(id) DO UPDATE SET
664
+ stash_dir = excluded.stash_dir,
665
+ ref = excluded.ref,
666
+ status = excluded.status,
667
+ source = excluded.source,
668
+ updated_at = excluded.updated_at,
669
+ content = excluded.content,
670
+ frontmatter_json = excluded.frontmatter_json,
671
+ metadata_json = excluded.metadata_json
672
+ `).run(v.id, v.stash_dir, v.ref, v.status, v.source, v.created_at, v.updated_at, v.content, v.frontmatter_json, v.metadata_json);
673
+ }
674
+ /**
675
+ * List proposals, optionally filtered by stashDir, status, and/or ref.
676
+ * Results are sorted by created_at ASC to match the existing listProposals() behaviour.
677
+ */
678
+ export function listStateProposals(db, options = {}) {
679
+ const conditions = [];
680
+ const params = [];
681
+ if (options.stashDir) {
682
+ conditions.push("stash_dir = ?");
683
+ params.push(options.stashDir);
684
+ }
685
+ if (options.status) {
686
+ conditions.push("status = ?");
687
+ params.push(options.status);
688
+ }
689
+ if (options.ref) {
690
+ conditions.push("ref = ?");
691
+ params.push(options.ref);
692
+ }
693
+ const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
694
+ const rows = db
695
+ .prepare(`SELECT id, stash_dir, ref, status, source, created_at, updated_at,
696
+ content, frontmatter_json, metadata_json
697
+ FROM proposals ${where} ORDER BY created_at ASC`)
698
+ .all(...params);
699
+ return rows.map(proposalRowToProposal);
700
+ }
701
+ /**
702
+ * Look up a single proposal by id. Returns undefined when not found.
703
+ */
704
+ export function getStateProposal(db, id) {
705
+ const row = db
706
+ .prepare(`SELECT id, stash_dir, ref, status, source, created_at, updated_at,
707
+ content, frontmatter_json, metadata_json
708
+ FROM proposals WHERE id = ?`)
709
+ .get(id);
710
+ return row ? proposalRowToProposal(row) : undefined;
711
+ }
712
+ // ── task_history table helpers ───────────────────────────────────────────────
713
+ /**
714
+ * Upsert a task history row.
715
+ */
716
+ export function upsertTaskHistory(db, row) {
717
+ // INSERT OR IGNORE: if a run with the same (task_id, started_at) was already
718
+ // imported (e.g. by the migration script), skip it silently.
719
+ db.prepare(`
720
+ INSERT OR IGNORE INTO task_history
721
+ (task_id, status, started_at, completed_at, failed_at, log_path,
722
+ target_kind, target_ref, metadata_json)
723
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
724
+ `).run(row.task_id, row.status, row.started_at, row.completed_at ?? null, row.failed_at ?? null, row.log_path ?? null, row.target_kind ?? null, row.target_ref ?? null, row.metadata_json);
725
+ }
726
+ /**
727
+ * Look up a task history row by task_id. Returns undefined when not found.
728
+ */
729
+ /**
730
+ * Return the most recent run for a given task_id, or undefined if no runs exist.
731
+ */
732
+ export function getTaskHistory(db, taskId) {
733
+ return db
734
+ .prepare(`SELECT id, task_id, status, started_at, completed_at, failed_at, log_path,
735
+ target_kind, target_ref, metadata_json
736
+ FROM task_history WHERE task_id = ? ORDER BY started_at DESC LIMIT 1`)
737
+ .get(taskId);
738
+ }
739
+ /**
740
+ * Return all runs for a given task_id, newest first.
741
+ */
742
+ export function getTaskHistoryRuns(db, taskId, limit = 50) {
743
+ return db
744
+ .prepare(`SELECT id, task_id, status, started_at, completed_at, failed_at, log_path,
745
+ target_kind, target_ref, metadata_json
746
+ FROM task_history WHERE task_id = ? ORDER BY started_at DESC LIMIT ?`)
747
+ .all(taskId, limit);
748
+ }
749
+ /**
750
+ * Query task history rows by started_at range and/or status.
751
+ */
752
+ export function queryTaskHistory(db, options = {}) {
753
+ const conditions = [];
754
+ const params = [];
755
+ if (options.since) {
756
+ conditions.push("started_at >= ?");
757
+ params.push(options.since);
758
+ }
759
+ if (options.until) {
760
+ conditions.push("started_at <= ?");
761
+ params.push(options.until);
762
+ }
763
+ if (options.status) {
764
+ conditions.push("status = ?");
765
+ params.push(options.status);
766
+ }
767
+ if (options.targetKind) {
768
+ conditions.push("target_kind = ?");
769
+ params.push(options.targetKind);
770
+ }
771
+ if (options.targetRef) {
772
+ conditions.push("target_ref = ?");
773
+ params.push(options.targetRef);
774
+ }
775
+ const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
776
+ return db
777
+ .prepare(`SELECT task_id, status, started_at, completed_at, failed_at, log_path,
778
+ target_kind, target_ref, metadata_json
779
+ FROM task_history ${where} ORDER BY started_at DESC`)
780
+ .all(...params);
781
+ }
782
+ // ── events.jsonl import ──────────────────────────────────────────────────────
783
+ /**
784
+ * Import all events from an `events.jsonl` file into the `events` table.
785
+ *
786
+ * The old byte-offset `id` is NOT preserved — the database assigns new
787
+ * monotonic integer ids. Callers that persisted a byte-offset cursor must
788
+ * discard it after migration and use the returned `maxId` as the new cursor.
789
+ *
790
+ * **Idempotency**: each line is pre-checked against the `events` table using
791
+ * `(event_type, ts, ref, metadata_json)` as the duplicate key. Lines whose
792
+ * exact tuple is already present are skipped and reported as `skipped` in the
793
+ * return value. This makes the migration safe to re-run (the v0.7→v0.8
794
+ * migration guide recommends re-running the script as a recovery path; without
795
+ * this guard, every re-run would double-import the entire event log).
796
+ *
797
+ * Duplicate detection is per-import-tuple, not a table-wide UNIQUE constraint:
798
+ * the events table has no UNIQUE constraint at runtime so that
799
+ * `appendEvent` can write multiple events with the same ts (sub-millisecond
800
+ * bursts produce identical `(event_type, ts, ref)` triples in practice). The
801
+ * SELECT-first check is scoped to the import path only.
802
+ *
803
+ * The import is wrapped in a single transaction for atomicity.
804
+ *
805
+ * @param db - Open state.db connection.
806
+ * @param jsonlPath - Absolute path to the events.jsonl file to import.
807
+ * @returns Number of rows inserted, the max id assigned, and the
808
+ * count of rows skipped because an identical event already
809
+ * existed in the table.
810
+ */
811
+ export async function importEventsJsonl(db, jsonlPath) {
812
+ const { readFileSync, existsSync } = await import("node:fs");
813
+ if (!existsSync(jsonlPath)) {
814
+ return { imported: 0, maxId: 0, skipped: 0 };
815
+ }
816
+ const text = readFileSync(jsonlPath, "utf8");
817
+ const lines = text.split("\n").filter((l) => l.trim().length > 0);
818
+ let imported = 0;
819
+ let maxId = 0;
820
+ let skipped = 0;
821
+ const insertStmt = db.prepare(`INSERT INTO events (event_type, ts, ref, metadata_json)
822
+ VALUES (?, ?, ?, ?)
823
+ RETURNING id`);
824
+ // Dedup pre-check: matches by the full tuple including metadata_json so an
825
+ // import is idempotent over identical rows but does not collide with two
826
+ // genuinely different events that happen to share (event_type, ts, ref).
827
+ //
828
+ // Uses IS for ref so two NULL refs compare equal (a plain `=` would treat
829
+ // NULL = NULL as NULL and the row would be re-inserted on every run).
830
+ const existsStmt = db.prepare(`SELECT 1 FROM events
831
+ WHERE event_type = ?
832
+ AND ts = ?
833
+ AND ref IS ?
834
+ AND metadata_json = ?
835
+ LIMIT 1`);
836
+ db.transaction(() => {
837
+ for (const line of lines) {
838
+ let parsed;
839
+ try {
840
+ parsed = JSON.parse(line);
841
+ }
842
+ catch {
843
+ continue; // skip malformed lines — same behaviour as readEvents()
844
+ }
845
+ const eventType = typeof parsed.eventType === "string" ? parsed.eventType : "unknown";
846
+ const ts = typeof parsed.ts === "string" ? parsed.ts : new Date().toISOString();
847
+ const ref = typeof parsed.ref === "string" ? parsed.ref : null;
848
+ const metadata = parsed.metadata !== undefined && typeof parsed.metadata === "object" ? JSON.stringify(parsed.metadata) : "{}";
849
+ const duplicate = existsStmt.get(eventType, ts, ref, metadata);
850
+ if (duplicate) {
851
+ skipped++;
852
+ continue;
853
+ }
854
+ const result = insertStmt.get(eventType, ts, ref, metadata);
855
+ if (result) {
856
+ imported++;
857
+ if (result.id > maxId)
858
+ maxId = result.id;
859
+ }
860
+ }
861
+ })();
862
+ return { imported, maxId, skipped };
863
+ }
864
+ /**
865
+ * Compute the cheap aggregate metrics blob from a full improve result.
866
+ *
867
+ * Pure function — no I/O. Used by {@link recordImproveRun} to populate
868
+ * `metrics_json`. Exposed for tests and for any future call site that wants
869
+ * the same aggregation logic without hitting state.db.
870
+ */
871
+ export function computeImproveRunMetrics(result) {
872
+ const plannedCount = Array.isArray(result.plannedRefs) ? result.plannedRefs.length : 0;
873
+ const actions = Array.isArray(result.actions) ? result.actions : [];
874
+ const actionsCount = actions.length;
875
+ let acceptedCount = 0;
876
+ let rejectedCount = 0;
877
+ let autoAcceptedCount = 0;
878
+ let errorCount = 0;
879
+ for (const action of actions) {
880
+ switch (action.mode) {
881
+ case "reflect":
882
+ case "distill":
883
+ case "memory-inference":
884
+ case "graph-extraction":
885
+ acceptedCount++;
886
+ break;
887
+ case "reflect-cooldown":
888
+ case "reflect-skipped":
889
+ case "distill-skipped":
890
+ rejectedCount++;
891
+ break;
892
+ case "reflect-failed":
893
+ case "error":
894
+ errorCount++;
895
+ break;
896
+ case "memory-prune":
897
+ // Prune is bookkeeping, not "accepted" content authoring; count
898
+ // separately as a no-op for the audit aggregate.
899
+ break;
900
+ }
901
+ // Legacy: pre-gate action results may carry autoAccepted: true (reflect path).
902
+ const r = action.result;
903
+ if (r && r.autoAccepted === true)
904
+ autoAcceptedCount++;
905
+ }
906
+ // Add gate-promoted count from the unified PostPhaseAutoAcceptGate (all phases).
907
+ autoAcceptedCount += result.gateAutoAcceptedCount ?? 0;
908
+ return { plannedCount, actionsCount, acceptedCount, rejectedCount, autoAcceptedCount, errorCount };
909
+ }
910
+ /**
911
+ * Insert a single improve-run row into `improve_runs`. Uses parameterised SQL.
912
+ *
913
+ * Idempotency: the table's PRIMARY KEY is `id`, so re-running with the same
914
+ * runId would error. Callers mint a fresh runId per invocation via
915
+ * {@link buildImproveRunId} so this is not a concern in practice — but the
916
+ * default behaviour is INSERT (not REPLACE) so accidental dupes surface as
917
+ * a SQLite constraint error rather than silently overwriting a prior record.
918
+ *
919
+ * The `metrics` parameter defaults to the output of
920
+ * {@link computeImproveRunMetrics} when not supplied. Pass an explicit
921
+ * `metrics` object to override the derivation (e.g. tests).
922
+ */
923
+ export function recordImproveRun(db, input) {
924
+ const metricsObj = input.metrics ?? computeImproveRunMetrics(input.result);
925
+ db.prepare(`
926
+ INSERT INTO improve_runs
927
+ (id, started_at, completed_at, stash_dir, dry_run, profile,
928
+ scope_mode, scope_value, guidance, ok, result_json, metrics_json, metadata_json)
929
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
930
+ `).run(input.id, input.startedAt, input.completedAt, input.stashDir, input.dryRun ? 1 : 0, input.profile, input.scopeMode, input.scopeValue, input.guidance, input.ok ? 1 : 0, JSON.stringify(input.result), JSON.stringify(metricsObj), JSON.stringify(input.metadata ?? {}));
931
+ }
932
+ /**
933
+ * Delete improve_runs rows older than `retentionDays` (default: 90). Mirrors
934
+ * {@link purgeOldEvents} — same default, same return shape (number of rows
935
+ * actually deleted), same disabled-when-non-finite semantics.
936
+ *
937
+ * Safe to call from the improve post-loop maintenance pass alongside
938
+ * `purgeOldEvents(db, retentionDays)`.
939
+ */
940
+ export function purgeOldImproveRuns(db, retentionDays = 90) {
941
+ if (!Number.isFinite(retentionDays) || retentionDays <= 0)
942
+ return 0;
943
+ const cutoff = new Date(Date.now() - retentionDays * 86_400_000).toISOString();
944
+ const result = db.prepare("DELETE FROM improve_runs WHERE started_at < ?").run(cutoff);
945
+ const changes = result.changes ?? 0;
946
+ return typeof changes === "bigint" ? Number(changes) : changes;
947
+ }
948
+ /**
949
+ * Record (or update) one session's extract outcome. INSERT-OR-REPLACE so the
950
+ * row reflects the most recent run; downstream skip-logic compares
951
+ * `session_ended_at` against the live session metadata to decide if anything
952
+ * new arrived since `processed_at`.
953
+ */
954
+ export function upsertExtractedSession(db, input) {
955
+ const endedAtIso = typeof input.sessionEndedAt === "number" && Number.isFinite(input.sessionEndedAt)
956
+ ? new Date(input.sessionEndedAt).toISOString()
957
+ : null;
958
+ db.prepare(`
959
+ INSERT OR REPLACE INTO extract_sessions_seen
960
+ (harness, session_id, processed_at, session_ended_at, outcome,
961
+ candidate_count, proposal_count, rationale, source_run, metadata_json)
962
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
963
+ `).run(input.harness, input.sessionId, input.processedAt, endedAtIso, input.outcome, input.candidateCount, input.proposalCount, input.rationale ?? null, input.sourceRun ?? null, JSON.stringify(input.metadata ?? {}));
964
+ }
965
+ /**
966
+ * Fetch a single session's last extract record, or `undefined` when the
967
+ * session has never been processed.
968
+ */
969
+ export function getExtractedSession(db, harness, sessionId) {
970
+ // bun:sqlite returns null (not undefined) when no row matches — normalize so
971
+ // callers can rely on `if (!row)` and `toBeUndefined()` equivalently.
972
+ const row = db
973
+ .prepare("SELECT * FROM extract_sessions_seen WHERE harness = ? AND session_id = ?")
974
+ .get(harness, sessionId);
975
+ return row ?? undefined;
976
+ }
977
+ /**
978
+ * Bulk-fetch session-extract status for a list of sessionIds in one harness.
979
+ * Returns a Map keyed by sessionId so callers can do O(1) lookups while
980
+ * iterating the discovery list.
981
+ */
982
+ export function getExtractedSessionsMap(db, harness, sessionIds) {
983
+ const out = new Map();
984
+ if (sessionIds.length === 0)
985
+ return out;
986
+ // SQLite has a ~999 param ceiling; chunk if a caller ever exceeds that.
987
+ const CHUNK = 500;
988
+ for (let i = 0; i < sessionIds.length; i += CHUNK) {
989
+ const chunk = sessionIds.slice(i, i + CHUNK);
990
+ const placeholders = chunk.map(() => "?").join(",");
991
+ const rows = db
992
+ .prepare(`SELECT * FROM extract_sessions_seen
993
+ WHERE harness = ? AND session_id IN (${placeholders})`)
994
+ .all(harness, ...chunk);
995
+ for (const row of rows)
996
+ out.set(row.session_id, row);
997
+ }
998
+ return out;
999
+ }
1000
+ /**
1001
+ * Decide whether a session should be skipped because the extractor has
1002
+ * already processed it AND nothing has changed since. The "anything new since
1003
+ * last extract?" rule is: the live `sessionEndedAtMs` is strictly later than
1004
+ * the recorded `session_ended_at`. Same-or-earlier endedAt means we'd be
1005
+ * re-processing the exact same content for no gain.
1006
+ *
1007
+ * Returns:
1008
+ * - `false` — no prior row, or session has new content since last extract.
1009
+ * The caller should process it.
1010
+ * - `true` — the session was already processed and hasn't been updated.
1011
+ * The caller should skip.
1012
+ */
1013
+ export function shouldSkipAlreadyExtractedSession(prior, liveSessionEndedAtMs) {
1014
+ if (!prior)
1015
+ return false;
1016
+ // No live timestamp → can't tell if anything's new. Be conservative and
1017
+ // skip — the operator can pass --force later if we add it.
1018
+ if (typeof liveSessionEndedAtMs !== "number" || !Number.isFinite(liveSessionEndedAtMs)) {
1019
+ return true;
1020
+ }
1021
+ const priorMs = prior.session_ended_at ? Date.parse(prior.session_ended_at) : Number.NaN;
1022
+ if (!Number.isFinite(priorMs))
1023
+ return false;
1024
+ // Re-process when there's new content; skip when the session is unchanged.
1025
+ return liveSessionEndedAtMs <= priorMs;
1026
+ }
1027
+ // ── registry_index_cache (goes in index.db, not state.db) ───────────────────
1028
+ /**
1029
+ * DDL for the `registry_index_cache` table that lives in the EXISTING index.db
1030
+ * (managed by src/indexer/db.ts).
1031
+ *
1032
+ * Design: uses the same migration-safe ADD COLUMN approach. The table is
1033
+ * created with CREATE TABLE IF NOT EXISTS so it is safe to call inside
1034
+ * ensureSchema() or as a standalone migration.
1035
+ *
1036
+ * Purpose: caches the result of resolving and fetching remote registry stash
1037
+ * indexes so `akm search` does not hit the network on every invocation.
1038
+ *
1039
+ * Indexed (query) columns:
1040
+ * registry_url TEXT PK — canonical URL of the registry; cache key.
1041
+ * fetched_at TEXT — ISO-8601; used to detect stale entries (TTL).
1042
+ * etag TEXT — HTTP ETag for conditional GET (If-None-Match).
1043
+ * last_modified TEXT — HTTP Last-Modified for conditional GET.
1044
+ *
1045
+ * Non-indexed payload:
1046
+ * index_json TEXT — JSON blob of the fetched registry index document.
1047
+ *
1048
+ * ADD COLUMN extension points (future migrations in db.ts):
1049
+ * ALTER TABLE registry_index_cache ADD COLUMN schema_version INTEGER DEFAULT 1;
1050
+ * ALTER TABLE registry_index_cache ADD COLUMN kit_count INTEGER DEFAULT NULL;
1051
+ * ALTER TABLE registry_index_cache ADD COLUMN error_message TEXT DEFAULT NULL;
1052
+ *
1053
+ * To add this table to index.db, call ensureRegistryIndexCacheSchema(db) from
1054
+ * within ensureSchema() in src/indexer/db.ts, or add it as a new CREATE TABLE
1055
+ * IF NOT EXISTS block inside the existing ensureSchema() call.
1056
+ */
1057
+ export const REGISTRY_INDEX_CACHE_DDL = `
1058
+ CREATE TABLE IF NOT EXISTS registry_index_cache (
1059
+ registry_url TEXT PRIMARY KEY,
1060
+ fetched_at TEXT NOT NULL,
1061
+ etag TEXT,
1062
+ last_modified TEXT,
1063
+ index_json TEXT NOT NULL DEFAULT '{}'
1064
+ );
1065
+
1066
+ CREATE INDEX IF NOT EXISTS idx_registry_cache_fetched
1067
+ ON registry_index_cache(fetched_at);
1068
+ `;