typeclaw 0.36.7 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +2 -2
  2. package/package.json +3 -2
  3. package/src/agent/index.ts +31 -11
  4. package/src/agent/live-sessions.ts +12 -0
  5. package/src/agent/model-fallback.ts +17 -15
  6. package/src/agent/model-overrides.ts +2 -2
  7. package/src/agent/session-meta.ts +10 -0
  8. package/src/agent/subagents.ts +11 -2
  9. package/src/agent/system-prompt.ts +9 -3
  10. package/src/agent/todo/continuation-policy.ts +6 -3
  11. package/src/agent/todo/continuation-wiring.ts +4 -2
  12. package/src/agent/todo/continuation.ts +3 -3
  13. package/src/agent/tools/todo/index.ts +27 -4
  14. package/src/bundled-plugins/agent-browser/index.ts +33 -108
  15. package/src/bundled-plugins/agent-browser/shim.ts +3 -94
  16. package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +8 -33
  17. package/src/bundled-plugins/doc-render/skills/typeclaw-render-pdf/SKILL.md +2 -2
  18. package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +7 -1
  19. package/src/bundled-plugins/memory/README.md +80 -23
  20. package/src/bundled-plugins/memory/append-tool.ts +74 -53
  21. package/src/bundled-plugins/memory/citation-superset.ts +4 -0
  22. package/src/bundled-plugins/memory/citations.ts +54 -0
  23. package/src/bundled-plugins/memory/dreaming-metrics.ts +30 -0
  24. package/src/bundled-plugins/memory/dreaming.ts +444 -21
  25. package/src/bundled-plugins/memory/index.ts +544 -400
  26. package/src/bundled-plugins/memory/load-memory.ts +87 -10
  27. package/src/bundled-plugins/memory/load-shards.ts +48 -22
  28. package/src/bundled-plugins/memory/memory-logger.ts +95 -106
  29. package/src/bundled-plugins/memory/memory-retrieval.ts +3 -3
  30. package/src/bundled-plugins/memory/parent-link.ts +33 -0
  31. package/src/bundled-plugins/memory/paths.ts +12 -0
  32. package/src/bundled-plugins/memory/references/frontmatter.ts +197 -0
  33. package/src/bundled-plugins/memory/references/load-references.ts +212 -0
  34. package/src/bundled-plugins/memory/references/store-reference-tool.ts +59 -0
  35. package/src/bundled-plugins/memory/search-tool.ts +282 -45
  36. package/src/bundled-plugins/memory/stream-events.ts +1 -0
  37. package/src/bundled-plugins/memory/stream-io.ts +28 -3
  38. package/src/bundled-plugins/memory/turn-dedup.ts +40 -0
  39. package/src/bundled-plugins/memory/vector/cache-write.ts +19 -0
  40. package/src/bundled-plugins/memory/vector/config.ts +28 -0
  41. package/src/bundled-plugins/memory/vector/doctor.ts +124 -0
  42. package/src/bundled-plugins/memory/vector/embedder.ts +246 -0
  43. package/src/bundled-plugins/memory/vector/hybrid.ts +439 -0
  44. package/src/bundled-plugins/memory/vector/index-on-write.ts +34 -0
  45. package/src/bundled-plugins/memory/vector/inspect.ts +111 -0
  46. package/src/bundled-plugins/memory/vector/passages.ts +125 -0
  47. package/src/bundled-plugins/memory/vector/reference-index-on-write.ts +50 -0
  48. package/src/bundled-plugins/memory/vector/relevance-gate.ts +93 -0
  49. package/src/bundled-plugins/memory/vector/startup.ts +71 -0
  50. package/src/bundled-plugins/memory/vector/store.ts +203 -0
  51. package/src/bundled-plugins/memory/vector/truncation.ts +124 -0
  52. package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +2 -0
  53. package/src/channels/router.ts +239 -40
  54. package/src/cli/incomplete-init.ts +57 -0
  55. package/src/cli/init.ts +143 -12
  56. package/src/cli/inspect.ts +11 -5
  57. package/src/cli/model.ts +112 -34
  58. package/src/cli/restart.ts +24 -0
  59. package/src/cli/start.ts +24 -0
  60. package/src/cli/tunnel.ts +53 -8
  61. package/src/config/config.ts +110 -19
  62. package/src/config/index.ts +5 -1
  63. package/src/config/models-mutation.ts +29 -11
  64. package/src/config/providers-mutation.ts +2 -2
  65. package/src/config/providers.ts +146 -12
  66. package/src/container/shared.ts +9 -0
  67. package/src/container/start.ts +87 -4
  68. package/src/cron/consumer.ts +13 -7
  69. package/src/hostd/models.ts +64 -0
  70. package/src/hostd/paths.ts +6 -0
  71. package/src/hostd/portbroker-manager.ts +2 -2
  72. package/src/init/checkpoint.ts +201 -0
  73. package/src/init/dockerfile.ts +164 -51
  74. package/src/init/gitignore.ts +7 -7
  75. package/src/init/index.ts +41 -9
  76. package/src/init/line-auth.ts +50 -21
  77. package/src/init/models-dev.ts +96 -21
  78. package/src/init/oauth-login.ts +3 -3
  79. package/src/init/progress.ts +29 -0
  80. package/src/init/validate-api-key.ts +4 -0
  81. package/src/inspect/index.ts +13 -6
  82. package/src/inspect/item-list.ts +11 -2
  83. package/src/inspect/live-list.ts +65 -0
  84. package/src/inspect/open-item.ts +22 -1
  85. package/src/inspect/session-list.ts +29 -0
  86. package/src/models/embedding-model.ts +114 -0
  87. package/src/models/transformers-version.ts +55 -0
  88. package/src/plugin/types.ts +3 -0
  89. package/src/portbroker/container-server.ts +23 -0
  90. package/src/portbroker/forward-request-bus.ts +35 -0
  91. package/src/portbroker/forward-result-bus.ts +2 -3
  92. package/src/portbroker/hostd-client.ts +182 -36
  93. package/src/portbroker/index.ts +6 -1
  94. package/src/portbroker/protocol.ts +9 -2
  95. package/src/run/channel-session-factory.ts +11 -1
  96. package/src/run/index.ts +41 -7
  97. package/src/server/command-runner.ts +24 -1
  98. package/src/server/index.ts +42 -8
  99. package/src/shared/index.ts +2 -0
  100. package/src/shared/protocol.ts +31 -0
  101. package/src/skills/typeclaw-channels/SKILL.md +4 -4
  102. package/src/skills/typeclaw-config/SKILL.md +2 -2
  103. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  104. package/src/skills/typeclaw-permissions/SKILL.md +3 -3
  105. package/src/skills/typeclaw-skills/SKILL.md +1 -1
  106. package/src/skills/typeclaw-tunnels/SKILL.md +22 -1
  107. package/src/tunnels/providers/cloudflare-quick.ts +65 -7
  108. package/src/tunnels/upstream-probe.ts +25 -0
  109. package/typeclaw.schema.json +156 -67
  110. package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +0 -170
  111. package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +0 -421
  112. package/src/portbroker/bind-with-forward.ts +0 -102
@@ -0,0 +1,125 @@
1
+ import { createHash } from 'node:crypto'
2
+
3
+ import { stripCitationLines } from '../citations'
4
+ import { fragmentContentHash } from '../fragment-parser'
5
+ import { loadAllShards, type TopicShard } from '../load-shards'
6
+ import { buildParentLinks } from '../parent-link'
7
+ import { loadAllReferences } from '../references/load-references'
8
+ import { readAllUndreamedStreamDays, type UndreamedStreamDay } from '../stream-io'
9
+ import { EMBEDDING_MODEL_ID } from './embedder'
10
+ import type { VectorStore } from './store'
11
+ import { boundEmbeddableText, fragmentEmbeddableText } from './truncation'
12
+
13
+ export type Passage = {
14
+ id: string
15
+ source: 'topic' | 'stream' | 'reference'
16
+ key: string
17
+ text: string
18
+ contentHash: string
19
+ }
20
+
21
+ // The single source of truth for a topic's embedded text + freshness hash, so the
22
+ // startup index build and dreaming's per-pass refresh stay byte-identical. The
23
+ // contentHash covers the EMBEDDED text (not the raw body): changing how the text
24
+ // is derived must invalidate every existing topic row, but `fragmentContentHash`
25
+ // over the unchanged raw body would not — `findMissingPassages` would skip them.
26
+ export function topicPassage(slug: string, heading: string, body: string): Passage {
27
+ const text = `${heading}\n${stripCitationLines(body)}`
28
+ return { id: `topic:${slug}`, source: 'topic', key: slug, text, contentHash: hashContent(text) }
29
+ }
30
+
31
+ export async function collectPassages(agentDir: string): Promise<Passage[]> {
32
+ const [shards, streamDays, references] = await Promise.all([
33
+ loadAllShards(agentDir),
34
+ readAllUndreamedStreamDays(agentDir),
35
+ referencePassages(agentDir),
36
+ ])
37
+ return [...buildPassages(shards, streamDays), ...references]
38
+ }
39
+
40
+ export async function referencePassages(agentDir: string): Promise<Passage[]> {
41
+ const references = await loadAllReferences(agentDir)
42
+ return references.flatMap((reference): Passage[] =>
43
+ referencePassagesForOne(reference.slug, reference.body, reference.frontmatter.demoted),
44
+ )
45
+ }
46
+
47
+ // Passages for a single reference, used by the on-write hook so a freshly stored
48
+ // reference is embedded immediately instead of waiting for the next startup index
49
+ // build. Must derive ids/chunks/hashes identically to `referencePassages` so the
50
+ // on-write rows and a later startup rebuild agree (no churn, no stale duplicates).
51
+ // A demoted reference yields no passages — the same exclusion `referencePassages`
52
+ // applies at startup.
53
+ export function referencePassagesForOne(slug: string, body: string, demoted = false): Passage[] {
54
+ if (demoted) return []
55
+ return chunkReferenceBody(body).map((chunk, chunkIdx) => ({
56
+ id: `reference:${slug}#${chunkIdx}`,
57
+ source: 'reference',
58
+ key: slug,
59
+ text: chunk,
60
+ contentHash: hashContent(chunk),
61
+ }))
62
+ }
63
+
64
+ export function findMissingPassages(store: VectorStore, passages: Passage[]): Passage[] {
65
+ const existing = new Map(store.getAllMeta().map((row) => [row.id, row]))
66
+ return passages.filter((passage) => {
67
+ const row = existing.get(passage.id)
68
+ return row === undefined || row.model !== EMBEDDING_MODEL_ID || row.contentHash !== passage.contentHash
69
+ })
70
+ }
71
+
72
+ function buildPassages(shards: TopicShard[], streamDays: UndreamedStreamDay[]): Passage[] {
73
+ const { supersededFragmentIds } = buildParentLinks(shards)
74
+ return [
75
+ ...shards.map((shard): Passage => topicPassage(shard.slug, shard.frontmatter.heading, shard.body)),
76
+ ...streamDays.flatMap((day) =>
77
+ day.events.flatMap((event): Passage[] => {
78
+ if (event.type === 'watermark') return []
79
+ if (event.type === 'fragment') {
80
+ // Superseded fragments stay cited for GC/history but are not embedded:
81
+ // they must never be a retrieval hook for a belief they no longer back.
82
+ if (supersededFragmentIds.has(event.id)) return []
83
+ const key = `${day.date}#${event.id}`
84
+ return [
85
+ {
86
+ id: `stream:${key}`,
87
+ source: 'stream',
88
+ key,
89
+ text: fragmentEmbeddableText(event),
90
+ contentHash: fragmentContentHash(event),
91
+ },
92
+ ]
93
+ }
94
+ const key = `${day.date}#legacy-${hashContent(event.text).slice(0, 12)}`
95
+ return [{ id: `stream:${key}`, source: 'stream', key, text: event.text, contentHash: hashContent(event.text) }]
96
+ }),
97
+ ),
98
+ ]
99
+ }
100
+
101
+ function chunkReferenceBody(body: string): string[] {
102
+ if (body.length === 0) return ['']
103
+
104
+ const chunks: string[] = []
105
+ let remaining = body
106
+ while (remaining.length > 0) {
107
+ const bounded = boundEmbeddableText(remaining)
108
+ if (!bounded.bounded) {
109
+ chunks.push(bounded.text)
110
+ break
111
+ }
112
+ if (bounded.text.length === 0) {
113
+ chunks.push(remaining[0]!)
114
+ remaining = remaining.slice(1)
115
+ continue
116
+ }
117
+ chunks.push(bounded.text)
118
+ remaining = remaining.slice(bounded.text.length)
119
+ }
120
+ return chunks
121
+ }
122
+
123
+ function hashContent(content: string): string {
124
+ return createHash('sha256').update(content).digest('hex')
125
+ }
@@ -0,0 +1,50 @@
1
+ import { embed, EMBEDDING_MODEL_ID } from './embedder'
2
+ import type { EmbedFn } from './hybrid'
3
+ import { referencePassagesForOne } from './passages'
4
+ import type { VectorStore } from './store'
5
+
6
+ export type ReferenceStoredContext = { slug: string; body: string; demoted?: boolean }
7
+
8
+ // Embeds a freshly stored reference into the vector index immediately, mirroring
9
+ // the stream-fragment on-write hook (`makeAppendHook`). Without this, a reference
10
+ // is only embedded at the next startup index build, so it is vector-unretrievable
11
+ // for the rest of the container's uptime. Chunks are derived by the same
12
+ // `referencePassagesForOne` the startup build uses, so the rows agree.
13
+ //
14
+ // Re-storing a slug with a shorter body produces fewer chunks; the stale
15
+ // higher-index `reference:<slug>#N` rows from the prior body must be pruned or
16
+ // they would resurface as orphaned retrieval hooks for content that no longer
17
+ // exists. We compute the wanted id set, upsert changed chunks, and delete any
18
+ // existing row for this slug that is not wanted.
19
+ export function makeReferenceStoredHook(
20
+ store: VectorStore,
21
+ embedFn: EmbedFn = embed,
22
+ ): (context: ReferenceStoredContext) => Promise<void> {
23
+ return async ({ slug, body, demoted }) => {
24
+ const passages = referencePassagesForOne(slug, body, demoted)
25
+ const wantedIds = new Set(passages.map((passage) => passage.id))
26
+
27
+ const prefix = `reference:${slug}#`
28
+ const staleIds = store
29
+ .getAllMeta()
30
+ .flatMap((row) => (row.id.startsWith(prefix) && !wantedIds.has(row.id) ? [row.id] : []))
31
+ if (staleIds.length > 0) store.deleteMany(staleIds)
32
+
33
+ for (const passage of passages) {
34
+ const existing = store.getByIds([passage.id])[0]
35
+ if (existing?.contentHash === passage.contentHash && existing.model === EMBEDDING_MODEL_ID) continue
36
+
37
+ const [embedding] = await embedFn([passage.text], 'passage')
38
+ if (embedding === undefined) continue
39
+ store.upsert({
40
+ id: passage.id,
41
+ source: 'reference',
42
+ key: slug,
43
+ model: EMBEDDING_MODEL_ID,
44
+ dims: embedding.length,
45
+ embedding,
46
+ contentHash: passage.contentHash,
47
+ })
48
+ }
49
+ }
50
+ }
@@ -0,0 +1,93 @@
1
+ // E5 embeddings (multilingual-e5-base) compress cosine similarity into a narrow
2
+ // ~0.70-0.85 band even for unrelated pairs — a documented consequence of the
3
+ // low InfoNCE training temperature (tau=0.01). Absolute thresholds therefore
4
+ // cannot tell a real match from baseline noise: an unrelated query's top hit
5
+ // (0.8055) can outscore a genuine match's top hit (0.7786) on a different query.
6
+ //
7
+ // The discriminating signal is QUERY-LOCAL CONTRAST: how far the best score
8
+ // stands above this query's own baseline cluster. A real match lifts top1 well
9
+ // clear of the pack; a no-match leaves top1 buried in the band. Measured on a
10
+ // live 193-topic index, no-match queries land top1-baseline <= 0.051 and
11
+ // has-match queries land >= 0.074, so a 0.06 margin separates them cleanly.
12
+ //
13
+ // The baseline is the MEDIAN of the non-head scores (robust to a near-duplicate
14
+ // cluster inflating a raw mean), which keeps a genuine winner above a crowd of
15
+ // similar topics. Suppression only fires once the non-head pack is large enough
16
+ // to estimate the band: the top HEAD_EXCLUDED_FROM_BASELINE scores are dropped,
17
+ // and at least MIN_BASELINE_PACK must remain. Below that, suppression is skipped
18
+ // — a false negative (injecting one obvious shard) is cheaper than wrongly
19
+ // suppressing the only relevant memory off a noisy 1-4 score tail.
20
+ //
21
+ // The contrast is top1 - median(non-head). A UNIFORM upward shift of the whole
22
+ // band (the "single-domain memory, everything is somewhat related" case) cancels
23
+ // out of that difference and leaves the verdict unchanged, so a concentrated
24
+ // corpus does NOT structurally compress the gap. Only a genuine reduction in
25
+ // rank SPREAD would, and a spread-normalized gate (gap >= k*MAD, or a z-score)
26
+ // is rejected on purpose: a no-match query can also produce a tight non-head
27
+ // pack plus one order-statistic outlier, which is exactly the case the absolute
28
+ // MARGIN suppresses. We keep the absolute margin as the vector-only no-match
29
+ // guard; recovery for a genuinely-suppressed single-domain match belongs in the
30
+ // corroborating keyword lane, not in a weaker semantic threshold.
31
+ const MARGIN = 0.06
32
+ const HEAD_EXCLUDED_FROM_BASELINE = 5
33
+ // Minimum non-head scores required to trust a suppression verdict. A median over
34
+ // 1-4 scores (n=6..9 once the head is dropped) is too noisy to zero out the only
35
+ // memory on, so the gated path needs HEAD_EXCLUDED_FROM_BASELINE + this many.
36
+ const MIN_BASELINE_PACK = 5
37
+ const GATED_TOPIC_FLOOR = HEAD_EXCLUDED_FROM_BASELINE + MIN_BASELINE_PACK
38
+
39
+ // The contrast reference for ADMITTING stream rows: the median of the available
40
+ // topic scores with the head trimmed. Topics define the ambient cosine band;
41
+ // sparse streams consume it but never define it, so a nearest-neighbour cluster
42
+ // of fragments can't move the bar. Returns null when fewer than two topic scores
43
+ // exist — one score is not an ambient band, so an uncorroborated semantic-only
44
+ // stream must not inject off it (it can still reach RRF via the keyword lane).
45
+ //
46
+ // The head trim is ADAPTIVE because streams never pass ungated: a strong top
47
+ // topic must NOT raise the stream bar, or a genuinely-fresh fragment would have
48
+ // to beat your best existing topic by the full margin and so never inject on a
49
+ // small corpus. So we always drop at least top1, scaling the exclusion up to
50
+ // HEAD_EXCLUDED_FROM_BASELINE only while a MIN_BASELINE_PACK-size tail survives.
51
+ // n=2..5 → drop top1, contrast against the remaining ambient topics
52
+ // n=6..9 → drop enough head to keep a MIN_BASELINE_PACK tail
53
+ // n>=10 → drop the full top HEAD_EXCLUDED_FROM_BASELINE
54
+ // `topicScores` MUST be sorted descending.
55
+ export function streamAdmissionBaseline(topicScores: number[]): number | null {
56
+ if (topicScores.length <= 1) return null
57
+ const excluded = Math.min(HEAD_EXCLUDED_FROM_BASELINE, Math.max(1, topicScores.length - MIN_BASELINE_PACK))
58
+ return median(topicScores.slice(excluded))
59
+ }
60
+
61
+ // Whether a single cosine score clears the band by the shared margin. Used to
62
+ // admit stream rows against the topic contrast reference: a stream candidate
63
+ // survives only if it stands as far above the band as a real topic match would.
64
+ // A null baseline (no topics at all) admits nothing.
65
+ export function clearsBaseline(score: number, baseline: number | null): boolean {
66
+ return baseline !== null && score - baseline >= MARGIN
67
+ }
68
+
69
+ // Returns how many of the sorted-descending topic cosine scores survive the
70
+ // gate. Zero means "no relevant memory matched" — a valid, expected outcome the
71
+ // caller injects as an empty memory block. Below GATED_TOPIC_FLOOR the non-head
72
+ // tail is too short (1-4 scores) for a reliable suppression verdict, so topics
73
+ // pass ungated (a false negative of one obvious shard is cheaper than
74
+ // suppressing the only memory). `scores` MUST be sorted descending.
75
+ export function gateRelevance(scores: number[], topK: number): number {
76
+ if (scores.length === 0 || topK <= 0) return 0
77
+ if (scores.length < GATED_TOPIC_FLOOR) return Math.min(scores.length, topK)
78
+
79
+ const top = scores[0]!
80
+ const margin = top - median(scores.slice(HEAD_EXCLUDED_FROM_BASELINE))
81
+ if (margin < MARGIN) return 0
82
+
83
+ const knee = top - 0.5 * margin
84
+ const survivors = scores.filter((score) => score >= knee).length
85
+ return Math.min(survivors, topK)
86
+ }
87
+
88
+ function median(values: number[]): number {
89
+ if (values.length === 0) return 0
90
+ const sorted = [...values].sort((a, b) => a - b)
91
+ const mid = Math.floor(sorted.length / 2)
92
+ return sorted.length % 2 === 0 ? (sorted[mid - 1]! + sorted[mid]!) / 2 : sorted[mid]!
93
+ }
@@ -0,0 +1,71 @@
1
+ import { join } from 'node:path'
2
+
3
+ import { EMBEDDING_MODEL_ID, embed } from './embedder'
4
+ import { collectPassages, findMissingPassages, type EmbedFn } from './hybrid'
5
+ import { VectorStore } from './store'
6
+
7
+ export async function buildStartupVectorIndex(
8
+ agentDir: string,
9
+ embedFn: EmbedFn = embed,
10
+ ): Promise<{ built: boolean; pruned: number; count: number }> {
11
+ const store = VectorStore.open(join(agentDir, 'memory', '.vectors', 'index.db'))
12
+ try {
13
+ const wanted = await collectPassages(agentDir)
14
+
15
+ // Prune current-model rows whose id left the desired passage set (deleted
16
+ // topics, dreamed-then-GC'd fragments, and — load-bearing here — fragments
17
+ // dreaming marked superseded). Without this, superseded `stream:*` rows stay
18
+ // in the table and can outrank active rows by raw cosine, consuming the
19
+ // finite `topK * 2` candidates before parent-child fusion ever sees them.
20
+ const pruned = pruneStaleRows(store, wanted)
21
+
22
+ const passages = findMissingPassages(store, wanted)
23
+ if (passages.length === 0) return { built: false, pruned, count: 0 }
24
+
25
+ const embeddings = await embedFn(
26
+ passages.map((passage) => passage.text),
27
+ 'passage',
28
+ )
29
+
30
+ let count = 0
31
+ for (let i = 0; i < passages.length; i++) {
32
+ const passage = passages[i]!
33
+ const embedding = embeddings[i]
34
+ if (embedding === undefined) continue
35
+
36
+ store.upsert({
37
+ id: passage.id,
38
+ source: passage.source,
39
+ key: passage.key,
40
+ model: EMBEDDING_MODEL_ID,
41
+ dims: embedding.length,
42
+ embedding,
43
+ contentHash: passage.contentHash,
44
+ })
45
+ count += 1
46
+ }
47
+
48
+ if (count === 0) return { built: false, pruned, count: 0 }
49
+
50
+ // After a model/dtype switch, the prior variant's rows linger with a stale
51
+ // `model` stamp (re-embedded passages upsert by id, but rows for the same id
52
+ // already matched the new stamp, and orphans from removed content would not).
53
+ // query() already excludes them, so this is hygiene — bound DB growth across
54
+ // variant switches — not correctness. Runs only after a successful re-embed.
55
+ store.deleteOtherModels(EMBEDDING_MODEL_ID)
56
+
57
+ return { built: true, pruned, count }
58
+ } finally {
59
+ store.close()
60
+ }
61
+ }
62
+
63
+ function pruneStaleRows(store: VectorStore, wanted: Awaited<ReturnType<typeof collectPassages>>): number {
64
+ const wantedIds = new Set(wanted.map((passage) => passage.id))
65
+ const stale = store
66
+ .getAllMeta()
67
+ .filter((row) => row.model === EMBEDDING_MODEL_ID && !wantedIds.has(row.id))
68
+ .map((row) => row.id)
69
+ if (stale.length > 0) store.deleteMany(stale)
70
+ return stale.length
71
+ }
@@ -0,0 +1,203 @@
1
+ import { Database } from 'bun:sqlite'
2
+ import { mkdirSync } from 'node:fs'
3
+ import { dirname } from 'node:path'
4
+
5
+ export type VectorRow = {
6
+ id: string
7
+ source: 'topic' | 'stream' | 'reference'
8
+ key: string
9
+ model: string
10
+ dims: number
11
+ embedding: Float32Array
12
+ contentHash: string
13
+ updatedAt: string
14
+ }
15
+
16
+ export type VectorMeta = { id: string; model: string; contentHash: string }
17
+
18
+ export type ScoredVectorRow = { row: VectorRow; score: number }
19
+
20
+ type StoredVectorRow = {
21
+ id: string
22
+ source: 'topic' | 'stream' | 'reference'
23
+ key: string
24
+ model: string
25
+ dims: number
26
+ embedding: Uint8Array
27
+ content_hash: string
28
+ updated_at: string
29
+ }
30
+
31
+ export class VectorStore {
32
+ static open(dbPath: string): VectorStore {
33
+ mkdirSync(dirname(dbPath), { recursive: true })
34
+ const db = new Database(dbPath)
35
+ db.run(`
36
+ CREATE TABLE IF NOT EXISTS vectors (
37
+ id TEXT PRIMARY KEY,
38
+ source TEXT NOT NULL,
39
+ key TEXT NOT NULL,
40
+ model TEXT NOT NULL,
41
+ dims INTEGER NOT NULL,
42
+ embedding BLOB NOT NULL,
43
+ content_hash TEXT NOT NULL,
44
+ updated_at TEXT NOT NULL
45
+ )
46
+ `)
47
+ return new VectorStore(db)
48
+ }
49
+
50
+ private constructor(private readonly db: Database) {}
51
+
52
+ upsert(row: Omit<VectorRow, 'updatedAt'>): void {
53
+ const existing = this.db
54
+ .query<{ content_hash: string; model: string }, [string]>('SELECT content_hash, model FROM vectors WHERE id = ?')
55
+ .get(row.id)
56
+
57
+ if (existing?.content_hash === row.contentHash && existing.model === row.model) {
58
+ return
59
+ }
60
+
61
+ this.db
62
+ .query(
63
+ `INSERT INTO vectors (id, source, key, model, dims, embedding, content_hash, updated_at)
64
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
65
+ ON CONFLICT(id) DO UPDATE SET
66
+ source = excluded.source,
67
+ key = excluded.key,
68
+ model = excluded.model,
69
+ dims = excluded.dims,
70
+ embedding = excluded.embedding,
71
+ content_hash = excluded.content_hash,
72
+ updated_at = excluded.updated_at`,
73
+ )
74
+ .run(
75
+ row.id,
76
+ row.source,
77
+ row.key,
78
+ row.model,
79
+ row.dims,
80
+ Buffer.from(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength),
81
+ row.contentHash,
82
+ new Date().toISOString(),
83
+ )
84
+ }
85
+
86
+ query(embedding: Float32Array, topK: number, modelId: string): VectorRow[] {
87
+ if (topK <= 0) return []
88
+ return this.queryScored(embedding, modelId)
89
+ .slice(0, topK)
90
+ .map(({ row }) => row)
91
+ }
92
+
93
+ // Same cosine scan as `query` but returns every compatible row WITH its score
94
+ // and unsliced, so a caller can reason about the full score distribution (the
95
+ // relevance gate's per-query baseline) before deciding how many to keep.
96
+ //
97
+ // Filter by embedding identity, not dims alone: a stale row from a different
98
+ // model/dtype variant can share the same dims but lives in an incompatible
99
+ // vector space, so cosine against it is garbage. Excluding it here keeps a
100
+ // partial re-embed (mixed variants mid-rebuild) at reduced recall, never
101
+ // wrong scores.
102
+ queryScored(embedding: Float32Array, modelId: string): ScoredVectorRow[] {
103
+ // The query vector's magnitude is identical for every row in this scan, so
104
+ // hoist it out of the per-row cosine instead of recomputing N times (each
105
+ // recompute is 768 multiply-adds + a sqrt). Behavior is unchanged — same
106
+ // cosine values, fewer operations on the brute-force hot path.
107
+ const queryMagnitude = magnitude(embedding)
108
+ return this.db
109
+ .query<StoredVectorRow, [string, number]>('SELECT * FROM vectors WHERE model = ? AND dims = ?')
110
+ .all(modelId, embedding.length)
111
+ .map(toVectorRow)
112
+ .map((row) => ({ row, score: cosineSimilarity(embedding, queryMagnitude, row.embedding) }))
113
+ .sort((a, b) => b.score - a.score)
114
+ }
115
+
116
+ deleteOtherModels(modelId: string): void {
117
+ this.db.query('DELETE FROM vectors WHERE model != ?').run(modelId)
118
+ }
119
+
120
+ delete(id: string): void {
121
+ this.db.query('DELETE FROM vectors WHERE id = ?').run(id)
122
+ }
123
+
124
+ deleteMany(ids: string[]): void {
125
+ const statement = this.db.query('DELETE FROM vectors WHERE id = ?')
126
+ const remove = this.db.transaction((values: string[]) => {
127
+ for (const id of values) statement.run(id)
128
+ })
129
+ remove(ids)
130
+ }
131
+
132
+ getAll(): VectorRow[] {
133
+ return this.db.query<StoredVectorRow, []>('SELECT * FROM vectors ORDER BY id').all().map(toVectorRow)
134
+ }
135
+
136
+ // Metadata only — never decodes the embedding BLOB, so a row whose blob is
137
+ // malformed (byte length not a multiple of 4) can't throw here the way
138
+ // getAll's Float32Array decode would.
139
+ getAllMeta(): VectorMeta[] {
140
+ return this.db
141
+ .query<{ id: string; model: string; content_hash: string }, []>(
142
+ 'SELECT id, model, content_hash FROM vectors ORDER BY id',
143
+ )
144
+ .all()
145
+ .map((row) => ({ id: row.id, model: row.model, contentHash: row.content_hash }))
146
+ }
147
+
148
+ getByIds(ids: string[]): VectorRow[] {
149
+ const statement = this.db.query<StoredVectorRow, [string]>('SELECT * FROM vectors WHERE id = ?')
150
+ return ids.flatMap((id) => {
151
+ const row = statement.get(id)
152
+ return row ? [toVectorRow(row)] : []
153
+ })
154
+ }
155
+
156
+ close(): void {
157
+ this.db.close()
158
+ }
159
+ }
160
+
161
+ function toVectorRow(row: StoredVectorRow): VectorRow {
162
+ return {
163
+ id: row.id,
164
+ source: row.source,
165
+ key: row.key,
166
+ model: row.model,
167
+ dims: row.dims,
168
+ embedding: blobToFloat32Array(row.embedding),
169
+ contentHash: row.content_hash,
170
+ updatedAt: row.updated_at,
171
+ }
172
+ }
173
+
174
+ function blobToFloat32Array(blob: Uint8Array): Float32Array {
175
+ const bytes = Buffer.from(blob)
176
+ const buffer = bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength)
177
+ return new Float32Array(buffer)
178
+ }
179
+
180
+ function magnitude(v: Float32Array): number {
181
+ let sumSquares = 0
182
+ for (let i = 0; i < v.length; i++) {
183
+ const value = v[i] ?? 0
184
+ sumSquares += value * value
185
+ }
186
+ return Math.sqrt(sumSquares)
187
+ }
188
+
189
+ function cosineSimilarity(a: Float32Array, aMagnitude: number, b: Float32Array): number {
190
+ let dot = 0
191
+ let bSumSquares = 0
192
+
193
+ for (let i = 0; i < a.length; i++) {
194
+ const aValue = a[i] ?? 0
195
+ const bValue = b[i] ?? 0
196
+ dot += aValue * bValue
197
+ bSumSquares += bValue * bValue
198
+ }
199
+
200
+ const bMagnitude = Math.sqrt(bSumSquares)
201
+ if (aMagnitude === 0 || bMagnitude === 0) return 0
202
+ return dot / (aMagnitude * bMagnitude)
203
+ }
@@ -0,0 +1,124 @@
1
+ // Xenova/multilingual-e5-base is a 512-token model; transformers.js truncates
2
+ // past that limit by default. Inputs longer than the cap would otherwise lose
3
+ // their tail from the embedded match surface SILENTLY. Canonical compact shards
4
+ // (heading + one belief sentence first) sit well under the cap; legacy verbose
5
+ // shards, legacy prose migration events, long fragments, and very long queries
6
+ // can exceed it. This module estimates token length cheaply (no tokenizer) and
7
+ // bounds the embeddable text deterministically so the cut is explicit, not a
8
+ // hidden tokenizer side effect. The dreaming subagent separately compacts the
9
+ // flagged shards over time, but bounding here guarantees no silent loss even
10
+ // for inputs dreaming never rewrites (e.g. raw legacy prose).
11
+ export const MAX_MODEL_TOKENS = 512
12
+
13
+ // The E5 prefix ("query: " / "passage: ") is prepended before tokenization and
14
+ // eats a couple of tokens from the budget. Subtracting a small reserve keeps
15
+ // the bound honest about the text budget the caller actually gets.
16
+ const PREFIX_TOKEN_RESERVE = 4
17
+
18
+ // Char-per-token ratio for a rough, deliberately CONSERVATIVE token estimate.
19
+ // multilingual-e5-base tokenizes CJK and other non-Latin scripts into far more
20
+ // tokens per character than English, so a single chars/token ratio would badly
21
+ // under-count them. We estimate per script: ~3.5 chars/token for Latin-ish
22
+ // text, ~1 token per CJK character. Over-estimating (bounding a little early) is
23
+ // safer than under-estimating (letting the tokenizer cut silently), so the
24
+ // ratios lean toward flagging.
25
+ //
26
+ // The char ratio ALONE under-counts many short words: WordPiece emits at least
27
+ // one token per whitespace-delimited word, so `'a '.repeat(509)` is ~509 tokens
28
+ // but only ~291 by chars/3.5. The non-CJK estimate therefore takes the MAX of
29
+ // the char-ratio count and the word count, and the inverse (charBudgetForTokens)
30
+ // charges a token at each word start too, so a bounded string re-estimates to at
31
+ // most the budget under either term.
32
+ const LATIN_CHARS_PER_TOKEN = 3.5
33
+
34
+ // Effective text-token budget once the prefix reserve is removed.
35
+ export const TEXT_TOKEN_BUDGET = MAX_MODEL_TOKENS - PREFIX_TOKEN_RESERVE
36
+
37
+ // CJK Unified Ideographs, Hiragana, Katakana, Hangul — scripts the tokenizer
38
+ // splits at roughly one token per character (often more). Counted 1:1.
39
+ // The `g` variant is for counting matches across a whole string (estimateTokens);
40
+ // the non-global variant is for per-character tests (charBudgetForTokens), where
41
+ // a global regex's stateful lastIndex would make repeated `.test()` calls flip.
42
+ const CJK_COUNT_PATTERN = /[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uac00-\ud7af\uff66-\uff9f]/gu
43
+ const CJK_CHAR_PATTERN = /[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uac00-\ud7af\uff66-\uff9f]/u
44
+
45
+ // A conservative token-count estimate that never loads the tokenizer (which
46
+ // would defeat the embedder's deliberate lazy-load of the heavy native stack).
47
+ // CJK chars count 1 token each; the remaining (non-CJK) text counts the MAX of
48
+ // its char-ratio estimate and its word count, because WordPiece never emits
49
+ // fewer than one token per whitespace-delimited word.
50
+ export function estimateTokens(text: string): number {
51
+ const cjkCount = (text.match(CJK_COUNT_PATTERN) ?? []).length
52
+ const nonCjkChars = text.length - cjkCount
53
+ const charBased = Math.ceil(nonCjkChars / LATIN_CHARS_PER_TOKEN)
54
+ return cjkCount + Math.max(charBased, countNonCjkWords(text))
55
+ }
56
+
57
+ // Count whitespace-delimited words AFTER removing CJK chars (those are already
58
+ // charged one token each), so a CJK run isn't double-counted as an extra word.
59
+ function countNonCjkWords(text: string): number {
60
+ return (text.replace(CJK_COUNT_PATTERN, ' ').match(/\S+/gu) ?? []).length
61
+ }
62
+
63
+ export function isOverBudget(text: string): boolean {
64
+ return estimateTokens(text) > TEXT_TOKEN_BUDGET
65
+ }
66
+
67
+ // Topic passages go through `topicPassage` in passages.ts (it strips citation
68
+ // lines from the embedded text); over-budget detection for topics derives from
69
+ // that same helper so the budget check matches what is actually embedded. This
70
+ // helper covers stream fragments, whose embedded text is `topic\nbody`.
71
+ export function fragmentEmbeddableText(event: { topic: string; body: string }): string {
72
+ return `${event.topic}\n${event.body}`
73
+ }
74
+
75
+ export type BoundedText = {
76
+ text: string
77
+ bounded: boolean
78
+ estimatedTokens: number
79
+ }
80
+
81
+ // Deterministically trim text to the estimated token budget BEFORE embedding, so
82
+ // the tokenizer's implicit cut never fires and the truncation point is one we
83
+ // own and can record. Bounds on a character budget derived from the same
84
+ // conservative estimate; the leading heading/belief sentence (the load-bearing
85
+ // retrieval signal) always survives because it comes first.
86
+ export function boundEmbeddableText(text: string): BoundedText {
87
+ const estimatedTokens = estimateTokens(text)
88
+ if (estimatedTokens <= TEXT_TOKEN_BUDGET) {
89
+ return { text, bounded: false, estimatedTokens }
90
+ }
91
+ const charBudget = charBudgetForTokens(text, TEXT_TOKEN_BUDGET)
92
+ return { text: text.slice(0, charBudget), bounded: true, estimatedTokens }
93
+ }
94
+
95
+ // Returns the longest prefix length (in chars) whose estimateTokens is still
96
+ // within budget. Recomputes the EXACT same estimate incrementally — CJK chars
97
+ // at 1 token, plus max(non-CJK char-ratio, word count) — so a bounded prefix can
98
+ // never re-estimate above the budget no matter which term dominates. estimate is
99
+ // monotonic non-decreasing in prefix length, so a single forward walk suffices.
100
+ function charBudgetForTokens(text: string, tokenBudget: number): number {
101
+ let cjk = 0
102
+ let nonCjk = 0
103
+ let words = 0
104
+ let inWord = false
105
+ let chars = 0
106
+ for (const char of text) {
107
+ const isCjk = CJK_CHAR_PATTERN.test(char)
108
+ const isSpace = /\s/u.test(char)
109
+ // A word start is a non-space, non-CJK char that follows a non-word char.
110
+ // CJK chars are charged via `cjk`, never as words (mirrors countNonCjkWords).
111
+ const startsWord = !isSpace && !isCjk && !inWord
112
+ const nextCjk = isCjk ? cjk + 1 : cjk
113
+ const nextNonCjk = isCjk ? nonCjk : nonCjk + char.length
114
+ const nextWords = startsWord ? words + 1 : words
115
+ const estimate = nextCjk + Math.max(Math.ceil(nextNonCjk / LATIN_CHARS_PER_TOKEN), nextWords)
116
+ if (estimate > tokenBudget) break
117
+ cjk = nextCjk
118
+ nonCjk = nextNonCjk
119
+ words = nextWords
120
+ inWord = !isSpace && !isCjk
121
+ chars += char.length
122
+ }
123
+ return chars
124
+ }
@@ -56,6 +56,8 @@ const PROCESS_ENV_TARGETS: ReadonlyArray<string> = [
56
56
  'ANTHROPIC_API_KEY',
57
57
  'MINIMAX_API_KEY',
58
58
  'DEEPSEEK_API_KEY',
59
+ 'MOONSHOT_API_KEY',
60
+ 'MOONSHOT_CODING_API_KEY',
59
61
  'GOOGLE_API_KEY',
60
62
  'GEMINI_API_KEY',
61
63
  'AWS_ACCESS_KEY_ID',