npm - typeclaw - Versions diffs - 0.36.7 → 0.37.0 - Mend

typeclaw 0.36.7 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

package/README.md +2 -2
package/package.json +3 -2
package/src/agent/index.ts +31 -11
package/src/agent/live-sessions.ts +12 -0
package/src/agent/model-fallback.ts +17 -15
package/src/agent/model-overrides.ts +2 -2
package/src/agent/session-meta.ts +10 -0
package/src/agent/subagents.ts +11 -2
package/src/agent/system-prompt.ts +9 -3
package/src/agent/todo/continuation-policy.ts +6 -3
package/src/agent/todo/continuation-wiring.ts +4 -2
package/src/agent/todo/continuation.ts +3 -3
package/src/agent/tools/todo/index.ts +27 -4
package/src/bundled-plugins/agent-browser/index.ts +33 -108
package/src/bundled-plugins/agent-browser/shim.ts +3 -94
package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +8 -33
package/src/bundled-plugins/doc-render/skills/typeclaw-render-pdf/SKILL.md +2 -2
package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +7 -1
package/src/bundled-plugins/memory/README.md +80 -23
package/src/bundled-plugins/memory/append-tool.ts +74 -53
package/src/bundled-plugins/memory/citation-superset.ts +4 -0
package/src/bundled-plugins/memory/citations.ts +54 -0
package/src/bundled-plugins/memory/dreaming-metrics.ts +30 -0
package/src/bundled-plugins/memory/dreaming.ts +444 -21
package/src/bundled-plugins/memory/index.ts +544 -400
package/src/bundled-plugins/memory/load-memory.ts +87 -10
package/src/bundled-plugins/memory/load-shards.ts +48 -22
package/src/bundled-plugins/memory/memory-logger.ts +95 -106
package/src/bundled-plugins/memory/memory-retrieval.ts +3 -3
package/src/bundled-plugins/memory/parent-link.ts +33 -0
package/src/bundled-plugins/memory/paths.ts +12 -0
package/src/bundled-plugins/memory/references/frontmatter.ts +197 -0
package/src/bundled-plugins/memory/references/load-references.ts +212 -0
package/src/bundled-plugins/memory/references/store-reference-tool.ts +59 -0
package/src/bundled-plugins/memory/search-tool.ts +282 -45
package/src/bundled-plugins/memory/stream-events.ts +1 -0
package/src/bundled-plugins/memory/stream-io.ts +28 -3
package/src/bundled-plugins/memory/turn-dedup.ts +40 -0
package/src/bundled-plugins/memory/vector/cache-write.ts +19 -0
package/src/bundled-plugins/memory/vector/config.ts +28 -0
package/src/bundled-plugins/memory/vector/doctor.ts +124 -0
package/src/bundled-plugins/memory/vector/embedder.ts +246 -0
package/src/bundled-plugins/memory/vector/hybrid.ts +439 -0
package/src/bundled-plugins/memory/vector/index-on-write.ts +34 -0
package/src/bundled-plugins/memory/vector/inspect.ts +111 -0
package/src/bundled-plugins/memory/vector/passages.ts +125 -0
package/src/bundled-plugins/memory/vector/reference-index-on-write.ts +50 -0
package/src/bundled-plugins/memory/vector/relevance-gate.ts +93 -0
package/src/bundled-plugins/memory/vector/startup.ts +71 -0
package/src/bundled-plugins/memory/vector/store.ts +203 -0
package/src/bundled-plugins/memory/vector/truncation.ts +124 -0
package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +2 -0
package/src/channels/router.ts +239 -40
package/src/cli/incomplete-init.ts +57 -0
package/src/cli/init.ts +143 -12
package/src/cli/inspect.ts +11 -5
package/src/cli/model.ts +112 -34
package/src/cli/restart.ts +24 -0
package/src/cli/start.ts +24 -0
package/src/cli/tunnel.ts +53 -8
package/src/config/config.ts +110 -19
package/src/config/index.ts +5 -1
package/src/config/models-mutation.ts +29 -11
package/src/config/providers-mutation.ts +2 -2
package/src/config/providers.ts +146 -12
package/src/container/shared.ts +9 -0
package/src/container/start.ts +87 -4
package/src/cron/consumer.ts +13 -7
package/src/hostd/models.ts +64 -0
package/src/hostd/paths.ts +6 -0
package/src/hostd/portbroker-manager.ts +2 -2
package/src/init/checkpoint.ts +201 -0
package/src/init/dockerfile.ts +164 -51
package/src/init/gitignore.ts +7 -7
package/src/init/index.ts +41 -9
package/src/init/line-auth.ts +50 -21
package/src/init/models-dev.ts +96 -21
package/src/init/oauth-login.ts +3 -3
package/src/init/progress.ts +29 -0
package/src/init/validate-api-key.ts +4 -0
package/src/inspect/index.ts +13 -6
package/src/inspect/item-list.ts +11 -2
package/src/inspect/live-list.ts +65 -0
package/src/inspect/open-item.ts +22 -1
package/src/inspect/session-list.ts +29 -0
package/src/models/embedding-model.ts +114 -0
package/src/models/transformers-version.ts +55 -0
package/src/plugin/types.ts +3 -0
package/src/portbroker/container-server.ts +23 -0
package/src/portbroker/forward-request-bus.ts +35 -0
package/src/portbroker/forward-result-bus.ts +2 -3
package/src/portbroker/hostd-client.ts +182 -36
package/src/portbroker/index.ts +6 -1
package/src/portbroker/protocol.ts +9 -2
package/src/run/channel-session-factory.ts +11 -1
package/src/run/index.ts +41 -7
package/src/server/command-runner.ts +24 -1
package/src/server/index.ts +42 -8
package/src/shared/index.ts +2 -0
package/src/shared/protocol.ts +31 -0
package/src/skills/typeclaw-channels/SKILL.md +4 -4
package/src/skills/typeclaw-config/SKILL.md +2 -2
package/src/skills/typeclaw-memory/SKILL.md +3 -1
package/src/skills/typeclaw-permissions/SKILL.md +3 -3
package/src/skills/typeclaw-skills/SKILL.md +1 -1
package/src/skills/typeclaw-tunnels/SKILL.md +22 -1
package/src/tunnels/providers/cloudflare-quick.ts +65 -7
package/src/tunnels/upstream-probe.ts +25 -0
package/typeclaw.schema.json +156 -67
package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +0 -170
package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +0 -421
package/src/portbroker/bind-with-forward.ts +0 -102

package/src/bundled-plugins/memory/vector/passages.ts ADDED Viewed

@@ -0,0 +1,125 @@
+import { createHash } from 'node:crypto'
+import { stripCitationLines } from '../citations'
+import { fragmentContentHash } from '../fragment-parser'
+import { loadAllShards, type TopicShard } from '../load-shards'
+import { buildParentLinks } from '../parent-link'
+import { loadAllReferences } from '../references/load-references'
+import { readAllUndreamedStreamDays, type UndreamedStreamDay } from '../stream-io'
+import { EMBEDDING_MODEL_ID } from './embedder'
+import type { VectorStore } from './store'
+import { boundEmbeddableText, fragmentEmbeddableText } from './truncation'
+export type Passage = {
+  id: string
+  source: 'topic' | 'stream' | 'reference'
+  key: string
+  text: string
+  contentHash: string
+}
+// The single source of truth for a topic's embedded text + freshness hash, so the
+// startup index build and dreaming's per-pass refresh stay byte-identical. The
+// contentHash covers the EMBEDDED text (not the raw body): changing how the text
+// is derived must invalidate every existing topic row, but `fragmentContentHash`
+// over the unchanged raw body would not — `findMissingPassages` would skip them.
+export function topicPassage(slug: string, heading: string, body: string): Passage {
+  const text = `${heading}\n${stripCitationLines(body)}`
+  return { id: `topic:${slug}`, source: 'topic', key: slug, text, contentHash: hashContent(text) }
+}
+export async function collectPassages(agentDir: string): Promise<Passage[]> {
+  const [shards, streamDays, references] = await Promise.all([
+    loadAllShards(agentDir),
+    readAllUndreamedStreamDays(agentDir),
+    referencePassages(agentDir),
+  ])
+  return [...buildPassages(shards, streamDays), ...references]
+}
+export async function referencePassages(agentDir: string): Promise<Passage[]> {
+  const references = await loadAllReferences(agentDir)
+  return references.flatMap((reference): Passage[] =>
+    referencePassagesForOne(reference.slug, reference.body, reference.frontmatter.demoted),
+  )
+}
+// Passages for a single reference, used by the on-write hook so a freshly stored
+// reference is embedded immediately instead of waiting for the next startup index
+// build. Must derive ids/chunks/hashes identically to `referencePassages` so the
+// on-write rows and a later startup rebuild agree (no churn, no stale duplicates).
+// A demoted reference yields no passages — the same exclusion `referencePassages`
+// applies at startup.
+export function referencePassagesForOne(slug: string, body: string, demoted = false): Passage[] {
+  if (demoted) return []
+  return chunkReferenceBody(body).map((chunk, chunkIdx) => ({
+    id: `reference:${slug}#${chunkIdx}`,
+    source: 'reference',
+    key: slug,
+    text: chunk,
+    contentHash: hashContent(chunk),
+  }))
+}
+export function findMissingPassages(store: VectorStore, passages: Passage[]): Passage[] {
+  const existing = new Map(store.getAllMeta().map((row) => [row.id, row]))
+  return passages.filter((passage) => {
+    const row = existing.get(passage.id)
+    return row === undefined || row.model !== EMBEDDING_MODEL_ID || row.contentHash !== passage.contentHash
+  })
+}
+function buildPassages(shards: TopicShard[], streamDays: UndreamedStreamDay[]): Passage[] {
+  const { supersededFragmentIds } = buildParentLinks(shards)
+  return [
+    ...shards.map((shard): Passage => topicPassage(shard.slug, shard.frontmatter.heading, shard.body)),
+    ...streamDays.flatMap((day) =>
+      day.events.flatMap((event): Passage[] => {
+        if (event.type === 'watermark') return []
+        if (event.type === 'fragment') {
+          // Superseded fragments stay cited for GC/history but are not embedded:
+          // they must never be a retrieval hook for a belief they no longer back.
+          if (supersededFragmentIds.has(event.id)) return []
+          const key = `${day.date}#${event.id}`
+          return [
+            {
+              id: `stream:${key}`,
+              source: 'stream',
+              key,
+              text: fragmentEmbeddableText(event),
+              contentHash: fragmentContentHash(event),
+            },
+          ]
+        }
+        const key = `${day.date}#legacy-${hashContent(event.text).slice(0, 12)}`
+        return [{ id: `stream:${key}`, source: 'stream', key, text: event.text, contentHash: hashContent(event.text) }]
+      }),
+    ),
+  ]
+}
+function chunkReferenceBody(body: string): string[] {
+  if (body.length === 0) return ['']
+  const chunks: string[] = []
+  let remaining = body
+  while (remaining.length > 0) {
+    const bounded = boundEmbeddableText(remaining)
+    if (!bounded.bounded) {
+      chunks.push(bounded.text)
+      break
+    }
+    if (bounded.text.length === 0) {
+      chunks.push(remaining[0]!)
+      remaining = remaining.slice(1)
+      continue
+    }
+    chunks.push(bounded.text)
+    remaining = remaining.slice(bounded.text.length)
+  }
+  return chunks
+}
+function hashContent(content: string): string {
+  return createHash('sha256').update(content).digest('hex')
+}

package/src/bundled-plugins/memory/vector/reference-index-on-write.ts ADDED Viewed

@@ -0,0 +1,50 @@
+import { embed, EMBEDDING_MODEL_ID } from './embedder'
+import type { EmbedFn } from './hybrid'
+import { referencePassagesForOne } from './passages'
+import type { VectorStore } from './store'
+export type ReferenceStoredContext = { slug: string; body: string; demoted?: boolean }
+// Embeds a freshly stored reference into the vector index immediately, mirroring
+// the stream-fragment on-write hook (`makeAppendHook`). Without this, a reference
+// is only embedded at the next startup index build, so it is vector-unretrievable
+// for the rest of the container's uptime. Chunks are derived by the same
+// `referencePassagesForOne` the startup build uses, so the rows agree.
+//
+// Re-storing a slug with a shorter body produces fewer chunks; the stale
+// higher-index `reference:<slug>#N` rows from the prior body must be pruned or
+// they would resurface as orphaned retrieval hooks for content that no longer
+// exists. We compute the wanted id set, upsert changed chunks, and delete any
+// existing row for this slug that is not wanted.
+export function makeReferenceStoredHook(
+  store: VectorStore,
+  embedFn: EmbedFn = embed,
+): (context: ReferenceStoredContext) => Promise<void> {
+  return async ({ slug, body, demoted }) => {
+    const passages = referencePassagesForOne(slug, body, demoted)
+    const wantedIds = new Set(passages.map((passage) => passage.id))
+    const prefix = `reference:${slug}#`
+    const staleIds = store
+      .getAllMeta()
+      .flatMap((row) => (row.id.startsWith(prefix) && !wantedIds.has(row.id) ? [row.id] : []))
+    if (staleIds.length > 0) store.deleteMany(staleIds)
+    for (const passage of passages) {
+      const existing = store.getByIds([passage.id])[0]
+      if (existing?.contentHash === passage.contentHash && existing.model === EMBEDDING_MODEL_ID) continue
+      const [embedding] = await embedFn([passage.text], 'passage')
+      if (embedding === undefined) continue
+      store.upsert({
+        id: passage.id,
+        source: 'reference',
+        key: slug,
+        model: EMBEDDING_MODEL_ID,
+        dims: embedding.length,
+        embedding,
+        contentHash: passage.contentHash,
+      })
+    }
+  }
+}

package/src/bundled-plugins/memory/vector/relevance-gate.ts ADDED Viewed

@@ -0,0 +1,93 @@
+// E5 embeddings (multilingual-e5-base) compress cosine similarity into a narrow
+// ~0.70-0.85 band even for unrelated pairs — a documented consequence of the
+// low InfoNCE training temperature (tau=0.01). Absolute thresholds therefore
+// cannot tell a real match from baseline noise: an unrelated query's top hit
+// (0.8055) can outscore a genuine match's top hit (0.7786) on a different query.
+//
+// The discriminating signal is QUERY-LOCAL CONTRAST: how far the best score
+// stands above this query's own baseline cluster. A real match lifts top1 well
+// clear of the pack; a no-match leaves top1 buried in the band. Measured on a
+// live 193-topic index, no-match queries land top1-baseline <= 0.051 and
+// has-match queries land >= 0.074, so a 0.06 margin separates them cleanly.
+//
+// The baseline is the MEDIAN of the non-head scores (robust to a near-duplicate
+// cluster inflating a raw mean), which keeps a genuine winner above a crowd of
+// similar topics. Suppression only fires once the non-head pack is large enough
+// to estimate the band: the top HEAD_EXCLUDED_FROM_BASELINE scores are dropped,
+// and at least MIN_BASELINE_PACK must remain. Below that, suppression is skipped
+// — a false negative (injecting one obvious shard) is cheaper than wrongly
+// suppressing the only relevant memory off a noisy 1-4 score tail.
+//
+// The contrast is top1 - median(non-head). A UNIFORM upward shift of the whole
+// band (the "single-domain memory, everything is somewhat related" case) cancels
+// out of that difference and leaves the verdict unchanged, so a concentrated
+// corpus does NOT structurally compress the gap. Only a genuine reduction in
+// rank SPREAD would, and a spread-normalized gate (gap >= k*MAD, or a z-score)
+// is rejected on purpose: a no-match query can also produce a tight non-head
+// pack plus one order-statistic outlier, which is exactly the case the absolute
+// MARGIN suppresses. We keep the absolute margin as the vector-only no-match
+// guard; recovery for a genuinely-suppressed single-domain match belongs in the
+// corroborating keyword lane, not in a weaker semantic threshold.
+const MARGIN = 0.06
+const HEAD_EXCLUDED_FROM_BASELINE = 5
+// Minimum non-head scores required to trust a suppression verdict. A median over
+// 1-4 scores (n=6..9 once the head is dropped) is too noisy to zero out the only
+// memory on, so the gated path needs HEAD_EXCLUDED_FROM_BASELINE + this many.
+const MIN_BASELINE_PACK = 5
+const GATED_TOPIC_FLOOR = HEAD_EXCLUDED_FROM_BASELINE + MIN_BASELINE_PACK
+// The contrast reference for ADMITTING stream rows: the median of the available
+// topic scores with the head trimmed. Topics define the ambient cosine band;
+// sparse streams consume it but never define it, so a nearest-neighbour cluster
+// of fragments can't move the bar. Returns null when fewer than two topic scores
+// exist — one score is not an ambient band, so an uncorroborated semantic-only
+// stream must not inject off it (it can still reach RRF via the keyword lane).
+//
+// The head trim is ADAPTIVE because streams never pass ungated: a strong top
+// topic must NOT raise the stream bar, or a genuinely-fresh fragment would have
+// to beat your best existing topic by the full margin and so never inject on a
+// small corpus. So we always drop at least top1, scaling the exclusion up to
+// HEAD_EXCLUDED_FROM_BASELINE only while a MIN_BASELINE_PACK-size tail survives.
+//   n=2..5  → drop top1, contrast against the remaining ambient topics
+//   n=6..9  → drop enough head to keep a MIN_BASELINE_PACK tail
+//   n>=10   → drop the full top HEAD_EXCLUDED_FROM_BASELINE
+// `topicScores` MUST be sorted descending.
+export function streamAdmissionBaseline(topicScores: number[]): number | null {
+  if (topicScores.length <= 1) return null
+  const excluded = Math.min(HEAD_EXCLUDED_FROM_BASELINE, Math.max(1, topicScores.length - MIN_BASELINE_PACK))
+  return median(topicScores.slice(excluded))
+}
+// Whether a single cosine score clears the band by the shared margin. Used to
+// admit stream rows against the topic contrast reference: a stream candidate
+// survives only if it stands as far above the band as a real topic match would.
+// A null baseline (no topics at all) admits nothing.
+export function clearsBaseline(score: number, baseline: number | null): boolean {
+  return baseline !== null && score - baseline >= MARGIN
+}
+// Returns how many of the sorted-descending topic cosine scores survive the
+// gate. Zero means "no relevant memory matched" — a valid, expected outcome the
+// caller injects as an empty memory block. Below GATED_TOPIC_FLOOR the non-head
+// tail is too short (1-4 scores) for a reliable suppression verdict, so topics
+// pass ungated (a false negative of one obvious shard is cheaper than
+// suppressing the only memory). `scores` MUST be sorted descending.
+export function gateRelevance(scores: number[], topK: number): number {
+  if (scores.length === 0 || topK <= 0) return 0
+  if (scores.length < GATED_TOPIC_FLOOR) return Math.min(scores.length, topK)
+  const top = scores[0]!
+  const margin = top - median(scores.slice(HEAD_EXCLUDED_FROM_BASELINE))
+  if (margin < MARGIN) return 0
+  const knee = top - 0.5 * margin
+  const survivors = scores.filter((score) => score >= knee).length
+  return Math.min(survivors, topK)
+}
+function median(values: number[]): number {
+  if (values.length === 0) return 0
+  const sorted = [...values].sort((a, b) => a - b)
+  const mid = Math.floor(sorted.length / 2)
+  return sorted.length % 2 === 0 ? (sorted[mid - 1]! + sorted[mid]!) / 2 : sorted[mid]!
+}

package/src/bundled-plugins/memory/vector/startup.ts ADDED Viewed

@@ -0,0 +1,71 @@
+import { join } from 'node:path'
+import { EMBEDDING_MODEL_ID, embed } from './embedder'
+import { collectPassages, findMissingPassages, type EmbedFn } from './hybrid'
+import { VectorStore } from './store'
+export async function buildStartupVectorIndex(
+  agentDir: string,
+  embedFn: EmbedFn = embed,
+): Promise<{ built: boolean; pruned: number; count: number }> {
+  const store = VectorStore.open(join(agentDir, 'memory', '.vectors', 'index.db'))
+  try {
+    const wanted = await collectPassages(agentDir)
+    // Prune current-model rows whose id left the desired passage set (deleted
+    // topics, dreamed-then-GC'd fragments, and — load-bearing here — fragments
+    // dreaming marked superseded). Without this, superseded `stream:*` rows stay
+    // in the table and can outrank active rows by raw cosine, consuming the
+    // finite `topK * 2` candidates before parent-child fusion ever sees them.
+    const pruned = pruneStaleRows(store, wanted)
+    const passages = findMissingPassages(store, wanted)
+    if (passages.length === 0) return { built: false, pruned, count: 0 }
+    const embeddings = await embedFn(
+      passages.map((passage) => passage.text),
+      'passage',
+    )
+    let count = 0
+    for (let i = 0; i < passages.length; i++) {
+      const passage = passages[i]!
+      const embedding = embeddings[i]
+      if (embedding === undefined) continue
+      store.upsert({
+        id: passage.id,
+        source: passage.source,
+        key: passage.key,
+        model: EMBEDDING_MODEL_ID,
+        dims: embedding.length,
+        embedding,
+        contentHash: passage.contentHash,
+      })
+      count += 1
+    }
+    if (count === 0) return { built: false, pruned, count: 0 }
+    // After a model/dtype switch, the prior variant's rows linger with a stale
+    // `model` stamp (re-embedded passages upsert by id, but rows for the same id
+    // already matched the new stamp, and orphans from removed content would not).
+    // query() already excludes them, so this is hygiene — bound DB growth across
+    // variant switches — not correctness. Runs only after a successful re-embed.
+    store.deleteOtherModels(EMBEDDING_MODEL_ID)
+    return { built: true, pruned, count }
+  } finally {
+    store.close()
+  }
+}
+function pruneStaleRows(store: VectorStore, wanted: Awaited<ReturnType<typeof collectPassages>>): number {
+  const wantedIds = new Set(wanted.map((passage) => passage.id))
+  const stale = store
+    .getAllMeta()
+    .filter((row) => row.model === EMBEDDING_MODEL_ID && !wantedIds.has(row.id))
+    .map((row) => row.id)
+  if (stale.length > 0) store.deleteMany(stale)
+  return stale.length
+}

package/src/bundled-plugins/memory/vector/store.ts ADDED Viewed

@@ -0,0 +1,203 @@
+import { Database } from 'bun:sqlite'
+import { mkdirSync } from 'node:fs'
+import { dirname } from 'node:path'
+export type VectorRow = {
+  id: string
+  source: 'topic' | 'stream' | 'reference'
+  key: string
+  model: string
+  dims: number
+  embedding: Float32Array
+  contentHash: string
+  updatedAt: string
+}
+export type VectorMeta = { id: string; model: string; contentHash: string }
+export type ScoredVectorRow = { row: VectorRow; score: number }
+type StoredVectorRow = {
+  id: string
+  source: 'topic' | 'stream' | 'reference'
+  key: string
+  model: string
+  dims: number
+  embedding: Uint8Array
+  content_hash: string
+  updated_at: string
+}
+export class VectorStore {
+  static open(dbPath: string): VectorStore {
+    mkdirSync(dirname(dbPath), { recursive: true })
+    const db = new Database(dbPath)
+    db.run(`
+      CREATE TABLE IF NOT EXISTS vectors (
+        id TEXT PRIMARY KEY,
+        source TEXT NOT NULL,
+        key TEXT NOT NULL,
+        model TEXT NOT NULL,
+        dims INTEGER NOT NULL,
+        embedding BLOB NOT NULL,
+        content_hash TEXT NOT NULL,
+        updated_at TEXT NOT NULL
+      )
+    `)
+    return new VectorStore(db)
+  }
+  private constructor(private readonly db: Database) {}
+  upsert(row: Omit<VectorRow, 'updatedAt'>): void {
+    const existing = this.db
+      .query<{ content_hash: string; model: string }, [string]>('SELECT content_hash, model FROM vectors WHERE id = ?')
+      .get(row.id)
+    if (existing?.content_hash === row.contentHash && existing.model === row.model) {
+      return
+    }
+    this.db
+      .query(
+        `INSERT INTO vectors (id, source, key, model, dims, embedding, content_hash, updated_at)
+         VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+         ON CONFLICT(id) DO UPDATE SET
+           source = excluded.source,
+           key = excluded.key,
+           model = excluded.model,
+           dims = excluded.dims,
+           embedding = excluded.embedding,
+           content_hash = excluded.content_hash,
+           updated_at = excluded.updated_at`,
+      )
+      .run(
+        row.id,
+        row.source,
+        row.key,
+        row.model,
+        row.dims,
+        Buffer.from(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength),
+        row.contentHash,
+        new Date().toISOString(),
+      )
+  }
+  query(embedding: Float32Array, topK: number, modelId: string): VectorRow[] {
+    if (topK <= 0) return []
+    return this.queryScored(embedding, modelId)
+      .slice(0, topK)
+      .map(({ row }) => row)
+  }
+  // Same cosine scan as `query` but returns every compatible row WITH its score
+  // and unsliced, so a caller can reason about the full score distribution (the
+  // relevance gate's per-query baseline) before deciding how many to keep.
+  //
+  // Filter by embedding identity, not dims alone: a stale row from a different
+  // model/dtype variant can share the same dims but lives in an incompatible
+  // vector space, so cosine against it is garbage. Excluding it here keeps a
+  // partial re-embed (mixed variants mid-rebuild) at reduced recall, never
+  // wrong scores.
+  queryScored(embedding: Float32Array, modelId: string): ScoredVectorRow[] {
+    // The query vector's magnitude is identical for every row in this scan, so
+    // hoist it out of the per-row cosine instead of recomputing N times (each
+    // recompute is 768 multiply-adds + a sqrt). Behavior is unchanged — same
+    // cosine values, fewer operations on the brute-force hot path.
+    const queryMagnitude = magnitude(embedding)
+    return this.db
+      .query<StoredVectorRow, [string, number]>('SELECT * FROM vectors WHERE model = ? AND dims = ?')
+      .all(modelId, embedding.length)
+      .map(toVectorRow)
+      .map((row) => ({ row, score: cosineSimilarity(embedding, queryMagnitude, row.embedding) }))
+      .sort((a, b) => b.score - a.score)
+  }
+  deleteOtherModels(modelId: string): void {
+    this.db.query('DELETE FROM vectors WHERE model != ?').run(modelId)
+  }
+  delete(id: string): void {
+    this.db.query('DELETE FROM vectors WHERE id = ?').run(id)
+  }
+  deleteMany(ids: string[]): void {
+    const statement = this.db.query('DELETE FROM vectors WHERE id = ?')
+    const remove = this.db.transaction((values: string[]) => {
+      for (const id of values) statement.run(id)
+    })
+    remove(ids)
+  }
+  getAll(): VectorRow[] {
+    return this.db.query<StoredVectorRow, []>('SELECT * FROM vectors ORDER BY id').all().map(toVectorRow)
+  }
+  // Metadata only — never decodes the embedding BLOB, so a row whose blob is
+  // malformed (byte length not a multiple of 4) can't throw here the way
+  // getAll's Float32Array decode would.
+  getAllMeta(): VectorMeta[] {
+    return this.db
+      .query<{ id: string; model: string; content_hash: string }, []>(
+        'SELECT id, model, content_hash FROM vectors ORDER BY id',
+      )
+      .all()
+      .map((row) => ({ id: row.id, model: row.model, contentHash: row.content_hash }))
+  }
+  getByIds(ids: string[]): VectorRow[] {
+    const statement = this.db.query<StoredVectorRow, [string]>('SELECT * FROM vectors WHERE id = ?')
+    return ids.flatMap((id) => {
+      const row = statement.get(id)
+      return row ? [toVectorRow(row)] : []
+    })
+  }
+  close(): void {
+    this.db.close()
+  }
+}
+function toVectorRow(row: StoredVectorRow): VectorRow {
+  return {
+    id: row.id,
+    source: row.source,
+    key: row.key,
+    model: row.model,
+    dims: row.dims,
+    embedding: blobToFloat32Array(row.embedding),
+    contentHash: row.content_hash,
+    updatedAt: row.updated_at,
+  }
+}
+function blobToFloat32Array(blob: Uint8Array): Float32Array {
+  const bytes = Buffer.from(blob)
+  const buffer = bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength)
+  return new Float32Array(buffer)
+}
+function magnitude(v: Float32Array): number {
+  let sumSquares = 0
+  for (let i = 0; i < v.length; i++) {
+    const value = v[i] ?? 0
+    sumSquares += value * value
+  }
+  return Math.sqrt(sumSquares)
+}
+function cosineSimilarity(a: Float32Array, aMagnitude: number, b: Float32Array): number {
+  let dot = 0
+  let bSumSquares = 0
+  for (let i = 0; i < a.length; i++) {
+    const aValue = a[i] ?? 0
+    const bValue = b[i] ?? 0
+    dot += aValue * bValue
+    bSumSquares += bValue * bValue
+  }
+  const bMagnitude = Math.sqrt(bSumSquares)
+  if (aMagnitude === 0 || bMagnitude === 0) return 0
+  return dot / (aMagnitude * bMagnitude)
+}

package/src/bundled-plugins/memory/vector/truncation.ts ADDED Viewed

@@ -0,0 +1,124 @@
+// Xenova/multilingual-e5-base is a 512-token model; transformers.js truncates
+// past that limit by default. Inputs longer than the cap would otherwise lose
+// their tail from the embedded match surface SILENTLY. Canonical compact shards
+// (heading + one belief sentence first) sit well under the cap; legacy verbose
+// shards, legacy prose migration events, long fragments, and very long queries
+// can exceed it. This module estimates token length cheaply (no tokenizer) and
+// bounds the embeddable text deterministically so the cut is explicit, not a
+// hidden tokenizer side effect. The dreaming subagent separately compacts the
+// flagged shards over time, but bounding here guarantees no silent loss even
+// for inputs dreaming never rewrites (e.g. raw legacy prose).
+export const MAX_MODEL_TOKENS = 512
+// The E5 prefix ("query: " / "passage: ") is prepended before tokenization and
+// eats a couple of tokens from the budget. Subtracting a small reserve keeps
+// the bound honest about the text budget the caller actually gets.
+const PREFIX_TOKEN_RESERVE = 4
+// Char-per-token ratio for a rough, deliberately CONSERVATIVE token estimate.
+// multilingual-e5-base tokenizes CJK and other non-Latin scripts into far more
+// tokens per character than English, so a single chars/token ratio would badly
+// under-count them. We estimate per script: ~3.5 chars/token for Latin-ish
+// text, ~1 token per CJK character. Over-estimating (bounding a little early) is
+// safer than under-estimating (letting the tokenizer cut silently), so the
+// ratios lean toward flagging.
+//
+// The char ratio ALONE under-counts many short words: WordPiece emits at least
+// one token per whitespace-delimited word, so `'a '.repeat(509)` is ~509 tokens
+// but only ~291 by chars/3.5. The non-CJK estimate therefore takes the MAX of
+// the char-ratio count and the word count, and the inverse (charBudgetForTokens)
+// charges a token at each word start too, so a bounded string re-estimates to at
+// most the budget under either term.
+const LATIN_CHARS_PER_TOKEN = 3.5
+// Effective text-token budget once the prefix reserve is removed.
+export const TEXT_TOKEN_BUDGET = MAX_MODEL_TOKENS - PREFIX_TOKEN_RESERVE
+// CJK Unified Ideographs, Hiragana, Katakana, Hangul — scripts the tokenizer
+// splits at roughly one token per character (often more). Counted 1:1.
+// The `g` variant is for counting matches across a whole string (estimateTokens);
+// the non-global variant is for per-character tests (charBudgetForTokens), where
+// a global regex's stateful lastIndex would make repeated `.test()` calls flip.
+const CJK_COUNT_PATTERN = /[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uac00-\ud7af\uff66-\uff9f]/gu
+const CJK_CHAR_PATTERN = /[\u3040-\u30ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uac00-\ud7af\uff66-\uff9f]/u
+// A conservative token-count estimate that never loads the tokenizer (which
+// would defeat the embedder's deliberate lazy-load of the heavy native stack).
+// CJK chars count 1 token each; the remaining (non-CJK) text counts the MAX of
+// its char-ratio estimate and its word count, because WordPiece never emits
+// fewer than one token per whitespace-delimited word.
+export function estimateTokens(text: string): number {
+  const cjkCount = (text.match(CJK_COUNT_PATTERN) ?? []).length
+  const nonCjkChars = text.length - cjkCount
+  const charBased = Math.ceil(nonCjkChars / LATIN_CHARS_PER_TOKEN)
+  return cjkCount + Math.max(charBased, countNonCjkWords(text))
+}
+// Count whitespace-delimited words AFTER removing CJK chars (those are already
+// charged one token each), so a CJK run isn't double-counted as an extra word.
+function countNonCjkWords(text: string): number {
+  return (text.replace(CJK_COUNT_PATTERN, ' ').match(/\S+/gu) ?? []).length
+}
+export function isOverBudget(text: string): boolean {
+  return estimateTokens(text) > TEXT_TOKEN_BUDGET
+}
+// Topic passages go through `topicPassage` in passages.ts (it strips citation
+// lines from the embedded text); over-budget detection for topics derives from
+// that same helper so the budget check matches what is actually embedded. This
+// helper covers stream fragments, whose embedded text is `topic\nbody`.
+export function fragmentEmbeddableText(event: { topic: string; body: string }): string {
+  return `${event.topic}\n${event.body}`
+}
+export type BoundedText = {
+  text: string
+  bounded: boolean
+  estimatedTokens: number
+}
+// Deterministically trim text to the estimated token budget BEFORE embedding, so
+// the tokenizer's implicit cut never fires and the truncation point is one we
+// own and can record. Bounds on a character budget derived from the same
+// conservative estimate; the leading heading/belief sentence (the load-bearing
+// retrieval signal) always survives because it comes first.
+export function boundEmbeddableText(text: string): BoundedText {
+  const estimatedTokens = estimateTokens(text)
+  if (estimatedTokens <= TEXT_TOKEN_BUDGET) {
+    return { text, bounded: false, estimatedTokens }
+  }
+  const charBudget = charBudgetForTokens(text, TEXT_TOKEN_BUDGET)
+  return { text: text.slice(0, charBudget), bounded: true, estimatedTokens }
+}
+// Returns the longest prefix length (in chars) whose estimateTokens is still
+// within budget. Recomputes the EXACT same estimate incrementally — CJK chars
+// at 1 token, plus max(non-CJK char-ratio, word count) — so a bounded prefix can
+// never re-estimate above the budget no matter which term dominates. estimate is
+// monotonic non-decreasing in prefix length, so a single forward walk suffices.
+function charBudgetForTokens(text: string, tokenBudget: number): number {
+  let cjk = 0
+  let nonCjk = 0
+  let words = 0
+  let inWord = false
+  let chars = 0
+  for (const char of text) {
+    const isCjk = CJK_CHAR_PATTERN.test(char)
+    const isSpace = /\s/u.test(char)
+    // A word start is a non-space, non-CJK char that follows a non-word char.
+    // CJK chars are charged via `cjk`, never as words (mirrors countNonCjkWords).
+    const startsWord = !isSpace && !isCjk && !inWord
+    const nextCjk = isCjk ? cjk + 1 : cjk
+    const nextNonCjk = isCjk ? nonCjk : nonCjk + char.length
+    const nextWords = startsWord ? words + 1 : words
+    const estimate = nextCjk + Math.max(Math.ceil(nextNonCjk / LATIN_CHARS_PER_TOKEN), nextWords)
+    if (estimate > tokenBudget) break
+    cjk = nextCjk
+    nonCjk = nextNonCjk
+    words = nextWords
+    inWord = !isSpace && !isCjk
+    chars += char.length
+  }
+  return chars
+}

package/src/bundled-plugins/security/policies/outbound-secret-scan.ts CHANGED Viewed

@@ -56,6 +56,8 @@ const PROCESS_ENV_TARGETS: ReadonlyArray<string> = [
   'ANTHROPIC_API_KEY',
   'MINIMAX_API_KEY',
   'DEEPSEEK_API_KEY',
+  'MOONSHOT_API_KEY',
+  'MOONSHOT_CODING_API_KEY',
   'GOOGLE_API_KEY',
   'GEMINI_API_KEY',
   'AWS_ACCESS_KEY_ID',