npm - typeclaw - Versions diffs - 0.36.7 → 0.37.0 - Mend

typeclaw 0.36.7 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

package/README.md +2 -2
package/package.json +3 -2
package/src/agent/index.ts +31 -11
package/src/agent/live-sessions.ts +12 -0
package/src/agent/model-fallback.ts +17 -15
package/src/agent/model-overrides.ts +2 -2
package/src/agent/session-meta.ts +10 -0
package/src/agent/subagents.ts +11 -2
package/src/agent/system-prompt.ts +9 -3
package/src/agent/todo/continuation-policy.ts +6 -3
package/src/agent/todo/continuation-wiring.ts +4 -2
package/src/agent/todo/continuation.ts +3 -3
package/src/agent/tools/todo/index.ts +27 -4
package/src/bundled-plugins/agent-browser/index.ts +33 -108
package/src/bundled-plugins/agent-browser/shim.ts +3 -94
package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +8 -33
package/src/bundled-plugins/doc-render/skills/typeclaw-render-pdf/SKILL.md +2 -2
package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +7 -1
package/src/bundled-plugins/memory/README.md +80 -23
package/src/bundled-plugins/memory/append-tool.ts +74 -53
package/src/bundled-plugins/memory/citation-superset.ts +4 -0
package/src/bundled-plugins/memory/citations.ts +54 -0
package/src/bundled-plugins/memory/dreaming-metrics.ts +30 -0
package/src/bundled-plugins/memory/dreaming.ts +444 -21
package/src/bundled-plugins/memory/index.ts +544 -400
package/src/bundled-plugins/memory/load-memory.ts +87 -10
package/src/bundled-plugins/memory/load-shards.ts +48 -22
package/src/bundled-plugins/memory/memory-logger.ts +95 -106
package/src/bundled-plugins/memory/memory-retrieval.ts +3 -3
package/src/bundled-plugins/memory/parent-link.ts +33 -0
package/src/bundled-plugins/memory/paths.ts +12 -0
package/src/bundled-plugins/memory/references/frontmatter.ts +197 -0
package/src/bundled-plugins/memory/references/load-references.ts +212 -0
package/src/bundled-plugins/memory/references/store-reference-tool.ts +59 -0
package/src/bundled-plugins/memory/search-tool.ts +282 -45
package/src/bundled-plugins/memory/stream-events.ts +1 -0
package/src/bundled-plugins/memory/stream-io.ts +28 -3
package/src/bundled-plugins/memory/turn-dedup.ts +40 -0
package/src/bundled-plugins/memory/vector/cache-write.ts +19 -0
package/src/bundled-plugins/memory/vector/config.ts +28 -0
package/src/bundled-plugins/memory/vector/doctor.ts +124 -0
package/src/bundled-plugins/memory/vector/embedder.ts +246 -0
package/src/bundled-plugins/memory/vector/hybrid.ts +439 -0
package/src/bundled-plugins/memory/vector/index-on-write.ts +34 -0
package/src/bundled-plugins/memory/vector/inspect.ts +111 -0
package/src/bundled-plugins/memory/vector/passages.ts +125 -0
package/src/bundled-plugins/memory/vector/reference-index-on-write.ts +50 -0
package/src/bundled-plugins/memory/vector/relevance-gate.ts +93 -0
package/src/bundled-plugins/memory/vector/startup.ts +71 -0
package/src/bundled-plugins/memory/vector/store.ts +203 -0
package/src/bundled-plugins/memory/vector/truncation.ts +124 -0
package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +2 -0
package/src/channels/router.ts +239 -40
package/src/cli/incomplete-init.ts +57 -0
package/src/cli/init.ts +143 -12
package/src/cli/inspect.ts +11 -5
package/src/cli/model.ts +112 -34
package/src/cli/restart.ts +24 -0
package/src/cli/start.ts +24 -0
package/src/cli/tunnel.ts +53 -8
package/src/config/config.ts +110 -19
package/src/config/index.ts +5 -1
package/src/config/models-mutation.ts +29 -11
package/src/config/providers-mutation.ts +2 -2
package/src/config/providers.ts +146 -12
package/src/container/shared.ts +9 -0
package/src/container/start.ts +87 -4
package/src/cron/consumer.ts +13 -7
package/src/hostd/models.ts +64 -0
package/src/hostd/paths.ts +6 -0
package/src/hostd/portbroker-manager.ts +2 -2
package/src/init/checkpoint.ts +201 -0
package/src/init/dockerfile.ts +164 -51
package/src/init/gitignore.ts +7 -7
package/src/init/index.ts +41 -9
package/src/init/line-auth.ts +50 -21
package/src/init/models-dev.ts +96 -21
package/src/init/oauth-login.ts +3 -3
package/src/init/progress.ts +29 -0
package/src/init/validate-api-key.ts +4 -0
package/src/inspect/index.ts +13 -6
package/src/inspect/item-list.ts +11 -2
package/src/inspect/live-list.ts +65 -0
package/src/inspect/open-item.ts +22 -1
package/src/inspect/session-list.ts +29 -0
package/src/models/embedding-model.ts +114 -0
package/src/models/transformers-version.ts +55 -0
package/src/plugin/types.ts +3 -0
package/src/portbroker/container-server.ts +23 -0
package/src/portbroker/forward-request-bus.ts +35 -0
package/src/portbroker/forward-result-bus.ts +2 -3
package/src/portbroker/hostd-client.ts +182 -36
package/src/portbroker/index.ts +6 -1
package/src/portbroker/protocol.ts +9 -2
package/src/run/channel-session-factory.ts +11 -1
package/src/run/index.ts +41 -7
package/src/server/command-runner.ts +24 -1
package/src/server/index.ts +42 -8
package/src/shared/index.ts +2 -0
package/src/shared/protocol.ts +31 -0
package/src/skills/typeclaw-channels/SKILL.md +4 -4
package/src/skills/typeclaw-config/SKILL.md +2 -2
package/src/skills/typeclaw-memory/SKILL.md +3 -1
package/src/skills/typeclaw-permissions/SKILL.md +3 -3
package/src/skills/typeclaw-skills/SKILL.md +1 -1
package/src/skills/typeclaw-tunnels/SKILL.md +22 -1
package/src/tunnels/providers/cloudflare-quick.ts +65 -7
package/src/tunnels/upstream-probe.ts +25 -0
package/typeclaw.schema.json +156 -67
package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +0 -170
package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +0 -421
package/src/portbroker/bind-with-forward.ts +0 -102

package/src/config/providers.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Api, Model } from '@mariozechner/pi-ai'
+import type { KnownApi, Model } from '@mariozechner/pi-ai'
 // Authentication mechanism a provider supports. `api-key` reads a static key
 // from .env (the original path); `oauth` runs a browser flow at init time and
@@ -18,16 +18,13 @@ type KnownProvider = {
   auth: ReadonlyArray<AuthMethod>
   apiKeyEnv: string | null
   oauthProviderId: string | null
-  models: Record<string, Model<Api>>
+  models: Record<string, Model<KnownApi>>
 }
-// Curated allowlist of providers + models that are wired into the agent
-// runtime. The values here back the Zod enum on every entry in
-// `configSchema.models`, so any model the user can put in `typeclaw.json`
-// (under any profile name) MUST appear here verbatim. The
-// init-time picker may surface additional models from models.dev, but it
-// resolves them through this list before scaffolding (anything missing falls
-// back to a curated default).
+// Curated provider + model table. Provider ids remain the allowlist for
+// `typeclaw.json` refs, while the model entries are the tested defaults and
+// JSON-schema autocomplete set. The init/model pickers may surface additional
+// models from models.dev as long as the provider prefix is one of these ids.
 //
 // Adding a new model: append it to the matching provider's `models` map. Each
 // model object is the literal `Model<...>` that pi-ai consumes — keep it
@@ -703,6 +700,118 @@ export const KNOWN_PROVIDERS = {
       },
     },
   },
+  // Moonshot AI (Kimi) — Open Platform pay-as-you-go API. The platform exposes
+  // an OpenAI-compatible surface at api.moonshot.ai/v1 (Bearer auth +
+  // /chat/completions shape), so models go through pi-ai's `openai-completions`
+  // adapter with a custom baseUrl — same trick as Fireworks, Z.AI, MiniMax, and
+  // DeepSeek. api-key only; the platform ships no OAuth flow.
+  //
+  // Moonshot also offers an Anthropic-compatible route (api.moonshot.ai/anthropic)
+  // on the same key, but it rescales temperature (real = requested × 0.6) and
+  // would be the FIRST `anthropic-messages` transport pointed at a non-Anthropic
+  // baseUrl in this codebase. We deliberately stay on the proven OpenAI-compatible
+  // path so behavior matches every other paygo provider.
+  //
+  // The split with `moonshot-coding` below mirrors `zai` / `zai-coding`: same
+  // upstream vendor, two distinct billing surfaces (Open Platform paygo vs the
+  // Kimi Code subscription) on two distinct base URLs with two distinct env
+  // vars, so a user can hold both keys at once. The Open Platform key does NOT
+  // work against the Kimi Code endpoint, and vice versa.
+  //
+  // Model lineup mirrors the OpenAI-compatible model list on platform.kimi.ai
+  // as of 2026-06-14: kimi-k2.7-code (flagship coding model, always-on thinking,
+  // text+image), kimi-k2.6 (general flagship, text+image), and kimi-k2.5
+  // (general, text+image). All three fold reasoning in via the `thinking`
+  // request parameter, so no separate "thinking" model id is needed. The whole
+  // legacy kimi-k2 series (kimi-k2-thinking, k2-0905/0711/turbo previews) was
+  // officially discontinued on 2026-05-25 and is intentionally omitted, as are
+  // the legacy moonshot-v1-* models. Costs are USD per 1M tokens from
+  // platform.kimi.ai pricing; Moonshot publishes no cache-write surcharge, so
+  // cacheWrite is 0. (pi-ai's `input` array only models text/image — Moonshot's
+  // video input on the K2.x models can't be expressed here, so it is omitted.)
+  moonshot: {
+    id: 'moonshot',
+    name: 'Moonshot (Kimi)',
+    baseUrl: 'https://api.moonshot.ai/v1',
+    auth: ['api-key'],
+    apiKeyEnv: 'MOONSHOT_API_KEY',
+    oauthProviderId: null,
+    models: {
+      'kimi-k2.7-code': {
+        id: 'kimi-k2.7-code',
+        name: 'Kimi K2.7 Code',
+        api: 'openai-completions',
+        provider: 'moonshot',
+        baseUrl: 'https://api.moonshot.ai/v1',
+        reasoning: true,
+        input: ['text', 'image'],
+        cost: { input: 0.6, output: 2.5, cacheRead: 0.15, cacheWrite: 0 },
+        contextWindow: 256000,
+        maxTokens: 64000,
+      },
+      'kimi-k2.6': {
+        id: 'kimi-k2.6',
+        name: 'Kimi K2.6',
+        api: 'openai-completions',
+        provider: 'moonshot',
+        baseUrl: 'https://api.moonshot.ai/v1',
+        reasoning: true,
+        input: ['text', 'image'],
+        cost: { input: 0.6, output: 2.5, cacheRead: 0.15, cacheWrite: 0 },
+        contextWindow: 256000,
+        maxTokens: 64000,
+      },
+      'kimi-k2.5': {
+        id: 'kimi-k2.5',
+        name: 'Kimi K2.5',
+        api: 'openai-completions',
+        provider: 'moonshot',
+        baseUrl: 'https://api.moonshot.ai/v1',
+        reasoning: true,
+        input: ['text', 'image'],
+        cost: { input: 0.6, output: 2.5, cacheRead: 0.15, cacheWrite: 0 },
+        contextWindow: 256000,
+        maxTokens: 64000,
+      },
+    },
+  },
+  // Moonshot AI Kimi Code — the Coding Plan subscription product. Distinct from
+  // the Open Platform above: a separate domain (api.kimi.com/coding/v1), a
+  // separate subscription key created at kimi.com/code/console, and a separate
+  // env var (`MOONSHOT_CODING_API_KEY`) so a user can hold both an Open Platform
+  // paygo key and a Coding Plan key without collisions. Kimi Code exposes an
+  // OpenAI-compatible route (Bearer auth + /chat/completions) alongside its
+  // Anthropic-compatible one; we use the OpenAI-compatible route so it threads
+  // through the same `openai-completions` adapter as every other paygo provider.
+  //
+  // Single model alias: `kimi-for-coding` is a STABLE ALIAS that the Coding Plan
+  // backend routes to the latest underlying model (currently the K2.6 family).
+  // Version-pinned ids are NOT accepted on this endpoint and fail silently, so
+  // the alias is the only id listed. Costs are 0 because the Coding Plan bills a
+  // flat subscription quota, not per-token — there is no per-token price to
+  // attribute (same convention as the Fireworks Fire Pass router above).
+  'moonshot-coding': {
+    id: 'moonshot-coding',
+    name: 'Moonshot (Kimi Coding Plan)',
+    baseUrl: 'https://api.kimi.com/coding/v1',
+    auth: ['api-key'],
+    apiKeyEnv: 'MOONSHOT_CODING_API_KEY',
+    oauthProviderId: null,
+    models: {
+      'kimi-for-coding': {
+        id: 'kimi-for-coding',
+        name: 'Kimi for Coding',
+        api: 'openai-completions',
+        provider: 'moonshot-coding',
+        baseUrl: 'https://api.kimi.com/coding/v1',
+        reasoning: true,
+        input: ['text', 'image'],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 256000,
+        maxTokens: 64000,
+      },
+    },
+  },
 } as const satisfies Record<string, KnownProvider>
 export type KnownProviderId = keyof typeof KNOWN_PROVIDERS
@@ -776,6 +885,15 @@ export const KNOWN_PROVIDER_VENDORS = {
     name: 'DeepSeek',
     providers: ['deepseek'],
   },
+  moonshot: {
+    id: 'moonshot',
+    name: 'Moonshot (Kimi)',
+    providers: ['moonshot', 'moonshot-coding'],
+    variants: {
+      moonshot: { label: 'Pay-as-you-go', hint: 'Moonshot Open Platform API billing' },
+      'moonshot-coding': { label: 'Coding Plan', hint: 'Kimi Code subscription' },
+    },
+  },
 } as const satisfies Record<string, KnownProviderVendor>
 export type KnownProviderVendorId = keyof typeof KNOWN_PROVIDER_VENDORS
@@ -820,6 +938,8 @@ export type KnownModelRef = {
   [P in KnownProviderId]: `${P}/${Extract<keyof (typeof KNOWN_PROVIDERS)[P]['models'], string>}`
 }[KnownProviderId]
+export type ModelRef = string & { readonly __modelRef: unique symbol }
 export function listKnownModelRefs(): KnownModelRef[] {
   const refs: string[] = []
   for (const providerId of Object.keys(KNOWN_PROVIDERS) as KnownProviderId[]) {
@@ -830,19 +950,33 @@ export function listKnownModelRefs(): KnownModelRef[] {
   return refs as KnownModelRef[]
 }
+export function isKnownModelRef(value: string): value is KnownModelRef {
+  return (listKnownModelRefs() as ReadonlyArray<string>).includes(value)
+}
+export function isModelRef(value: string): value is ModelRef {
+  return /^[a-z0-9][a-z0-9-]*\/[^\s/][^\s]*$/.test(value) && knownProviderForModelRef(value) !== null
+}
 // The default we hand to scaffolded `typeclaw.json` and the schema's
 // `model.default`. Lives here (next to the provider table) so adding a model
 // can't drift from the field default — both come from the same module.
 export const DEFAULT_MODEL_REF: KnownModelRef = 'openai/gpt-5.4-nano'
-export function providerForModelRef(ref: KnownModelRef): KnownProviderId {
+export function providerForModelRef(ref: KnownModelRef | ModelRef | string): KnownProviderId {
   // KnownModelRef is `${provider}/${modelId}`, but provider IDs themselves can
   // contain '-' and model IDs can contain '/' (Fireworks). We split on the
   // first slash that follows a registered provider id.
+  const providerId = knownProviderForModelRef(ref)
+  if (providerId !== null) return providerId
+  throw new Error(`Unknown provider in model ref: ${ref}`)
+}
+function knownProviderForModelRef(ref: string): KnownProviderId | null {
   for (const providerId of Object.keys(KNOWN_PROVIDERS) as KnownProviderId[]) {
     if (ref.startsWith(`${providerId}/`)) return providerId
   }
-  throw new Error(`Unknown provider in model ref: ${ref}`)
+  return null
 }
 // Per-provider default for pi-coding-agent's `thinkingLevel` knob. Returning
@@ -857,7 +991,7 @@ export function providerForModelRef(ref: KnownModelRef): KnownProviderId {
 //
 // Anthropic, GLM, and Kimi don't share the padding behavior, so they keep the
 // SDK default.
-export function defaultThinkingLevelForRef(ref: KnownModelRef): 'low' | undefined {
+export function defaultThinkingLevelForRef(ref: KnownModelRef | ModelRef | string): 'low' | undefined {
   const providerId = providerForModelRef(ref)
   if (providerId === 'openai' || providerId === 'openai-codex') return 'low'
   return undefined

package/src/container/shared.ts CHANGED Viewed

@@ -98,6 +98,15 @@ export async function checkDockerAvailable(exec: DockerExec = defaultDockerExec)
   }
 }
+// `docker buildx version` exits 0 only when the buildx CLI plugin is installed.
+// `start` uses this to pick the build path: buildx present -> `docker buildx
+// build` with the BuildKit Dockerfile (`--mount=type=cache` + the `# syntax=`
+// pragma, fast cached rebuilds); absent -> a BuildKit-stripped Dockerfile built
+// with the legacy `docker build`. Either way the agent image builds.
+export async function buildxAvailable(exec: DockerExec = defaultDockerExec): Promise<boolean> {
+  return (await exec(['buildx', 'version'])).exitCode === 0
+}
 export function containerNameFromCwd(cwd: string): string {
   return sanitizeContainerName(basename(resolve(cwd)))
 }

package/src/container/start.ts CHANGED Viewed

@@ -3,10 +3,13 @@ import { existsSync } from 'node:fs'
 import { readFile, writeFile } from 'node:fs/promises'
 import { isAbsolute, join, resolve } from 'node:path'
+import { agentUsesVector } from '@/bundled-plugins/memory/vector/config'
 import { expandMountPath, loadConfigSync, withDefaultPlugins, type Config } from '@/config'
 import { commitGitignoreWithUntracks, untrackTrulyIgnoredFiles } from '@/git/reconcile-ignored'
 import { commitSystemFile as commitSystemFileShared } from '@/git/system-commit'
 import { send as sendToDaemon } from '@/hostd/client'
+import { ensureModels } from '@/hostd/models'
+import { homeRoot } from '@/hostd/paths'
 import type { HttpInfoResult } from '@/hostd/protocol'
 import { ensureDaemon } from '@/hostd/spawn'
 import {
@@ -27,6 +30,7 @@ import { hostLocaleIsCjk } from '@/shared/host-locale'
 import { CONTAINER_PORT, TUI_TOKEN_LABEL, findFreePort, isPortAllocatedError, resolveTuiToken } from './port'
 import {
+  buildxAvailable,
   classifyRmStderr,
   cleanupRunCorpse,
   containerNameFromCwd,
@@ -280,11 +284,31 @@ export async function start({
       return { ok: false, reason: `dependency install failed: ${deps.reason}` }
     }
     await commitSystemFile(cwd, DEPENDENCY_FILES, upgradeCommitMessage ?? 'Update dependencies')
+    // Probe buildx up front so the Dockerfile we write matches the builder we
+    // will use. buildx present -> emit the BuildKit Dockerfile and build with
+    // `docker buildx build` (fast, cache mounts honored). buildx absent -> emit
+    // the BuildKit-stripped variant and fall back to legacy `docker build`, so
+    // `typeclaw start` still succeeds (just without cross-build apt/bun caches).
+    const hasBuildx = await buildxAvailable(exec)
     // Dockerfile refresh AFTER ensureDeps so the version pin in the FROM
     // line resolves against the agent's installed node_modules/typeclaw —
     // ensures the base image's CLI version matches the runtime the
     // container will actually load.
-    const dockerfileRefresh = await refreshDockerfile(cwd)
+    const dockerfileRefresh = await refreshDockerfile(cwd, { buildKit: hasBuildx })
+    // Provision the embedding model only when THIS agent opts into vector. The
+    // container embedder runs with local_files_only, so the model must already
+    // be on the host's ~/.typeclaw/models cache before the container boots —
+    // otherwise the startup vector index build fails. Kick the download off here
+    // (idempotent + file-locked) so it overlaps the docker build, then await it
+    // just before `docker run`. A vector-opted-out agent never reaches this, so
+    // a host whose containers are all opted out never downloads the ~280 MB
+    // model — including every agent under `typeclaw compose`, since each agent's
+    // start() makes this decision independently. The `.catch` swallow only keeps
+    // an early return between here and the await from logging an unhandled
+    // rejection; the real error is surfaced when we await at the run site below.
+    const modelsReady = agentUsesVector(cwd) ? ensureModels() : null
+    modelsReady?.catch(() => {})
     if (state.exists) {
       // Container holds the name but is not running. Without `--rm`, this is
@@ -362,14 +386,32 @@ export async function start({
     let built = false
     if (plan.needsBuild) {
-      const build = await exec(['build', '-t', plan.imageTag, plan.buildContext], { cwd, inheritStdio: true })
-      if (build.exitCode !== 0) {
+      const buildOk = await runImageBuild({
+        exec,
+        cwd,
+        imageTag: plan.imageTag,
+        buildContext: plan.buildContext,
+        hasBuildx,
+      })
+      if (!buildOk) {
         await cleanupHostDaemonRegistration(containerName, hostd)
         return { ok: false, reason: 'docker build failed' }
       }
       built = true
     }
+    if (modelsReady) {
+      try {
+        await modelsReady
+      } catch (error) {
+        await cleanupHostDaemonRegistration(containerName, hostd)
+        return {
+          ok: false,
+          reason: `embedding model provisioning failed (memory.vector.enabled): ${error instanceof Error ? error.message : String(error)}`,
+        }
+      }
+    }
     let run = await execRunWithConflictRetry(exec, plan.runArgs, cwd, containerName)
     // TOCTOU: another process may have grabbed the port between our probe and
@@ -623,6 +665,16 @@ export async function planStart({
     runArgs.push('-v', mount.readOnly ? `${hostPath}:${target}:ro` : `${hostPath}:${target}`)
   }
+  // Shared model cache mount for embeddings. Gated on vector opt-in: a
+  // vector-opted-out container has no embedder to feed, and the host never
+  // populates ~/.typeclaw/models for it (see ensureModels gating in start()),
+  // so mounting an empty cache would only invite a confusing local_files_only
+  // miss if something inside the container reached for the model anyway.
+  if (agentUsesVector(cwd)) {
+    runArgs.push('-v', `${homeRoot()}/models:/opt/models:ro`)
+    runArgs.push('-e', 'TYPECLAW_MODEL_CACHE=/opt/models')
+  }
   runArgs.push(imageTag)
   return {
@@ -649,11 +701,12 @@ async function resolvePublishHost(exec: DockerExec): Promise<string> {
 // the cheapest correct signal: the build context for `docker build` is the
 // Dockerfile itself, so equal contents definitionally produce an equivalent
 // image.
-export async function refreshDockerfile(cwd: string): Promise<{ changed: boolean }> {
+export async function refreshDockerfile(cwd: string, opts: { buildKit?: boolean } = {}): Promise<{ changed: boolean }> {
   const cfg = await loadTypeclawConfig(cwd)
   const next = buildDockerfile(cfg.docker.file, {
     baseImageVersion: resolveBaseImageVersion(cwd),
     cjkFontsAuto: hostLocaleIsCjk(),
+    buildKit: opts.buildKit,
   })
   const path = join(cwd, DOCKERFILE)
   const prev = await readFile(path, 'utf8').catch(() => null)
@@ -662,6 +715,36 @@ export async function refreshDockerfile(cwd: string): Promise<{ changed: boolean
   return { changed: true }
 }
+// Builds the agent image with a seamless buildx->legacy fallback. The preferred
+// frontend is chosen from `hasBuildx`; if a buildx build FAILS (e.g. the plugin
+// is installed but there is no usable builder/driver), we transparently rewrite
+// the Dockerfile to its BuildKit-stripped form and retry once with the legacy
+// `docker build`. The user sees one successful `typeclaw start` instead of a
+// buildx-specific dead end. A genuine Dockerfile error fails both paths, so the
+// retry costs at most one extra attempt before the real error surfaces.
+async function runImageBuild(args: {
+  exec: DockerExec
+  cwd: string
+  imageTag: string
+  buildContext: string
+  hasBuildx: boolean
+}): Promise<boolean> {
+  const { exec, cwd, imageTag, buildContext, hasBuildx } = args
+  if (hasBuildx) {
+    // `--load` puts the image in the local store so the subsequent `docker run`
+    // finds it. Non-default buildx drivers (docker-container, etc.) export to
+    // the build cache ONLY without it; on the default `docker` driver --load is
+    // already implied, so passing it unconditionally is a safe no-op there.
+    const buildx = await exec(['buildx', 'build', '--load', '-t', imageTag, buildContext], { cwd, inheritStdio: true })
+    if (buildx.exitCode === 0) return true
+    // buildx failed — fall back to the legacy builder against a stripped
+    // Dockerfile so a misconfigured-buildx host still ends up with an image.
+    await refreshDockerfile(cwd, { buildKit: false })
+  }
+  const legacy = await exec(['build', '-t', imageTag, buildContext], { cwd, inheritStdio: true })
+  return legacy.exitCode === 0
+}
 export async function refreshGitignore(cwd: string): Promise<void> {
   const cfg = await loadTypeclawConfig(cwd)
   await writeFile(join(cwd, GITIGNORE_FILE), buildGitignore(cfg.git.ignore))

package/src/cron/consumer.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import type { AgentSession } from '@/agent'
 import { promptWithFallback, resolveFallbackChain } from '@/agent/model-fallback'
 import type { SessionOrigin } from '@/agent/session-origin'
 import { getConfig } from '@/config'
-import type { KnownModelRef } from '@/config/providers'
+import type { ModelRef } from '@/config/providers'
 import type { HookBus } from '@/plugin'
 import type { Stream, Unsubscribe } from '@/stream'
@@ -48,7 +48,7 @@ export type CreateCronConsumerOptions = {
   // each attempt to the specified model. Factories that don't honor the
   // override silently lose fallback semantics, so production wiring threads
   // it through to `createSession({ refOverride })`.
-  createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>
+  createSessionForCron: (job: PromptJob, refOverride?: ModelRef) => Promise<CronSession>
   // Builds the `CronHandlerContext` for the job and awaits its `handler`.
   // Wired by `src/run/index.ts` to reuse `runPromptForCommand` /
   // `runExecForCommand` from the command runner so plugin cron handlers and
@@ -161,7 +161,7 @@ export function createCronConsumer({
 async function runPrompt(
   job: PromptJob,
-  createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>,
+  createSessionForCron: (job: PromptJob, refOverride?: ModelRef) => Promise<CronSession>,
   stream: Stream,
   logger: CronConsumerLogger,
 ): Promise<void> {
@@ -198,8 +198,8 @@ async function runPrompt(
 async function runPromptOnce(
   job: PromptJob,
-  refs: KnownModelRef[],
-  createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>,
+  refs: ModelRef[],
+  createSessionForCron: (job: PromptJob, refOverride?: ModelRef) => Promise<CronSession>,
   logger: CronConsumerLogger,
 ): Promise<void> {
   // Per-attempt lifecycle: every session we create gets full
@@ -227,8 +227,13 @@ async function runPromptOnce(
               ...(created.origin !== undefined ? { origin: created.origin } : {}),
             }
           : undefined
+      // Per-turn memory injection for vector agents: the turn-start hook writes
+      // the rendered memory block into `retrievalContext.results`, which we
+      // append to the prompt text below (vector agents have no system-prompt
+      // `# Memory` section). Empty for non-vector agents.
+      const retrievalContext = { results: '' }
       if (created.hooks && turnEvent !== undefined) {
-        await created.hooks.runSessionTurnStart({ ...turnEvent, userPrompt: job.prompt })
+        await created.hooks.runSessionTurnStart({ ...turnEvent, userPrompt: job.prompt, retrievalContext })
       }
       // Bridge the CronSession wrapper into the AgentSession surface the
       // fallback helper expects:
@@ -243,7 +248,8 @@ async function runPromptOnce(
       // regular method that reads `this._eventListeners`. Destructuring drops
       // the receiver.
       const sessionForHelper: AgentSession = {
-        prompt: (text: string) => created.prompt(text),
+        prompt: (text: string) =>
+          created.prompt(retrievalContext.results.length > 0 ? `${text}\n\n${retrievalContext.results}` : text),
         subscribe: created.session?.subscribe.bind(created.session) ?? (() => () => {}),
       } as unknown as AgentSession
       return {

package/src/hostd/models.ts ADDED Viewed

@@ -0,0 +1,64 @@
+import { mkdir } from 'node:fs/promises'
+import { join } from 'node:path'
+import { env as transformersEnv, pipeline } from '@huggingface/transformers'
+import lockfile from 'proper-lockfile'
+import { EMBEDDING_MODEL_DTYPE, EMBEDDING_MODEL_NAME, writeModelSentinel } from '@/models/embedding-model'
+import { getResolvedTransformersVersion } from '@/models/transformers-version'
+import { modelsDir } from './paths'
+// q8 → onnx/model_quantized.onnx (~279 MB). Without this, dtype defaults to
+// 'auto', which resolves to fp32 (onnx/model.onnx, 1.11 GB) on CPU/non-WASM
+// devices — 4x the download for no quality gain at this corpus size. The
+// gold-set eval that chose e5-base (recall@3 96.9%) was run on this q8 variant.
+// Shared with the container embedder via @/models/embedding-model: the host
+// downloads what the container loads with local_files_only, so a mismatch
+// makes the container request a file that was never fetched.
+const MODEL_NAME = EMBEDDING_MODEL_NAME
+const MODEL_DTYPE = EMBEDDING_MODEL_DTYPE
+const LOCK_RETRIES = { retries: 60, factor: 1, minTimeout: 100, maxTimeout: 100, randomize: false } as const
+let ensureModelsPromise: Promise<void> | null = null
+let ensureModelsPath: string | null = null
+export function ensureModels(): Promise<void> {
+  const dir = modelsDir()
+  if (ensureModelsPath !== dir) {
+    ensureModelsPath = dir
+    ensureModelsPromise = null
+  }
+  ensureModelsPromise ??= ensureModelsLocked().catch((error: unknown) => {
+    ensureModelsPromise = null
+    throw error
+  })
+  return ensureModelsPromise
+}
+async function ensureModelsLocked(): Promise<void> {
+  const dir = modelsDir()
+  await mkdir(dir, { recursive: true })
+  const release = await lockfile.lock(dir, {
+    lockfilePath: join(dir, '.lock'),
+    realpath: false,
+    retries: LOCK_RETRIES,
+    stale: 30_000,
+  })
+  try {
+    configureTransformers(dir)
+    await pipeline('feature-extraction', MODEL_NAME, { dtype: MODEL_DTYPE })
+    // Stamp the cache with the version that produced it, still under the lock,
+    // so the container can verify the producer matches its consumer before a
+    // local_files_only load (see assertModelCacheCompatible).
+    await writeModelSentinel(dir, { transformers: getResolvedTransformersVersion() })
+  } finally {
+    await release()
+  }
+}
+function configureTransformers(dir: string): void {
+  transformersEnv.localModelPath = dir
+  ;(transformersEnv as typeof transformersEnv & { cacheDir: string }).cacheDir = dir
+}

package/src/hostd/paths.ts CHANGED Viewed

@@ -56,6 +56,10 @@ export function keysDir(): string {
   return join(homeRoot(), KEYS_DIR)
 }
+export function modelsDir(): string {
+  return join(homeRoot(), 'models')
+}
 // Throws on any name that could traverse out of registrationsDir() or
 // confuse the filesystem. Caller's responsibility to handle the error;
 // don't catch-and-ignore — an invalid name is a protocol violation.
@@ -82,8 +86,10 @@ export async function ensureDirs(): Promise<void> {
   await mkdir(logDir(), { recursive: true })
   await mkdir(registrationsDir(), { recursive: true })
   await mkdir(keysDir(), { recursive: true })
+  await mkdir(modelsDir(), { recursive: true })
   await chmod(runDir(), 0o700).catch(() => {})
   await chmod(logDir(), 0o700).catch(() => {})
   await chmod(registrationsDir(), 0o700).catch(() => {})
   await chmod(keysDir(), 0o700).catch(() => {})
+  await chmod(modelsDir(), 0o700).catch(() => {})
 }

package/src/hostd/portbroker-manager.ts CHANGED Viewed

@@ -67,8 +67,8 @@ export function createPortbrokerManager(opts: PortbrokerManagerOptions = {}): Po
         brokerToken: input.brokerToken,
         onEvent: (event) => {
           input.onEvent(event)
-          if (event.kind === 'port-forward-opened') tailscale.servePort(event.port)
-          else if (event.kind === 'port-forward-closed') tailscale.stopPort(event.port)
+          if (event.kind === 'port-forward-opened') tailscale.servePort(event.hostPort ?? event.port)
+          else if (event.kind === 'port-forward-closed') tailscale.stopPort(event.hostPort ?? event.port)
         },
         onFatalAuthFailure: (reason) => {
           // The broker has already stopped itself. Drop it from the map so a