typeclaw 0.36.7 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +2 -2
  2. package/package.json +3 -2
  3. package/src/agent/index.ts +31 -11
  4. package/src/agent/live-sessions.ts +12 -0
  5. package/src/agent/model-fallback.ts +17 -15
  6. package/src/agent/model-overrides.ts +2 -2
  7. package/src/agent/session-meta.ts +10 -0
  8. package/src/agent/subagents.ts +11 -2
  9. package/src/agent/system-prompt.ts +9 -3
  10. package/src/agent/todo/continuation-policy.ts +6 -3
  11. package/src/agent/todo/continuation-wiring.ts +4 -2
  12. package/src/agent/todo/continuation.ts +3 -3
  13. package/src/agent/tools/todo/index.ts +27 -4
  14. package/src/bundled-plugins/agent-browser/index.ts +33 -108
  15. package/src/bundled-plugins/agent-browser/shim.ts +3 -94
  16. package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +8 -33
  17. package/src/bundled-plugins/doc-render/skills/typeclaw-render-pdf/SKILL.md +2 -2
  18. package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +7 -1
  19. package/src/bundled-plugins/memory/README.md +80 -23
  20. package/src/bundled-plugins/memory/append-tool.ts +74 -53
  21. package/src/bundled-plugins/memory/citation-superset.ts +4 -0
  22. package/src/bundled-plugins/memory/citations.ts +54 -0
  23. package/src/bundled-plugins/memory/dreaming-metrics.ts +30 -0
  24. package/src/bundled-plugins/memory/dreaming.ts +444 -21
  25. package/src/bundled-plugins/memory/index.ts +544 -400
  26. package/src/bundled-plugins/memory/load-memory.ts +87 -10
  27. package/src/bundled-plugins/memory/load-shards.ts +48 -22
  28. package/src/bundled-plugins/memory/memory-logger.ts +95 -106
  29. package/src/bundled-plugins/memory/memory-retrieval.ts +3 -3
  30. package/src/bundled-plugins/memory/parent-link.ts +33 -0
  31. package/src/bundled-plugins/memory/paths.ts +12 -0
  32. package/src/bundled-plugins/memory/references/frontmatter.ts +197 -0
  33. package/src/bundled-plugins/memory/references/load-references.ts +212 -0
  34. package/src/bundled-plugins/memory/references/store-reference-tool.ts +59 -0
  35. package/src/bundled-plugins/memory/search-tool.ts +282 -45
  36. package/src/bundled-plugins/memory/stream-events.ts +1 -0
  37. package/src/bundled-plugins/memory/stream-io.ts +28 -3
  38. package/src/bundled-plugins/memory/turn-dedup.ts +40 -0
  39. package/src/bundled-plugins/memory/vector/cache-write.ts +19 -0
  40. package/src/bundled-plugins/memory/vector/config.ts +28 -0
  41. package/src/bundled-plugins/memory/vector/doctor.ts +124 -0
  42. package/src/bundled-plugins/memory/vector/embedder.ts +246 -0
  43. package/src/bundled-plugins/memory/vector/hybrid.ts +439 -0
  44. package/src/bundled-plugins/memory/vector/index-on-write.ts +34 -0
  45. package/src/bundled-plugins/memory/vector/inspect.ts +111 -0
  46. package/src/bundled-plugins/memory/vector/passages.ts +125 -0
  47. package/src/bundled-plugins/memory/vector/reference-index-on-write.ts +50 -0
  48. package/src/bundled-plugins/memory/vector/relevance-gate.ts +93 -0
  49. package/src/bundled-plugins/memory/vector/startup.ts +71 -0
  50. package/src/bundled-plugins/memory/vector/store.ts +203 -0
  51. package/src/bundled-plugins/memory/vector/truncation.ts +124 -0
  52. package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +2 -0
  53. package/src/channels/router.ts +239 -40
  54. package/src/cli/incomplete-init.ts +57 -0
  55. package/src/cli/init.ts +143 -12
  56. package/src/cli/inspect.ts +11 -5
  57. package/src/cli/model.ts +112 -34
  58. package/src/cli/restart.ts +24 -0
  59. package/src/cli/start.ts +24 -0
  60. package/src/cli/tunnel.ts +53 -8
  61. package/src/config/config.ts +110 -19
  62. package/src/config/index.ts +5 -1
  63. package/src/config/models-mutation.ts +29 -11
  64. package/src/config/providers-mutation.ts +2 -2
  65. package/src/config/providers.ts +146 -12
  66. package/src/container/shared.ts +9 -0
  67. package/src/container/start.ts +87 -4
  68. package/src/cron/consumer.ts +13 -7
  69. package/src/hostd/models.ts +64 -0
  70. package/src/hostd/paths.ts +6 -0
  71. package/src/hostd/portbroker-manager.ts +2 -2
  72. package/src/init/checkpoint.ts +201 -0
  73. package/src/init/dockerfile.ts +164 -51
  74. package/src/init/gitignore.ts +7 -7
  75. package/src/init/index.ts +41 -9
  76. package/src/init/line-auth.ts +50 -21
  77. package/src/init/models-dev.ts +96 -21
  78. package/src/init/oauth-login.ts +3 -3
  79. package/src/init/progress.ts +29 -0
  80. package/src/init/validate-api-key.ts +4 -0
  81. package/src/inspect/index.ts +13 -6
  82. package/src/inspect/item-list.ts +11 -2
  83. package/src/inspect/live-list.ts +65 -0
  84. package/src/inspect/open-item.ts +22 -1
  85. package/src/inspect/session-list.ts +29 -0
  86. package/src/models/embedding-model.ts +114 -0
  87. package/src/models/transformers-version.ts +55 -0
  88. package/src/plugin/types.ts +3 -0
  89. package/src/portbroker/container-server.ts +23 -0
  90. package/src/portbroker/forward-request-bus.ts +35 -0
  91. package/src/portbroker/forward-result-bus.ts +2 -3
  92. package/src/portbroker/hostd-client.ts +182 -36
  93. package/src/portbroker/index.ts +6 -1
  94. package/src/portbroker/protocol.ts +9 -2
  95. package/src/run/channel-session-factory.ts +11 -1
  96. package/src/run/index.ts +41 -7
  97. package/src/server/command-runner.ts +24 -1
  98. package/src/server/index.ts +42 -8
  99. package/src/shared/index.ts +2 -0
  100. package/src/shared/protocol.ts +31 -0
  101. package/src/skills/typeclaw-channels/SKILL.md +4 -4
  102. package/src/skills/typeclaw-config/SKILL.md +2 -2
  103. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  104. package/src/skills/typeclaw-permissions/SKILL.md +3 -3
  105. package/src/skills/typeclaw-skills/SKILL.md +1 -1
  106. package/src/skills/typeclaw-tunnels/SKILL.md +22 -1
  107. package/src/tunnels/providers/cloudflare-quick.ts +65 -7
  108. package/src/tunnels/upstream-probe.ts +25 -0
  109. package/typeclaw.schema.json +156 -67
  110. package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +0 -170
  111. package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +0 -421
  112. package/src/portbroker/bind-with-forward.ts +0 -102
@@ -1,4 +1,4 @@
1
- import type { Api, Model } from '@mariozechner/pi-ai'
1
+ import type { KnownApi, Model } from '@mariozechner/pi-ai'
2
2
 
3
3
  // Authentication mechanism a provider supports. `api-key` reads a static key
4
4
  // from .env (the original path); `oauth` runs a browser flow at init time and
@@ -18,16 +18,13 @@ type KnownProvider = {
18
18
  auth: ReadonlyArray<AuthMethod>
19
19
  apiKeyEnv: string | null
20
20
  oauthProviderId: string | null
21
- models: Record<string, Model<Api>>
21
+ models: Record<string, Model<KnownApi>>
22
22
  }
23
23
 
24
- // Curated allowlist of providers + models that are wired into the agent
25
- // runtime. The values here back the Zod enum on every entry in
26
- // `configSchema.models`, so any model the user can put in `typeclaw.json`
27
- // (under any profile name) MUST appear here verbatim. The
28
- // init-time picker may surface additional models from models.dev, but it
29
- // resolves them through this list before scaffolding (anything missing falls
30
- // back to a curated default).
24
+ // Curated provider + model table. Provider ids remain the allowlist for
25
+ // `typeclaw.json` refs, while the model entries are the tested defaults and
26
+ // JSON-schema autocomplete set. The init/model pickers may surface additional
27
+ // models from models.dev as long as the provider prefix is one of these ids.
31
28
  //
32
29
  // Adding a new model: append it to the matching provider's `models` map. Each
33
30
  // model object is the literal `Model<...>` that pi-ai consumes — keep it
@@ -703,6 +700,118 @@ export const KNOWN_PROVIDERS = {
703
700
  },
704
701
  },
705
702
  },
703
+ // Moonshot AI (Kimi) — Open Platform pay-as-you-go API. The platform exposes
704
+ // an OpenAI-compatible surface at api.moonshot.ai/v1 (Bearer auth +
705
+ // /chat/completions shape), so models go through pi-ai's `openai-completions`
706
+ // adapter with a custom baseUrl — same trick as Fireworks, Z.AI, MiniMax, and
707
+ // DeepSeek. api-key only; the platform ships no OAuth flow.
708
+ //
709
+ // Moonshot also offers an Anthropic-compatible route (api.moonshot.ai/anthropic)
710
+ // on the same key, but it rescales temperature (real = requested × 0.6) and
711
+ // would be the FIRST `anthropic-messages` transport pointed at a non-Anthropic
712
+ // baseUrl in this codebase. We deliberately stay on the proven OpenAI-compatible
713
+ // path so behavior matches every other paygo provider.
714
+ //
715
+ // The split with `moonshot-coding` below mirrors `zai` / `zai-coding`: same
716
+ // upstream vendor, two distinct billing surfaces (Open Platform paygo vs the
717
+ // Kimi Code subscription) on two distinct base URLs with two distinct env
718
+ // vars, so a user can hold both keys at once. The Open Platform key does NOT
719
+ // work against the Kimi Code endpoint, and vice versa.
720
+ //
721
+ // Model lineup mirrors the OpenAI-compatible model list on platform.kimi.ai
722
+ // as of 2026-06-14: kimi-k2.7-code (flagship coding model, always-on thinking,
723
+ // text+image), kimi-k2.6 (general flagship, text+image), and kimi-k2.5
724
+ // (general, text+image). All three fold reasoning in via the `thinking`
725
+ // request parameter, so no separate "thinking" model id is needed. The whole
726
+ // legacy kimi-k2 series (kimi-k2-thinking, k2-0905/0711/turbo previews) was
727
+ // officially discontinued on 2026-05-25 and is intentionally omitted, as are
728
+ // the legacy moonshot-v1-* models. Costs are USD per 1M tokens from
729
+ // platform.kimi.ai pricing; Moonshot publishes no cache-write surcharge, so
730
+ // cacheWrite is 0. (pi-ai's `input` array only models text/image — Moonshot's
731
+ // video input on the K2.x models can't be expressed here, so it is omitted.)
732
+ moonshot: {
733
+ id: 'moonshot',
734
+ name: 'Moonshot (Kimi)',
735
+ baseUrl: 'https://api.moonshot.ai/v1',
736
+ auth: ['api-key'],
737
+ apiKeyEnv: 'MOONSHOT_API_KEY',
738
+ oauthProviderId: null,
739
+ models: {
740
+ 'kimi-k2.7-code': {
741
+ id: 'kimi-k2.7-code',
742
+ name: 'Kimi K2.7 Code',
743
+ api: 'openai-completions',
744
+ provider: 'moonshot',
745
+ baseUrl: 'https://api.moonshot.ai/v1',
746
+ reasoning: true,
747
+ input: ['text', 'image'],
748
+ cost: { input: 0.6, output: 2.5, cacheRead: 0.15, cacheWrite: 0 },
749
+ contextWindow: 256000,
750
+ maxTokens: 64000,
751
+ },
752
+ 'kimi-k2.6': {
753
+ id: 'kimi-k2.6',
754
+ name: 'Kimi K2.6',
755
+ api: 'openai-completions',
756
+ provider: 'moonshot',
757
+ baseUrl: 'https://api.moonshot.ai/v1',
758
+ reasoning: true,
759
+ input: ['text', 'image'],
760
+ cost: { input: 0.6, output: 2.5, cacheRead: 0.15, cacheWrite: 0 },
761
+ contextWindow: 256000,
762
+ maxTokens: 64000,
763
+ },
764
+ 'kimi-k2.5': {
765
+ id: 'kimi-k2.5',
766
+ name: 'Kimi K2.5',
767
+ api: 'openai-completions',
768
+ provider: 'moonshot',
769
+ baseUrl: 'https://api.moonshot.ai/v1',
770
+ reasoning: true,
771
+ input: ['text', 'image'],
772
+ cost: { input: 0.6, output: 2.5, cacheRead: 0.15, cacheWrite: 0 },
773
+ contextWindow: 256000,
774
+ maxTokens: 64000,
775
+ },
776
+ },
777
+ },
778
+ // Moonshot AI Kimi Code — the Coding Plan subscription product. Distinct from
779
+ // the Open Platform above: a separate domain (api.kimi.com/coding/v1), a
780
+ // separate subscription key created at kimi.com/code/console, and a separate
781
+ // env var (`MOONSHOT_CODING_API_KEY`) so a user can hold both an Open Platform
782
+ // paygo key and a Coding Plan key without collisions. Kimi Code exposes an
783
+ // OpenAI-compatible route (Bearer auth + /chat/completions) alongside its
784
+ // Anthropic-compatible one; we use the OpenAI-compatible route so it threads
785
+ // through the same `openai-completions` adapter as every other paygo provider.
786
+ //
787
+ // Single model alias: `kimi-for-coding` is a STABLE ALIAS that the Coding Plan
788
+ // backend routes to the latest underlying model (currently the K2.6 family).
789
+ // Version-pinned ids are NOT accepted on this endpoint and fail silently, so
790
+ // the alias is the only id listed. Costs are 0 because the Coding Plan bills a
791
+ // flat subscription quota, not per-token — there is no per-token price to
792
+ // attribute (same convention as the Fireworks Fire Pass router above).
793
+ 'moonshot-coding': {
794
+ id: 'moonshot-coding',
795
+ name: 'Moonshot (Kimi Coding Plan)',
796
+ baseUrl: 'https://api.kimi.com/coding/v1',
797
+ auth: ['api-key'],
798
+ apiKeyEnv: 'MOONSHOT_CODING_API_KEY',
799
+ oauthProviderId: null,
800
+ models: {
801
+ 'kimi-for-coding': {
802
+ id: 'kimi-for-coding',
803
+ name: 'Kimi for Coding',
804
+ api: 'openai-completions',
805
+ provider: 'moonshot-coding',
806
+ baseUrl: 'https://api.kimi.com/coding/v1',
807
+ reasoning: true,
808
+ input: ['text', 'image'],
809
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
810
+ contextWindow: 256000,
811
+ maxTokens: 64000,
812
+ },
813
+ },
814
+ },
706
815
  } as const satisfies Record<string, KnownProvider>
707
816
 
708
817
  export type KnownProviderId = keyof typeof KNOWN_PROVIDERS
@@ -776,6 +885,15 @@ export const KNOWN_PROVIDER_VENDORS = {
776
885
  name: 'DeepSeek',
777
886
  providers: ['deepseek'],
778
887
  },
888
+ moonshot: {
889
+ id: 'moonshot',
890
+ name: 'Moonshot (Kimi)',
891
+ providers: ['moonshot', 'moonshot-coding'],
892
+ variants: {
893
+ moonshot: { label: 'Pay-as-you-go', hint: 'Moonshot Open Platform API billing' },
894
+ 'moonshot-coding': { label: 'Coding Plan', hint: 'Kimi Code subscription' },
895
+ },
896
+ },
779
897
  } as const satisfies Record<string, KnownProviderVendor>
780
898
 
781
899
  export type KnownProviderVendorId = keyof typeof KNOWN_PROVIDER_VENDORS
@@ -820,6 +938,8 @@ export type KnownModelRef = {
820
938
  [P in KnownProviderId]: `${P}/${Extract<keyof (typeof KNOWN_PROVIDERS)[P]['models'], string>}`
821
939
  }[KnownProviderId]
822
940
 
941
+ export type ModelRef = string & { readonly __modelRef: unique symbol }
942
+
823
943
  export function listKnownModelRefs(): KnownModelRef[] {
824
944
  const refs: string[] = []
825
945
  for (const providerId of Object.keys(KNOWN_PROVIDERS) as KnownProviderId[]) {
@@ -830,19 +950,33 @@ export function listKnownModelRefs(): KnownModelRef[] {
830
950
  return refs as KnownModelRef[]
831
951
  }
832
952
 
953
+ export function isKnownModelRef(value: string): value is KnownModelRef {
954
+ return (listKnownModelRefs() as ReadonlyArray<string>).includes(value)
955
+ }
956
+
957
+ export function isModelRef(value: string): value is ModelRef {
958
+ return /^[a-z0-9][a-z0-9-]*\/[^\s/][^\s]*$/.test(value) && knownProviderForModelRef(value) !== null
959
+ }
960
+
833
961
  // The default we hand to scaffolded `typeclaw.json` and the schema's
834
962
  // `model.default`. Lives here (next to the provider table) so adding a model
835
963
  // can't drift from the field default — both come from the same module.
836
964
  export const DEFAULT_MODEL_REF: KnownModelRef = 'openai/gpt-5.4-nano'
837
965
 
838
- export function providerForModelRef(ref: KnownModelRef): KnownProviderId {
966
+ export function providerForModelRef(ref: KnownModelRef | ModelRef | string): KnownProviderId {
839
967
  // KnownModelRef is `${provider}/${modelId}`, but provider IDs themselves can
840
968
  // contain '-' and model IDs can contain '/' (Fireworks). We split on the
841
969
  // first slash that follows a registered provider id.
970
+ const providerId = knownProviderForModelRef(ref)
971
+ if (providerId !== null) return providerId
972
+ throw new Error(`Unknown provider in model ref: ${ref}`)
973
+ }
974
+
975
+ function knownProviderForModelRef(ref: string): KnownProviderId | null {
842
976
  for (const providerId of Object.keys(KNOWN_PROVIDERS) as KnownProviderId[]) {
843
977
  if (ref.startsWith(`${providerId}/`)) return providerId
844
978
  }
845
- throw new Error(`Unknown provider in model ref: ${ref}`)
979
+ return null
846
980
  }
847
981
 
848
982
  // Per-provider default for pi-coding-agent's `thinkingLevel` knob. Returning
@@ -857,7 +991,7 @@ export function providerForModelRef(ref: KnownModelRef): KnownProviderId {
857
991
  //
858
992
  // Anthropic, GLM, and Kimi don't share the padding behavior, so they keep the
859
993
  // SDK default.
860
- export function defaultThinkingLevelForRef(ref: KnownModelRef): 'low' | undefined {
994
+ export function defaultThinkingLevelForRef(ref: KnownModelRef | ModelRef | string): 'low' | undefined {
861
995
  const providerId = providerForModelRef(ref)
862
996
  if (providerId === 'openai' || providerId === 'openai-codex') return 'low'
863
997
  return undefined
@@ -98,6 +98,15 @@ export async function checkDockerAvailable(exec: DockerExec = defaultDockerExec)
98
98
  }
99
99
  }
100
100
 
101
+ // `docker buildx version` exits 0 only when the buildx CLI plugin is installed.
102
+ // `start` uses this to pick the build path: buildx present -> `docker buildx
103
+ // build` with the BuildKit Dockerfile (`--mount=type=cache` + the `# syntax=`
104
+ // pragma, fast cached rebuilds); absent -> a BuildKit-stripped Dockerfile built
105
+ // with the legacy `docker build`. Either way the agent image builds.
106
+ export async function buildxAvailable(exec: DockerExec = defaultDockerExec): Promise<boolean> {
107
+ return (await exec(['buildx', 'version'])).exitCode === 0
108
+ }
109
+
101
110
  export function containerNameFromCwd(cwd: string): string {
102
111
  return sanitizeContainerName(basename(resolve(cwd)))
103
112
  }
@@ -3,10 +3,13 @@ import { existsSync } from 'node:fs'
3
3
  import { readFile, writeFile } from 'node:fs/promises'
4
4
  import { isAbsolute, join, resolve } from 'node:path'
5
5
 
6
+ import { agentUsesVector } from '@/bundled-plugins/memory/vector/config'
6
7
  import { expandMountPath, loadConfigSync, withDefaultPlugins, type Config } from '@/config'
7
8
  import { commitGitignoreWithUntracks, untrackTrulyIgnoredFiles } from '@/git/reconcile-ignored'
8
9
  import { commitSystemFile as commitSystemFileShared } from '@/git/system-commit'
9
10
  import { send as sendToDaemon } from '@/hostd/client'
11
+ import { ensureModels } from '@/hostd/models'
12
+ import { homeRoot } from '@/hostd/paths'
10
13
  import type { HttpInfoResult } from '@/hostd/protocol'
11
14
  import { ensureDaemon } from '@/hostd/spawn'
12
15
  import {
@@ -27,6 +30,7 @@ import { hostLocaleIsCjk } from '@/shared/host-locale'
27
30
 
28
31
  import { CONTAINER_PORT, TUI_TOKEN_LABEL, findFreePort, isPortAllocatedError, resolveTuiToken } from './port'
29
32
  import {
33
+ buildxAvailable,
30
34
  classifyRmStderr,
31
35
  cleanupRunCorpse,
32
36
  containerNameFromCwd,
@@ -280,11 +284,31 @@ export async function start({
280
284
  return { ok: false, reason: `dependency install failed: ${deps.reason}` }
281
285
  }
282
286
  await commitSystemFile(cwd, DEPENDENCY_FILES, upgradeCommitMessage ?? 'Update dependencies')
287
+ // Probe buildx up front so the Dockerfile we write matches the builder we
288
+ // will use. buildx present -> emit the BuildKit Dockerfile and build with
289
+ // `docker buildx build` (fast, cache mounts honored). buildx absent -> emit
290
+ // the BuildKit-stripped variant and fall back to legacy `docker build`, so
291
+ // `typeclaw start` still succeeds (just without cross-build apt/bun caches).
292
+ const hasBuildx = await buildxAvailable(exec)
283
293
  // Dockerfile refresh AFTER ensureDeps so the version pin in the FROM
284
294
  // line resolves against the agent's installed node_modules/typeclaw —
285
295
  // ensures the base image's CLI version matches the runtime the
286
296
  // container will actually load.
287
- const dockerfileRefresh = await refreshDockerfile(cwd)
297
+ const dockerfileRefresh = await refreshDockerfile(cwd, { buildKit: hasBuildx })
298
+
299
+ // Provision the embedding model only when THIS agent opts into vector. The
300
+ // container embedder runs with local_files_only, so the model must already
301
+ // be on the host's ~/.typeclaw/models cache before the container boots —
302
+ // otherwise the startup vector index build fails. Kick the download off here
303
+ // (idempotent + file-locked) so it overlaps the docker build, then await it
304
+ // just before `docker run`. A vector-opted-out agent never reaches this, so
305
+ // a host whose containers are all opted out never downloads the ~280 MB
306
+ // model — including every agent under `typeclaw compose`, since each agent's
307
+ // start() makes this decision independently. The `.catch` swallow only keeps
308
+ // an early return between here and the await from logging an unhandled
309
+ // rejection; the real error is surfaced when we await at the run site below.
310
+ const modelsReady = agentUsesVector(cwd) ? ensureModels() : null
311
+ modelsReady?.catch(() => {})
288
312
 
289
313
  if (state.exists) {
290
314
  // Container holds the name but is not running. Without `--rm`, this is
@@ -362,14 +386,32 @@ export async function start({
362
386
 
363
387
  let built = false
364
388
  if (plan.needsBuild) {
365
- const build = await exec(['build', '-t', plan.imageTag, plan.buildContext], { cwd, inheritStdio: true })
366
- if (build.exitCode !== 0) {
389
+ const buildOk = await runImageBuild({
390
+ exec,
391
+ cwd,
392
+ imageTag: plan.imageTag,
393
+ buildContext: plan.buildContext,
394
+ hasBuildx,
395
+ })
396
+ if (!buildOk) {
367
397
  await cleanupHostDaemonRegistration(containerName, hostd)
368
398
  return { ok: false, reason: 'docker build failed' }
369
399
  }
370
400
  built = true
371
401
  }
372
402
 
403
+ if (modelsReady) {
404
+ try {
405
+ await modelsReady
406
+ } catch (error) {
407
+ await cleanupHostDaemonRegistration(containerName, hostd)
408
+ return {
409
+ ok: false,
410
+ reason: `embedding model provisioning failed (memory.vector.enabled): ${error instanceof Error ? error.message : String(error)}`,
411
+ }
412
+ }
413
+ }
414
+
373
415
  let run = await execRunWithConflictRetry(exec, plan.runArgs, cwd, containerName)
374
416
 
375
417
  // TOCTOU: another process may have grabbed the port between our probe and
@@ -623,6 +665,16 @@ export async function planStart({
623
665
  runArgs.push('-v', mount.readOnly ? `${hostPath}:${target}:ro` : `${hostPath}:${target}`)
624
666
  }
625
667
 
668
+ // Shared model cache mount for embeddings. Gated on vector opt-in: a
669
+ // vector-opted-out container has no embedder to feed, and the host never
670
+ // populates ~/.typeclaw/models for it (see ensureModels gating in start()),
671
+ // so mounting an empty cache would only invite a confusing local_files_only
672
+ // miss if something inside the container reached for the model anyway.
673
+ if (agentUsesVector(cwd)) {
674
+ runArgs.push('-v', `${homeRoot()}/models:/opt/models:ro`)
675
+ runArgs.push('-e', 'TYPECLAW_MODEL_CACHE=/opt/models')
676
+ }
677
+
626
678
  runArgs.push(imageTag)
627
679
 
628
680
  return {
@@ -649,11 +701,12 @@ async function resolvePublishHost(exec: DockerExec): Promise<string> {
649
701
  // the cheapest correct signal: the build context for `docker build` is the
650
702
  // Dockerfile itself, so equal contents definitionally produce an equivalent
651
703
  // image.
652
- export async function refreshDockerfile(cwd: string): Promise<{ changed: boolean }> {
704
+ export async function refreshDockerfile(cwd: string, opts: { buildKit?: boolean } = {}): Promise<{ changed: boolean }> {
653
705
  const cfg = await loadTypeclawConfig(cwd)
654
706
  const next = buildDockerfile(cfg.docker.file, {
655
707
  baseImageVersion: resolveBaseImageVersion(cwd),
656
708
  cjkFontsAuto: hostLocaleIsCjk(),
709
+ buildKit: opts.buildKit,
657
710
  })
658
711
  const path = join(cwd, DOCKERFILE)
659
712
  const prev = await readFile(path, 'utf8').catch(() => null)
@@ -662,6 +715,36 @@ export async function refreshDockerfile(cwd: string): Promise<{ changed: boolean
662
715
  return { changed: true }
663
716
  }
664
717
 
718
+ // Builds the agent image with a seamless buildx->legacy fallback. The preferred
719
+ // frontend is chosen from `hasBuildx`; if a buildx build FAILS (e.g. the plugin
720
+ // is installed but there is no usable builder/driver), we transparently rewrite
721
+ // the Dockerfile to its BuildKit-stripped form and retry once with the legacy
722
+ // `docker build`. The user sees one successful `typeclaw start` instead of a
723
+ // buildx-specific dead end. A genuine Dockerfile error fails both paths, so the
724
+ // retry costs at most one extra attempt before the real error surfaces.
725
+ async function runImageBuild(args: {
726
+ exec: DockerExec
727
+ cwd: string
728
+ imageTag: string
729
+ buildContext: string
730
+ hasBuildx: boolean
731
+ }): Promise<boolean> {
732
+ const { exec, cwd, imageTag, buildContext, hasBuildx } = args
733
+ if (hasBuildx) {
734
+ // `--load` puts the image in the local store so the subsequent `docker run`
735
+ // finds it. Non-default buildx drivers (docker-container, etc.) export to
736
+ // the build cache ONLY without it; on the default `docker` driver --load is
737
+ // already implied, so passing it unconditionally is a safe no-op there.
738
+ const buildx = await exec(['buildx', 'build', '--load', '-t', imageTag, buildContext], { cwd, inheritStdio: true })
739
+ if (buildx.exitCode === 0) return true
740
+ // buildx failed — fall back to the legacy builder against a stripped
741
+ // Dockerfile so a misconfigured-buildx host still ends up with an image.
742
+ await refreshDockerfile(cwd, { buildKit: false })
743
+ }
744
+ const legacy = await exec(['build', '-t', imageTag, buildContext], { cwd, inheritStdio: true })
745
+ return legacy.exitCode === 0
746
+ }
747
+
665
748
  export async function refreshGitignore(cwd: string): Promise<void> {
666
749
  const cfg = await loadTypeclawConfig(cwd)
667
750
  await writeFile(join(cwd, GITIGNORE_FILE), buildGitignore(cfg.git.ignore))
@@ -2,7 +2,7 @@ import type { AgentSession } from '@/agent'
2
2
  import { promptWithFallback, resolveFallbackChain } from '@/agent/model-fallback'
3
3
  import type { SessionOrigin } from '@/agent/session-origin'
4
4
  import { getConfig } from '@/config'
5
- import type { KnownModelRef } from '@/config/providers'
5
+ import type { ModelRef } from '@/config/providers'
6
6
  import type { HookBus } from '@/plugin'
7
7
  import type { Stream, Unsubscribe } from '@/stream'
8
8
 
@@ -48,7 +48,7 @@ export type CreateCronConsumerOptions = {
48
48
  // each attempt to the specified model. Factories that don't honor the
49
49
  // override silently lose fallback semantics, so production wiring threads
50
50
  // it through to `createSession({ refOverride })`.
51
- createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>
51
+ createSessionForCron: (job: PromptJob, refOverride?: ModelRef) => Promise<CronSession>
52
52
  // Builds the `CronHandlerContext` for the job and awaits its `handler`.
53
53
  // Wired by `src/run/index.ts` to reuse `runPromptForCommand` /
54
54
  // `runExecForCommand` from the command runner so plugin cron handlers and
@@ -161,7 +161,7 @@ export function createCronConsumer({
161
161
 
162
162
  async function runPrompt(
163
163
  job: PromptJob,
164
- createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>,
164
+ createSessionForCron: (job: PromptJob, refOverride?: ModelRef) => Promise<CronSession>,
165
165
  stream: Stream,
166
166
  logger: CronConsumerLogger,
167
167
  ): Promise<void> {
@@ -198,8 +198,8 @@ async function runPrompt(
198
198
 
199
199
  async function runPromptOnce(
200
200
  job: PromptJob,
201
- refs: KnownModelRef[],
202
- createSessionForCron: (job: PromptJob, refOverride?: KnownModelRef) => Promise<CronSession>,
201
+ refs: ModelRef[],
202
+ createSessionForCron: (job: PromptJob, refOverride?: ModelRef) => Promise<CronSession>,
203
203
  logger: CronConsumerLogger,
204
204
  ): Promise<void> {
205
205
  // Per-attempt lifecycle: every session we create gets full
@@ -227,8 +227,13 @@ async function runPromptOnce(
227
227
  ...(created.origin !== undefined ? { origin: created.origin } : {}),
228
228
  }
229
229
  : undefined
230
+ // Per-turn memory injection for vector agents: the turn-start hook writes
231
+ // the rendered memory block into `retrievalContext.results`, which we
232
+ // append to the prompt text below (vector agents have no system-prompt
233
+ // `# Memory` section). Empty for non-vector agents.
234
+ const retrievalContext = { results: '' }
230
235
  if (created.hooks && turnEvent !== undefined) {
231
- await created.hooks.runSessionTurnStart({ ...turnEvent, userPrompt: job.prompt })
236
+ await created.hooks.runSessionTurnStart({ ...turnEvent, userPrompt: job.prompt, retrievalContext })
232
237
  }
233
238
  // Bridge the CronSession wrapper into the AgentSession surface the
234
239
  // fallback helper expects:
@@ -243,7 +248,8 @@ async function runPromptOnce(
243
248
  // regular method that reads `this._eventListeners`. Destructuring drops
244
249
  // the receiver.
245
250
  const sessionForHelper: AgentSession = {
246
- prompt: (text: string) => created.prompt(text),
251
+ prompt: (text: string) =>
252
+ created.prompt(retrievalContext.results.length > 0 ? `${text}\n\n${retrievalContext.results}` : text),
247
253
  subscribe: created.session?.subscribe.bind(created.session) ?? (() => () => {}),
248
254
  } as unknown as AgentSession
249
255
  return {
@@ -0,0 +1,64 @@
1
+ import { mkdir } from 'node:fs/promises'
2
+ import { join } from 'node:path'
3
+
4
+ import { env as transformersEnv, pipeline } from '@huggingface/transformers'
5
+ import lockfile from 'proper-lockfile'
6
+
7
+ import { EMBEDDING_MODEL_DTYPE, EMBEDDING_MODEL_NAME, writeModelSentinel } from '@/models/embedding-model'
8
+ import { getResolvedTransformersVersion } from '@/models/transformers-version'
9
+
10
+ import { modelsDir } from './paths'
11
+
12
+ // q8 → onnx/model_quantized.onnx (~279 MB). Without this, dtype defaults to
13
+ // 'auto', which resolves to fp32 (onnx/model.onnx, 1.11 GB) on CPU/non-WASM
14
+ // devices — 4x the download for no quality gain at this corpus size. The
15
+ // gold-set eval that chose e5-base (recall@3 96.9%) was run on this q8 variant.
16
+ // Shared with the container embedder via @/models/embedding-model: the host
17
+ // downloads what the container loads with local_files_only, so a mismatch
18
+ // makes the container request a file that was never fetched.
19
+ const MODEL_NAME = EMBEDDING_MODEL_NAME
20
+ const MODEL_DTYPE = EMBEDDING_MODEL_DTYPE
21
+ const LOCK_RETRIES = { retries: 60, factor: 1, minTimeout: 100, maxTimeout: 100, randomize: false } as const
22
+
23
+ let ensureModelsPromise: Promise<void> | null = null
24
+ let ensureModelsPath: string | null = null
25
+
26
+ export function ensureModels(): Promise<void> {
27
+ const dir = modelsDir()
28
+ if (ensureModelsPath !== dir) {
29
+ ensureModelsPath = dir
30
+ ensureModelsPromise = null
31
+ }
32
+ ensureModelsPromise ??= ensureModelsLocked().catch((error: unknown) => {
33
+ ensureModelsPromise = null
34
+ throw error
35
+ })
36
+ return ensureModelsPromise
37
+ }
38
+
39
+ async function ensureModelsLocked(): Promise<void> {
40
+ const dir = modelsDir()
41
+ await mkdir(dir, { recursive: true })
42
+
43
+ const release = await lockfile.lock(dir, {
44
+ lockfilePath: join(dir, '.lock'),
45
+ realpath: false,
46
+ retries: LOCK_RETRIES,
47
+ stale: 30_000,
48
+ })
49
+ try {
50
+ configureTransformers(dir)
51
+ await pipeline('feature-extraction', MODEL_NAME, { dtype: MODEL_DTYPE })
52
+ // Stamp the cache with the version that produced it, still under the lock,
53
+ // so the container can verify the producer matches its consumer before a
54
+ // local_files_only load (see assertModelCacheCompatible).
55
+ await writeModelSentinel(dir, { transformers: getResolvedTransformersVersion() })
56
+ } finally {
57
+ await release()
58
+ }
59
+ }
60
+
61
+ function configureTransformers(dir: string): void {
62
+ transformersEnv.localModelPath = dir
63
+ ;(transformersEnv as typeof transformersEnv & { cacheDir: string }).cacheDir = dir
64
+ }
@@ -56,6 +56,10 @@ export function keysDir(): string {
56
56
  return join(homeRoot(), KEYS_DIR)
57
57
  }
58
58
 
59
+ export function modelsDir(): string {
60
+ return join(homeRoot(), 'models')
61
+ }
62
+
59
63
  // Throws on any name that could traverse out of registrationsDir() or
60
64
  // confuse the filesystem. Caller's responsibility to handle the error;
61
65
  // don't catch-and-ignore — an invalid name is a protocol violation.
@@ -82,8 +86,10 @@ export async function ensureDirs(): Promise<void> {
82
86
  await mkdir(logDir(), { recursive: true })
83
87
  await mkdir(registrationsDir(), { recursive: true })
84
88
  await mkdir(keysDir(), { recursive: true })
89
+ await mkdir(modelsDir(), { recursive: true })
85
90
  await chmod(runDir(), 0o700).catch(() => {})
86
91
  await chmod(logDir(), 0o700).catch(() => {})
87
92
  await chmod(registrationsDir(), 0o700).catch(() => {})
88
93
  await chmod(keysDir(), 0o700).catch(() => {})
94
+ await chmod(modelsDir(), 0o700).catch(() => {})
89
95
  }
@@ -67,8 +67,8 @@ export function createPortbrokerManager(opts: PortbrokerManagerOptions = {}): Po
67
67
  brokerToken: input.brokerToken,
68
68
  onEvent: (event) => {
69
69
  input.onEvent(event)
70
- if (event.kind === 'port-forward-opened') tailscale.servePort(event.port)
71
- else if (event.kind === 'port-forward-closed') tailscale.stopPort(event.port)
70
+ if (event.kind === 'port-forward-opened') tailscale.servePort(event.hostPort ?? event.port)
71
+ else if (event.kind === 'port-forward-closed') tailscale.stopPort(event.hostPort ?? event.port)
72
72
  },
73
73
  onFatalAuthFailure: (reason) => {
74
74
  // The broker has already stopped itself. Drop it from the map so a