typeclaw 0.36.7 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +3 -2
- package/src/agent/index.ts +31 -11
- package/src/agent/live-sessions.ts +12 -0
- package/src/agent/model-fallback.ts +17 -15
- package/src/agent/model-overrides.ts +2 -2
- package/src/agent/session-meta.ts +10 -0
- package/src/agent/subagents.ts +11 -2
- package/src/agent/system-prompt.ts +9 -3
- package/src/agent/todo/continuation-policy.ts +6 -3
- package/src/agent/todo/continuation-wiring.ts +4 -2
- package/src/agent/todo/continuation.ts +3 -3
- package/src/agent/tools/todo/index.ts +27 -4
- package/src/bundled-plugins/agent-browser/index.ts +33 -108
- package/src/bundled-plugins/agent-browser/shim.ts +3 -94
- package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +8 -33
- package/src/bundled-plugins/doc-render/skills/typeclaw-render-pdf/SKILL.md +2 -2
- package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +7 -1
- package/src/bundled-plugins/memory/README.md +80 -23
- package/src/bundled-plugins/memory/append-tool.ts +74 -53
- package/src/bundled-plugins/memory/citation-superset.ts +4 -0
- package/src/bundled-plugins/memory/citations.ts +54 -0
- package/src/bundled-plugins/memory/dreaming-metrics.ts +30 -0
- package/src/bundled-plugins/memory/dreaming.ts +444 -21
- package/src/bundled-plugins/memory/index.ts +544 -400
- package/src/bundled-plugins/memory/load-memory.ts +87 -10
- package/src/bundled-plugins/memory/load-shards.ts +48 -22
- package/src/bundled-plugins/memory/memory-logger.ts +95 -106
- package/src/bundled-plugins/memory/memory-retrieval.ts +3 -3
- package/src/bundled-plugins/memory/parent-link.ts +33 -0
- package/src/bundled-plugins/memory/paths.ts +12 -0
- package/src/bundled-plugins/memory/references/frontmatter.ts +197 -0
- package/src/bundled-plugins/memory/references/load-references.ts +212 -0
- package/src/bundled-plugins/memory/references/store-reference-tool.ts +59 -0
- package/src/bundled-plugins/memory/search-tool.ts +282 -45
- package/src/bundled-plugins/memory/stream-events.ts +1 -0
- package/src/bundled-plugins/memory/stream-io.ts +28 -3
- package/src/bundled-plugins/memory/turn-dedup.ts +40 -0
- package/src/bundled-plugins/memory/vector/cache-write.ts +19 -0
- package/src/bundled-plugins/memory/vector/config.ts +28 -0
- package/src/bundled-plugins/memory/vector/doctor.ts +124 -0
- package/src/bundled-plugins/memory/vector/embedder.ts +246 -0
- package/src/bundled-plugins/memory/vector/hybrid.ts +439 -0
- package/src/bundled-plugins/memory/vector/index-on-write.ts +34 -0
- package/src/bundled-plugins/memory/vector/inspect.ts +111 -0
- package/src/bundled-plugins/memory/vector/passages.ts +125 -0
- package/src/bundled-plugins/memory/vector/reference-index-on-write.ts +50 -0
- package/src/bundled-plugins/memory/vector/relevance-gate.ts +93 -0
- package/src/bundled-plugins/memory/vector/startup.ts +71 -0
- package/src/bundled-plugins/memory/vector/store.ts +203 -0
- package/src/bundled-plugins/memory/vector/truncation.ts +124 -0
- package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +2 -0
- package/src/channels/router.ts +239 -40
- package/src/cli/incomplete-init.ts +57 -0
- package/src/cli/init.ts +143 -12
- package/src/cli/inspect.ts +11 -5
- package/src/cli/model.ts +112 -34
- package/src/cli/restart.ts +24 -0
- package/src/cli/start.ts +24 -0
- package/src/cli/tunnel.ts +53 -8
- package/src/config/config.ts +110 -19
- package/src/config/index.ts +5 -1
- package/src/config/models-mutation.ts +29 -11
- package/src/config/providers-mutation.ts +2 -2
- package/src/config/providers.ts +146 -12
- package/src/container/shared.ts +9 -0
- package/src/container/start.ts +87 -4
- package/src/cron/consumer.ts +13 -7
- package/src/hostd/models.ts +64 -0
- package/src/hostd/paths.ts +6 -0
- package/src/hostd/portbroker-manager.ts +2 -2
- package/src/init/checkpoint.ts +201 -0
- package/src/init/dockerfile.ts +164 -51
- package/src/init/gitignore.ts +7 -7
- package/src/init/index.ts +41 -9
- package/src/init/line-auth.ts +50 -21
- package/src/init/models-dev.ts +96 -21
- package/src/init/oauth-login.ts +3 -3
- package/src/init/progress.ts +29 -0
- package/src/init/validate-api-key.ts +4 -0
- package/src/inspect/index.ts +13 -6
- package/src/inspect/item-list.ts +11 -2
- package/src/inspect/live-list.ts +65 -0
- package/src/inspect/open-item.ts +22 -1
- package/src/inspect/session-list.ts +29 -0
- package/src/models/embedding-model.ts +114 -0
- package/src/models/transformers-version.ts +55 -0
- package/src/plugin/types.ts +3 -0
- package/src/portbroker/container-server.ts +23 -0
- package/src/portbroker/forward-request-bus.ts +35 -0
- package/src/portbroker/forward-result-bus.ts +2 -3
- package/src/portbroker/hostd-client.ts +182 -36
- package/src/portbroker/index.ts +6 -1
- package/src/portbroker/protocol.ts +9 -2
- package/src/run/channel-session-factory.ts +11 -1
- package/src/run/index.ts +41 -7
- package/src/server/command-runner.ts +24 -1
- package/src/server/index.ts +42 -8
- package/src/shared/index.ts +2 -0
- package/src/shared/protocol.ts +31 -0
- package/src/skills/typeclaw-channels/SKILL.md +4 -4
- package/src/skills/typeclaw-config/SKILL.md +2 -2
- package/src/skills/typeclaw-memory/SKILL.md +3 -1
- package/src/skills/typeclaw-permissions/SKILL.md +3 -3
- package/src/skills/typeclaw-skills/SKILL.md +1 -1
- package/src/skills/typeclaw-tunnels/SKILL.md +22 -1
- package/src/tunnels/providers/cloudflare-quick.ts +65 -7
- package/src/tunnels/upstream-probe.ts +25 -0
- package/typeclaw.schema.json +156 -67
- package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +0 -170
- package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +0 -421
- package/src/portbroker/bind-with-forward.ts +0 -102
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { existsSync } from 'node:fs'
|
|
2
|
+
import { rm } from 'node:fs/promises'
|
|
3
|
+
import { join } from 'node:path'
|
|
4
|
+
|
|
5
|
+
import type { PluginCheckResult } from '@/plugin'
|
|
6
|
+
|
|
7
|
+
import { inspectVectorIndex, type VectorIndexProblem } from './inspect'
|
|
8
|
+
import { collectPassages, findMissingPassages } from './passages'
|
|
9
|
+
import { VectorStore } from './store'
|
|
10
|
+
|
|
11
|
+
export const VECTOR_INDEX_REL_PATH = join('memory', '.vectors', 'index.db')
|
|
12
|
+
|
|
13
|
+
export async function runVectorIndexDoctor(agentDir: string): Promise<PluginCheckResult> {
|
|
14
|
+
const dbPath = join(agentDir, VECTOR_INDEX_REL_PATH)
|
|
15
|
+
|
|
16
|
+
if (!existsSync(dbPath)) {
|
|
17
|
+
return {
|
|
18
|
+
status: 'warning',
|
|
19
|
+
message: 'vector memory is enabled but the index DB is missing; it rebuilds on the next startup',
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const finding = inspectVectorIndex(dbPath)
|
|
24
|
+
|
|
25
|
+
if (finding.kind === 'unreadable' || finding.kind === 'corrupt' || finding.kind === 'schema-missing') {
|
|
26
|
+
return corruptionResult(dbPath, finding)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const passages = await collectPassages(agentDir)
|
|
30
|
+
const wantedIds = new Set(passages.map((passage) => passage.id))
|
|
31
|
+
const orphans = finding.rowIds.filter((id) => !wantedIds.has(id))
|
|
32
|
+
const backfillCount = countBackfill(dbPath, passages)
|
|
33
|
+
|
|
34
|
+
return summarize(dbPath, {
|
|
35
|
+
rowCount: finding.rowCount,
|
|
36
|
+
orphans,
|
|
37
|
+
modelMismatch: finding.modelMismatch,
|
|
38
|
+
malformed: finding.malformed,
|
|
39
|
+
backfillCount,
|
|
40
|
+
indexedCount: passages.length - backfillCount,
|
|
41
|
+
wantedCount: passages.length,
|
|
42
|
+
})
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function countBackfill(dbPath: string, passages: Awaited<ReturnType<typeof collectPassages>>): number {
|
|
46
|
+
const store = VectorStore.open(dbPath)
|
|
47
|
+
try {
|
|
48
|
+
return findMissingPassages(store, passages).length
|
|
49
|
+
} finally {
|
|
50
|
+
store.close()
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
type Summary = {
|
|
55
|
+
rowCount: number
|
|
56
|
+
orphans: string[]
|
|
57
|
+
modelMismatch: string[]
|
|
58
|
+
malformed: string[]
|
|
59
|
+
backfillCount: number
|
|
60
|
+
indexedCount: number
|
|
61
|
+
wantedCount: number
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function summarize(dbPath: string, s: Summary): PluginCheckResult {
|
|
65
|
+
// Dedupe: a row can be both orphaned and malformed/variant, so the union by
|
|
66
|
+
// id keeps the count and the deletion list honest.
|
|
67
|
+
const repairable = [...new Set([...s.orphans, ...s.modelMismatch, ...s.malformed])]
|
|
68
|
+
const details: string[] = []
|
|
69
|
+
if (s.orphans.length > 0) details.push(`${s.orphans.length} orphaned row(s) for deleted topics/fragments`)
|
|
70
|
+
if (s.modelMismatch.length > 0) {
|
|
71
|
+
details.push(`${s.modelMismatch.length} row(s) from a different embedding model/dims`)
|
|
72
|
+
}
|
|
73
|
+
if (s.malformed.length > 0) details.push(`${s.malformed.length} row(s) with a malformed embedding blob`)
|
|
74
|
+
if (s.backfillCount > 0) details.push(`${s.backfillCount} memory passage(s) need (re)indexing`)
|
|
75
|
+
|
|
76
|
+
if (details.length === 0) {
|
|
77
|
+
return { status: 'ok', message: `vector index healthy: ${s.indexedCount}/${s.wantedCount} memory passages indexed` }
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const result: PluginCheckResult = {
|
|
81
|
+
status: 'warning',
|
|
82
|
+
message: `vector index has ${details.length} issue(s); ${s.rowCount} row(s) stored`,
|
|
83
|
+
details,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (repairable.length > 0) {
|
|
87
|
+
result.fix = {
|
|
88
|
+
description: `Delete ${repairable.length} orphaned/incompatible vector row(s); backfill happens on the next startup`,
|
|
89
|
+
apply: async () => {
|
|
90
|
+
const store = VectorStore.open(dbPath)
|
|
91
|
+
try {
|
|
92
|
+
store.deleteMany(repairable)
|
|
93
|
+
} finally {
|
|
94
|
+
store.close()
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
summary: `pruned ${repairable.length} stale vector row(s) from ${VECTOR_INDEX_REL_PATH}`,
|
|
98
|
+
changedPaths: [],
|
|
99
|
+
}
|
|
100
|
+
},
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return result
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function corruptionResult(dbPath: string, finding: VectorIndexProblem): PluginCheckResult {
|
|
108
|
+
const details = finding.kind === 'corrupt' ? finding.detail : [finding.detail]
|
|
109
|
+
return {
|
|
110
|
+
status: 'error',
|
|
111
|
+
message:
|
|
112
|
+
finding.kind === 'schema-missing'
|
|
113
|
+
? 'vector index DB has an invalid schema'
|
|
114
|
+
: 'vector index DB is unreadable or corrupted',
|
|
115
|
+
details,
|
|
116
|
+
fix: {
|
|
117
|
+
description: `Delete the corrupted ${VECTOR_INDEX_REL_PATH}; it rebuilds from memory on the next startup`,
|
|
118
|
+
apply: async () => {
|
|
119
|
+
await rm(dbPath, { force: true })
|
|
120
|
+
return { summary: `deleted corrupted ${VECTOR_INDEX_REL_PATH}`, changedPaths: [] }
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
}
|
|
124
|
+
}
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
import { join } from 'node:path'
|
|
2
|
+
|
|
3
|
+
// Type-only import: erased at runtime, so it does NOT evaluate
|
|
4
|
+
// @huggingface/transformers (which eagerly `import sharp`s, crashing the
|
|
5
|
+
// container at startup when sharp's linux binary is missing). The runtime
|
|
6
|
+
// values are pulled lazily via `loadTransformers()` below.
|
|
7
|
+
import type { env as TransformersEnvValue, pipeline as TransformersPipeline } from '@huggingface/transformers'
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
assertModelCacheCompatible,
|
|
11
|
+
EMBEDDING_DIMS,
|
|
12
|
+
EMBEDDING_MODEL_DTYPE,
|
|
13
|
+
EMBEDDING_MODEL_ID,
|
|
14
|
+
EMBEDDING_MODEL_NAME,
|
|
15
|
+
} from '@/models/embedding-model'
|
|
16
|
+
import { getResolvedTransformersVersion } from '@/models/transformers-version'
|
|
17
|
+
|
|
18
|
+
import { homeRoot } from '../../../hostd/paths'
|
|
19
|
+
import { type BoundedText, boundEmbeddableText, MAX_MODEL_TOKENS } from './truncation'
|
|
20
|
+
|
|
21
|
+
// Re-exported for the vector subsystem's existing imports. The canonical
|
|
22
|
+
// definitions live in @/models/embedding-model (shared host + container).
|
|
23
|
+
export const MODEL_NAME = EMBEDDING_MODEL_NAME
|
|
24
|
+
export const DIMS = EMBEDDING_DIMS
|
|
25
|
+
export const MODEL_DTYPE = EMBEDDING_MODEL_DTYPE
|
|
26
|
+
export { EMBEDDING_MODEL_ID }
|
|
27
|
+
|
|
28
|
+
export type EmbedType = 'query' | 'passage'
|
|
29
|
+
|
|
30
|
+
// Passages per onnxruntime forward pass. The whole-array embed (a startup index
|
|
31
|
+
// build over thousands of shards+fragments) otherwise allocates activation
|
|
32
|
+
// tensors for every input at once, and that single spike OOM-kills the
|
|
33
|
+
// container mid-build — the agent boots, then dies with only a SIGKILL. Chunking
|
|
34
|
+
// caps peak memory at one batch's worth regardless of corpus size; the model is
|
|
35
|
+
// loaded once and reused across chunks, so the only cost is sequential passes.
|
|
36
|
+
const EMBED_BATCH_SIZE = 64
|
|
37
|
+
|
|
38
|
+
type TransformersEnv = typeof TransformersEnvValue
|
|
39
|
+
type FeatureExtractor = Awaited<ReturnType<typeof TransformersPipeline<'feature-extraction'>>>
|
|
40
|
+
|
|
41
|
+
// Defer the transformers (and thus sharp/onnxruntime) module load until an
|
|
42
|
+
// embedding is actually requested. typeclaw's memory plugin is always loaded
|
|
43
|
+
// and `vector.enabled` defaults to false, so a top-level static import would
|
|
44
|
+
// drag the heavy native stack onto every container boot — and crash it when
|
|
45
|
+
// sharp can't resolve its platform binary. Memoized so the module evaluates
|
|
46
|
+
// at most once.
|
|
47
|
+
type TransformersModule = { env: TransformersEnv; pipeline: typeof TransformersPipeline }
|
|
48
|
+
|
|
49
|
+
let transformersModulePromise: Promise<TransformersModule> | undefined
|
|
50
|
+
|
|
51
|
+
const realTransformersImport = (): Promise<TransformersModule> =>
|
|
52
|
+
import('@huggingface/transformers').then((mod) => ({ env: mod.env, pipeline: mod.pipeline }))
|
|
53
|
+
|
|
54
|
+
// Injectable importer seam. Defaults to the real dynamic import; a test can
|
|
55
|
+
// swap it to drive the module-load layer (e.g. fail once, then succeed) without
|
|
56
|
+
// fighting Bun's mock.module namespace snapshotting. Bun freezes the mocked
|
|
57
|
+
// namespace at registration, so a runtime-toggled failure can't be expressed
|
|
58
|
+
// through mock.module — this seam is the supported way to exercise it.
|
|
59
|
+
let importTransformers: () => Promise<TransformersModule> = realTransformersImport
|
|
60
|
+
|
|
61
|
+
export function __setTransformersImporterForTests(importer: (() => Promise<TransformersModule>) | undefined): void {
|
|
62
|
+
importTransformers = importer ?? realTransformersImport
|
|
63
|
+
transformersModulePromise = undefined
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Injectable cache-compatibility check, mirroring the importer seam above. In
|
|
67
|
+
// production it asserts the host-stamped sentinel matches this container before
|
|
68
|
+
// the local_files_only load. The embedder's own mechanics tests (batching,
|
|
69
|
+
// lazy-load, warm-up) mock transformers and run against the default cache path
|
|
70
|
+
// with no sentinel, so they swap in a no-op — the sentinel guard has its own
|
|
71
|
+
// dedicated coverage in embedding-model.test.ts.
|
|
72
|
+
const realModelCacheCheck = (): Promise<void> =>
|
|
73
|
+
assertModelCacheCompatible(modelCachePath(), { transformers: getResolvedTransformersVersion() })
|
|
74
|
+
|
|
75
|
+
let verifyModelCache: () => Promise<void> = realModelCacheCheck
|
|
76
|
+
|
|
77
|
+
export function __setModelCacheCheckForTests(check: (() => Promise<void>) | undefined): void {
|
|
78
|
+
verifyModelCache = check ?? realModelCacheCheck
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function loadTransformers(): Promise<TransformersModule> {
|
|
82
|
+
// Clear the memo on rejection (mirroring getEmbedder) so a transient failure
|
|
83
|
+
// of the dynamic import / native module load doesn't cache the rejected
|
|
84
|
+
// promise — otherwise every later getEmbedder() awaits the same dead promise
|
|
85
|
+
// and per-turn embedding stays poisoned for the life of the process.
|
|
86
|
+
transformersModulePromise ??= importTransformers().catch((err) => {
|
|
87
|
+
transformersModulePromise = undefined
|
|
88
|
+
throw err
|
|
89
|
+
})
|
|
90
|
+
return transformersModulePromise
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export class Embedder {
|
|
94
|
+
private constructor(private readonly extractor: FeatureExtractor) {}
|
|
95
|
+
|
|
96
|
+
static async load(): Promise<Embedder> {
|
|
97
|
+
const { env, pipeline } = await loadTransformers()
|
|
98
|
+
// Guard the cache BEFORE local_files_only load: a host/container transformers
|
|
99
|
+
// drift (or a hand-copied cache) otherwise surfaces as a cryptic missing-file
|
|
100
|
+
// miss, or silently loads a stale variant. Fails loudly with a refresh hint.
|
|
101
|
+
await verifyModelCache()
|
|
102
|
+
configureTransformers(env)
|
|
103
|
+
const extractor = await pipeline('feature-extraction', MODEL_NAME, { local_files_only: true, dtype: MODEL_DTYPE })
|
|
104
|
+
return new Embedder(extractor)
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async embed(texts: string[], type: EmbedType): Promise<Float32Array[]> {
|
|
108
|
+
if (texts.length === 0) return []
|
|
109
|
+
|
|
110
|
+
// Bound every input to the model's token budget BEFORE the tokenizer sees it.
|
|
111
|
+
// The tokenizer would otherwise truncate silently at 512 tokens; bounding
|
|
112
|
+
// here makes the cut deterministic and owned by us (the leading heading /
|
|
113
|
+
// belief sentence — the load-bearing retrieval signal — always survives
|
|
114
|
+
// because it comes first). The dreaming subagent separately compacts the
|
|
115
|
+
// topic shards that trip this, but bounding guarantees no silent loss even
|
|
116
|
+
// for inputs dreaming never rewrites — queries and stream fragments — which
|
|
117
|
+
// the dreaming over_budget table does not cover, so this is their only
|
|
118
|
+
// observability path.
|
|
119
|
+
const results = texts.map((text) => boundEmbeddableText(text))
|
|
120
|
+
warnIfBounded(results, type)
|
|
121
|
+
|
|
122
|
+
// Log the total embed size up front so a process that dies mid-build still
|
|
123
|
+
// leaves a line naming how much it was embedding. The work is chunked below
|
|
124
|
+
// (EMBED_BATCH_SIZE per onnxruntime pass) so peak memory no longer scales
|
|
125
|
+
// with this total — but the count remains the useful breadcrumb for a slow
|
|
126
|
+
// or wedged build.
|
|
127
|
+
logEmbedBatch(texts.length, type)
|
|
128
|
+
|
|
129
|
+
const prefixed = prefixTexts(
|
|
130
|
+
results.map((r) => r.text),
|
|
131
|
+
type,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
// Gate per-chunk progress on the same LARGE_EMBED threshold as the up-front
|
|
135
|
+
// line: only the startup index migration runs long enough for "wedged or
|
|
136
|
+
// just slow?" to be a real question. Smaller embeds (queries, per-write
|
|
137
|
+
// upserts) finish in a pass or two and would only spam the logs.
|
|
138
|
+
const reportProgress = prefixed.length >= LARGE_EMBED
|
|
139
|
+
|
|
140
|
+
const embeddings: Float32Array[] = []
|
|
141
|
+
for (let start = 0; start < prefixed.length; start += EMBED_BATCH_SIZE) {
|
|
142
|
+
const batch = prefixed.slice(start, start + EMBED_BATCH_SIZE)
|
|
143
|
+
const output = await this.extractor(batch, { pooling: 'mean', normalize: true })
|
|
144
|
+
embeddings.push(...toEmbeddings(output.data, batch.length))
|
|
145
|
+
if (reportProgress) logEmbedProgress(embeddings.length, prefixed.length, type)
|
|
146
|
+
}
|
|
147
|
+
return embeddings
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
let embedderInstance: Promise<Embedder> | null = null
|
|
152
|
+
|
|
153
|
+
export function getEmbedder(): Promise<Embedder> {
|
|
154
|
+
// Clear the memo on rejection so a transient load failure (e.g. boot warm-up
|
|
155
|
+
// racing the host model mount) degrades to a retry on the next call instead
|
|
156
|
+
// of caching the rejected promise and poisoning every later per-turn embed.
|
|
157
|
+
embedderInstance ??= Embedder.load().catch((err) => {
|
|
158
|
+
embedderInstance = null
|
|
159
|
+
throw err
|
|
160
|
+
})
|
|
161
|
+
return embedderInstance
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Boot-time readiness step: force the lazy embedder to load now so the first
|
|
165
|
+
// per-turn query embed doesn't pay the ~2-5s ONNX init on the critical path.
|
|
166
|
+
// Only called on the vector-enabled boot path (see src/run/index.ts), which
|
|
167
|
+
// preserves embedder.ts's lazy-import guarantee for vector-off boots.
|
|
168
|
+
export async function warmEmbedder(): Promise<void> {
|
|
169
|
+
await getEmbedder()
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
export async function embed(texts: string[], type: EmbedType): Promise<Float32Array[]> {
|
|
173
|
+
return (await getEmbedder()).embed(texts, type)
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Structured, content-free signal when any input was bounded, so a truncation
|
|
177
|
+
// is observable in logs (the dreaming over_budget table only covers topic
|
|
178
|
+
// shards — this is the only path for queries and stream fragments). Logs counts
|
|
179
|
+
// and the worst estimate only, never the text, so memory content can't leak.
|
|
180
|
+
function warnIfBounded(results: readonly BoundedText[], type: EmbedType): void {
|
|
181
|
+
const trimmed = results.filter((r) => r.bounded)
|
|
182
|
+
if (trimmed.length === 0) return
|
|
183
|
+
const worst = trimmed.reduce((max, r) => Math.max(max, r.estimatedTokens), 0)
|
|
184
|
+
console.warn(
|
|
185
|
+
`[memory] vector embedding: bounded ${trimmed.length}/${results.length} ${type} input(s) to the ` +
|
|
186
|
+
`${MAX_MODEL_TOKENS}-token model limit (worst ~${worst} est. tokens); their tail is not embedded`,
|
|
187
|
+
)
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// A large embed (a startup index build over thousands of passages) used to be
|
|
191
|
+
// the prime suspect for a container that boots, logs, then dies without an
|
|
192
|
+
// error — the onnxruntime activation tensors spiked the OOM killer. The embed
|
|
193
|
+
// is now chunked at EMBED_BATCH_SIZE, so this is no longer a fatal threshold;
|
|
194
|
+
// it just marks where a build is large enough that its duration is worth noting
|
|
195
|
+
// up front (the count remains the breadcrumb if the build wedges or is slow).
|
|
196
|
+
const LARGE_EMBED = 256
|
|
197
|
+
|
|
198
|
+
function logEmbedBatch(count: number, type: EmbedType): void {
|
|
199
|
+
const line = `[memory] vector embedding: ${count} ${type} input(s) (chunked at ${EMBED_BATCH_SIZE}/pass)`
|
|
200
|
+
if (count >= LARGE_EMBED) {
|
|
201
|
+
console.info(`${line} — large build, this may take a while`)
|
|
202
|
+
} else {
|
|
203
|
+
console.info(line)
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function logEmbedProgress(done: number, total: number, type: EmbedType): void {
|
|
208
|
+
const pct = Math.floor((done / total) * 100)
|
|
209
|
+
console.info(`[memory] vector embedding: ${done}/${total} ${type} input(s) embedded (${pct}%)`)
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function configureTransformers(env: TransformersEnv): void {
|
|
213
|
+
env.localModelPath = modelCachePath()
|
|
214
|
+
env.allowRemoteModels = false
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function modelCachePath(): string {
|
|
218
|
+
const override = process.env.TYPECLAW_MODEL_CACHE
|
|
219
|
+
if (override && override.length > 0) return override
|
|
220
|
+
return join(homeRoot(), 'models')
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function prefixTexts(texts: string[], type: EmbedType): string[] {
|
|
224
|
+
const prefix = type === 'query' ? 'query: ' : 'passage: '
|
|
225
|
+
return texts.map((text) => `${prefix}${text}`)
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function toEmbeddings(data: unknown, count: number): Float32Array[] {
|
|
229
|
+
const values = toFloat32Array(data)
|
|
230
|
+
if (values.length !== count * DIMS) {
|
|
231
|
+
throw new Error(`unexpected ${MODEL_NAME} embedding size: got ${values.length}, expected ${count * DIMS}`)
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
return Array.from({ length: count }, (_, index) => values.slice(index * DIMS, (index + 1) * DIMS))
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function toFloat32Array(data: unknown): Float32Array {
|
|
238
|
+
if (data instanceof Float32Array) return data
|
|
239
|
+
if (!isNumericArrayLike(data)) throw new Error(`${MODEL_NAME} returned non-numeric embeddings`)
|
|
240
|
+
return Float32Array.from(data)
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function isNumericArrayLike(data: unknown): data is ArrayLike<number> {
|
|
244
|
+
if (!ArrayBuffer.isView(data) || !('length' in data)) return false
|
|
245
|
+
return !(data instanceof BigInt64Array) && !(data instanceof BigUint64Array)
|
|
246
|
+
}
|