typeclaw 0.36.8 → 0.37.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +3 -3
  2. package/package.json +3 -2
  3. package/src/agent/index.ts +31 -11
  4. package/src/agent/live-sessions.ts +12 -0
  5. package/src/agent/model-fallback.ts +17 -15
  6. package/src/agent/model-overrides.ts +2 -2
  7. package/src/agent/session-meta.ts +10 -0
  8. package/src/agent/subagents.ts +30 -3
  9. package/src/agent/system-prompt.ts +9 -3
  10. package/src/agent/todo/continuation-policy.ts +6 -3
  11. package/src/agent/todo/continuation-wiring.ts +4 -2
  12. package/src/agent/todo/continuation.ts +3 -3
  13. package/src/agent/tools/todo/index.ts +27 -4
  14. package/src/bundled-plugins/agent-browser/index.ts +33 -108
  15. package/src/bundled-plugins/agent-browser/shim.ts +3 -94
  16. package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +8 -33
  17. package/src/bundled-plugins/doc-render/skills/typeclaw-render-pdf/SKILL.md +2 -2
  18. package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +7 -1
  19. package/src/bundled-plugins/memory/README.md +80 -23
  20. package/src/bundled-plugins/memory/append-tool.ts +74 -53
  21. package/src/bundled-plugins/memory/citation-superset.ts +4 -0
  22. package/src/bundled-plugins/memory/citations.ts +54 -0
  23. package/src/bundled-plugins/memory/dreaming-metrics.ts +30 -0
  24. package/src/bundled-plugins/memory/dreaming.ts +444 -21
  25. package/src/bundled-plugins/memory/index.ts +544 -400
  26. package/src/bundled-plugins/memory/load-memory.ts +87 -10
  27. package/src/bundled-plugins/memory/load-shards.ts +48 -22
  28. package/src/bundled-plugins/memory/memory-logger.ts +95 -106
  29. package/src/bundled-plugins/memory/memory-retrieval.ts +3 -3
  30. package/src/bundled-plugins/memory/parent-link.ts +33 -0
  31. package/src/bundled-plugins/memory/paths.ts +12 -0
  32. package/src/bundled-plugins/memory/references/frontmatter.ts +197 -0
  33. package/src/bundled-plugins/memory/references/load-references.ts +212 -0
  34. package/src/bundled-plugins/memory/references/store-reference-tool.ts +59 -0
  35. package/src/bundled-plugins/memory/search-tool.ts +282 -45
  36. package/src/bundled-plugins/memory/stream-events.ts +1 -0
  37. package/src/bundled-plugins/memory/stream-io.ts +28 -3
  38. package/src/bundled-plugins/memory/turn-dedup.ts +40 -0
  39. package/src/bundled-plugins/memory/vector/cache-write.ts +19 -0
  40. package/src/bundled-plugins/memory/vector/config.ts +28 -0
  41. package/src/bundled-plugins/memory/vector/doctor.ts +124 -0
  42. package/src/bundled-plugins/memory/vector/embedder.ts +246 -0
  43. package/src/bundled-plugins/memory/vector/hybrid.ts +439 -0
  44. package/src/bundled-plugins/memory/vector/index-on-write.ts +34 -0
  45. package/src/bundled-plugins/memory/vector/inspect.ts +111 -0
  46. package/src/bundled-plugins/memory/vector/passages.ts +125 -0
  47. package/src/bundled-plugins/memory/vector/reference-index-on-write.ts +50 -0
  48. package/src/bundled-plugins/memory/vector/relevance-gate.ts +93 -0
  49. package/src/bundled-plugins/memory/vector/startup.ts +71 -0
  50. package/src/bundled-plugins/memory/vector/store.ts +203 -0
  51. package/src/bundled-plugins/memory/vector/truncation.ts +124 -0
  52. package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +2 -0
  53. package/src/channels/router.ts +239 -40
  54. package/src/cli/incomplete-init.ts +57 -0
  55. package/src/cli/init.ts +166 -18
  56. package/src/cli/inspect.ts +11 -5
  57. package/src/cli/model.ts +115 -36
  58. package/src/cli/provider.ts +5 -3
  59. package/src/cli/restart.ts +24 -0
  60. package/src/cli/start.ts +24 -0
  61. package/src/cli/tunnel.ts +53 -8
  62. package/src/config/config.ts +110 -19
  63. package/src/config/index.ts +5 -1
  64. package/src/config/models-mutation.ts +29 -11
  65. package/src/config/providers-mutation.ts +2 -2
  66. package/src/config/providers.ts +146 -12
  67. package/src/container/shared.ts +9 -0
  68. package/src/container/start.ts +87 -4
  69. package/src/cron/consumer.ts +13 -7
  70. package/src/hostd/models.ts +64 -0
  71. package/src/hostd/paths.ts +6 -0
  72. package/src/hostd/portbroker-manager.ts +2 -2
  73. package/src/init/checkpoint.ts +201 -0
  74. package/src/init/dockerfile.ts +121 -34
  75. package/src/init/gitignore.ts +7 -7
  76. package/src/init/index.ts +41 -9
  77. package/src/init/models-dev.ts +96 -21
  78. package/src/init/oauth-login.ts +3 -3
  79. package/src/init/progress.ts +29 -0
  80. package/src/init/validate-api-key.ts +4 -0
  81. package/src/inspect/index.ts +13 -6
  82. package/src/inspect/item-list.ts +11 -2
  83. package/src/inspect/live-list.ts +65 -0
  84. package/src/inspect/open-item.ts +22 -1
  85. package/src/inspect/session-list.ts +29 -0
  86. package/src/models/embedding-model.ts +114 -0
  87. package/src/models/transformers-version.ts +55 -0
  88. package/src/plugin/types.ts +3 -0
  89. package/src/portbroker/container-server.ts +23 -0
  90. package/src/portbroker/forward-request-bus.ts +35 -0
  91. package/src/portbroker/forward-result-bus.ts +2 -3
  92. package/src/portbroker/hostd-client.ts +182 -36
  93. package/src/portbroker/index.ts +6 -1
  94. package/src/portbroker/protocol.ts +9 -2
  95. package/src/run/channel-session-factory.ts +11 -1
  96. package/src/run/index.ts +65 -8
  97. package/src/server/command-runner.ts +24 -1
  98. package/src/server/index.ts +42 -8
  99. package/src/shared/index.ts +2 -0
  100. package/src/shared/protocol.ts +31 -0
  101. package/src/skills/typeclaw-channels/SKILL.md +4 -4
  102. package/src/skills/typeclaw-config/SKILL.md +2 -2
  103. package/src/skills/typeclaw-memory/SKILL.md +3 -1
  104. package/src/skills/typeclaw-permissions/SKILL.md +3 -3
  105. package/src/skills/typeclaw-skills/SKILL.md +1 -1
  106. package/src/skills/typeclaw-tunnels/SKILL.md +22 -1
  107. package/src/tunnels/providers/cloudflare-quick.ts +65 -7
  108. package/src/tunnels/upstream-probe.ts +25 -0
  109. package/typeclaw.schema.json +156 -67
  110. package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +0 -170
  111. package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +0 -421
  112. package/src/portbroker/bind-with-forward.ts +0 -102
@@ -1,15 +1,20 @@
1
+ import { writeFile } from 'node:fs/promises'
2
+
1
3
  import { z } from 'zod'
2
4
 
3
5
  import { defineTool } from '@/plugin'
4
6
 
5
- import { loadAllShards, type TopicShard } from './load-shards'
7
+ import { loadAllShards, loadShard, type TopicShard } from './load-shards'
8
+ import { renderReference } from './references/frontmatter'
9
+ import { loadAllReferences, loadReference, type Reference } from './references/load-references'
6
10
  import type { FragmentEvent, LegacyProseEvent, StreamEvent } from './stream-events'
7
11
  import { readAllUndreamedStreamDays, type UndreamedStreamDay } from './stream-io'
8
12
 
9
13
  const DEFAULT_MAX_RESULTS = 10
10
14
  const EXCERPT_CONTEXT_LINES = 3
15
+ const EXCERPT_HEAD_LINES = 7
11
16
 
12
- type TopicMatch = {
17
+ export type TopicMatch = {
13
18
  source: 'topic'
14
19
  shardPath: string
15
20
  slug: string
@@ -18,7 +23,7 @@ type TopicMatch = {
18
23
  fullBody?: string
19
24
  }
20
25
 
21
- type StreamMatch = {
26
+ export type StreamMatch = {
22
27
  source: 'stream'
23
28
  streamPath: string
24
29
  date: string
@@ -28,43 +33,135 @@ type StreamMatch = {
28
33
  fullBody?: string
29
34
  }
30
35
 
31
- type MemorySearchMatch = TopicMatch | StreamMatch
36
+ export type ReferenceMatch = {
37
+ source: 'reference'
38
+ slug: string
39
+ title: string
40
+ excerpt: string
41
+ created: string
42
+ fullBody?: string
43
+ }
32
44
 
33
- type MemorySearchResult = { matches: MemorySearchMatch[]; truncatedAt?: number } | { error: string }
45
+ export type MemorySearchMatch = TopicMatch | StreamMatch | ReferenceMatch
46
+
47
+ export type MemorySearchResult = { matches: MemorySearchMatch[]; truncatedAt?: number } | { error: string }
48
+
49
+ export type Matcher = (haystack: string) => boolean
50
+
51
+ export function createMemorySearchTool() {
52
+ return defineTool({
53
+ description:
54
+ 'Search the agent\'s long-term memory, or look up one topic shard by exact slug. Covers topic shards under memory/topics/ (consolidated facts), references under memory/references/ (verbatim artifacts), and undreamed daily-stream events under memory/streams/ (recent fragments not yet folded into shards). Pass `query` for search OR `topic` for an exact slug lookup, not both. Search is case-insensitive substring by default: tries the whole query as one phrase first, and if that finds nothing, falls back to OR-matching the individual words (ranked by how many words each hit contains) — so a multi-word query still returns results even when no entry contains the exact phrase. asRegex=true treats query as a JavaScript regex (no word fallback). `topic` skips search entirely and returns that one shard (or reference) with its full body — use it to read a topic OR reference whose slug you already have (e.g. a heading shown in injected memory); it resolves the topic shard first and falls back to a reference of the same slug. Returns matches discriminated by `source: "topic" | "reference" | "stream"`, each with line-context excerpts; full=true includes complete bodies (topic lookups always include the full body). Ordering depends on mode: exact-phrase (and regex) results list all topic matches first (alphabetical by slug), then reference matches, then stream matches (newest day first); word-fallback results are ranked by matched-word count, with that same topic-then-reference-then-stream-newest order as the tiebreak within each score band, so a higher-scoring stream match can precede a lower-scoring topic match.',
55
+ parameters: z.object({
56
+ query: z.string().optional(),
57
+ topic: z.string().optional(),
58
+ asRegex: z.boolean().default(false),
59
+ full: z.boolean().default(false),
60
+ maxResults: z.number().int().min(0).default(DEFAULT_MAX_RESULTS),
61
+ since: z.string().optional(),
62
+ before: z.string().optional(),
63
+ }),
64
+ async execute({ query, topic, asRegex, full, maxResults, since, before }, ctx) {
65
+ if ((query === undefined) === (topic === undefined)) {
66
+ return resultToToolResult({ error: 'provide exactly one of `query` or `topic`' })
67
+ }
68
+
69
+ if (topic !== undefined) {
70
+ return resultToToolResult(await lookupTopic(ctx.agentDir, topic, ctx.logger))
71
+ }
72
+
73
+ const matcherOrError = buildMatcher(query!, asRegex)
74
+ if (typeof matcherOrError === 'string') {
75
+ return resultToToolResult({ error: matcherOrError })
76
+ }
77
+
78
+ const [shards, streamDays, allReferences] = await Promise.all([
79
+ loadAllShards(ctx.agentDir, { logger: ctx.logger }),
80
+ readAllUndreamedStreamDays(ctx.agentDir),
81
+ loadAllReferences(ctx.agentDir, { logger: ctx.logger }),
82
+ ])
83
+ const dateFilter = parseReferenceDateFilter(since, before)
84
+ if ('error' in dateFilter) return resultToToolResult(dateFilter)
85
+
86
+ const references = allReferences.filter((reference) => referenceCandidateAllowed(reference, dateFilter))
87
+ if (shards.length === 0 && streamDays.length === 0 && references.length === 0) {
88
+ return resultToToolResult({ matches: [], truncatedAt: 0 })
89
+ }
90
+
91
+ let result = searchAll(shards, streamDays, matcherOrError, { full, maxResults, references })
92
+ if ('matches' in result && result.matches.length === 0) {
93
+ const fallback = tokenFallback(query!, asRegex, shards, streamDays, references, { full, maxResults })
94
+ if (fallback !== null) result = fallback
95
+ }
96
+ if ('matches' in result) await bumpReturnedReferences(allReferences, result.matches)
97
+ return resultToToolResult(result)
98
+ },
99
+ })
100
+ }
34
101
 
35
- type Matcher = (haystack: string) => boolean
102
+ export const memorySearchTool = createMemorySearchTool()
103
+
104
+ // Exact slug lookup, so the agent can read a topic OR reference whose slug the
105
+ // per-turn injection already showed it without re-running a fuzzy search for a
106
+ // body the retrieval layer already located. The injected memory block renders
107
+ // both topic and reference entries with a `slug:` line and a single recovery
108
+ // hint (`memory_search({ topic: "<slug>" })`), so this lookup must resolve both:
109
+ // it tries the topic shard first, then falls back to a reference of the same
110
+ // slug. A traversal slug makes the path builder throw inside the loader — caught
111
+ // and returned as a structured error, not a crash. A slug that matches neither
112
+ // returns empty matches, the same shape as a search that hit nothing.
113
+ async function lookupTopic(
114
+ agentDir: string,
115
+ slug: string,
116
+ logger?: { warn(message: string): void },
117
+ ): Promise<MemorySearchResult> {
118
+ const loaderOptions = logger === undefined ? {} : { logger }
119
+ let shard: TopicShard | null
120
+ try {
121
+ shard = await loadShard(agentDir, slug, loaderOptions)
122
+ } catch (err) {
123
+ return { error: `invalid topic slug: ${err instanceof Error ? err.message : String(err)}` }
124
+ }
125
+ if (shard !== null) return { matches: [topicMatchWithFullBody(shard)] }
126
+
127
+ const reference = await loadReference(agentDir, slug, loaderOptions)
128
+ if (reference !== null) {
129
+ const match = referenceMatchWithFullBody(reference)
130
+ // A reference reached via topic-slug lookup is a real access — record it so
131
+ // it advances accessCount/lastAccessed the same as a query hit, otherwise the
132
+ // injected-slug use case this fallback unlocks would still decay as unused.
133
+ await bumpReturnedReferences([reference], [match])
134
+ return { matches: [match] }
135
+ }
36
136
 
37
- export const memorySearchTool = defineTool({
38
- description:
39
- 'Search the agent\'s long-term memory. Covers both topic shards under memory/topics/ (consolidated facts) and undreamed daily-stream events under memory/streams/ (recent fragments not yet folded into shards). Case-insensitive substring by default: tries the whole query as one phrase first, and if that finds nothing, falls back to OR-matching the individual words (ranked by how many words each hit contains) — so a multi-word query still returns results even when no entry contains the exact phrase. asRegex=true treats query as a JavaScript regex (no word fallback). Returns matches discriminated by `source: "topic" | "stream"`, each with line-context excerpts; full=true includes complete bodies. Ordering depends on mode: exact-phrase (and regex) results list all topic matches first (alphabetical by slug), then stream matches (newest day first); word-fallback results are ranked by matched-word count, with that same topic-first/stream-newest order as the tiebreak within each score band, so a higher-scoring stream match can precede a lower-scoring topic match.',
40
- parameters: z.object({
41
- query: z.string(),
42
- asRegex: z.boolean().default(false),
43
- full: z.boolean().default(false),
44
- maxResults: z.number().int().min(0).default(DEFAULT_MAX_RESULTS),
45
- }),
46
- async execute({ query, asRegex, full, maxResults }, ctx) {
47
- const matcherOrError = buildMatcher(query, asRegex)
48
- if (typeof matcherOrError === 'string') {
49
- return resultToToolResult({ error: matcherOrError })
50
- }
137
+ return { matches: [] }
138
+ }
51
139
 
52
- const [shards, streamDays] = await Promise.all([
53
- loadAllShards(ctx.agentDir, { logger: ctx.logger }),
54
- readAllUndreamedStreamDays(ctx.agentDir),
55
- ])
56
- if (shards.length === 0 && streamDays.length === 0) {
57
- return resultToToolResult({ matches: [], truncatedAt: 0 })
58
- }
140
+ function topicMatchWithFullBody(shard: TopicShard): TopicMatch {
141
+ return {
142
+ source: 'topic',
143
+ shardPath: shard.path,
144
+ slug: shard.slug,
145
+ heading: shard.frontmatter.heading,
146
+ excerpt: excerpt(shard.body),
147
+ fullBody: shard.body,
148
+ }
149
+ }
59
150
 
60
- const result = searchAll(shards, streamDays, matcherOrError, { full, maxResults })
61
- if ('matches' in result && result.matches.length === 0) {
62
- const fallback = tokenFallback(query, asRegex, shards, streamDays, { full, maxResults })
63
- if (fallback !== null) return resultToToolResult(fallback)
64
- }
65
- return resultToToolResult(result)
66
- },
67
- })
151
+ function referenceMatchWithFullBody(reference: Reference): ReferenceMatch {
152
+ return {
153
+ source: 'reference',
154
+ slug: reference.slug,
155
+ title: reference.frontmatter.title,
156
+ excerpt: excerpt(reference.body),
157
+ created: reference.frontmatter.created,
158
+ fullBody: reference.body,
159
+ }
160
+ }
161
+
162
+ function excerpt(body: string): string {
163
+ return splitBodyLines(body).slice(0, EXCERPT_HEAD_LINES).join('\n')
164
+ }
68
165
 
69
166
  // Phrase-first/token-fallback: the descriptive multi-word queries the
70
167
  // retrieval subagent issues rarely appear verbatim in any body, so a
@@ -81,16 +178,17 @@ function tokenFallback(
81
178
  asRegex: boolean,
82
179
  shards: TopicShard[],
83
180
  streamDays: UndreamedStreamDay[],
181
+ references: Reference[],
84
182
  options: { full: boolean; maxResults: number },
85
183
  ): MemorySearchResult | null {
86
184
  if (asRegex) return null
87
185
  const tokens = distinctTokens(query)
88
186
  if (tokens.length === 0) return null
89
187
  if (tokens.length === 1 && tokens[0] === query.trim().toLowerCase()) return null
90
- return searchAllRanked(shards, streamDays, tokens, options)
188
+ return searchAllRanked(shards, streamDays, tokens, { ...options, references })
91
189
  }
92
190
 
93
- function distinctTokens(query: string): string[] {
191
+ export function distinctTokens(query: string): string[] {
94
192
  return [
95
193
  ...new Set(
96
194
  query
@@ -101,7 +199,7 @@ function distinctTokens(query: string): string[] {
101
199
  ]
102
200
  }
103
201
 
104
- function buildMatcher(query: string, asRegex: boolean): Matcher | string {
202
+ export function buildMatcher(query: string, asRegex: boolean): Matcher | string {
105
203
  if (asRegex) {
106
204
  try {
107
205
  const regex = new RegExp(query, 'i')
@@ -122,11 +220,11 @@ function buildMatcher(query: string, asRegex: boolean): Matcher | string {
122
220
  // before topic matches when `maxResults` is exhausted. The agent reading
123
221
  // results in this order sees long-term consolidated truth before recent
124
222
  // ephemeral fragments, which mirrors the injection-side rendering order.
125
- function searchAll(
223
+ export function searchAll(
126
224
  shards: TopicShard[],
127
225
  streamDays: UndreamedStreamDay[],
128
226
  matcher: Matcher,
129
- options: { full: boolean; maxResults: number },
227
+ options: { full: boolean; maxResults: number; references?: Reference[] },
130
228
  ): MemorySearchResult {
131
229
  const matches: MemorySearchMatch[] = []
132
230
  let truncatedAt: number | undefined
@@ -146,6 +244,12 @@ function searchAll(
146
244
  if (!push(match)) return { matches, truncatedAt: truncatedAt! }
147
245
  }
148
246
 
247
+ for (const reference of options.references ?? []) {
248
+ const match = matchReference(reference, matcher, options.full)
249
+ if (match === null) continue
250
+ if (!push(match)) return { matches, truncatedAt: truncatedAt! }
251
+ }
252
+
149
253
  for (let i = streamDays.length - 1; i >= 0; i--) {
150
254
  const day = streamDays[i]!
151
255
  for (const event of day.events) {
@@ -165,19 +269,32 @@ function searchAll(
165
269
  // natural enumeration order (topics first in loadAllShards order, then stream
166
270
  // days newest-first), so the established ordering contract holds within each
167
271
  // score band. maxResults truncation is applied last, after ranking.
168
- function searchAllRanked(
272
+ //
273
+ // `tokenMatchMode` defaults to 'substring' (the tool-path contract: `memory_search`
274
+ // is a deliberate agent query). The hybrid keyword lane opts into 'ascii-boundary'
275
+ // because its query is a whole user prompt, where unanchored substrings let short
276
+ // tokens ('in', 'do', 'ci') match inside unrelated words and over-score verbose
277
+ // shards. Both the match predicate and the score use the SAME per-token matchers,
278
+ // so a shard cannot rank on a hit the matcher wouldn't have counted.
279
+ export function searchAllRanked(
169
280
  shards: TopicShard[],
170
281
  streamDays: UndreamedStreamDay[],
171
282
  tokens: string[],
172
- options: { full: boolean; maxResults: number },
283
+ options: {
284
+ full: boolean
285
+ maxResults: number
286
+ references?: Reference[]
287
+ tokenMatchMode?: 'substring' | 'ascii-boundary'
288
+ },
173
289
  ): MemorySearchResult {
290
+ const tokenMatchers = tokens.map((t) => buildTokenMatcher(t, options.tokenMatchMode ?? 'substring'))
174
291
  const anyToken: Matcher = (haystack) => {
175
292
  const lower = haystack.toLowerCase()
176
- return tokens.some((t) => lower.includes(t))
293
+ return tokenMatchers.some((matches) => matches(lower))
177
294
  }
178
295
  const scoreOf = (text: string): number => {
179
296
  const lower = text.toLowerCase()
180
- return tokens.reduce((n, t) => (lower.includes(t) ? n + 1 : n), 0)
297
+ return tokenMatchers.reduce((n, matches) => (matches(lower) ? n + 1 : n), 0)
181
298
  }
182
299
 
183
300
  const scored: Array<{ match: MemorySearchMatch; score: number; order: number }> = []
@@ -189,6 +306,12 @@ function searchAllRanked(
189
306
  scored.push({ match, score: scoreOf(shardSearchText(shard)), order: order++ })
190
307
  }
191
308
 
309
+ for (const reference of options.references ?? []) {
310
+ const match = matchReference(reference, anyToken, options.full)
311
+ if (match === null) continue
312
+ scored.push({ match, score: scoreOf(referenceSearchText(reference)), order: order++ })
313
+ }
314
+
192
315
  for (let i = streamDays.length - 1; i >= 0; i--) {
193
316
  const day = streamDays[i]!
194
317
  for (const event of day.events) {
@@ -206,6 +329,27 @@ function searchAllRanked(
206
329
  return { matches: scored.map((s) => s.match) }
207
330
  }
208
331
 
332
+ // A per-token predicate over an ALREADY-lowercased haystack. 'substring' is plain
333
+ // `includes`. 'ascii-boundary' anchors ASCII tokens between alnum boundaries
334
+ // (NOT `\b`, which is unreliable for CJK) so 'in'/'do' stop matching inside
335
+ // 'reload'/'docker'; a token containing any non-ASCII char (e.g. '홍길동') has no
336
+ // reliable ASCII boundary and falls back to substring.
337
+ function buildTokenMatcher(token: string, mode: 'substring' | 'ascii-boundary'): (lowerHaystack: string) => boolean {
338
+ if (mode === 'substring' || hasNonAscii(token)) {
339
+ return (lower) => lower.includes(token)
340
+ }
341
+ const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
342
+ const boundary = new RegExp(`(?:^|[^a-z0-9])${escaped}(?=$|[^a-z0-9])`)
343
+ return (lower) => boundary.test(lower)
344
+ }
345
+
346
+ export function hasNonAscii(text: string): boolean {
347
+ for (let i = 0; i < text.length; i++) {
348
+ if (text.charCodeAt(i) > 0x7f) return true
349
+ }
350
+ return false
351
+ }
352
+
209
353
  function shardSearchText(shard: TopicShard): string {
210
354
  return [shard.slug, shard.frontmatter.heading, ...(shard.frontmatter.tags ?? []), shard.body].join('\n')
211
355
  }
@@ -216,6 +360,10 @@ function eventSearchText(event: StreamEvent): string {
216
360
  return ''
217
361
  }
218
362
 
363
+ function referenceSearchText(reference: Reference): string {
364
+ return [reference.slug, reference.frontmatter.title, ...reference.frontmatter.tags, reference.body].join('\n')
365
+ }
366
+
219
367
  function matchShard(shard: TopicShard, matcher: Matcher, full: boolean): TopicMatch | null {
220
368
  const bodyLines = splitBodyLines(shard.body)
221
369
  const firstBodyLineIndex = bodyLines.findIndex((line) => matcher(line))
@@ -239,6 +387,30 @@ function matchShard(shard: TopicShard, matcher: Matcher, full: boolean): TopicMa
239
387
  return match
240
388
  }
241
389
 
390
+ function matchReference(reference: Reference, matcher: Matcher, full: boolean): ReferenceMatch | null {
391
+ const bodyLines = splitBodyLines(reference.body)
392
+ const firstBodyLineIndex = bodyLines.findIndex((line) => matcher(line))
393
+ const matched =
394
+ matcher(reference.slug) ||
395
+ matcher(reference.frontmatter.title) ||
396
+ reference.frontmatter.tags.some((tag) => matcher(tag)) ||
397
+ firstBodyLineIndex !== -1
398
+ if (!matched) return null
399
+
400
+ const match: ReferenceMatch = {
401
+ source: 'reference',
402
+ slug: reference.slug,
403
+ title: reference.frontmatter.title,
404
+ excerpt:
405
+ firstBodyLineIndex === -1
406
+ ? fallbackReferenceExcerpt(reference, matcher)
407
+ : excerptForLine(bodyLines, firstBodyLineIndex),
408
+ created: reference.frontmatter.created,
409
+ }
410
+ if (full) match.fullBody = reference.body
411
+ return match
412
+ }
413
+
242
414
  // Stream-event matcher. `fragment` events expose `topic` + `body` for search;
243
415
  // `legacy_prose` exposes `text` (no id, no topic). `watermark` events carry
244
416
  // no human content and are skipped — they only mark dreaming progress.
@@ -315,15 +487,80 @@ function fallbackExcerpt(shard: TopicShard, matcher: Matcher): string {
315
487
  return matchedTag ?? shard.frontmatter.heading
316
488
  }
317
489
 
490
+ function fallbackReferenceExcerpt(reference: Reference, matcher: Matcher): string {
491
+ if (matcher(reference.frontmatter.title)) return reference.frontmatter.title
492
+ if (matcher(reference.slug)) return reference.slug
493
+ const matchedTag = reference.frontmatter.tags.find((tag) => matcher(tag))
494
+ return matchedTag ?? reference.frontmatter.title
495
+ }
496
+
497
+ type ReferenceDateFilter = { since?: Date; before?: Date }
498
+
499
+ function parseReferenceDateFilter(
500
+ since: string | undefined,
501
+ before: string | undefined,
502
+ ): ReferenceDateFilter | { error: string } {
503
+ const sinceDate = since === undefined ? undefined : parseDateParam('since', since)
504
+ if (typeof sinceDate === 'string') return { error: sinceDate }
505
+ const beforeDate = before === undefined ? undefined : parseDateParam('before', before)
506
+ if (typeof beforeDate === 'string') return { error: beforeDate }
507
+ return { since: sinceDate, before: beforeDate }
508
+ }
509
+
510
+ function parseDateParam(name: string, value: string): Date | string {
511
+ const date = new Date(value)
512
+ if (Number.isNaN(date.getTime())) return `invalid ${name}: expected ISO 8601 datetime string`
513
+ return date
514
+ }
515
+
516
+ function referenceCandidateAllowed(reference: Reference, filter: ReferenceDateFilter): boolean {
517
+ if (reference.frontmatter.demoted) return false
518
+ const created = new Date(reference.frontmatter.created)
519
+ if (filter.since !== undefined && created < filter.since) return false
520
+ if (filter.before !== undefined && created >= filter.before) return false
521
+ return true
522
+ }
523
+
524
+ async function bumpReturnedReferences(references: Reference[], matches: MemorySearchMatch[]): Promise<void> {
525
+ const returnedSlugs = new Set(matches.filter((match) => match.source === 'reference').map((match) => match.slug))
526
+ if (returnedSlugs.size === 0) return
527
+ await Promise.all(
528
+ references
529
+ .filter((reference) => returnedSlugs.has(reference.slug))
530
+ .map((reference) =>
531
+ writeFile(
532
+ reference.path,
533
+ renderReference(
534
+ {
535
+ ...reference.frontmatter,
536
+ lastAccessed: new Date().toISOString(),
537
+ accessCount: reference.frontmatter.accessCount + 1,
538
+ },
539
+ reference.body,
540
+ ),
541
+ 'utf8',
542
+ ),
543
+ ),
544
+ )
545
+ }
546
+
318
547
  function excerptForLine(lines: string[], matchIndex: number): string {
319
548
  const start = Math.max(0, matchIndex - EXCERPT_CONTEXT_LINES)
320
549
  const end = Math.min(lines.length, matchIndex + EXCERPT_CONTEXT_LINES + 1)
321
550
  return lines.slice(start, end).join('\n')
322
551
  }
323
552
 
553
+ const EMPTY_RESULT_GUIDANCE =
554
+ 'No matching memory. This is the authoritative result — memory_search already covers topic shards, references, and undreamed stream events. Do not fall back to grep/find/bash or manually reading memory/topics, memory/references, memory/streams, or sessions; accept that no relevant memory exists and proceed.'
555
+
556
+ // The empty-set note rides in the LLM-facing `text` ONLY. `details` stays the
557
+ // pure struct: `keywordLane` reads `searchAll` directly (never this layer) and
558
+ // the structured tests assert on `details`, so both must see no `note`.
324
559
  function resultToToolResult(result: MemorySearchResult) {
560
+ const isEmpty = 'matches' in result && result.matches.length === 0
561
+ const text = isEmpty ? JSON.stringify({ ...result, note: EMPTY_RESULT_GUIDANCE }) : JSON.stringify(result)
325
562
  return {
326
- content: [{ type: 'text' as const, text: JSON.stringify(result) }],
563
+ content: [{ type: 'text' as const, text }],
327
564
  details: result,
328
565
  }
329
566
  }
@@ -39,6 +39,7 @@ export const fragmentEventSchema = z
39
39
  entry: z.string(),
40
40
  topic: z.string(),
41
41
  body: z.string(),
42
+ references: z.array(z.string()).optional(),
42
43
  })
43
44
  .passthrough()
44
45
 
@@ -1,13 +1,18 @@
1
1
  import { readFile, appendFile, readdir, stat, writeFile, rename } from 'node:fs/promises'
2
- import { join } from 'node:path'
2
+ import { basename, join } from 'node:path'
3
3
 
4
4
  import { getDreamedIds, loadDreamingState } from './dreaming-state'
5
5
  import { streamsDir } from './paths'
6
- import { parseEventLine, type StreamEvent } from './stream-events'
6
+ import { parseEventLine, type FragmentEvent, type StreamEvent } from './stream-events'
7
7
 
8
8
  const STREAM_FILE_PATTERN = /^\d{4}-\d{2}-\d{2}\.jsonl$/
9
9
  const STREAM_DATE_FROM_FILENAME = /^(\d{4}-\d{2}-\d{2})\.jsonl$/
10
10
 
11
+ export type FragmentsAppendedContext = {
12
+ path: string
13
+ date: string | null
14
+ }
15
+
11
16
  // Per-file event cache. `(mtimeMs, ctimeMs, size)` is the invalidation key,
12
17
  // mirroring `load-shards.ts`'s shard cache. The three writers in this module
13
18
  // — `appendEvents` (memory-logger appends), `writeEventsAtomic` (dreaming
@@ -104,10 +109,30 @@ export function __resetStreamFileCacheForTests(): void {
104
109
  streamFileCache.clear()
105
110
  }
106
111
 
107
- export async function appendEvents(path: string, events: readonly StreamEvent[]): Promise<void> {
112
+ export async function appendEvents(
113
+ path: string,
114
+ events: readonly StreamEvent[],
115
+ onFragmentsAppended?: (fragments: FragmentEvent[], context: FragmentsAppendedContext) => Promise<void>,
116
+ onHookError?: (err: unknown) => void,
117
+ ): Promise<void> {
108
118
  if (events.length === 0) return
109
119
  const joined = events.map((e) => `${JSON.stringify(e)}\n`).join('')
110
120
  await appendFile(path, joined, 'utf-8')
121
+ if (onFragmentsAppended === undefined) return
122
+
123
+ const fragments = events.filter((event): event is FragmentEvent => event.type === 'fragment')
124
+ if (fragments.length === 0) return
125
+
126
+ const context: FragmentsAppendedContext = { path, date: streamDateFromPath(path) }
127
+ try {
128
+ await onFragmentsAppended(fragments, context)
129
+ } catch (err) {
130
+ onHookError?.(err)
131
+ }
132
+ }
133
+
134
+ function streamDateFromPath(path: string): string | null {
135
+ return STREAM_DATE_FROM_FILENAME.exec(basename(path))?.[1] ?? null
111
136
  }
112
137
 
113
138
  export async function writeEventsAtomic(path: string, events: readonly StreamEvent[]): Promise<void> {
@@ -0,0 +1,40 @@
1
+ import type { TopicShard } from './load-shards'
2
+
3
+ export type InjectedShardState = Map<string, string>
4
+
5
+ export type DirectShardPartition = {
6
+ full: TopicShard[]
7
+ unchanged: TopicShard[]
8
+ }
9
+
10
+ // Preserves the "nothing the agent always had vanishes on an off-topic turn"
11
+ // guarantee by AVAILABILITY, not literal presence: an unchanged shard is still
12
+ // named (heading + slug) and its body is recoverable via memory_search, while a
13
+ // changed shard always re-injects in full so the agent never reads a stale body.
14
+ // `state` is the session-scoped record the caller owns and clears on session.end.
15
+ export function partitionDirectShards(shards: TopicShard[], state: InjectedShardState): DirectShardPartition {
16
+ const full: TopicShard[] = []
17
+ const unchanged: TopicShard[] = []
18
+ for (const shard of shards) {
19
+ const hash = hashBody(shard.body)
20
+ if (state.get(shard.slug) === hash) {
21
+ unchanged.push(shard)
22
+ } else {
23
+ full.push(shard)
24
+ state.set(shard.slug, hash)
25
+ }
26
+ }
27
+ return { full, unchanged }
28
+ }
29
+
30
+ // FNV-1a over the body. A hash collision only suppresses a body the agent can
31
+ // still re-fetch by slug, so collision-tolerance buys a cheap one-string-per-slug
32
+ // state map instead of retaining full bodies per session.
33
+ function hashBody(body: string): string {
34
+ let hash = 0x811c9dc5
35
+ for (let i = 0; i < body.length; i++) {
36
+ hash ^= body.charCodeAt(i)
37
+ hash = Math.imul(hash, 0x01000193)
38
+ }
39
+ return (hash >>> 0).toString(16)
40
+ }
@@ -0,0 +1,19 @@
1
+ import { mkdir, writeFile, rename } from 'node:fs/promises'
2
+ import { join } from 'node:path'
3
+
4
+ /**
5
+ * Atomically write retrieval cache to memory/.retrieval-cache/<sessionId>.md
6
+ * using tmp+rename pattern for crash safety.
7
+ */
8
+ export async function writeRetrievalCache(agentDir: string, sessionId: string, content: string): Promise<void> {
9
+ const cacheDir = join(agentDir, 'memory', '.retrieval-cache')
10
+ const cachePath = join(cacheDir, `${sessionId}.md`)
11
+ const tmpPath = `${cachePath}.tmp`
12
+
13
+ // Create directory if it doesn't exist
14
+ await mkdir(cacheDir, { recursive: true })
15
+
16
+ // Write to tmp file, then atomically rename
17
+ await writeFile(tmpPath, content, 'utf8')
18
+ await rename(tmpPath, cachePath)
19
+ }
@@ -0,0 +1,28 @@
1
+ import { z } from 'zod'
2
+
3
+ import { loadPluginConfigsSync } from '@/config'
4
+
5
+ export const vectorConfigSchema = z
6
+ .object({
7
+ enabled: z.boolean().default(false),
8
+ })
9
+ .default({ enabled: false })
10
+
11
+ export type VectorConfig = z.infer<typeof vectorConfigSchema>
12
+
13
+ // Fails closed to `false`: a memory block we can't parse is treated as opted
14
+ // out. Shared by the host-side download gate and the runtime's per-turn vs
15
+ // system-prompt memory-injection decision, so both read the flag identically.
16
+ export function vectorEnabledFromMemoryConfig(memory: unknown): boolean {
17
+ if (typeof memory !== 'object' || memory === null) return false
18
+ const parsed = vectorConfigSchema.safeParse((memory as Record<string, unknown>).vector)
19
+ return parsed.success && parsed.data.enabled
20
+ }
21
+
22
+ export function agentUsesVector(cwd: string): boolean {
23
+ try {
24
+ return vectorEnabledFromMemoryConfig(loadPluginConfigsSync(cwd).memory)
25
+ } catch {
26
+ return false
27
+ }
28
+ }