typeclaw 0.36.8 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +3 -2
- package/src/agent/index.ts +31 -11
- package/src/agent/live-sessions.ts +12 -0
- package/src/agent/model-fallback.ts +17 -15
- package/src/agent/model-overrides.ts +2 -2
- package/src/agent/session-meta.ts +10 -0
- package/src/agent/subagents.ts +11 -2
- package/src/agent/system-prompt.ts +9 -3
- package/src/agent/todo/continuation-policy.ts +6 -3
- package/src/agent/todo/continuation-wiring.ts +4 -2
- package/src/agent/todo/continuation.ts +3 -3
- package/src/agent/tools/todo/index.ts +27 -4
- package/src/bundled-plugins/agent-browser/index.ts +33 -108
- package/src/bundled-plugins/agent-browser/shim.ts +3 -94
- package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +8 -33
- package/src/bundled-plugins/doc-render/skills/typeclaw-render-pdf/SKILL.md +2 -2
- package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +7 -1
- package/src/bundled-plugins/memory/README.md +80 -23
- package/src/bundled-plugins/memory/append-tool.ts +74 -53
- package/src/bundled-plugins/memory/citation-superset.ts +4 -0
- package/src/bundled-plugins/memory/citations.ts +54 -0
- package/src/bundled-plugins/memory/dreaming-metrics.ts +30 -0
- package/src/bundled-plugins/memory/dreaming.ts +444 -21
- package/src/bundled-plugins/memory/index.ts +544 -400
- package/src/bundled-plugins/memory/load-memory.ts +87 -10
- package/src/bundled-plugins/memory/load-shards.ts +48 -22
- package/src/bundled-plugins/memory/memory-logger.ts +95 -106
- package/src/bundled-plugins/memory/memory-retrieval.ts +3 -3
- package/src/bundled-plugins/memory/parent-link.ts +33 -0
- package/src/bundled-plugins/memory/paths.ts +12 -0
- package/src/bundled-plugins/memory/references/frontmatter.ts +197 -0
- package/src/bundled-plugins/memory/references/load-references.ts +212 -0
- package/src/bundled-plugins/memory/references/store-reference-tool.ts +59 -0
- package/src/bundled-plugins/memory/search-tool.ts +282 -45
- package/src/bundled-plugins/memory/stream-events.ts +1 -0
- package/src/bundled-plugins/memory/stream-io.ts +28 -3
- package/src/bundled-plugins/memory/turn-dedup.ts +40 -0
- package/src/bundled-plugins/memory/vector/cache-write.ts +19 -0
- package/src/bundled-plugins/memory/vector/config.ts +28 -0
- package/src/bundled-plugins/memory/vector/doctor.ts +124 -0
- package/src/bundled-plugins/memory/vector/embedder.ts +246 -0
- package/src/bundled-plugins/memory/vector/hybrid.ts +439 -0
- package/src/bundled-plugins/memory/vector/index-on-write.ts +34 -0
- package/src/bundled-plugins/memory/vector/inspect.ts +111 -0
- package/src/bundled-plugins/memory/vector/passages.ts +125 -0
- package/src/bundled-plugins/memory/vector/reference-index-on-write.ts +50 -0
- package/src/bundled-plugins/memory/vector/relevance-gate.ts +93 -0
- package/src/bundled-plugins/memory/vector/startup.ts +71 -0
- package/src/bundled-plugins/memory/vector/store.ts +203 -0
- package/src/bundled-plugins/memory/vector/truncation.ts +124 -0
- package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +2 -0
- package/src/channels/router.ts +239 -40
- package/src/cli/incomplete-init.ts +57 -0
- package/src/cli/init.ts +143 -12
- package/src/cli/inspect.ts +11 -5
- package/src/cli/model.ts +112 -34
- package/src/cli/restart.ts +24 -0
- package/src/cli/start.ts +24 -0
- package/src/cli/tunnel.ts +53 -8
- package/src/config/config.ts +110 -19
- package/src/config/index.ts +5 -1
- package/src/config/models-mutation.ts +29 -11
- package/src/config/providers-mutation.ts +2 -2
- package/src/config/providers.ts +146 -12
- package/src/container/shared.ts +9 -0
- package/src/container/start.ts +87 -4
- package/src/cron/consumer.ts +13 -7
- package/src/hostd/models.ts +64 -0
- package/src/hostd/paths.ts +6 -0
- package/src/hostd/portbroker-manager.ts +2 -2
- package/src/init/checkpoint.ts +201 -0
- package/src/init/dockerfile.ts +121 -34
- package/src/init/gitignore.ts +7 -7
- package/src/init/index.ts +41 -9
- package/src/init/models-dev.ts +96 -21
- package/src/init/oauth-login.ts +3 -3
- package/src/init/progress.ts +29 -0
- package/src/init/validate-api-key.ts +4 -0
- package/src/inspect/index.ts +13 -6
- package/src/inspect/item-list.ts +11 -2
- package/src/inspect/live-list.ts +65 -0
- package/src/inspect/open-item.ts +22 -1
- package/src/inspect/session-list.ts +29 -0
- package/src/models/embedding-model.ts +114 -0
- package/src/models/transformers-version.ts +55 -0
- package/src/plugin/types.ts +3 -0
- package/src/portbroker/container-server.ts +23 -0
- package/src/portbroker/forward-request-bus.ts +35 -0
- package/src/portbroker/forward-result-bus.ts +2 -3
- package/src/portbroker/hostd-client.ts +182 -36
- package/src/portbroker/index.ts +6 -1
- package/src/portbroker/protocol.ts +9 -2
- package/src/run/channel-session-factory.ts +11 -1
- package/src/run/index.ts +41 -7
- package/src/server/command-runner.ts +24 -1
- package/src/server/index.ts +42 -8
- package/src/shared/index.ts +2 -0
- package/src/shared/protocol.ts +31 -0
- package/src/skills/typeclaw-channels/SKILL.md +4 -4
- package/src/skills/typeclaw-config/SKILL.md +2 -2
- package/src/skills/typeclaw-memory/SKILL.md +3 -1
- package/src/skills/typeclaw-permissions/SKILL.md +3 -3
- package/src/skills/typeclaw-skills/SKILL.md +1 -1
- package/src/skills/typeclaw-tunnels/SKILL.md +22 -1
- package/src/tunnels/providers/cloudflare-quick.ts +65 -7
- package/src/tunnels/upstream-probe.ts +25 -0
- package/typeclaw.schema.json +156 -67
- package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +0 -170
- package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +0 -421
- package/src/portbroker/bind-with-forward.ts +0 -102
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
import { writeFile } from 'node:fs/promises'
|
|
2
|
+
|
|
1
3
|
import { z } from 'zod'
|
|
2
4
|
|
|
3
5
|
import { defineTool } from '@/plugin'
|
|
4
6
|
|
|
5
|
-
import { loadAllShards, type TopicShard } from './load-shards'
|
|
7
|
+
import { loadAllShards, loadShard, type TopicShard } from './load-shards'
|
|
8
|
+
import { renderReference } from './references/frontmatter'
|
|
9
|
+
import { loadAllReferences, loadReference, type Reference } from './references/load-references'
|
|
6
10
|
import type { FragmentEvent, LegacyProseEvent, StreamEvent } from './stream-events'
|
|
7
11
|
import { readAllUndreamedStreamDays, type UndreamedStreamDay } from './stream-io'
|
|
8
12
|
|
|
9
13
|
const DEFAULT_MAX_RESULTS = 10
|
|
10
14
|
const EXCERPT_CONTEXT_LINES = 3
|
|
15
|
+
const EXCERPT_HEAD_LINES = 7
|
|
11
16
|
|
|
12
|
-
type TopicMatch = {
|
|
17
|
+
export type TopicMatch = {
|
|
13
18
|
source: 'topic'
|
|
14
19
|
shardPath: string
|
|
15
20
|
slug: string
|
|
@@ -18,7 +23,7 @@ type TopicMatch = {
|
|
|
18
23
|
fullBody?: string
|
|
19
24
|
}
|
|
20
25
|
|
|
21
|
-
type StreamMatch = {
|
|
26
|
+
export type StreamMatch = {
|
|
22
27
|
source: 'stream'
|
|
23
28
|
streamPath: string
|
|
24
29
|
date: string
|
|
@@ -28,43 +33,135 @@ type StreamMatch = {
|
|
|
28
33
|
fullBody?: string
|
|
29
34
|
}
|
|
30
35
|
|
|
31
|
-
type
|
|
36
|
+
export type ReferenceMatch = {
|
|
37
|
+
source: 'reference'
|
|
38
|
+
slug: string
|
|
39
|
+
title: string
|
|
40
|
+
excerpt: string
|
|
41
|
+
created: string
|
|
42
|
+
fullBody?: string
|
|
43
|
+
}
|
|
32
44
|
|
|
33
|
-
type
|
|
45
|
+
export type MemorySearchMatch = TopicMatch | StreamMatch | ReferenceMatch
|
|
46
|
+
|
|
47
|
+
export type MemorySearchResult = { matches: MemorySearchMatch[]; truncatedAt?: number } | { error: string }
|
|
48
|
+
|
|
49
|
+
export type Matcher = (haystack: string) => boolean
|
|
50
|
+
|
|
51
|
+
export function createMemorySearchTool() {
|
|
52
|
+
return defineTool({
|
|
53
|
+
description:
|
|
54
|
+
'Search the agent\'s long-term memory, or look up one topic shard by exact slug. Covers topic shards under memory/topics/ (consolidated facts), references under memory/references/ (verbatim artifacts), and undreamed daily-stream events under memory/streams/ (recent fragments not yet folded into shards). Pass `query` for search OR `topic` for an exact slug lookup, not both. Search is case-insensitive substring by default: tries the whole query as one phrase first, and if that finds nothing, falls back to OR-matching the individual words (ranked by how many words each hit contains) — so a multi-word query still returns results even when no entry contains the exact phrase. asRegex=true treats query as a JavaScript regex (no word fallback). `topic` skips search entirely and returns that one shard (or reference) with its full body — use it to read a topic OR reference whose slug you already have (e.g. a heading shown in injected memory); it resolves the topic shard first and falls back to a reference of the same slug. Returns matches discriminated by `source: "topic" | "reference" | "stream"`, each with line-context excerpts; full=true includes complete bodies (topic lookups always include the full body). Ordering depends on mode: exact-phrase (and regex) results list all topic matches first (alphabetical by slug), then reference matches, then stream matches (newest day first); word-fallback results are ranked by matched-word count, with that same topic-then-reference-then-stream-newest order as the tiebreak within each score band, so a higher-scoring stream match can precede a lower-scoring topic match.',
|
|
55
|
+
parameters: z.object({
|
|
56
|
+
query: z.string().optional(),
|
|
57
|
+
topic: z.string().optional(),
|
|
58
|
+
asRegex: z.boolean().default(false),
|
|
59
|
+
full: z.boolean().default(false),
|
|
60
|
+
maxResults: z.number().int().min(0).default(DEFAULT_MAX_RESULTS),
|
|
61
|
+
since: z.string().optional(),
|
|
62
|
+
before: z.string().optional(),
|
|
63
|
+
}),
|
|
64
|
+
async execute({ query, topic, asRegex, full, maxResults, since, before }, ctx) {
|
|
65
|
+
if ((query === undefined) === (topic === undefined)) {
|
|
66
|
+
return resultToToolResult({ error: 'provide exactly one of `query` or `topic`' })
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (topic !== undefined) {
|
|
70
|
+
return resultToToolResult(await lookupTopic(ctx.agentDir, topic, ctx.logger))
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const matcherOrError = buildMatcher(query!, asRegex)
|
|
74
|
+
if (typeof matcherOrError === 'string') {
|
|
75
|
+
return resultToToolResult({ error: matcherOrError })
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const [shards, streamDays, allReferences] = await Promise.all([
|
|
79
|
+
loadAllShards(ctx.agentDir, { logger: ctx.logger }),
|
|
80
|
+
readAllUndreamedStreamDays(ctx.agentDir),
|
|
81
|
+
loadAllReferences(ctx.agentDir, { logger: ctx.logger }),
|
|
82
|
+
])
|
|
83
|
+
const dateFilter = parseReferenceDateFilter(since, before)
|
|
84
|
+
if ('error' in dateFilter) return resultToToolResult(dateFilter)
|
|
85
|
+
|
|
86
|
+
const references = allReferences.filter((reference) => referenceCandidateAllowed(reference, dateFilter))
|
|
87
|
+
if (shards.length === 0 && streamDays.length === 0 && references.length === 0) {
|
|
88
|
+
return resultToToolResult({ matches: [], truncatedAt: 0 })
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
let result = searchAll(shards, streamDays, matcherOrError, { full, maxResults, references })
|
|
92
|
+
if ('matches' in result && result.matches.length === 0) {
|
|
93
|
+
const fallback = tokenFallback(query!, asRegex, shards, streamDays, references, { full, maxResults })
|
|
94
|
+
if (fallback !== null) result = fallback
|
|
95
|
+
}
|
|
96
|
+
if ('matches' in result) await bumpReturnedReferences(allReferences, result.matches)
|
|
97
|
+
return resultToToolResult(result)
|
|
98
|
+
},
|
|
99
|
+
})
|
|
100
|
+
}
|
|
34
101
|
|
|
35
|
-
|
|
102
|
+
export const memorySearchTool = createMemorySearchTool()
|
|
103
|
+
|
|
104
|
+
// Exact slug lookup, so the agent can read a topic OR reference whose slug the
|
|
105
|
+
// per-turn injection already showed it without re-running a fuzzy search for a
|
|
106
|
+
// body the retrieval layer already located. The injected memory block renders
|
|
107
|
+
// both topic and reference entries with a `slug:` line and a single recovery
|
|
108
|
+
// hint (`memory_search({ topic: "<slug>" })`), so this lookup must resolve both:
|
|
109
|
+
// it tries the topic shard first, then falls back to a reference of the same
|
|
110
|
+
// slug. A traversal slug makes the path builder throw inside the loader — caught
|
|
111
|
+
// and returned as a structured error, not a crash. A slug that matches neither
|
|
112
|
+
// returns empty matches, the same shape as a search that hit nothing.
|
|
113
|
+
async function lookupTopic(
|
|
114
|
+
agentDir: string,
|
|
115
|
+
slug: string,
|
|
116
|
+
logger?: { warn(message: string): void },
|
|
117
|
+
): Promise<MemorySearchResult> {
|
|
118
|
+
const loaderOptions = logger === undefined ? {} : { logger }
|
|
119
|
+
let shard: TopicShard | null
|
|
120
|
+
try {
|
|
121
|
+
shard = await loadShard(agentDir, slug, loaderOptions)
|
|
122
|
+
} catch (err) {
|
|
123
|
+
return { error: `invalid topic slug: ${err instanceof Error ? err.message : String(err)}` }
|
|
124
|
+
}
|
|
125
|
+
if (shard !== null) return { matches: [topicMatchWithFullBody(shard)] }
|
|
126
|
+
|
|
127
|
+
const reference = await loadReference(agentDir, slug, loaderOptions)
|
|
128
|
+
if (reference !== null) {
|
|
129
|
+
const match = referenceMatchWithFullBody(reference)
|
|
130
|
+
// A reference reached via topic-slug lookup is a real access — record it so
|
|
131
|
+
// it advances accessCount/lastAccessed the same as a query hit, otherwise the
|
|
132
|
+
// injected-slug use case this fallback unlocks would still decay as unused.
|
|
133
|
+
await bumpReturnedReferences([reference], [match])
|
|
134
|
+
return { matches: [match] }
|
|
135
|
+
}
|
|
36
136
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
'Search the agent\'s long-term memory. Covers both topic shards under memory/topics/ (consolidated facts) and undreamed daily-stream events under memory/streams/ (recent fragments not yet folded into shards). Case-insensitive substring by default: tries the whole query as one phrase first, and if that finds nothing, falls back to OR-matching the individual words (ranked by how many words each hit contains) — so a multi-word query still returns results even when no entry contains the exact phrase. asRegex=true treats query as a JavaScript regex (no word fallback). Returns matches discriminated by `source: "topic" | "stream"`, each with line-context excerpts; full=true includes complete bodies. Ordering depends on mode: exact-phrase (and regex) results list all topic matches first (alphabetical by slug), then stream matches (newest day first); word-fallback results are ranked by matched-word count, with that same topic-first/stream-newest order as the tiebreak within each score band, so a higher-scoring stream match can precede a lower-scoring topic match.',
|
|
40
|
-
parameters: z.object({
|
|
41
|
-
query: z.string(),
|
|
42
|
-
asRegex: z.boolean().default(false),
|
|
43
|
-
full: z.boolean().default(false),
|
|
44
|
-
maxResults: z.number().int().min(0).default(DEFAULT_MAX_RESULTS),
|
|
45
|
-
}),
|
|
46
|
-
async execute({ query, asRegex, full, maxResults }, ctx) {
|
|
47
|
-
const matcherOrError = buildMatcher(query, asRegex)
|
|
48
|
-
if (typeof matcherOrError === 'string') {
|
|
49
|
-
return resultToToolResult({ error: matcherOrError })
|
|
50
|
-
}
|
|
137
|
+
return { matches: [] }
|
|
138
|
+
}
|
|
51
139
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
140
|
+
function topicMatchWithFullBody(shard: TopicShard): TopicMatch {
|
|
141
|
+
return {
|
|
142
|
+
source: 'topic',
|
|
143
|
+
shardPath: shard.path,
|
|
144
|
+
slug: shard.slug,
|
|
145
|
+
heading: shard.frontmatter.heading,
|
|
146
|
+
excerpt: excerpt(shard.body),
|
|
147
|
+
fullBody: shard.body,
|
|
148
|
+
}
|
|
149
|
+
}
|
|
59
150
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
151
|
+
function referenceMatchWithFullBody(reference: Reference): ReferenceMatch {
|
|
152
|
+
return {
|
|
153
|
+
source: 'reference',
|
|
154
|
+
slug: reference.slug,
|
|
155
|
+
title: reference.frontmatter.title,
|
|
156
|
+
excerpt: excerpt(reference.body),
|
|
157
|
+
created: reference.frontmatter.created,
|
|
158
|
+
fullBody: reference.body,
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function excerpt(body: string): string {
|
|
163
|
+
return splitBodyLines(body).slice(0, EXCERPT_HEAD_LINES).join('\n')
|
|
164
|
+
}
|
|
68
165
|
|
|
69
166
|
// Phrase-first/token-fallback: the descriptive multi-word queries the
|
|
70
167
|
// retrieval subagent issues rarely appear verbatim in any body, so a
|
|
@@ -81,16 +178,17 @@ function tokenFallback(
|
|
|
81
178
|
asRegex: boolean,
|
|
82
179
|
shards: TopicShard[],
|
|
83
180
|
streamDays: UndreamedStreamDay[],
|
|
181
|
+
references: Reference[],
|
|
84
182
|
options: { full: boolean; maxResults: number },
|
|
85
183
|
): MemorySearchResult | null {
|
|
86
184
|
if (asRegex) return null
|
|
87
185
|
const tokens = distinctTokens(query)
|
|
88
186
|
if (tokens.length === 0) return null
|
|
89
187
|
if (tokens.length === 1 && tokens[0] === query.trim().toLowerCase()) return null
|
|
90
|
-
return searchAllRanked(shards, streamDays, tokens, options)
|
|
188
|
+
return searchAllRanked(shards, streamDays, tokens, { ...options, references })
|
|
91
189
|
}
|
|
92
190
|
|
|
93
|
-
function distinctTokens(query: string): string[] {
|
|
191
|
+
export function distinctTokens(query: string): string[] {
|
|
94
192
|
return [
|
|
95
193
|
...new Set(
|
|
96
194
|
query
|
|
@@ -101,7 +199,7 @@ function distinctTokens(query: string): string[] {
|
|
|
101
199
|
]
|
|
102
200
|
}
|
|
103
201
|
|
|
104
|
-
function buildMatcher(query: string, asRegex: boolean): Matcher | string {
|
|
202
|
+
export function buildMatcher(query: string, asRegex: boolean): Matcher | string {
|
|
105
203
|
if (asRegex) {
|
|
106
204
|
try {
|
|
107
205
|
const regex = new RegExp(query, 'i')
|
|
@@ -122,11 +220,11 @@ function buildMatcher(query: string, asRegex: boolean): Matcher | string {
|
|
|
122
220
|
// before topic matches when `maxResults` is exhausted. The agent reading
|
|
123
221
|
// results in this order sees long-term consolidated truth before recent
|
|
124
222
|
// ephemeral fragments, which mirrors the injection-side rendering order.
|
|
125
|
-
function searchAll(
|
|
223
|
+
export function searchAll(
|
|
126
224
|
shards: TopicShard[],
|
|
127
225
|
streamDays: UndreamedStreamDay[],
|
|
128
226
|
matcher: Matcher,
|
|
129
|
-
options: { full: boolean; maxResults: number },
|
|
227
|
+
options: { full: boolean; maxResults: number; references?: Reference[] },
|
|
130
228
|
): MemorySearchResult {
|
|
131
229
|
const matches: MemorySearchMatch[] = []
|
|
132
230
|
let truncatedAt: number | undefined
|
|
@@ -146,6 +244,12 @@ function searchAll(
|
|
|
146
244
|
if (!push(match)) return { matches, truncatedAt: truncatedAt! }
|
|
147
245
|
}
|
|
148
246
|
|
|
247
|
+
for (const reference of options.references ?? []) {
|
|
248
|
+
const match = matchReference(reference, matcher, options.full)
|
|
249
|
+
if (match === null) continue
|
|
250
|
+
if (!push(match)) return { matches, truncatedAt: truncatedAt! }
|
|
251
|
+
}
|
|
252
|
+
|
|
149
253
|
for (let i = streamDays.length - 1; i >= 0; i--) {
|
|
150
254
|
const day = streamDays[i]!
|
|
151
255
|
for (const event of day.events) {
|
|
@@ -165,19 +269,32 @@ function searchAll(
|
|
|
165
269
|
// natural enumeration order (topics first in loadAllShards order, then stream
|
|
166
270
|
// days newest-first), so the established ordering contract holds within each
|
|
167
271
|
// score band. maxResults truncation is applied last, after ranking.
|
|
168
|
-
|
|
272
|
+
//
|
|
273
|
+
// `tokenMatchMode` defaults to 'substring' (the tool-path contract: `memory_search`
|
|
274
|
+
// is a deliberate agent query). The hybrid keyword lane opts into 'ascii-boundary'
|
|
275
|
+
// because its query is a whole user prompt, where unanchored substrings let short
|
|
276
|
+
// tokens ('in', 'do', 'ci') match inside unrelated words and over-score verbose
|
|
277
|
+
// shards. Both the match predicate and the score use the SAME per-token matchers,
|
|
278
|
+
// so a shard cannot rank on a hit the matcher wouldn't have counted.
|
|
279
|
+
export function searchAllRanked(
|
|
169
280
|
shards: TopicShard[],
|
|
170
281
|
streamDays: UndreamedStreamDay[],
|
|
171
282
|
tokens: string[],
|
|
172
|
-
options: {
|
|
283
|
+
options: {
|
|
284
|
+
full: boolean
|
|
285
|
+
maxResults: number
|
|
286
|
+
references?: Reference[]
|
|
287
|
+
tokenMatchMode?: 'substring' | 'ascii-boundary'
|
|
288
|
+
},
|
|
173
289
|
): MemorySearchResult {
|
|
290
|
+
const tokenMatchers = tokens.map((t) => buildTokenMatcher(t, options.tokenMatchMode ?? 'substring'))
|
|
174
291
|
const anyToken: Matcher = (haystack) => {
|
|
175
292
|
const lower = haystack.toLowerCase()
|
|
176
|
-
return
|
|
293
|
+
return tokenMatchers.some((matches) => matches(lower))
|
|
177
294
|
}
|
|
178
295
|
const scoreOf = (text: string): number => {
|
|
179
296
|
const lower = text.toLowerCase()
|
|
180
|
-
return
|
|
297
|
+
return tokenMatchers.reduce((n, matches) => (matches(lower) ? n + 1 : n), 0)
|
|
181
298
|
}
|
|
182
299
|
|
|
183
300
|
const scored: Array<{ match: MemorySearchMatch; score: number; order: number }> = []
|
|
@@ -189,6 +306,12 @@ function searchAllRanked(
|
|
|
189
306
|
scored.push({ match, score: scoreOf(shardSearchText(shard)), order: order++ })
|
|
190
307
|
}
|
|
191
308
|
|
|
309
|
+
for (const reference of options.references ?? []) {
|
|
310
|
+
const match = matchReference(reference, anyToken, options.full)
|
|
311
|
+
if (match === null) continue
|
|
312
|
+
scored.push({ match, score: scoreOf(referenceSearchText(reference)), order: order++ })
|
|
313
|
+
}
|
|
314
|
+
|
|
192
315
|
for (let i = streamDays.length - 1; i >= 0; i--) {
|
|
193
316
|
const day = streamDays[i]!
|
|
194
317
|
for (const event of day.events) {
|
|
@@ -206,6 +329,27 @@ function searchAllRanked(
|
|
|
206
329
|
return { matches: scored.map((s) => s.match) }
|
|
207
330
|
}
|
|
208
331
|
|
|
332
|
+
// A per-token predicate over an ALREADY-lowercased haystack. 'substring' is plain
|
|
333
|
+
// `includes`. 'ascii-boundary' anchors ASCII tokens between alnum boundaries
|
|
334
|
+
// (NOT `\b`, which is unreliable for CJK) so 'in'/'do' stop matching inside
|
|
335
|
+
// 'reload'/'docker'; a token containing any non-ASCII char (e.g. '홍길동') has no
|
|
336
|
+
// reliable ASCII boundary and falls back to substring.
|
|
337
|
+
function buildTokenMatcher(token: string, mode: 'substring' | 'ascii-boundary'): (lowerHaystack: string) => boolean {
|
|
338
|
+
if (mode === 'substring' || hasNonAscii(token)) {
|
|
339
|
+
return (lower) => lower.includes(token)
|
|
340
|
+
}
|
|
341
|
+
const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
|
342
|
+
const boundary = new RegExp(`(?:^|[^a-z0-9])${escaped}(?=$|[^a-z0-9])`)
|
|
343
|
+
return (lower) => boundary.test(lower)
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
export function hasNonAscii(text: string): boolean {
|
|
347
|
+
for (let i = 0; i < text.length; i++) {
|
|
348
|
+
if (text.charCodeAt(i) > 0x7f) return true
|
|
349
|
+
}
|
|
350
|
+
return false
|
|
351
|
+
}
|
|
352
|
+
|
|
209
353
|
function shardSearchText(shard: TopicShard): string {
|
|
210
354
|
return [shard.slug, shard.frontmatter.heading, ...(shard.frontmatter.tags ?? []), shard.body].join('\n')
|
|
211
355
|
}
|
|
@@ -216,6 +360,10 @@ function eventSearchText(event: StreamEvent): string {
|
|
|
216
360
|
return ''
|
|
217
361
|
}
|
|
218
362
|
|
|
363
|
+
function referenceSearchText(reference: Reference): string {
|
|
364
|
+
return [reference.slug, reference.frontmatter.title, ...reference.frontmatter.tags, reference.body].join('\n')
|
|
365
|
+
}
|
|
366
|
+
|
|
219
367
|
function matchShard(shard: TopicShard, matcher: Matcher, full: boolean): TopicMatch | null {
|
|
220
368
|
const bodyLines = splitBodyLines(shard.body)
|
|
221
369
|
const firstBodyLineIndex = bodyLines.findIndex((line) => matcher(line))
|
|
@@ -239,6 +387,30 @@ function matchShard(shard: TopicShard, matcher: Matcher, full: boolean): TopicMa
|
|
|
239
387
|
return match
|
|
240
388
|
}
|
|
241
389
|
|
|
390
|
+
function matchReference(reference: Reference, matcher: Matcher, full: boolean): ReferenceMatch | null {
|
|
391
|
+
const bodyLines = splitBodyLines(reference.body)
|
|
392
|
+
const firstBodyLineIndex = bodyLines.findIndex((line) => matcher(line))
|
|
393
|
+
const matched =
|
|
394
|
+
matcher(reference.slug) ||
|
|
395
|
+
matcher(reference.frontmatter.title) ||
|
|
396
|
+
reference.frontmatter.tags.some((tag) => matcher(tag)) ||
|
|
397
|
+
firstBodyLineIndex !== -1
|
|
398
|
+
if (!matched) return null
|
|
399
|
+
|
|
400
|
+
const match: ReferenceMatch = {
|
|
401
|
+
source: 'reference',
|
|
402
|
+
slug: reference.slug,
|
|
403
|
+
title: reference.frontmatter.title,
|
|
404
|
+
excerpt:
|
|
405
|
+
firstBodyLineIndex === -1
|
|
406
|
+
? fallbackReferenceExcerpt(reference, matcher)
|
|
407
|
+
: excerptForLine(bodyLines, firstBodyLineIndex),
|
|
408
|
+
created: reference.frontmatter.created,
|
|
409
|
+
}
|
|
410
|
+
if (full) match.fullBody = reference.body
|
|
411
|
+
return match
|
|
412
|
+
}
|
|
413
|
+
|
|
242
414
|
// Stream-event matcher. `fragment` events expose `topic` + `body` for search;
|
|
243
415
|
// `legacy_prose` exposes `text` (no id, no topic). `watermark` events carry
|
|
244
416
|
// no human content and are skipped — they only mark dreaming progress.
|
|
@@ -315,15 +487,80 @@ function fallbackExcerpt(shard: TopicShard, matcher: Matcher): string {
|
|
|
315
487
|
return matchedTag ?? shard.frontmatter.heading
|
|
316
488
|
}
|
|
317
489
|
|
|
490
|
+
function fallbackReferenceExcerpt(reference: Reference, matcher: Matcher): string {
|
|
491
|
+
if (matcher(reference.frontmatter.title)) return reference.frontmatter.title
|
|
492
|
+
if (matcher(reference.slug)) return reference.slug
|
|
493
|
+
const matchedTag = reference.frontmatter.tags.find((tag) => matcher(tag))
|
|
494
|
+
return matchedTag ?? reference.frontmatter.title
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
type ReferenceDateFilter = { since?: Date; before?: Date }
|
|
498
|
+
|
|
499
|
+
function parseReferenceDateFilter(
|
|
500
|
+
since: string | undefined,
|
|
501
|
+
before: string | undefined,
|
|
502
|
+
): ReferenceDateFilter | { error: string } {
|
|
503
|
+
const sinceDate = since === undefined ? undefined : parseDateParam('since', since)
|
|
504
|
+
if (typeof sinceDate === 'string') return { error: sinceDate }
|
|
505
|
+
const beforeDate = before === undefined ? undefined : parseDateParam('before', before)
|
|
506
|
+
if (typeof beforeDate === 'string') return { error: beforeDate }
|
|
507
|
+
return { since: sinceDate, before: beforeDate }
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
function parseDateParam(name: string, value: string): Date | string {
|
|
511
|
+
const date = new Date(value)
|
|
512
|
+
if (Number.isNaN(date.getTime())) return `invalid ${name}: expected ISO 8601 datetime string`
|
|
513
|
+
return date
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
function referenceCandidateAllowed(reference: Reference, filter: ReferenceDateFilter): boolean {
|
|
517
|
+
if (reference.frontmatter.demoted) return false
|
|
518
|
+
const created = new Date(reference.frontmatter.created)
|
|
519
|
+
if (filter.since !== undefined && created < filter.since) return false
|
|
520
|
+
if (filter.before !== undefined && created >= filter.before) return false
|
|
521
|
+
return true
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
async function bumpReturnedReferences(references: Reference[], matches: MemorySearchMatch[]): Promise<void> {
|
|
525
|
+
const returnedSlugs = new Set(matches.filter((match) => match.source === 'reference').map((match) => match.slug))
|
|
526
|
+
if (returnedSlugs.size === 0) return
|
|
527
|
+
await Promise.all(
|
|
528
|
+
references
|
|
529
|
+
.filter((reference) => returnedSlugs.has(reference.slug))
|
|
530
|
+
.map((reference) =>
|
|
531
|
+
writeFile(
|
|
532
|
+
reference.path,
|
|
533
|
+
renderReference(
|
|
534
|
+
{
|
|
535
|
+
...reference.frontmatter,
|
|
536
|
+
lastAccessed: new Date().toISOString(),
|
|
537
|
+
accessCount: reference.frontmatter.accessCount + 1,
|
|
538
|
+
},
|
|
539
|
+
reference.body,
|
|
540
|
+
),
|
|
541
|
+
'utf8',
|
|
542
|
+
),
|
|
543
|
+
),
|
|
544
|
+
)
|
|
545
|
+
}
|
|
546
|
+
|
|
318
547
|
function excerptForLine(lines: string[], matchIndex: number): string {
|
|
319
548
|
const start = Math.max(0, matchIndex - EXCERPT_CONTEXT_LINES)
|
|
320
549
|
const end = Math.min(lines.length, matchIndex + EXCERPT_CONTEXT_LINES + 1)
|
|
321
550
|
return lines.slice(start, end).join('\n')
|
|
322
551
|
}
|
|
323
552
|
|
|
553
|
+
const EMPTY_RESULT_GUIDANCE =
|
|
554
|
+
'No matching memory. This is the authoritative result — memory_search already covers topic shards, references, and undreamed stream events. Do not fall back to grep/find/bash or manually reading memory/topics, memory/references, memory/streams, or sessions; accept that no relevant memory exists and proceed.'
|
|
555
|
+
|
|
556
|
+
// The empty-set note rides in the LLM-facing `text` ONLY. `details` stays the
|
|
557
|
+
// pure struct: `keywordLane` reads `searchAll` directly (never this layer) and
|
|
558
|
+
// the structured tests assert on `details`, so both must see no `note`.
|
|
324
559
|
function resultToToolResult(result: MemorySearchResult) {
|
|
560
|
+
const isEmpty = 'matches' in result && result.matches.length === 0
|
|
561
|
+
const text = isEmpty ? JSON.stringify({ ...result, note: EMPTY_RESULT_GUIDANCE }) : JSON.stringify(result)
|
|
325
562
|
return {
|
|
326
|
-
content: [{ type: 'text' as const, text
|
|
563
|
+
content: [{ type: 'text' as const, text }],
|
|
327
564
|
details: result,
|
|
328
565
|
}
|
|
329
566
|
}
|
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
import { readFile, appendFile, readdir, stat, writeFile, rename } from 'node:fs/promises'
|
|
2
|
-
import { join } from 'node:path'
|
|
2
|
+
import { basename, join } from 'node:path'
|
|
3
3
|
|
|
4
4
|
import { getDreamedIds, loadDreamingState } from './dreaming-state'
|
|
5
5
|
import { streamsDir } from './paths'
|
|
6
|
-
import { parseEventLine, type StreamEvent } from './stream-events'
|
|
6
|
+
import { parseEventLine, type FragmentEvent, type StreamEvent } from './stream-events'
|
|
7
7
|
|
|
8
8
|
const STREAM_FILE_PATTERN = /^\d{4}-\d{2}-\d{2}\.jsonl$/
|
|
9
9
|
const STREAM_DATE_FROM_FILENAME = /^(\d{4}-\d{2}-\d{2})\.jsonl$/
|
|
10
10
|
|
|
11
|
+
export type FragmentsAppendedContext = {
|
|
12
|
+
path: string
|
|
13
|
+
date: string | null
|
|
14
|
+
}
|
|
15
|
+
|
|
11
16
|
// Per-file event cache. `(mtimeMs, ctimeMs, size)` is the invalidation key,
|
|
12
17
|
// mirroring `load-shards.ts`'s shard cache. The three writers in this module
|
|
13
18
|
// — `appendEvents` (memory-logger appends), `writeEventsAtomic` (dreaming
|
|
@@ -104,10 +109,30 @@ export function __resetStreamFileCacheForTests(): void {
|
|
|
104
109
|
streamFileCache.clear()
|
|
105
110
|
}
|
|
106
111
|
|
|
107
|
-
export async function appendEvents(
|
|
112
|
+
export async function appendEvents(
|
|
113
|
+
path: string,
|
|
114
|
+
events: readonly StreamEvent[],
|
|
115
|
+
onFragmentsAppended?: (fragments: FragmentEvent[], context: FragmentsAppendedContext) => Promise<void>,
|
|
116
|
+
onHookError?: (err: unknown) => void,
|
|
117
|
+
): Promise<void> {
|
|
108
118
|
if (events.length === 0) return
|
|
109
119
|
const joined = events.map((e) => `${JSON.stringify(e)}\n`).join('')
|
|
110
120
|
await appendFile(path, joined, 'utf-8')
|
|
121
|
+
if (onFragmentsAppended === undefined) return
|
|
122
|
+
|
|
123
|
+
const fragments = events.filter((event): event is FragmentEvent => event.type === 'fragment')
|
|
124
|
+
if (fragments.length === 0) return
|
|
125
|
+
|
|
126
|
+
const context: FragmentsAppendedContext = { path, date: streamDateFromPath(path) }
|
|
127
|
+
try {
|
|
128
|
+
await onFragmentsAppended(fragments, context)
|
|
129
|
+
} catch (err) {
|
|
130
|
+
onHookError?.(err)
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function streamDateFromPath(path: string): string | null {
|
|
135
|
+
return STREAM_DATE_FROM_FILENAME.exec(basename(path))?.[1] ?? null
|
|
111
136
|
}
|
|
112
137
|
|
|
113
138
|
export async function writeEventsAtomic(path: string, events: readonly StreamEvent[]): Promise<void> {
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import type { TopicShard } from './load-shards'
|
|
2
|
+
|
|
3
|
+
export type InjectedShardState = Map<string, string>
|
|
4
|
+
|
|
5
|
+
export type DirectShardPartition = {
|
|
6
|
+
full: TopicShard[]
|
|
7
|
+
unchanged: TopicShard[]
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
// Preserves the "nothing the agent always had vanishes on an off-topic turn"
|
|
11
|
+
// guarantee by AVAILABILITY, not literal presence: an unchanged shard is still
|
|
12
|
+
// named (heading + slug) and its body is recoverable via memory_search, while a
|
|
13
|
+
// changed shard always re-injects in full so the agent never reads a stale body.
|
|
14
|
+
// `state` is the session-scoped record the caller owns and clears on session.end.
|
|
15
|
+
export function partitionDirectShards(shards: TopicShard[], state: InjectedShardState): DirectShardPartition {
|
|
16
|
+
const full: TopicShard[] = []
|
|
17
|
+
const unchanged: TopicShard[] = []
|
|
18
|
+
for (const shard of shards) {
|
|
19
|
+
const hash = hashBody(shard.body)
|
|
20
|
+
if (state.get(shard.slug) === hash) {
|
|
21
|
+
unchanged.push(shard)
|
|
22
|
+
} else {
|
|
23
|
+
full.push(shard)
|
|
24
|
+
state.set(shard.slug, hash)
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return { full, unchanged }
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// FNV-1a over the body. A hash collision only suppresses a body the agent can
|
|
31
|
+
// still re-fetch by slug, so collision-tolerance buys a cheap one-string-per-slug
|
|
32
|
+
// state map instead of retaining full bodies per session.
|
|
33
|
+
function hashBody(body: string): string {
|
|
34
|
+
let hash = 0x811c9dc5
|
|
35
|
+
for (let i = 0; i < body.length; i++) {
|
|
36
|
+
hash ^= body.charCodeAt(i)
|
|
37
|
+
hash = Math.imul(hash, 0x01000193)
|
|
38
|
+
}
|
|
39
|
+
return (hash >>> 0).toString(16)
|
|
40
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { mkdir, writeFile, rename } from 'node:fs/promises'
|
|
2
|
+
import { join } from 'node:path'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Atomically write retrieval cache to memory/.retrieval-cache/<sessionId>.md
|
|
6
|
+
* using tmp+rename pattern for crash safety.
|
|
7
|
+
*/
|
|
8
|
+
export async function writeRetrievalCache(agentDir: string, sessionId: string, content: string): Promise<void> {
|
|
9
|
+
const cacheDir = join(agentDir, 'memory', '.retrieval-cache')
|
|
10
|
+
const cachePath = join(cacheDir, `${sessionId}.md`)
|
|
11
|
+
const tmpPath = `${cachePath}.tmp`
|
|
12
|
+
|
|
13
|
+
// Create directory if it doesn't exist
|
|
14
|
+
await mkdir(cacheDir, { recursive: true })
|
|
15
|
+
|
|
16
|
+
// Write to tmp file, then atomically rename
|
|
17
|
+
await writeFile(tmpPath, content, 'utf8')
|
|
18
|
+
await rename(tmpPath, cachePath)
|
|
19
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
|
|
3
|
+
import { loadPluginConfigsSync } from '@/config'
|
|
4
|
+
|
|
5
|
+
export const vectorConfigSchema = z
|
|
6
|
+
.object({
|
|
7
|
+
enabled: z.boolean().default(false),
|
|
8
|
+
})
|
|
9
|
+
.default({ enabled: false })
|
|
10
|
+
|
|
11
|
+
export type VectorConfig = z.infer<typeof vectorConfigSchema>
|
|
12
|
+
|
|
13
|
+
// Fails closed to `false`: a memory block we can't parse is treated as opted
|
|
14
|
+
// out. Shared by the host-side download gate and the runtime's per-turn vs
|
|
15
|
+
// system-prompt memory-injection decision, so both read the flag identically.
|
|
16
|
+
export function vectorEnabledFromMemoryConfig(memory: unknown): boolean {
|
|
17
|
+
if (typeof memory !== 'object' || memory === null) return false
|
|
18
|
+
const parsed = vectorConfigSchema.safeParse((memory as Record<string, unknown>).vector)
|
|
19
|
+
return parsed.success && parsed.data.enabled
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function agentUsesVector(cwd: string): boolean {
|
|
23
|
+
try {
|
|
24
|
+
return vectorEnabledFromMemoryConfig(loadPluginConfigsSync(cwd).memory)
|
|
25
|
+
} catch {
|
|
26
|
+
return false
|
|
27
|
+
}
|
|
28
|
+
}
|