@framers/agentos 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/memory/retrieval/typed-network/TypedNetworkObserver.d.ts +36 -7
- package/dist/memory/retrieval/typed-network/TypedNetworkObserver.d.ts.map +1 -1
- package/dist/memory/retrieval/typed-network/TypedNetworkObserver.js +108 -22
- package/dist/memory/retrieval/typed-network/TypedNetworkObserver.js.map +1 -1
- package/dist/memory/retrieval/typed-network/prompts/extraction-schema.d.ts +55 -23
- package/dist/memory/retrieval/typed-network/prompts/extraction-schema.d.ts.map +1 -1
- package/dist/memory/retrieval/typed-network/prompts/extraction-schema.js +49 -10
- package/dist/memory/retrieval/typed-network/prompts/extraction-schema.js.map +1 -1
- package/package.json +1 -1
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* @file TypedNetworkObserver.ts
|
|
3
3
|
* @description LLM-driven extractor that turns a conversation block
|
|
4
4
|
* into 0+ {@link TypedFact}s. Wraps the 6-step extraction prompt and
|
|
5
|
-
* the zod
|
|
5
|
+
* the tolerant zod parsing of the LLM's structured-output response.
|
|
6
6
|
*
|
|
7
7
|
* Production wiring: a typical caller constructs the observer once per
|
|
8
8
|
* pipeline (re-using the same `gpt-5-mini` adapter), then invokes
|
|
@@ -10,6 +10,30 @@
|
|
|
10
10
|
* are then upserted into a {@link TypedNetworkStore} and embedded by
|
|
11
11
|
* the host's {@link IEmbeddingManager}.
|
|
12
12
|
*
|
|
13
|
+
* **Tolerance design (Phase 4c smoke fix):** the parser accepts the
|
|
14
|
+
* common deviations gpt-5-mini emits at scale, rather than throwing on
|
|
15
|
+
* any deviation:
|
|
16
|
+
*
|
|
17
|
+
* 1. **Code-fence stripping**: triple-backtick fences (with or without
|
|
18
|
+
* language tag) are removed before JSON parse.
|
|
19
|
+
* 2. **Top-level array auto-wrap**: a bare `[fact, fact]` is wrapped
|
|
20
|
+
* as `{facts: [...]}` before schema validation.
|
|
21
|
+
* 3. **Per-fact tolerance**: facts are validated one at a time via
|
|
22
|
+
* `TypedExtractionFactSchema.safeParse`. Bad facts are dropped
|
|
23
|
+
* silently; good facts in the same response are kept.
|
|
24
|
+
* 4. **Schema-level defaults**: `temporal`, `participants`,
|
|
25
|
+
* `reasoning_markers`, and `entities` default to sensible empties
|
|
26
|
+
* when the LLM omits them. `bank` is uppercase-coerced. See
|
|
27
|
+
* {@link TypedExtractionFactSchema} for the full tolerance surface.
|
|
28
|
+
* 5. **Retry-on-outer-failure**: if the catastrophic outer parse
|
|
29
|
+
* fails (invalid JSON, primitive value, neither array nor object
|
|
30
|
+
* with `facts`), the extractor retries once with the validation
|
|
31
|
+
* error appended to the user prompt. Implements spec section 6's
|
|
32
|
+
* retry path that was specified but never shipped.
|
|
33
|
+
*
|
|
34
|
+
* The extract method NEVER throws on extractable input; persistent
|
|
35
|
+
* outer failure returns `[]` so the caller can continue ingest.
|
|
36
|
+
*
|
|
13
37
|
* @module @framers/agentos/memory/retrieval/typed-network/TypedNetworkObserver
|
|
14
38
|
*/
|
|
15
39
|
import type { TypedFact } from './types.js';
|
|
@@ -49,18 +73,23 @@ export declare class TypedNetworkObserver {
|
|
|
49
73
|
private readonly temperature;
|
|
50
74
|
constructor(options: TypedNetworkObserverOptions);
|
|
51
75
|
/**
|
|
52
|
-
* Extract typed facts from a conversation block.
|
|
53
|
-
*
|
|
54
|
-
* IDs of the form
|
|
55
|
-
*
|
|
76
|
+
* Extract typed facts from a conversation block.
|
|
77
|
+
*
|
|
78
|
+
* Resulting facts have stable IDs of the form
|
|
79
|
+
* `<sessionId>-fact-<index>`, where `<index>` is the sequential
|
|
80
|
+
* POST-DROP position so dropped facts produce contiguous IDs in the
|
|
81
|
+
* returned array.
|
|
82
|
+
*
|
|
83
|
+
* **Never throws on extractable input.** Catastrophic outer parse
|
|
84
|
+
* failures (invalid JSON, primitive value, missing facts key) get
|
|
85
|
+
* one retry; persistent failure returns `[]`. Bad individual facts
|
|
86
|
+
* are dropped silently via per-fact `safeParse`.
|
|
56
87
|
*
|
|
57
88
|
* @param sessionText - Full conversation text. Will be wrapped in
|
|
58
89
|
* the user prompt's delimiters automatically.
|
|
59
90
|
* @param sessionId - Stable identifier used to namespace the
|
|
60
91
|
* resulting fact IDs.
|
|
61
92
|
* @returns Array of {@link TypedFact}s, possibly empty.
|
|
62
|
-
* @throws ZodError if the LLM output fails schema validation.
|
|
63
|
-
* @throws SyntaxError if the LLM output is not valid JSON.
|
|
64
93
|
*/
|
|
65
94
|
extract(sessionText: string, sessionId: string): Promise<TypedFact[]>;
|
|
66
95
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TypedNetworkObserver.d.ts","sourceRoot":"","sources":["../../../../src/memory/retrieval/typed-network/TypedNetworkObserver.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"TypedNetworkObserver.d.ts","sourceRoot":"","sources":["../../../../src/memory/retrieval/typed-network/TypedNetworkObserver.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAOH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAE5C;;;;;;GAMG;AACH,MAAM,WAAW,mBAAmB;IAClC,MAAM,CAAC,IAAI,EAAE;QACX,MAAM,EAAE,MAAM,CAAC;QACf,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,2BAA2B;IAC1C,2DAA2D;IAC3D,GAAG,EAAE,mBAAmB,CAAC;IACzB,4GAA4G;IAC5G,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,2DAA2D;IAC3D,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AASD;;;GAGG;AACH,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAsB;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;gBAEzB,OAAO,EAAE,2BAA2B;IAMhD;;;;;;;;;;;;;;;;;;OAkBG;IACG,OAAO,CAAC,WAAW,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;CA6D5E"}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* @file TypedNetworkObserver.ts
|
|
3
3
|
* @description LLM-driven extractor that turns a conversation block
|
|
4
4
|
* into 0+ {@link TypedFact}s. Wraps the 6-step extraction prompt and
|
|
5
|
-
* the zod
|
|
5
|
+
* the tolerant zod parsing of the LLM's structured-output response.
|
|
6
6
|
*
|
|
7
7
|
* Production wiring: a typical caller constructs the observer once per
|
|
8
8
|
* pipeline (re-using the same `gpt-5-mini` adapter), then invokes
|
|
@@ -10,10 +10,40 @@
|
|
|
10
10
|
* are then upserted into a {@link TypedNetworkStore} and embedded by
|
|
11
11
|
* the host's {@link IEmbeddingManager}.
|
|
12
12
|
*
|
|
13
|
+
* **Tolerance design (Phase 4c smoke fix):** the parser accepts the
|
|
14
|
+
* common deviations gpt-5-mini emits at scale, rather than throwing on
|
|
15
|
+
* any deviation:
|
|
16
|
+
*
|
|
17
|
+
* 1. **Code-fence stripping**: triple-backtick fences (with or without
|
|
18
|
+
* language tag) are removed before JSON parse.
|
|
19
|
+
* 2. **Top-level array auto-wrap**: a bare `[fact, fact]` is wrapped
|
|
20
|
+
* as `{facts: [...]}` before schema validation.
|
|
21
|
+
* 3. **Per-fact tolerance**: facts are validated one at a time via
|
|
22
|
+
* `TypedExtractionFactSchema.safeParse`. Bad facts are dropped
|
|
23
|
+
* silently; good facts in the same response are kept.
|
|
24
|
+
* 4. **Schema-level defaults**: `temporal`, `participants`,
|
|
25
|
+
* `reasoning_markers`, and `entities` default to sensible empties
|
|
26
|
+
* when the LLM omits them. `bank` is uppercase-coerced. See
|
|
27
|
+
* {@link TypedExtractionFactSchema} for the full tolerance surface.
|
|
28
|
+
* 5. **Retry-on-outer-failure**: if the catastrophic outer parse
|
|
29
|
+
* fails (invalid JSON, primitive value, neither array nor object
|
|
30
|
+
* with `facts`), the extractor retries once with the validation
|
|
31
|
+
* error appended to the user prompt. Implements spec section 6's
|
|
32
|
+
* retry path that was specified but never shipped.
|
|
33
|
+
*
|
|
34
|
+
* The extract method NEVER throws on extractable input; persistent
|
|
35
|
+
* outer failure returns `[]` so the caller can continue ingest.
|
|
36
|
+
*
|
|
13
37
|
* @module @framers/agentos/memory/retrieval/typed-network/TypedNetworkObserver
|
|
14
38
|
*/
|
|
15
|
-
import {
|
|
39
|
+
import { TypedExtractionFactSchema } from './prompts/extraction-schema.js';
|
|
16
40
|
import { TYPED_EXTRACTION_SYSTEM_PROMPT, buildExtractionUserPrompt, } from './prompts/extraction-prompt.js';
|
|
41
|
+
/**
|
|
42
|
+
* Maximum total LLM invocations per `extract` call. The first attempt
|
|
43
|
+
* uses the base prompt; the second appends the validation error from
|
|
44
|
+
* the first attempt for the model to self-correct against.
|
|
45
|
+
*/
|
|
46
|
+
const MAX_ATTEMPTS = 2;
|
|
17
47
|
/**
|
|
18
48
|
* The 6-step extractor. Stateless aside from its constructor options;
|
|
19
49
|
* safe to share across concurrent extractions.
|
|
@@ -25,33 +55,88 @@ export class TypedNetworkObserver {
|
|
|
25
55
|
this.temperature = options.temperature ?? 0;
|
|
26
56
|
}
|
|
27
57
|
/**
|
|
28
|
-
* Extract typed facts from a conversation block.
|
|
29
|
-
*
|
|
30
|
-
* IDs of the form
|
|
31
|
-
*
|
|
58
|
+
* Extract typed facts from a conversation block.
|
|
59
|
+
*
|
|
60
|
+
* Resulting facts have stable IDs of the form
|
|
61
|
+
* `<sessionId>-fact-<index>`, where `<index>` is the sequential
|
|
62
|
+
* POST-DROP position so dropped facts produce contiguous IDs in the
|
|
63
|
+
* returned array.
|
|
64
|
+
*
|
|
65
|
+
* **Never throws on extractable input.** Catastrophic outer parse
|
|
66
|
+
* failures (invalid JSON, primitive value, missing facts key) get
|
|
67
|
+
* one retry; persistent failure returns `[]`. Bad individual facts
|
|
68
|
+
* are dropped silently via per-fact `safeParse`.
|
|
32
69
|
*
|
|
33
70
|
* @param sessionText - Full conversation text. Will be wrapped in
|
|
34
71
|
* the user prompt's delimiters automatically.
|
|
35
72
|
* @param sessionId - Stable identifier used to namespace the
|
|
36
73
|
* resulting fact IDs.
|
|
37
74
|
* @returns Array of {@link TypedFact}s, possibly empty.
|
|
38
|
-
* @throws ZodError if the LLM output fails schema validation.
|
|
39
|
-
* @throws SyntaxError if the LLM output is not valid JSON.
|
|
40
75
|
*/
|
|
41
76
|
async extract(sessionText, sessionId) {
|
|
42
|
-
const
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
77
|
+
const baseUserPrompt = buildExtractionUserPrompt(sessionText);
|
|
78
|
+
let lastValidationError = null;
|
|
79
|
+
for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt += 1) {
|
|
80
|
+
// First attempt uses the bare prompt; retry appends the
|
|
81
|
+
// validation error so the model can self-correct.
|
|
82
|
+
const userPrompt = lastValidationError === null
|
|
83
|
+
? baseUserPrompt
|
|
84
|
+
: `${baseUserPrompt}\n\nThe previous response failed validation: ${lastValidationError}\nReturn JSON matching the schema strictly. Do not add commentary.`;
|
|
85
|
+
const raw = await this.llm.invoke({
|
|
86
|
+
system: TYPED_EXTRACTION_SYSTEM_PROMPT,
|
|
87
|
+
user: userPrompt,
|
|
88
|
+
maxTokens: this.maxTokens,
|
|
89
|
+
temperature: this.temperature,
|
|
90
|
+
});
|
|
91
|
+
const stripped = stripCodeFence(raw);
|
|
92
|
+
// Parse JSON. SyntaxError captures bad-JSON outer failures into
|
|
93
|
+
// the retry path.
|
|
94
|
+
let json;
|
|
95
|
+
try {
|
|
96
|
+
json = JSON.parse(stripped);
|
|
97
|
+
}
|
|
98
|
+
catch (err) {
|
|
99
|
+
lastValidationError = err instanceof Error ? err.message : String(err);
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
// Auto-wrap top-level array. gpt-5-mini frequently emits a bare
|
|
103
|
+
// facts array instead of `{facts: [...]}`; this recovers the
|
|
104
|
+
// most common deviation.
|
|
105
|
+
const container = Array.isArray(json) ? { facts: json } : json;
|
|
106
|
+
// Outer-shape validation. We accept any object with a `facts`
|
|
107
|
+
// array; per-fact validation runs in `extractFactsFromContainer`.
|
|
108
|
+
if (typeof container !== 'object' ||
|
|
109
|
+
container === null ||
|
|
110
|
+
!('facts' in container) ||
|
|
111
|
+
!Array.isArray(container.facts)) {
|
|
112
|
+
lastValidationError =
|
|
113
|
+
'expected JSON object with a "facts" array; got unexpected outer shape';
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
return extractFactsFromContainer(container.facts, sessionId);
|
|
117
|
+
}
|
|
118
|
+
// Both attempts failed at the outer layer; return empty rather
|
|
119
|
+
// than throwing so the caller can continue ingest. The caller is
|
|
120
|
+
// responsible for downstream "no typed facts in this session"
|
|
121
|
+
// semantics.
|
|
122
|
+
return [];
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Run per-fact tolerance over a candidate array. Returns only the
|
|
127
|
+
* facts that pass {@link TypedExtractionFactSchema} validation;
|
|
128
|
+
* silently drops the rest. IDs are sequential post-drop indices to
|
|
129
|
+
* keep the output array contiguously addressable.
|
|
130
|
+
*/
|
|
131
|
+
function extractFactsFromContainer(candidates, sessionId) {
|
|
132
|
+
const facts = [];
|
|
133
|
+
for (const candidate of candidates) {
|
|
134
|
+
const result = TypedExtractionFactSchema.safeParse(candidate);
|
|
135
|
+
if (!result.success)
|
|
136
|
+
continue;
|
|
137
|
+
const f = result.data;
|
|
138
|
+
facts.push({
|
|
139
|
+
id: `${sessionId}-fact-${facts.length}`,
|
|
55
140
|
bank: f.bank,
|
|
56
141
|
text: f.text,
|
|
57
142
|
embedding: [],
|
|
@@ -60,8 +145,9 @@ export class TypedNetworkObserver {
|
|
|
60
145
|
reasoningMarkers: f.reasoning_markers,
|
|
61
146
|
entities: f.entities,
|
|
62
147
|
confidence: f.confidence,
|
|
63
|
-
})
|
|
148
|
+
});
|
|
64
149
|
}
|
|
150
|
+
return facts;
|
|
65
151
|
}
|
|
66
152
|
/**
|
|
67
153
|
* Strip leading/trailing markdown code fences. Tolerates triple-backtick
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TypedNetworkObserver.js","sourceRoot":"","sources":["../../../../src/memory/retrieval/typed-network/TypedNetworkObserver.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"TypedNetworkObserver.js","sourceRoot":"","sources":["../../../../src/memory/retrieval/typed-network/TypedNetworkObserver.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,yBAAyB,EAAE,MAAM,gCAAgC,CAAC;AAC3E,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,GAC1B,MAAM,gCAAgC,CAAC;AA+BxC;;;;GAIG;AACH,MAAM,YAAY,GAAG,CAAC,CAAC;AAEvB;;;GAGG;AACH,MAAM,OAAO,oBAAoB;IAK/B,YAAY,OAAoC;QAC9C,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;QACvB,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC;QAC3C,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IAC9C,CAAC;IAED;;;;;;;;;;;;;;;;;;OAkBG;IACH,KAAK,CAAC,OAAO,CAAC,WAAmB,EAAE,SAAiB;QAClD,MAAM,cAAc,GAAG,yBAAyB,CAAC,WAAW,CAAC,CAAC;QAC9D,IAAI,mBAAmB,GAAkB,IAAI,CAAC;QAE9C,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,YAAY,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;YAC3D,wDAAwD;YACxD,kDAAkD;YAClD,MAAM,UAAU,GACd,mBAAmB,KAAK,IAAI;gBAC1B,CAAC,CAAC,cAAc;gBAChB,CAAC,CAAC,GAAG,cAAc,gDAAgD,mBAAmB,oEAAoE,CAAC;YAE/J,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC;gBAChC,MAAM,EAAE,8BAA8B;gBACtC,IAAI,EAAE,UAAU;gBAChB,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,WAAW,EAAE,IAAI,CAAC,WAAW;aAC9B,CAAC,CAAC;YAEH,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;YAErC,gEAAgE;YAChE,kBAAkB;YAClB,IAAI,IAAa,CAAC;YAClB,IAAI,CAAC;gBACH,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YAC9B,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,mBAAmB,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACvE,SAAS;YACX,CAAC;YAED,gEAAgE;YAChE,6DAA6D;YAC7D,yBAAyB;YACzB,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;YAE/D,8DAA8D;YAC9D,kEAAkE;YAClE,IACE,OAAO,SAAS,KAAK,QAAQ;gBAC7B,SAAS,KAAK,IAAI;gBAClB,CAAC,CAAC,OAAO,IAAI,SAAS,CAAC;gBACvB,CAAC,KAAK,CAAC,OAAO,CAAE,SAAgC,CAAC,KAAK,CAAC,EACvD,CAAC;gBACD,mBAAmB;oBACjB,uEAAuE,CAAC;gBAC1E,SAAS;YACX,CAAC;YAED,OAAO,yBAAyB,CAC7B,SAAkC,CAAC,KAAK,EACzC,SAAS,CACV,CAAC;QACJ,CAAC;QAED,+DAA+D;QAC/D,iEAAiE;QACjE,8DAA8D;QAC9D,aAAa;QACb,OAAO,EAAE,CAAC;IACZ,CAAC;CACF;AAED;;;;;GAKG;AACH,SAAS,yBAAyB,CAChC,UAAqB,EACrB,SAAiB;IAEjB,MAAM,KAAK,GAAgB,EAAE,CAAC;IAC9B,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,MAAM,GAAG,yBAAyB,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QAC9D,IAAI,CAAC,MAAM,CAAC,OAAO;YAAE,SAAS;QAC9B,MAAM,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC;YACT,EAAE,EAAE,GAAG,SAAS,SAAS,KAAK,CAAC,MAAM,EAAE;YACvC,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,SAAS,EAAE,EAAE;YACb,QAAQ,EAAE,CAAC,CAAC,QAAQ;YACpB,YAAY,EAAE,CAAC,CAAC,YAAY;YAC5B,gBAAgB,EAAE,CAAC,CAAC,iBAAiB;YACrC,QAAQ,EAAE,CAAC,CAAC,QAAQ;YACpB,UAAU,EAAE,CAAC,CAAC,UAAU;SACzB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;GAKG;AACH,SAAS,cAAc,CAAC,CAAS;IAC/B,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACzB,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IAC/C,sFAAsF;IACtF,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,EAAE,CAAC,CAAC;IAClE,OAAO,WAAW,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;AAC/C,CAAC"}
|
|
@@ -8,59 +8,91 @@
|
|
|
8
8
|
* schema's snake_case shape to the camelCase TypedFact at construction
|
|
9
9
|
* time.
|
|
10
10
|
*
|
|
11
|
+
* **Tolerance design (Phase 4c smoke fix):** the schema accepts the
|
|
12
|
+
* common deviations gpt-5-mini emits at scale, rather than throwing on
|
|
13
|
+
* any deviation:
|
|
14
|
+
*
|
|
15
|
+
* - `bank` is preprocessed to uppercase before enum validation. The
|
|
16
|
+
* prompt asks for uppercase; if the model emits lowercase, the
|
|
17
|
+
* coercion recovers the fact instead of dropping it.
|
|
18
|
+
* - `temporal.mention` is optional and defaults to empty string. The
|
|
19
|
+
* model sometimes omits it when it cannot infer a mention timestamp.
|
|
20
|
+
* Downstream {@link rankByTemporalOverlap} already handles empty
|
|
21
|
+
* mentions gracefully (falls back to interval endpoints).
|
|
22
|
+
* - `temporal` itself defaults to `{mention: ''}`. The model sometimes
|
|
23
|
+
* omits the temporal block entirely on non-temporal facts.
|
|
24
|
+
* - `participants`, `reasoning_markers`, `entities` default to `[]`.
|
|
25
|
+
* The model frequently emits the fact without these keys when no
|
|
26
|
+
* participants/entities/markers apply.
|
|
27
|
+
*
|
|
28
|
+
* Per-fact failures (text below minimum length, bank not in W/E/O/S
|
|
29
|
+
* after uppercase coercion, confidence outside [0, 1]) still cause the
|
|
30
|
+
* INDIVIDUAL fact to drop. The {@link TypedNetworkObserver} validates
|
|
31
|
+
* facts one by one (`safeParse` per fact) and keeps the valid ones.
|
|
32
|
+
*
|
|
11
33
|
* @module @framers/agentos/memory/retrieval/typed-network/prompts/extraction-schema
|
|
12
34
|
*/
|
|
13
35
|
import { z } from 'zod';
|
|
14
36
|
/**
|
|
15
37
|
* Schema for one extracted fact, matching the LLM's expected output.
|
|
16
|
-
*
|
|
17
|
-
*
|
|
38
|
+
*
|
|
39
|
+
* Defaults applied when the LLM omits fields:
|
|
40
|
+
* - `temporal.mention`: `''` (downstream tolerates empty mention)
|
|
41
|
+
* - `participants`: `[]`
|
|
42
|
+
* - `reasoning_markers`: `[]`
|
|
43
|
+
* - `entities`: `[]`
|
|
44
|
+
* - `confidence`: `1.0`
|
|
45
|
+
*
|
|
46
|
+
* `bank` is uppercase-coerced before enum validation so a lowercase
|
|
47
|
+
* model output (e.g. `'world'`) passes as `'WORLD'`.
|
|
18
48
|
*/
|
|
19
49
|
export declare const TypedExtractionFactSchema: z.ZodObject<{
|
|
20
50
|
text: z.ZodString;
|
|
21
|
-
bank: z.ZodEnum<{
|
|
51
|
+
bank: z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodEnum<{
|
|
22
52
|
WORLD: "WORLD";
|
|
23
53
|
EXPERIENCE: "EXPERIENCE";
|
|
24
54
|
OPINION: "OPINION";
|
|
25
55
|
OBSERVATION: "OBSERVATION";
|
|
26
|
-
}
|
|
27
|
-
temporal: z.ZodObject<{
|
|
56
|
+
}>>;
|
|
57
|
+
temporal: z.ZodDefault<z.ZodObject<{
|
|
28
58
|
start: z.ZodOptional<z.ZodString>;
|
|
29
59
|
end: z.ZodOptional<z.ZodString>;
|
|
30
|
-
mention: z.ZodString
|
|
31
|
-
}, z.core.$strip>;
|
|
32
|
-
participants: z.ZodArray<z.ZodObject<{
|
|
33
|
-
name: z.ZodString;
|
|
34
|
-
role: z.ZodString;
|
|
60
|
+
mention: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
35
61
|
}, z.core.$strip>>;
|
|
36
|
-
|
|
37
|
-
|
|
62
|
+
participants: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
63
|
+
name: z.ZodString;
|
|
64
|
+
role: z.ZodDefault<z.ZodString>;
|
|
65
|
+
}, z.core.$strip>>>;
|
|
66
|
+
reasoning_markers: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
67
|
+
entities: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
38
68
|
confidence: z.ZodDefault<z.ZodNumber>;
|
|
39
69
|
}, z.core.$strip>;
|
|
40
70
|
/**
|
|
41
71
|
* Top-level schema. Wraps the fact array under a `facts` key so the
|
|
42
|
-
* LLM has a stable structural anchor to emit against.
|
|
72
|
+
* LLM has a stable structural anchor to emit against. The
|
|
73
|
+
* {@link TypedNetworkObserver} additionally tolerates a top-level
|
|
74
|
+
* array (no `facts` key) by auto-wrapping it before this schema runs.
|
|
43
75
|
*/
|
|
44
76
|
export declare const TypedExtractionSchema: z.ZodObject<{
|
|
45
77
|
facts: z.ZodArray<z.ZodObject<{
|
|
46
78
|
text: z.ZodString;
|
|
47
|
-
bank: z.ZodEnum<{
|
|
79
|
+
bank: z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodEnum<{
|
|
48
80
|
WORLD: "WORLD";
|
|
49
81
|
EXPERIENCE: "EXPERIENCE";
|
|
50
82
|
OPINION: "OPINION";
|
|
51
83
|
OBSERVATION: "OBSERVATION";
|
|
52
|
-
}
|
|
53
|
-
temporal: z.ZodObject<{
|
|
84
|
+
}>>;
|
|
85
|
+
temporal: z.ZodDefault<z.ZodObject<{
|
|
54
86
|
start: z.ZodOptional<z.ZodString>;
|
|
55
87
|
end: z.ZodOptional<z.ZodString>;
|
|
56
|
-
mention: z.ZodString
|
|
57
|
-
}, z.core.$strip>;
|
|
58
|
-
participants: z.ZodArray<z.ZodObject<{
|
|
59
|
-
name: z.ZodString;
|
|
60
|
-
role: z.ZodString;
|
|
88
|
+
mention: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
61
89
|
}, z.core.$strip>>;
|
|
62
|
-
|
|
63
|
-
|
|
90
|
+
participants: z.ZodDefault<z.ZodArray<z.ZodObject<{
|
|
91
|
+
name: z.ZodString;
|
|
92
|
+
role: z.ZodDefault<z.ZodString>;
|
|
93
|
+
}, z.core.$strip>>>;
|
|
94
|
+
reasoning_markers: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
95
|
+
entities: z.ZodDefault<z.ZodArray<z.ZodString>>;
|
|
64
96
|
confidence: z.ZodDefault<z.ZodNumber>;
|
|
65
97
|
}, z.core.$strip>>;
|
|
66
98
|
}, z.core.$strip>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extraction-schema.d.ts","sourceRoot":"","sources":["../../../../../src/memory/retrieval/typed-network/prompts/extraction-schema.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"extraction-schema.d.ts","sourceRoot":"","sources":["../../../../../src/memory/retrieval/typed-network/prompts/extraction-schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,yBAAyB;;;;;;;;;;;;;;;;;;;;iBAwBpC,CAAC;AAEH;;;;;GAKG;AACH,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;iBAEhC,CAAC;AAEH,mEAAmE;AACnE,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,qBAAqB,CAAC,CAAC;AAC1E,qEAAqE;AACrE,MAAM,MAAM,mBAAmB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,yBAAyB,CAAC,CAAC"}
|
|
@@ -8,30 +8,69 @@
|
|
|
8
8
|
* schema's snake_case shape to the camelCase TypedFact at construction
|
|
9
9
|
* time.
|
|
10
10
|
*
|
|
11
|
+
* **Tolerance design (Phase 4c smoke fix):** the schema accepts the
|
|
12
|
+
* common deviations gpt-5-mini emits at scale, rather than throwing on
|
|
13
|
+
* any deviation:
|
|
14
|
+
*
|
|
15
|
+
* - `bank` is preprocessed to uppercase before enum validation. The
|
|
16
|
+
* prompt asks for uppercase; if the model emits lowercase, the
|
|
17
|
+
* coercion recovers the fact instead of dropping it.
|
|
18
|
+
* - `temporal.mention` is optional and defaults to empty string. The
|
|
19
|
+
* model sometimes omits it when it cannot infer a mention timestamp.
|
|
20
|
+
* Downstream {@link rankByTemporalOverlap} already handles empty
|
|
21
|
+
* mentions gracefully (falls back to interval endpoints).
|
|
22
|
+
* - `temporal` itself defaults to `{mention: ''}`. The model sometimes
|
|
23
|
+
* omits the temporal block entirely on non-temporal facts.
|
|
24
|
+
* - `participants`, `reasoning_markers`, `entities` default to `[]`.
|
|
25
|
+
* The model frequently emits the fact without these keys when no
|
|
26
|
+
* participants/entities/markers apply.
|
|
27
|
+
*
|
|
28
|
+
* Per-fact failures (text below minimum length, bank not in W/E/O/S
|
|
29
|
+
* after uppercase coercion, confidence outside [0, 1]) still cause the
|
|
30
|
+
* INDIVIDUAL fact to drop. The {@link TypedNetworkObserver} validates
|
|
31
|
+
* facts one by one (`safeParse` per fact) and keeps the valid ones.
|
|
32
|
+
*
|
|
11
33
|
* @module @framers/agentos/memory/retrieval/typed-network/prompts/extraction-schema
|
|
12
34
|
*/
|
|
13
35
|
import { z } from 'zod';
|
|
14
36
|
/**
|
|
15
37
|
* Schema for one extracted fact, matching the LLM's expected output.
|
|
16
|
-
*
|
|
17
|
-
*
|
|
38
|
+
*
|
|
39
|
+
* Defaults applied when the LLM omits fields:
|
|
40
|
+
* - `temporal.mention`: `''` (downstream tolerates empty mention)
|
|
41
|
+
* - `participants`: `[]`
|
|
42
|
+
* - `reasoning_markers`: `[]`
|
|
43
|
+
* - `entities`: `[]`
|
|
44
|
+
* - `confidence`: `1.0`
|
|
45
|
+
*
|
|
46
|
+
* `bank` is uppercase-coerced before enum validation so a lowercase
|
|
47
|
+
* model output (e.g. `'world'`) passes as `'WORLD'`.
|
|
18
48
|
*/
|
|
19
49
|
export const TypedExtractionFactSchema = z.object({
|
|
20
50
|
text: z.string().min(1),
|
|
21
|
-
bank: z.enum(['WORLD', 'EXPERIENCE', 'OPINION', 'OBSERVATION']),
|
|
22
|
-
temporal: z
|
|
51
|
+
bank: z.preprocess((v) => (typeof v === 'string' ? v.toUpperCase() : v), z.enum(['WORLD', 'EXPERIENCE', 'OPINION', 'OBSERVATION'])),
|
|
52
|
+
temporal: z
|
|
53
|
+
.object({
|
|
23
54
|
start: z.string().optional(),
|
|
24
55
|
end: z.string().optional(),
|
|
25
|
-
mention: z.string(),
|
|
26
|
-
})
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
56
|
+
mention: z.string().optional().default(''),
|
|
57
|
+
})
|
|
58
|
+
.default({ mention: '' }),
|
|
59
|
+
participants: z
|
|
60
|
+
.array(z.object({
|
|
61
|
+
name: z.string(),
|
|
62
|
+
role: z.string().default(''),
|
|
63
|
+
}))
|
|
64
|
+
.default([]),
|
|
65
|
+
reasoning_markers: z.array(z.string()).default([]),
|
|
66
|
+
entities: z.array(z.string()).default([]),
|
|
30
67
|
confidence: z.number().min(0).max(1).default(1.0),
|
|
31
68
|
});
|
|
32
69
|
/**
|
|
33
70
|
* Top-level schema. Wraps the fact array under a `facts` key so the
|
|
34
|
-
* LLM has a stable structural anchor to emit against.
|
|
71
|
+
* LLM has a stable structural anchor to emit against. The
|
|
72
|
+
* {@link TypedNetworkObserver} additionally tolerates a top-level
|
|
73
|
+
* array (no `facts` key) by auto-wrapping it before this schema runs.
|
|
35
74
|
*/
|
|
36
75
|
export const TypedExtractionSchema = z.object({
|
|
37
76
|
facts: z.array(TypedExtractionFactSchema),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extraction-schema.js","sourceRoot":"","sources":["../../../../../src/memory/retrieval/typed-network/prompts/extraction-schema.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"extraction-schema.js","sourceRoot":"","sources":["../../../../../src/memory/retrieval/typed-network/prompts/extraction-schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC;IAChD,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACvB,IAAI,EAAE,CAAC,CAAC,UAAU,CAChB,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EACpD,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,YAAY,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC,CAC1D;IACD,QAAQ,EAAE,CAAC;SACR,MAAM,CAAC;QACN,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC5B,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;QAC1B,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;KAC3C,CAAC;SACD,OAAO,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAC3B,YAAY,EAAE,CAAC;SACZ,KAAK,CACJ,CAAC,CAAC,MAAM,CAAC;QACP,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;QAChB,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;KAC7B,CAAC,CACH;SACA,OAAO,CAAC,EAAE,CAAC;IACd,iBAAiB,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;IAClD,QAAQ,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC;IACzC,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC;CAClD,CAAC,CAAC;AAEH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5C,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,yBAAyB,CAAC;CAC1C,CAAC,CAAC"}
|