@lov3kaizen/agentsea-structured 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +330 -0
- package/dist/index.d.ts +783 -0
- package/dist/index.js +3210 -0
- package/dist/index.js.map +1 -0
- package/package.json +77 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 lovekaizen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
# @lov3kaizen/agentsea-structured
|
|
2
|
+
|
|
3
|
+
TypeScript-native structured output framework that guarantees LLM responses match your Zod schemas. Similar to Python's Instructor/Outlines but built specifically for TypeScript.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Zod Schema Integration**: Define expected output shapes using Zod schemas
|
|
8
|
+
- **Multiple Extraction Modes**: JSON mode, tool/function calling, prompt engineering, and hybrid approaches
|
|
9
|
+
- **Provider Adapters**: Built-in support for OpenAI, Anthropic, and Google
|
|
10
|
+
- **Streaming Support**: Get partial results as they stream in
|
|
11
|
+
- **Automatic Retries**: Smart retry with fix hints when validation fails
|
|
12
|
+
- **Schema-Aware Prompting**: Automatically generates schema prompts in multiple formats
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pnpm add @lov3kaizen/agentsea-structured zod
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
```typescript
|
|
23
|
+
import { z } from 'zod';
|
|
24
|
+
import OpenAI from 'openai';
|
|
25
|
+
import {
|
|
26
|
+
createStructuredClient,
|
|
27
|
+
createOpenAIAdapter,
|
|
28
|
+
} from '@lov3kaizen/agentsea-structured';
|
|
29
|
+
|
|
30
|
+
// Define your schema
|
|
31
|
+
const UserSchema = z.object({
|
|
32
|
+
name: z.string().describe('User full name'),
|
|
33
|
+
email: z.string().email().describe('User email address'),
|
|
34
|
+
age: z.number().int().positive().describe('User age'),
|
|
35
|
+
interests: z.array(z.string()).describe('User interests'),
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
// Create provider adapter
|
|
39
|
+
const openai = new OpenAI();
|
|
40
|
+
const adapter = createOpenAIAdapter(openai);
|
|
41
|
+
|
|
42
|
+
// Create structured client
|
|
43
|
+
const client = createStructuredClient(adapter, {
|
|
44
|
+
defaultMode: 'json',
|
|
45
|
+
enableFixHints: true,
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
// Extract structured data
|
|
49
|
+
const result = await client.extract({
|
|
50
|
+
model: 'gpt-4o',
|
|
51
|
+
messages: [
|
|
52
|
+
{
|
|
53
|
+
role: 'user',
|
|
54
|
+
content:
|
|
55
|
+
'Extract user info: John Doe, john@example.com, 30 years old, likes coding and gaming',
|
|
56
|
+
},
|
|
57
|
+
],
|
|
58
|
+
response_format: UserSchema,
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
if (result.success) {
|
|
62
|
+
console.log(result.data);
|
|
63
|
+
// { name: 'John Doe', email: 'john@example.com', age: 30, interests: ['coding', 'gaming'] }
|
|
64
|
+
}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Extraction Modes
|
|
68
|
+
|
|
69
|
+
### JSON Mode (Default)
|
|
70
|
+
|
|
71
|
+
Uses native JSON mode when available (OpenAI, Google):
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
const result = await client.extract({
|
|
75
|
+
model: 'gpt-4o',
|
|
76
|
+
messages: [...],
|
|
77
|
+
response_format: schema,
|
|
78
|
+
mode: 'json',
|
|
79
|
+
});
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Tool Mode
|
|
83
|
+
|
|
84
|
+
Uses function/tool calling for extraction:
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
const result = await client.extract({
|
|
88
|
+
model: 'gpt-4o',
|
|
89
|
+
messages: [...],
|
|
90
|
+
response_format: schema,
|
|
91
|
+
mode: 'tool',
|
|
92
|
+
});
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Prompt Mode
|
|
96
|
+
|
|
97
|
+
Uses prompt engineering for providers without native JSON support:
|
|
98
|
+
|
|
99
|
+
```typescript
|
|
100
|
+
const result = await client.extract({
|
|
101
|
+
model: 'claude-3-5-sonnet-20241022',
|
|
102
|
+
messages: [...],
|
|
103
|
+
response_format: schema,
|
|
104
|
+
mode: 'prompt',
|
|
105
|
+
});
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Hybrid Mode
|
|
109
|
+
|
|
110
|
+
Automatically falls back between modes:
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
const result = await client.extract({
|
|
114
|
+
model: 'gpt-4o',
|
|
115
|
+
messages: [...],
|
|
116
|
+
response_format: schema,
|
|
117
|
+
mode: {
|
|
118
|
+
mode: 'hybrid',
|
|
119
|
+
fallbackOrder: ['json', 'tool', 'prompt'],
|
|
120
|
+
},
|
|
121
|
+
});
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Streaming
|
|
125
|
+
|
|
126
|
+
Get partial results as they stream in:
|
|
127
|
+
|
|
128
|
+
```typescript
|
|
129
|
+
const stream = await client.extractStream(
|
|
130
|
+
{
|
|
131
|
+
model: 'gpt-4o',
|
|
132
|
+
messages: [...],
|
|
133
|
+
response_format: schema,
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
yieldPartials: true,
|
|
137
|
+
minFieldsBeforeYield: 2,
|
|
138
|
+
onFieldComplete: (path, value) => {
|
|
139
|
+
console.log(`Field ${path} completed:`, value);
|
|
140
|
+
},
|
|
141
|
+
}
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
// Iterate over partial results
|
|
145
|
+
for await (const partial of stream.partials()) {
|
|
146
|
+
console.log('Partial:', partial);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Get final result
|
|
150
|
+
const final = await stream.final();
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Provider Adapters
|
|
154
|
+
|
|
155
|
+
### OpenAI
|
|
156
|
+
|
|
157
|
+
```typescript
|
|
158
|
+
import OpenAI from 'openai';
|
|
159
|
+
import { createOpenAIAdapter } from '@lov3kaizen/agentsea-structured';
|
|
160
|
+
|
|
161
|
+
const openai = new OpenAI();
|
|
162
|
+
const adapter = createOpenAIAdapter(openai);
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Anthropic
|
|
166
|
+
|
|
167
|
+
```typescript
|
|
168
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
169
|
+
import { createAnthropicAdapter } from '@lov3kaizen/agentsea-structured';
|
|
170
|
+
|
|
171
|
+
const anthropic = new Anthropic();
|
|
172
|
+
const adapter = createAnthropicAdapter(anthropic);
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Google
|
|
176
|
+
|
|
177
|
+
```typescript
|
|
178
|
+
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
179
|
+
import { createGoogleAdapter } from '@lov3kaizen/agentsea-structured';
|
|
180
|
+
|
|
181
|
+
const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY);
|
|
182
|
+
const adapter = createGoogleAdapter(genAI);
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Retry Configuration
|
|
186
|
+
|
|
187
|
+
```typescript
|
|
188
|
+
const client = createStructuredClient(adapter, {
|
|
189
|
+
defaultRetry: {
|
|
190
|
+
maxAttempts: 3,
|
|
191
|
+
retryOn: ['validation_error', 'parse_error'],
|
|
192
|
+
backoffMultiplier: 1.5,
|
|
193
|
+
initialDelay: 1000,
|
|
194
|
+
maxDelay: 10000,
|
|
195
|
+
},
|
|
196
|
+
enableFixHints: true, // Add validation hints to retry prompts
|
|
197
|
+
});
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Schema Utilities
|
|
201
|
+
|
|
202
|
+
### Schema to Prompt
|
|
203
|
+
|
|
204
|
+
Convert Zod schemas to prompt-friendly representations:
|
|
205
|
+
|
|
206
|
+
```typescript
|
|
207
|
+
import { schemaToPrompt } from '@lov3kaizen/agentsea-structured';
|
|
208
|
+
|
|
209
|
+
const prompt = schemaToPrompt(UserSchema, {
|
|
210
|
+
format: 'natural', // 'json-schema' | 'typescript' | 'natural' | 'examples'
|
|
211
|
+
includeConstraints: true,
|
|
212
|
+
includeExamples: true,
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
console.log(prompt.text);
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
### Schema Validation
|
|
219
|
+
|
|
220
|
+
```typescript
|
|
221
|
+
import {
|
|
222
|
+
validateSchema,
|
|
223
|
+
getValidationHints,
|
|
224
|
+
} from '@lov3kaizen/agentsea-structured';
|
|
225
|
+
|
|
226
|
+
const result = validateSchema(UserSchema, data);
|
|
227
|
+
if (!result.success) {
|
|
228
|
+
const hints = getValidationHints(UserSchema, data);
|
|
229
|
+
console.log('Hints for fixing:', hints);
|
|
230
|
+
}
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## AgentSea Integration
|
|
234
|
+
|
|
235
|
+
Use with the AgentSea framework:
|
|
236
|
+
|
|
237
|
+
```typescript
|
|
238
|
+
import {
|
|
239
|
+
createStructuredProvider,
|
|
240
|
+
Extractors,
|
|
241
|
+
} from '@lov3kaizen/agentsea-structured';
|
|
242
|
+
|
|
243
|
+
const provider = createStructuredProvider({
|
|
244
|
+
defaultModel: 'gpt-4o',
|
|
245
|
+
enableFixHints: true,
|
|
246
|
+
});
|
|
247
|
+
|
|
248
|
+
provider.registerProvider('openai', {
|
|
249
|
+
provider: 'openai',
|
|
250
|
+
client: new OpenAI(),
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
// Use built-in extractors
|
|
254
|
+
const sentimentSchema = Extractors.sentiment();
|
|
255
|
+
const result = await provider.extract(
|
|
256
|
+
sentimentSchema,
|
|
257
|
+
'I love this product! The quality is amazing.',
|
|
258
|
+
);
|
|
259
|
+
|
|
260
|
+
// Create reusable typed extractors
|
|
261
|
+
const userExtractor = provider.createExtractor(UserSchema, {
|
|
262
|
+
model: 'gpt-4o',
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
const user = await userExtractor.extract('John Doe, john@example.com, 30');
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### Built-in Extractors
|
|
269
|
+
|
|
270
|
+
```typescript
|
|
271
|
+
import { Extractors } from '@lov3kaizen/agentsea-structured';
|
|
272
|
+
|
|
273
|
+
// List extraction
|
|
274
|
+
const listSchema = Extractors.list(z.string(), { minItems: 1, maxItems: 10 });
|
|
275
|
+
|
|
276
|
+
// Entity extraction
|
|
277
|
+
const entitySchema = Extractors.entity({
|
|
278
|
+
name: z.string(),
|
|
279
|
+
type: z.string(),
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
// Classification
|
|
283
|
+
const classifySchema = Extractors.classification(['spam', 'not_spam'], {
|
|
284
|
+
confidence: true,
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
// Sentiment analysis
|
|
288
|
+
const sentimentSchema = Extractors.sentiment();
|
|
289
|
+
|
|
290
|
+
// Summary extraction
|
|
291
|
+
const summarySchema = Extractors.summary({ maxLength: 500 });
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
## API Reference
|
|
295
|
+
|
|
296
|
+
### StructuredClient
|
|
297
|
+
|
|
298
|
+
Main client for structured extraction.
|
|
299
|
+
|
|
300
|
+
```typescript
|
|
301
|
+
const client = createStructuredClient(adapter, config);
|
|
302
|
+
|
|
303
|
+
// Extract structured data
|
|
304
|
+
const result = await client.extract(options);
|
|
305
|
+
|
|
306
|
+
// Extract with streaming
|
|
307
|
+
const stream = await client.extractStream(options, streamingOptions);
|
|
308
|
+
|
|
309
|
+
// Check mode support
|
|
310
|
+
const supports = client.supportsMode('json', 'gpt-4o');
|
|
311
|
+
|
|
312
|
+
// Get provider capabilities
|
|
313
|
+
const caps = client.getProviderCapabilities('gpt-4o');
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### Events
|
|
317
|
+
|
|
318
|
+
```typescript
|
|
319
|
+
client.on('extraction:start', ({ requestId, mode }) => { ... });
|
|
320
|
+
client.on('extraction:attempt', ({ requestId, attempt, mode }) => { ... });
|
|
321
|
+
client.on('extraction:success', ({ requestId, data, attempts }) => { ... });
|
|
322
|
+
client.on('extraction:error', ({ requestId, error, attempt }) => { ... });
|
|
323
|
+
client.on('extraction:retry', ({ requestId, attempt, reason, hints }) => { ... });
|
|
324
|
+
client.on('validation:failed', ({ requestId, errors }) => { ... });
|
|
325
|
+
client.on('mode:switch', ({ requestId, from, to }) => { ... });
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
## License
|
|
329
|
+
|
|
330
|
+
MIT
|