@minded-ai/mindedjs 2.0.14 → 2.0.15-beta-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,15 @@ import { JsonOutputParser } from '@langchain/core/output_parsers';
5
5
  import { SystemMessage } from '@langchain/core/messages';
6
6
  import { BaseLanguageModel } from '@langchain/core/language_models/base';
7
7
 
8
+ // Type guard for checking if LLM supports structured output
9
+ interface StructuredOutputLLM extends BaseLanguageModel {
10
+ withStructuredOutput<T extends z.ZodType>(schema: T): BaseLanguageModel;
11
+ }
12
+
13
+ function supportsStructuredOutput(llm: BaseLanguageModel): llm is StructuredOutputLLM {
14
+ return 'withStructuredOutput' in llm && typeof (llm as any).withStructuredOutput === 'function';
15
+ }
16
+
8
17
  // Type definitions for classifier configuration
9
18
  export interface ClassDefinition {
10
19
  name: string;
@@ -14,26 +23,35 @@ export interface ClassDefinition {
14
23
  export interface ClassifierConfig {
15
24
  classes: ClassDefinition[];
16
25
  systemPrompt?: string;
17
- outputFormat?: 'json' | 'text';
18
- includeReason?: boolean;
19
- defaultClass?: string;
20
- defaultReason?: string;
26
+ defaultClass: string;
27
+ defaultReason: string;
21
28
  }
22
29
 
23
30
  export interface ClassificationResult {
24
31
  class: string;
25
- reason?: string;
26
- confidence?: number;
32
+ reason: string;
33
+ confidence: number;
27
34
  [key: string]: any; // Allow additional fields
28
35
  }
29
36
 
30
- // Default configuration
31
- const DEFAULT_CONFIG: Partial<ClassifierConfig> = {
32
- outputFormat: 'json',
33
- includeReason: true,
34
- defaultClass: 'unknown',
35
- defaultReason: 'Unable to determine classification',
36
- };
37
+ /**
38
+ * Create a dynamic Zod schema for classification result based on valid classes
39
+ */
40
+ function createClassificationSchema(config: ClassifierConfig): z.ZodSchema {
41
+ const validClassNames = config.classes.map((c) => c.name);
42
+
43
+ // Create a union type for valid class names
44
+ const classEnum = z.enum(validClassNames as [string, ...string[]]);
45
+
46
+ // Build the schema - always include all fields
47
+ const schemaShape: Record<string, z.ZodTypeAny> = {
48
+ class: classEnum.describe('The selected classification category'),
49
+ reason: z.string().describe('Explanation for the classification'),
50
+ confidence: z.number().min(0).max(1).describe('Confidence score between 0 and 1'),
51
+ };
52
+
53
+ return z.object(schemaShape);
54
+ }
37
55
 
38
56
  /**
39
57
  * Generic classifier utility that can be used standalone
@@ -43,58 +61,90 @@ const DEFAULT_CONFIG: Partial<ClassifierConfig> = {
43
61
  * @returns The classification result
44
62
  */
45
63
  export async function classify(content: string, config: ClassifierConfig, llm: BaseLanguageModel): Promise<ClassificationResult> {
46
- const mergedConfig = { ...DEFAULT_CONFIG, ...config };
64
+ const MAX_RETRIES = 3;
65
+ let lastError: Error | null = null;
47
66
 
48
- try {
49
- // Build the classification prompt
50
- const classesDescription = mergedConfig.classes.map((c) => `${c.name}: ${c.description}`).join('\n');
51
-
52
- const basePrompt =
53
- mergedConfig.systemPrompt || 'You are a classifier. Your task is to classify the given content into one of the following categories:';
54
-
55
- let prompt = `${basePrompt}\n\n${classesDescription}\n\n`;
56
-
57
- if (mergedConfig.outputFormat === 'json') {
58
- prompt += `You should output the result in the following JSON format: {
59
- "class": "<selected class name>",
60
- ${mergedConfig.includeReason ? '"reason": "<explanation for the classification>",' : ''}
61
- "confidence": <confidence score between 0 and 1>
62
- }.\nReturn JSON and nothing more.`;
63
- } else {
64
- prompt += 'Return only the class name.';
65
- }
67
+ // Check if LLM supports structured output upfront
68
+ if (!supportsStructuredOutput(llm)) {
69
+ throw new Error('LLM does not support structured output, which is required for classification');
70
+ }
66
71
 
67
- prompt += `\n\nContent to classify:\n${content}`;
68
-
69
- // Make the classification request
70
- if (mergedConfig.outputFormat === 'json') {
71
- const parser = new JsonOutputParser();
72
- const result = await llm.pipe(parser).invoke([new SystemMessage(prompt)]);
73
- return result as ClassificationResult;
74
- } else {
75
- const result = await llm.invoke([new SystemMessage(prompt)]);
76
- const classText = typeof result.content === 'string' ? result.content.trim() : '';
77
- return { class: classText };
78
- }
79
- } catch (err) {
80
- logger.error({ message: 'Classification failed, using default', err });
72
+ for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
73
+ try {
74
+ // Build the classification prompt
75
+ const classesDescription = config.classes.map((c) => `${c.name}: ${c.description}`).join('\n');
81
76
 
82
- // Return default classification on error
83
- return {
84
- class: mergedConfig.defaultClass || 'unknown',
85
- reason: mergedConfig.defaultReason || (err as Error).message,
86
- confidence: 0,
87
- };
77
+ const basePrompt =
78
+ config.systemPrompt || 'You are a classifier. Your task is to classify the given content into one of the following categories:';
79
+
80
+ let prompt = `${basePrompt}\n\n${classesDescription}\n\n`;
81
+
82
+ // Add retry feedback if this is not the first attempt
83
+ if (attempt > 1) {
84
+ const validClassNames = config.classes.map((c) => c.name);
85
+ prompt += `\n⚠️ IMPORTANT: Your previous classification attempt failed. `;
86
+ prompt += `You MUST select ONLY from these valid classes:\n${validClassNames.join(', ')}\n\n`;
87
+ }
88
+
89
+ // Create dynamic schema based on valid classes
90
+ const classificationSchema = createClassificationSchema(config);
91
+
92
+ // Add content to classify to the prompt
93
+ prompt += `\n\nContent to classify:\n${content}`;
94
+
95
+ // Use structured output for guaranteed compliance
96
+ const structuredLLM = llm.withStructuredOutput(classificationSchema);
97
+ const messages = [new SystemMessage(prompt)];
98
+ const result = (await structuredLLM.invoke(messages)) as ClassificationResult;
99
+
100
+ // The result is already validated by withStructuredOutput
101
+ logger.debug({
102
+ message: 'Classification successful with structured output',
103
+ attempt,
104
+ class: result.class,
105
+ });
106
+
107
+ return result;
108
+ } catch (err) {
109
+ lastError = err as Error;
110
+ logger.warn({
111
+ message: `Classification attempt ${attempt} failed`,
112
+ attempt,
113
+ maxRetries: MAX_RETRIES,
114
+ error: lastError.message,
115
+ });
116
+
117
+ // If this is not the last attempt, continue to retry
118
+ if (attempt < MAX_RETRIES) {
119
+ continue;
120
+ }
121
+ }
88
122
  }
123
+
124
+ // All retries exhausted - fallback to default
125
+ logger.warn({
126
+ message: 'Classification failed after max retries, using default class',
127
+ maxRetries: MAX_RETRIES,
128
+ lastError: lastError?.message,
129
+ defaultClass: config.defaultClass,
130
+ defaultReason: config.defaultReason,
131
+ });
132
+
133
+ // Return default classification
134
+ return {
135
+ class: config.defaultClass,
136
+ reason: config.defaultReason,
137
+ confidence: 0,
138
+ };
89
139
  }
90
140
 
91
141
  /**
92
142
  * Create a classifier from a simple class list
93
143
  * @param classes Array of class names or [name, description] tuples
94
- * @param options Additional configuration options
144
+ * @param options Configuration options (includeReason, defaultClass, defaultReason are required)
95
145
  * @returns A configured classify function
96
146
  */
97
- export function createClassifier(classes: string[] | [string, string][], options?: Partial<ClassifierConfig>) {
147
+ export function createClassifier(classes: string[] | [string, string][], options: Omit<ClassifierConfig, 'classes'>) {
98
148
  const classDefinitions: ClassDefinition[] = classes.map((c) => {
99
149
  if (typeof c === 'string') {
100
150
  return { name: c, description: '' };
@@ -127,20 +177,18 @@ export const schema = z.object({
127
177
  .optional()
128
178
  .describe('Classes to classify into. Can be strings, [name, description] tuples, or objects with name and description'),
129
179
  systemPrompt: z.string().optional().describe('Custom system prompt for classification'),
130
- includeReason: z.boolean().optional().default(true).describe('Whether to include reasoning in the response'),
131
- outputFormat: z.enum(['json', 'text']).optional().default('json').describe('Output format for the classification'),
132
- defaultClass: z.string().optional().describe('Default class to use if classification fails'),
133
- defaultReason: z.string().optional().describe('Default reason to use if classification fails'),
180
+ defaultClass: z.string().describe('Default class to use if classification fails'),
181
+ defaultReason: z.string().describe('Default reason to use if classification fails'),
134
182
  });
135
183
 
136
184
  const classifierTool: Tool<typeof schema, any> = {
137
185
  name: 'minded-classifier',
138
186
  description:
139
- 'Classify content into predefined categories using AI. Supports custom classes, system prompts, and various output formats. Can be configured with default fallback values.',
187
+ 'Classify content into predefined categories using AI. Supports custom classes and system prompts. Uses structured output for guaranteed schema compliance. Requires default fallback values.',
140
188
  input: schema,
141
189
  isGlobal: false,
142
190
  execute: async ({ input, state, agent }) => {
143
- const { content, classes, systemPrompt, includeReason, outputFormat, defaultClass, defaultReason } = input;
191
+ const { content, classes, systemPrompt, defaultClass, defaultReason } = input;
144
192
 
145
193
  logger.info({
146
194
  message: 'Classifying content',
@@ -175,8 +223,6 @@ const classifierTool: Tool<typeof schema, any> = {
175
223
  const config: ClassifierConfig = {
176
224
  classes: classDefinitions,
177
225
  systemPrompt,
178
- includeReason,
179
- outputFormat,
180
226
  defaultClass,
181
227
  defaultReason,
182
228
  };
@@ -220,46 +266,50 @@ export async function multiClassify(
220
266
  config: ClassifierConfig & { maxClasses?: number },
221
267
  llm: BaseLanguageModel,
222
268
  ): Promise<ClassificationResult[]> {
223
- const mergedConfig = { ...DEFAULT_CONFIG, ...config };
224
- const maxClasses = mergedConfig.maxClasses || 3;
269
+ const maxClasses = config.maxClasses || 3;
270
+
271
+ // Check if LLM supports structured output
272
+ if (!supportsStructuredOutput(llm)) {
273
+ throw new Error('LLM does not support structured output, which is required for multi-classification');
274
+ }
225
275
 
226
276
  try {
227
- const classesDescription = mergedConfig.classes.map((c) => `${c.name}: ${c.description}`).join('\n');
277
+ const classesDescription = config.classes.map((c) => `${c.name}: ${c.description}`).join('\n');
278
+ const validClassNames = config.classes.map((c) => c.name);
228
279
 
229
- const basePrompt =
230
- mergedConfig.systemPrompt || 'You are a multi-label classifier. Select all applicable categories for the given content:';
280
+ const basePrompt = config.systemPrompt || 'You are a multi-label classifier. Select all applicable categories for the given content:';
231
281
 
232
282
  const prompt = `${basePrompt}\n\n${classesDescription}\n\n
233
- You should output the result as a JSON array of up to ${maxClasses} classifications, ordered by relevance:
234
- [
235
- {
236
- "class": "<class name>",
237
- ${mergedConfig.includeReason ? '"reason": "<explanation>",' : ''}
238
- "confidence": <confidence score between 0 and 1>
239
- },
240
- ...
241
- ]
242
- Return JSON and nothing more.
283
+ Select up to ${maxClasses} classifications, ordered by relevance.
243
284
 
244
285
  Content to classify:
245
286
  ${content}`;
246
287
 
247
- const parser = new JsonOutputParser();
248
- const result = await llm.pipe(parser).invoke([new SystemMessage(prompt)]);
288
+ // Create schema for array of classifications
289
+ const classEnum = z.enum(validClassNames as [string, ...string[]]);
290
+ const multiClassSchema = z
291
+ .array(
292
+ z.object({
293
+ class: classEnum.describe('The selected classification category'),
294
+ reason: z.string().describe('Explanation for the classification'),
295
+ confidence: z.number().min(0).max(1).describe('Confidence score between 0 and 1'),
296
+ }),
297
+ )
298
+ .min(1)
299
+ .max(maxClasses);
249
300
 
250
- if (Array.isArray(result)) {
251
- return result as ClassificationResult[];
252
- }
301
+ const structuredLLM = llm.withStructuredOutput(multiClassSchema);
302
+ const messages = [new SystemMessage(prompt)];
303
+ const result = (await structuredLLM.invoke(messages)) as ClassificationResult[];
253
304
 
254
- // If single result returned, wrap in array
255
- return [result as ClassificationResult];
305
+ return result;
256
306
  } catch (err) {
257
307
  logger.error({ message: 'Multi-classification failed', err });
258
308
 
259
309
  return [
260
310
  {
261
- class: mergedConfig.defaultClass || 'unknown',
262
- reason: mergedConfig.defaultReason || (err as Error).message,
311
+ class: config.defaultClass,
312
+ reason: config.defaultReason,
263
313
  confidence: 0,
264
314
  },
265
315
  ];