@minded-ai/mindedjs 3.0.8-beta.12 → 3.1.9-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/cli/index.js +2 -9
  2. package/dist/cli/index.js.map +1 -1
  3. package/dist/cli/runCommand.d.ts +1 -1
  4. package/dist/cli/runCommand.d.ts.map +1 -1
  5. package/dist/cli/runCommand.js +31 -23
  6. package/dist/cli/runCommand.js.map +1 -1
  7. package/dist/index.d.ts +2 -1
  8. package/dist/index.d.ts.map +1 -1
  9. package/dist/index.js +6 -3
  10. package/dist/index.js.map +1 -1
  11. package/dist/internalTools/documentExtraction/documentExtraction.d.ts +112 -102
  12. package/dist/internalTools/documentExtraction/documentExtraction.d.ts.map +1 -1
  13. package/dist/internalTools/documentExtraction/documentExtraction.js +146 -705
  14. package/dist/internalTools/documentExtraction/documentExtraction.js.map +1 -1
  15. package/dist/internalTools/documentExtraction/extractStructuredData.d.ts +57 -0
  16. package/dist/internalTools/documentExtraction/extractStructuredData.d.ts.map +1 -0
  17. package/dist/internalTools/documentExtraction/extractStructuredData.js +121 -0
  18. package/dist/internalTools/documentExtraction/extractStructuredData.js.map +1 -0
  19. package/dist/internalTools/documentExtraction/parseDocumentLocal.d.ts +16 -0
  20. package/dist/internalTools/documentExtraction/parseDocumentLocal.d.ts.map +1 -0
  21. package/dist/internalTools/documentExtraction/parseDocumentLocal.js +547 -0
  22. package/dist/internalTools/documentExtraction/parseDocumentLocal.js.map +1 -0
  23. package/dist/internalTools/documentExtraction/parseDocumentManaged.d.ts +13 -0
  24. package/dist/internalTools/documentExtraction/parseDocumentManaged.d.ts.map +1 -0
  25. package/dist/internalTools/documentExtraction/parseDocumentManaged.js +150 -0
  26. package/dist/internalTools/documentExtraction/parseDocumentManaged.js.map +1 -0
  27. package/dist/nodes/addAppToolNode.d.ts.map +1 -1
  28. package/dist/nodes/addAppToolNode.js +20 -1
  29. package/dist/nodes/addAppToolNode.js.map +1 -1
  30. package/dist/toolsLibrary/classifier.d.ts +2 -2
  31. package/dist/toolsLibrary/parseDocument.d.ts +11 -10
  32. package/dist/toolsLibrary/parseDocument.d.ts.map +1 -1
  33. package/dist/toolsLibrary/parseDocument.js +33 -189
  34. package/dist/toolsLibrary/parseDocument.js.map +1 -1
  35. package/dist/toolsLibrary/withBrowserSession.d.ts.map +1 -1
  36. package/dist/toolsLibrary/withBrowserSession.js +70 -2
  37. package/dist/toolsLibrary/withBrowserSession.js.map +1 -1
  38. package/dist/types/Flows.types.d.ts +1 -0
  39. package/dist/types/Flows.types.d.ts.map +1 -1
  40. package/dist/types/Flows.types.js.map +1 -1
  41. package/dist/utils/schemaUtils.js +1 -1
  42. package/dist/utils/schemaUtils.js.map +1 -1
  43. package/docs/tooling/document-processing.md +235 -174
  44. package/package.json +2 -1
  45. package/src/cli/index.ts +2 -10
  46. package/src/cli/runCommand.ts +31 -25
  47. package/src/index.ts +2 -1
  48. package/src/internalTools/documentExtraction/documentExtraction.ts +184 -767
  49. package/src/internalTools/documentExtraction/extractStructuredData.ts +140 -0
  50. package/src/internalTools/documentExtraction/parseDocumentLocal.ts +660 -0
  51. package/src/internalTools/documentExtraction/parseDocumentManaged.ts +152 -0
  52. package/src/nodes/addAppToolNode.ts +30 -7
  53. package/src/toolsLibrary/parseDocument.ts +38 -206
  54. package/src/toolsLibrary/withBrowserSession.ts +89 -4
  55. package/src/types/Flows.types.ts +1 -0
  56. package/src/utils/schemaUtils.ts +1 -1
@@ -0,0 +1,140 @@
1
+ import { ZodType } from 'zod';
2
+ import { logger } from '../../utils/logger';
3
+ import { BaseLanguageModel } from '@langchain/core/language_models/base';
4
+
5
+ /**
6
+ * Extract structured or unstructured data from content using AI.
7
+ *
8
+ * This function processes string content with an LLM to extract information:
9
+ * - With schema: Returns structured data matching the Zod schema
10
+ * - Without schema: Returns LLM's text analysis of the content based on the provided prompt
11
+ *
12
+ * @param options - Extraction options
13
+ * @param options.content - The content to extract information from
14
+ * @param options.llm - Language model instance for AI-powered extraction
15
+ * @param options.schema - Optional Zod schema defining the structure of data to extract
16
+ * @param options.prompt - Optional prompt to guide the extraction (defaults to expert data-extraction assistant)
17
+ * @param options.sessionId - Unique session identifier for logging and tracking
18
+ *
19
+ * @returns Promise resolving to:
20
+ * - Structured data of type T if schema is provided
21
+ * - String or object containing LLM analysis if no schema provided
22
+ *
23
+ * @throws {Error} If LLM is not provided or doesn't support structured output
24
+ * @throws {Error} If LLM extraction fails
25
+ *
26
+ * @example
27
+ * ```typescript
28
+ * import { extractStructuredDataFromString } from '@minded-ai/mindedjs';
29
+ * import { z } from 'zod';
30
+ *
31
+ * // Extract structured data with schema
32
+ * const result1 = await extractStructuredDataFromString({
33
+ * content: 'Invoice #12345\nTotal: $500.00\nDate: 2024-01-15',
34
+ * llm: agent.llm,
35
+ * schema: z.object({
36
+ * invoiceNumber: z.string(),
37
+ * totalAmount: z.number(),
38
+ * date: z.string(),
39
+ * }),
40
+ * });
41
+ * // result1: {"invoiceNumber": "12345", "totalAmount": 500, "date": "2024-01-15"}
42
+ *
43
+ * // Extract unstructured data with custom prompt (returns object format as string)
44
+ * const result2 = await extractStructuredDataFromString({
45
+ * content: 'Invoice #12345\nTotal: $500.00\nDate: 2024-01-15',
46
+ * llm: agent.llm,
47
+ * prompt: 'Extract the invoice number, total amount, and date. Return as an object with keys: invoiceNumber, totalAmount, date',
48
+ * });
49
+ * // result2: {"invoiceNumber": "12345", "totalAmount": 500, "date": "2024-01-15"}
50
+ * ```
51
+ */
52
+ export async function extractStructuredDataFromString<T extends Record<string, any>>({
53
+ content,
54
+ llm,
55
+ schema,
56
+ prompt,
57
+ sessionId,
58
+ }: {
59
+ content: string;
60
+ llm: BaseLanguageModel;
61
+ schema?: ZodType<T>;
62
+ prompt?: string;
63
+ sessionId: string;
64
+ }): Promise<T | string> {
65
+ if (!llm) {
66
+ throw new Error(
67
+ 'LLM instance is required for structured data extraction. Please provide an LLM when calling extractStructuredDataFromString.',
68
+ );
69
+ }
70
+
71
+ const messages = [
72
+ {
73
+ role: 'system',
74
+ content:
75
+ 'You are an expert data-extraction assistant. ' +
76
+ 'Extract the requested information from the provided document content. ' +
77
+ 'If you cannot find a value for a required field, use "N/A" or a descriptive placeholder. ' +
78
+ 'Be accurate and thorough in your extraction. ' +
79
+ 'The user prompt may include information needed to extract structured data from the document.',
80
+ },
81
+ {
82
+ role: 'user',
83
+ content: (prompt ? prompt + ' ' : '') + `Please analyze and extract information from this document:\n\n${content}`,
84
+ },
85
+ ];
86
+
87
+ try {
88
+ if (schema) {
89
+ if (!llm.withStructuredOutput) {
90
+ throw new Error('The provided LLM does not support structured output. Please use a compatible LLM instance.');
91
+ }
92
+ const structuredLlm = llm.withStructuredOutput<T>(schema);
93
+
94
+ const result = await structuredLlm.invoke(messages);
95
+
96
+ logger.debug({
97
+ msg: '[DocumentProcessor] Structured data extraction completed',
98
+ sessionId,
99
+ extractedData: JSON.stringify(result),
100
+ });
101
+
102
+ return result;
103
+ } else {
104
+ // Without schema, return the LLM's text response
105
+ const response = await llm.invoke(messages);
106
+
107
+ // Extract the text content from the response
108
+ let textContent: string;
109
+ if (typeof response.content === 'string') {
110
+ textContent = response.content;
111
+ } else if (Array.isArray(response.content) && response.content.length > 0) {
112
+ // Handle array of content blocks
113
+ textContent = response.content.map((block: any) => (typeof block === 'string' ? block : block.text || '')).join('\n');
114
+ } else {
115
+ textContent = String(response.content);
116
+ }
117
+
118
+ // Try to parse the response in case it is a valid JSON object.
119
+ const result = parseJSONSafe(textContent);
120
+
121
+ logger.debug({
122
+ msg: '[DocumentProcessor] Unstructured data extraction completed',
123
+ sessionId,
124
+ extractedData: textContent,
125
+ });
126
+
127
+ return result;
128
+ }
129
+ } catch (err) {
130
+ throw new Error(`LLM extraction failed: ${err instanceof Error ? err.message : String(err)}`);
131
+ }
132
+ }
133
+
134
+ function parseJSONSafe(str: string) {
135
+ try {
136
+ return JSON.parse(str);
137
+ } catch {
138
+ return str;
139
+ }
140
+ }