@minded-ai/mindedjs 3.1.45 → 3.1.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/toolsLibrary/browserSessionPersistence.d.ts.map +1 -1
- package/dist/toolsLibrary/browserSessionPersistence.js +27 -8
- package/dist/toolsLibrary/browserSessionPersistence.js.map +1 -1
- package/docs/api/knowledge.md +73 -14
- package/docs/platform/knowledge-bases.md +14 -2
- package/docs/tooling/document-processing.md +16 -2
- package/package.json +15 -6
- package/src/toolsLibrary/browserSessionPersistence.ts +60 -12
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"browserSessionPersistence.d.ts","sourceRoot":"","sources":["../../src/toolsLibrary/browserSessionPersistence.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"browserSessionPersistence.d.ts","sourceRoot":"","sources":["../../src/toolsLibrary/browserSessionPersistence.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAOlD,UAAU,MAAM;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,QAAQ,CAAC,EAAE,QAAQ,GAAG,KAAK,GAAG,MAAM,CAAC;CACtC;AAED,UAAU,iBAAiB;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC9B;AAgCD;;GAEG;AACH,eAAO,MAAM,yBAAyB,QAAa,OAAO,CAAC;IACzD,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC;IACzB,YAAY,EAAE,iBAAiB,EAAE,GAAG,IAAI,CAAC;CAC1C,CAoBA,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,wBAAwB,GACnC,SAAS,cAAc,EACvB,MAAM,IAAI,EACV,WAAW,MAAM,KAChB,OAAO,CAAC,IAAI,CAkKd,CAAC"}
|
|
@@ -4,11 +4,30 @@ exports.saveSessionDataViaSocket = exports.fetchSessionDataViaSocket = void 0;
|
|
|
4
4
|
const logger_1 = require("../utils/logger");
|
|
5
5
|
const storage_1 = require("./storage");
|
|
6
6
|
const BROWSER_SESSION_KEY = 'browser-session';
|
|
7
|
+
const STORAGE_OPERATION_TIMEOUT_MS = 3000;
|
|
8
|
+
const withTimeout = async (promise, timeoutMs, operation) => {
|
|
9
|
+
let timeout = null;
|
|
10
|
+
try {
|
|
11
|
+
return await Promise.race([
|
|
12
|
+
promise,
|
|
13
|
+
new Promise((_, reject) => {
|
|
14
|
+
timeout = setTimeout(() => {
|
|
15
|
+
reject(new Error(`${operation} timed out after ${timeoutMs}ms`));
|
|
16
|
+
}, timeoutMs);
|
|
17
|
+
}),
|
|
18
|
+
]);
|
|
19
|
+
}
|
|
20
|
+
finally {
|
|
21
|
+
if (timeout) {
|
|
22
|
+
clearTimeout(timeout);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
};
|
|
7
26
|
/**
|
|
8
27
|
* Extract localStorage from the current page
|
|
9
28
|
*/
|
|
10
29
|
const extractLocalStorageFromPage = async (page) => {
|
|
11
|
-
return await page.evaluate(() => Object.fromEntries(Object.entries(localStorage)));
|
|
30
|
+
return await withTimeout(page.evaluate(() => Object.fromEntries(Object.entries(localStorage))), STORAGE_OPERATION_TIMEOUT_MS, 'Extract localStorage from current page');
|
|
12
31
|
};
|
|
13
32
|
/**
|
|
14
33
|
* Fetch both cookies and localStorage
|
|
@@ -39,7 +58,7 @@ exports.fetchSessionDataViaSocket = fetchSessionDataViaSocket;
|
|
|
39
58
|
*/
|
|
40
59
|
const saveSessionDataViaSocket = async (context, page, sessionId) => {
|
|
41
60
|
try {
|
|
42
|
-
const cookies = await context.cookies();
|
|
61
|
+
const cookies = await withTimeout(context.cookies(), STORAGE_OPERATION_TIMEOUT_MS, 'Extract browser cookies');
|
|
43
62
|
const data = {};
|
|
44
63
|
if (cookies && cookies.length > 0) {
|
|
45
64
|
data.cookies = cookies;
|
|
@@ -47,10 +66,10 @@ const saveSessionDataViaSocket = async (context, page, sessionId) => {
|
|
|
47
66
|
// Extract localStorage for ALL origins using CDP
|
|
48
67
|
let client;
|
|
49
68
|
try {
|
|
50
|
-
client = await context.newCDPSession(page);
|
|
69
|
+
client = await withTimeout(context.newCDPSession(page), STORAGE_OPERATION_TIMEOUT_MS, 'Create CDP session for browser session persistence');
|
|
51
70
|
// Enable Target domain to track all frames and their origins
|
|
52
|
-
await client.send('Target.setDiscoverTargets', { discover: true });
|
|
53
|
-
const response = await client.send('Target.getTargets');
|
|
71
|
+
await withTimeout(client.send('Target.setDiscoverTargets', { discover: true }), STORAGE_OPERATION_TIMEOUT_MS, 'Enable CDP target discovery');
|
|
72
|
+
const response = await withTimeout(client.send('Target.getTargets'), STORAGE_OPERATION_TIMEOUT_MS, 'Get CDP targets for localStorage persistence');
|
|
54
73
|
const targetInfos = response.targetInfos;
|
|
55
74
|
// Extract unique origins from all page-type targets
|
|
56
75
|
const origins = new Set();
|
|
@@ -78,12 +97,12 @@ const saveSessionDataViaSocket = async (context, page, sessionId) => {
|
|
|
78
97
|
// Extract localStorage from all origins in parallel
|
|
79
98
|
const extractionPromises = Array.from(origins).map(async (origin) => {
|
|
80
99
|
try {
|
|
81
|
-
const storageResponse = await client.send('DOMStorage.getDOMStorageItems', {
|
|
100
|
+
const storageResponse = await withTimeout(client.send('DOMStorage.getDOMStorageItems', {
|
|
82
101
|
storageId: {
|
|
83
102
|
securityOrigin: origin,
|
|
84
103
|
isLocalStorage: true,
|
|
85
104
|
},
|
|
86
|
-
});
|
|
105
|
+
}), STORAGE_OPERATION_TIMEOUT_MS, `Extract localStorage for origin ${origin}`);
|
|
87
106
|
const { entries } = storageResponse;
|
|
88
107
|
if (entries && entries.length > 0) {
|
|
89
108
|
const data = Object.fromEntries(entries);
|
|
@@ -133,7 +152,7 @@ const saveSessionDataViaSocket = async (context, page, sessionId) => {
|
|
|
133
152
|
}
|
|
134
153
|
finally {
|
|
135
154
|
if (client) {
|
|
136
|
-
await client.detach().catch(() => { });
|
|
155
|
+
await withTimeout(client.detach(), STORAGE_OPERATION_TIMEOUT_MS, 'Detach CDP session after browser session persistence').catch(() => { });
|
|
137
156
|
}
|
|
138
157
|
}
|
|
139
158
|
if (Object.keys(data).length === 0) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"browserSessionPersistence.js","sourceRoot":"","sources":["../../src/toolsLibrary/browserSessionPersistence.ts"],"names":[],"mappings":";;;AACA,4CAAyC;AACzC,uCAAmD;AAEnD,MAAM,mBAAmB,GAAG,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"browserSessionPersistence.js","sourceRoot":"","sources":["../../src/toolsLibrary/browserSessionPersistence.ts"],"names":[],"mappings":";;;AACA,4CAAyC;AACzC,uCAAmD;AAEnD,MAAM,mBAAmB,GAAG,iBAAiB,CAAC;AAC9C,MAAM,4BAA4B,GAAG,IAAI,CAAC;AAkB1C,MAAM,WAAW,GAAG,KAAK,EAAK,OAAmB,EAAE,SAAiB,EAAE,SAAiB,EAAc,EAAE;IACrG,IAAI,OAAO,GAA0B,IAAI,CAAC;IAE1C,IAAI,CAAC;QACH,OAAO,MAAM,OAAO,CAAC,IAAI,CAAC;YACxB,OAAO;YACP,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE;gBAC/B,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE;oBACxB,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,SAAS,oBAAoB,SAAS,IAAI,CAAC,CAAC,CAAC;gBACnE,CAAC,EAAE,SAAS,CAAC,CAAC;YAChB,CAAC,CAAC;SACH,CAAC,CAAC;IACL,CAAC;YAAS,CAAC;QACT,IAAI,OAAO,EAAE,CAAC;YACZ,YAAY,CAAC,OAAO,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;AACH,CAAC,CAAC;AAEF;;GAEG;AACH,MAAM,2BAA2B,GAAG,KAAK,EAAE,IAAU,EAAmC,EAAE;IACxF,OAAO,MAAM,WAAW,CACtB,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,CAAC,EACrE,4BAA4B,EAC5B,wCAAwC,CACzC,CAAC;AACJ,CAAC,CAAC;AAEF;;GAEG;AACI,MAAM,yBAAyB,GAAG,KAAK,IAG3C,EAAE;;IACH,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,IAAA,oBAAU,EAAC;YAC/B,OAAO,EAAE,EAAE,GAAG,EAAE,mBAAmB,EAAE;SACtC,CAAC,CAAC;QAEH,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;QAC/C,CAAC;QAED,qCAAqC;QACrC,MAAM,YAAY,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,OAAO,GAAG,CAAA,MAAA,YAAY,CAAC,IAAI,0CAAE,OAAO,KAAI,IAAI,CAAC;QACnD,MAAM,YAAY,GAAG,CAAA,MAAA,YAAY,CAAC,IAAI,0CAAE,YAAY,KAAI,IAAI,CAAC;QAE7D,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;IACnC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,eAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,8BAA8B,EAAE,KAAK,EAAE,CAAC,CAAC;QAChE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IAC/C,CAAC;AACH,CAAC,CAAC;AAvBW,QAAA,yBAAyB,6BAuBpC;AAEF;;GAEG;AACI,MAAM,wBAAwB,GAAG,KAAK,EAC3C,OAAuB,EACvB,IAAU,EACV,SAAiB,EACF,EAAE;IACjB,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,WAAW,CAC/B,OAAO,CAAC,OAAO,EAAE,EACjB,4BAA4B,EAC5B,yBAAyB,CAC1B,CAAC;QACF,MAAM,IAAI,GAA4B,EAAE,CAAC;QAEzC,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACzB,CAAC;QAED,iDAAiD;QACjD,IAAI,MAAM,CAAC;QACX,IAAI,CAAC;YACH,MAAM,GAAG,MAAM,WAAW,CACxB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,EAC3B,4BAA4B,EAC5B,oDAAoD,CACrD,CAAC;YAEF,6DAA6D;YAC7D,MAAM,WAAW,CACf,MAAM,CAAC,IAAI,CAAC,2BAA2B,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,EAC5D,4BAA4B,EAC5B,6BAA6B,CAC9B,CAAC;YAWF,MAAM,QAAQ,GAAG,MAAM,WAAW,CAChC,MAAM,CAAC,IAAI,CAAC,mBAAmB,CAAC,EAChC,4BAA4B,EAC5B,8CAA8C,CAC/C,CAAC;YACF,MAAM,WAAW,GAAI,QAA0C,CAAC,WAAW,CAAC;YAE5E,oDAAoD;YACpD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;YAClC,KAAK,MAAM,MAAM,IAAI,WAAW,IAAI,EAAE,EAAE,CAAC;gBACvC,IAAI,MAAM,CAAC,IAAI,KAAK,MAAM,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;oBACvD,IAAI,CAAC;wBACH,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;wBAChC,IAAI,GAAG,CAAC,QAAQ,KAAK,OAAO,IAAI,GAAG,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;4BAC1D,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;wBAC1B,CAAC;oBACH,CAAC;oBAAC,WAAM,CAAC;wBACP,sBAAsB;oBACxB,CAAC;gBACH,CAAC;YACH,CAAC;YAED,mCAAmC;YACnC,IAAI,CAAC;gBACH,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,MAAM,CAAC;gBACjD,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;YAC7B,CAAC;YAAC,WAAM,CAAC;gBACP,mCAAmC;YACrC,CAAC;YAED,oDAAoD;YACpD,MAAM,kBAAkB,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;gBAClE,IAAI,CAAC;oBAKH,MAAM,eAAe,GAAG,MAAM,WAAW,CACvC,MAAO,CAAC,IAAI,CAAC,+BAA+B,EAAE;wBAC5C,SAAS,EAAE;4BACT,cAAc,EAAE,MAAM;4BACtB,cAAc,EAAE,IAAI;yBACrB;qBACF,CAAC,EACF,4BAA4B,EAC5B,mCAAmC,MAAM,EAAE,CAC5C,CAAC;oBACF,MAAM,EAAE,OAAO,EAAE,GAAG,eAAqC,CAAC;oBAE1D,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAClC,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;wBAEzC,+EAA+E;wBAC/E,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;wBAC7C,IAAI,QAAQ,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC;4BAChC,eAAM,CAAC,IAAI,CAAC;gCACV,OAAO,EAAE,8CAA8C;gCACvD,MAAM;gCACN,MAAM,EAAE,CAAC,QAAQ,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;6BAC5C,CAAC,CAAC;4BACH,OAAO,IAAI,CAAC;wBACd,CAAC;wBAED,OAAO;4BACL,GAAG,EAAE,MAAM;4BACX,IAAI;yBACL,CAAC;oBACJ,CAAC;gBACH,CAAC;gBAAC,OAAO,GAAG,EAAE,CAAC;oBACb,eAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,2CAA2C,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;gBAC5F,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CAAC;YAEH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;YACtD,MAAM,oBAAoB,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,CAAwB,CAAC;YAE5E,IAAI,oBAAoB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpC,IAAI,CAAC,YAAY,GAAG,oBAAoB,CAAC;YAC3C,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,yDAAyD;YACzD,eAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,sEAAsE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YAC7G,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBAC9B,MAAM,gBAAgB,GAAG,MAAM,2BAA2B,CAAC,IAAI,CAAC,CAAC;gBAEjE,IAAI,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC7C,IAAI,CAAC,YAAY,GAAG,CAAC;4BACnB,GAAG,EAAE,UAAU;4BACf,IAAI,EAAE,gBAAgB;yBACvB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAAC,OAAO,WAAW,EAAE,CAAC;gBACrB,eAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,oDAAoD,EAAE,KAAK,EAAE,WAAW,EAAE,CAAC,CAAC;YACrG,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,WAAW,CACf,MAAM,CAAC,MAAM,EAAE,EACf,4BAA4B,EAC5B,sDAAsD,CACvD,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;QAED,IAAI,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnC,OAAO;QACT,CAAC;QAED,gCAAgC;QAChC,IAAI,CAAC,GAAG,GAAG,mBAAmB,CAAC;QAE/B,qDAAqD;QACrD,4BAA4B;QAC5B,MAAM,WAAW,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;QACtC,IAAI,CAAC;YACH,MAAM,IAAA,oBAAU,EAAC,SAAS,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;QACjD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,eAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,6BAA6B,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC;QAC5E,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,eAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,6BAA6B,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5E,CAAC;AACH,CAAC,CAAC;AAtKW,QAAA,wBAAwB,4BAsKnC"}
|
package/docs/api/knowledge.md
CHANGED
|
@@ -84,6 +84,35 @@ Use labels to filter documents during retrieval (see [Knowledge Base RAG API](..
|
|
|
84
84
|
|
|
85
85
|
---
|
|
86
86
|
|
|
87
|
+
## Custom Metadata (customMeta1–4)
|
|
88
|
+
|
|
89
|
+
Documents also support up to **four free-form custom metadata fields** for attaching arbitrary string data.
|
|
90
|
+
|
|
91
|
+
- **`customMeta1`** through **`customMeta4`**: Any string value, no format restriction
|
|
92
|
+
|
|
93
|
+
```json
|
|
94
|
+
{
|
|
95
|
+
"customMeta1": "DOC-98765",
|
|
96
|
+
"customMeta2": "John Smith",
|
|
97
|
+
"customMeta3": "v2.1",
|
|
98
|
+
"customMeta4": "legal"
|
|
99
|
+
}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Custom Metadata Requirements
|
|
103
|
+
|
|
104
|
+
- **Format**: Free-form string, any value is accepted
|
|
105
|
+
- **Max length**: 500 characters per field
|
|
106
|
+
- **Not filterable**: Custom metadata cannot be used as query filters (unlike `label1`/`label2`)
|
|
107
|
+
|
|
108
|
+
### How Custom Metadata is Returned
|
|
109
|
+
|
|
110
|
+
Custom metadata fields are returned in **every retrieved chunk** alongside the document's text. Consumers can read them from `result.metadata.minded_internal_meta1` through `result.metadata.minded_internal_meta4` in raw retrieval responses.
|
|
111
|
+
|
|
112
|
+
> **Tip**: Use custom metadata to attach source identifiers, version numbers, author names, or any context you want to surface alongside retrieved chunks — without the strict format requirements of filter labels.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
87
116
|
## Endpoints
|
|
88
117
|
|
|
89
118
|
| Method | Endpoint | Description |
|
|
@@ -116,6 +145,10 @@ Generate a pre-signed URL to upload a new document.
|
|
|
116
145
|
| `fileName` | string | Yes | File name with extension (e.g., `policy.pdf`). If `.zip`, triggers batch import. |
|
|
117
146
|
| `label1` | string | No | Primary label in `namespace:value` format (e.g., `dept:support`) |
|
|
118
147
|
| `label2` | string | No | Secondary label in `namespace:value` format (e.g., `lang:en`) |
|
|
148
|
+
| `customMeta1` | string | No | Free-form custom metadata field 1 (max 500 chars) |
|
|
149
|
+
| `customMeta2` | string | No | Free-form custom metadata field 2 (max 500 chars) |
|
|
150
|
+
| `customMeta3` | string | No | Free-form custom metadata field 3 (max 500 chars) |
|
|
151
|
+
| `customMeta4` | string | No | Free-form custom metadata field 4 (max 500 chars) |
|
|
119
152
|
| `customDocumentId` | string | No | Your external identifier for this document |
|
|
120
153
|
|
|
121
154
|
### Request Example
|
|
@@ -128,6 +161,8 @@ Generate a pre-signed URL to upload a new document.
|
|
|
128
161
|
"fileName": "refund-policy.pdf",
|
|
129
162
|
"label1": "dept:support",
|
|
130
163
|
"label2": "lang:en",
|
|
164
|
+
"customMeta1": "DOC-98765",
|
|
165
|
+
"customMeta2": "John Smith",
|
|
131
166
|
"customDocumentId": "DOC-12345"
|
|
132
167
|
}
|
|
133
168
|
```
|
|
@@ -310,6 +345,10 @@ Generate a pre-signed URL to replace an existing document's content.
|
|
|
310
345
|
| `fileName` | string | Yes | New file name |
|
|
311
346
|
| `label1` | string | No | Updated primary label in `namespace:value` format |
|
|
312
347
|
| `label2` | string | No | Updated secondary label in `namespace:value` format |
|
|
348
|
+
| `customMeta1` | string | No | Free-form custom metadata field 1 (max 500 chars) |
|
|
349
|
+
| `customMeta2` | string | No | Free-form custom metadata field 2 (max 500 chars) |
|
|
350
|
+
| `customMeta3` | string | No | Free-form custom metadata field 3 (max 500 chars) |
|
|
351
|
+
| `customMeta4` | string | No | Free-form custom metadata field 4 (max 500 chars) |
|
|
313
352
|
|
|
314
353
|
> **Note**: Provide either `s3Key` or `customDocumentId`, not both.
|
|
315
354
|
|
|
@@ -323,7 +362,9 @@ Generate a pre-signed URL to replace an existing document's content.
|
|
|
323
362
|
"customDocumentId": "DOC-12345",
|
|
324
363
|
"fileName": "refund-policy-v2.pdf",
|
|
325
364
|
"label1": "dept:support",
|
|
326
|
-
"label2": "lang:en"
|
|
365
|
+
"label2": "lang:en",
|
|
366
|
+
"customMeta1": "DOC-98765",
|
|
367
|
+
"customMeta2": "John Smith"
|
|
327
368
|
}
|
|
328
369
|
```
|
|
329
370
|
|
|
@@ -384,7 +425,7 @@ Finalize document update and trigger reprocessing.
|
|
|
384
425
|
PATCH /knowledge/documents/metadata
|
|
385
426
|
```
|
|
386
427
|
|
|
387
|
-
Update document labels without re-uploading the file.
|
|
428
|
+
Update document labels and/or custom metadata without re-uploading the file.
|
|
388
429
|
|
|
389
430
|
### Request Body
|
|
390
431
|
|
|
@@ -397,6 +438,10 @@ Update document labels without re-uploading the file.
|
|
|
397
438
|
| `customDocumentId` | string | * | Your external document ID. Required if `s3Key` not provided. |
|
|
398
439
|
| `label1` | string | No | Updated primary label in `namespace:value` format |
|
|
399
440
|
| `label2` | string | No | Updated secondary label in `namespace:value` format |
|
|
441
|
+
| `customMeta1` | string | No | Updated free-form custom metadata field 1 (max 500 chars). Pass `null` to clear. |
|
|
442
|
+
| `customMeta2` | string | No | Updated free-form custom metadata field 2 (max 500 chars). Pass `null` to clear. |
|
|
443
|
+
| `customMeta3` | string | No | Updated free-form custom metadata field 3 (max 500 chars). Pass `null` to clear. |
|
|
444
|
+
| `customMeta4` | string | No | Updated free-form custom metadata field 4 (max 500 chars). Pass `null` to clear. |
|
|
400
445
|
|
|
401
446
|
### Request Example
|
|
402
447
|
|
|
@@ -407,7 +452,9 @@ Update document labels without re-uploading the file.
|
|
|
407
452
|
"knowledgeBaseId": "kb-abc123",
|
|
408
453
|
"s3Key": "agents/<agentId>/production/<kbId>/refund-policy.pdf",
|
|
409
454
|
"label1": "dept:support",
|
|
410
|
-
"label2": "status:reviewed"
|
|
455
|
+
"label2": "status:reviewed",
|
|
456
|
+
"customMeta1": "DOC-98765",
|
|
457
|
+
"customMeta2": "John Smith"
|
|
411
458
|
}
|
|
412
459
|
```
|
|
413
460
|
|
|
@@ -419,6 +466,10 @@ Update document labels without re-uploading the file.
|
|
|
419
466
|
"documentId": "document-uuid",
|
|
420
467
|
"label1": "dept:support",
|
|
421
468
|
"label2": "status:reviewed",
|
|
469
|
+
"customMeta1": "DOC-98765",
|
|
470
|
+
"customMeta2": "John Smith",
|
|
471
|
+
"customMeta3": null,
|
|
472
|
+
"customMeta4": null,
|
|
422
473
|
"updatedAt": "2026-01-16T14:00:00.000Z"
|
|
423
474
|
}
|
|
424
475
|
```
|
|
@@ -460,6 +511,10 @@ GET /knowledge/documents?agentId=your-agent-id&environment=production&knowledgeB
|
|
|
460
511
|
"fileSize": 204800,
|
|
461
512
|
"label1": "dept:support",
|
|
462
513
|
"label2": "lang:en",
|
|
514
|
+
"customMeta1": "DOC-98765",
|
|
515
|
+
"customMeta2": "John Smith",
|
|
516
|
+
"customMeta3": null,
|
|
517
|
+
"customMeta4": null,
|
|
463
518
|
"createdAt": "2026-01-15T12:00:00.000Z",
|
|
464
519
|
"updatedAt": "2026-01-16T14:00:00.000Z"
|
|
465
520
|
}
|
|
@@ -467,17 +522,21 @@ GET /knowledge/documents?agentId=your-agent-id&environment=production&knowledgeB
|
|
|
467
522
|
|
|
468
523
|
### Response Fields
|
|
469
524
|
|
|
470
|
-
| Field | Type
|
|
471
|
-
| ------------------ |
|
|
472
|
-
| `s3Key` | string
|
|
473
|
-
| `documentId` | string
|
|
474
|
-
| `status` | string
|
|
475
|
-
| `contentType` | string
|
|
476
|
-
| `fileSize` | number
|
|
477
|
-
| `label1` | string
|
|
478
|
-
| `label2` | string
|
|
479
|
-
| `
|
|
480
|
-
| `
|
|
525
|
+
| Field | Type | Description |
|
|
526
|
+
| ------------------ | ------------- | -------------------------------------------------------- |
|
|
527
|
+
| `s3Key` | string | Document path; use for subsequent operations |
|
|
528
|
+
| `documentId` | string | Document UUID (your `customDocumentId` if provided during upload, otherwise system-generated) |
|
|
529
|
+
| `status` | string | Processing status (see [Status Values](#document-status-values)) |
|
|
530
|
+
| `contentType` | string | MIME type of the document (e.g., `application/pdf`) |
|
|
531
|
+
| `fileSize` | number | File size in bytes |
|
|
532
|
+
| `label1` | string\|null | Primary label in `namespace:value` format |
|
|
533
|
+
| `label2` | string\|null | Secondary label in `namespace:value` format |
|
|
534
|
+
| `customMeta1` | string\|null | Free-form custom metadata field 1 |
|
|
535
|
+
| `customMeta2` | string\|null | Free-form custom metadata field 2 |
|
|
536
|
+
| `customMeta3` | string\|null | Free-form custom metadata field 3 |
|
|
537
|
+
| `customMeta4` | string\|null | Free-form custom metadata field 4 |
|
|
538
|
+
| `createdAt` | string | ISO 8601 creation timestamp |
|
|
539
|
+
| `updatedAt` | string | ISO 8601 last update timestamp |
|
|
481
540
|
|
|
482
541
|
---
|
|
483
542
|
|
|
@@ -27,7 +27,7 @@ Deleting a knowledge base removes it from the agent and triggers cleanup of its
|
|
|
27
27
|
|
|
28
28
|
## Upload and manage documents (UI)
|
|
29
29
|
|
|
30
|
-
From the knowledge base view, you can upload documents and set labels for filtering.
|
|
30
|
+
From the knowledge base view, you can upload documents and set labels and custom metadata for filtering and context enrichment.
|
|
31
31
|
|
|
32
32
|
### Document Labels
|
|
33
33
|
|
|
@@ -35,9 +35,21 @@ Each document supports two optional labels in `namespace:value` format:
|
|
|
35
35
|
- **label1**: Primary classification (e.g., `dept:support`, `topic:billing`)
|
|
36
36
|
- **label2**: Secondary classification (e.g., `lang:en`, `tier:premium`)
|
|
37
37
|
|
|
38
|
+
Labels can be used to **filter documents during retrieval** — only chunks from matching documents are returned.
|
|
39
|
+
|
|
40
|
+
### Custom Metadata
|
|
41
|
+
|
|
42
|
+
Each document also supports up to **four free-form custom metadata fields** (Custom Metadata 1–4). Unlike labels, these:
|
|
43
|
+
- Accept any string value (no namespace format required)
|
|
44
|
+
- Are limited to **500 characters** each
|
|
45
|
+
- Are **not filterable** — they cannot be used to restrict retrieval results
|
|
46
|
+
- Are **returned alongside every retrieved chunk** in the raw retrieval response
|
|
47
|
+
|
|
48
|
+
Use custom metadata to attach source identifiers, version numbers, author names, or other context you want available at retrieval time.
|
|
49
|
+
|
|
38
50
|
Typical flow:
|
|
39
51
|
|
|
40
|
-
- Upload a document (and optionally
|
|
52
|
+
- Upload a document (and optionally set labels and/or custom metadata)
|
|
41
53
|
- Wait for ingestion/sync to complete
|
|
42
54
|
- Use the platform “Query” / “Generate” actions to test retrieval and citations
|
|
43
55
|
|
|
@@ -24,6 +24,17 @@ DOCUMENT_PROCESSING_MODE=local
|
|
|
24
24
|
LLAMA_CLOUD_API_KEY=your_llama_cloud_api_key
|
|
25
25
|
```
|
|
26
26
|
|
|
27
|
+
## Document Quality
|
|
28
|
+
|
|
29
|
+
When using managed mode, you can control the quality of document parsing via the `quality` option. This maps to `DocumentQuality` exported from `@minded-ai/mindedjs`:
|
|
30
|
+
|
|
31
|
+
| Value | Credit cost | Description |
|
|
32
|
+
| ------------ | ----------- | ------------------------------------------------- |
|
|
33
|
+
| `'advanced'` | 3x | **Default.** High-quality OCR and parsing. Handles complex layouts, diagrams, images, scanned documents, and most document types with high accuracy. Recommended for production use. |
|
|
34
|
+
| `'standard'` | 1x | Faster processing, lower cost. Suitable for text-based documents. |
|
|
35
|
+
|
|
36
|
+
The `quality` option is available on both the `parseDocument` and `parseDocumentAndExtractStructuredData` functions, as well as on the `minded-parse-documents` flow node. It has no effect in local processing mode.
|
|
37
|
+
|
|
27
38
|
## Using in Flows
|
|
28
39
|
|
|
29
40
|
Document processing includes built-in AI extraction - use the node's `prompt` and `outputSchema` properties to specify what data to extract. No additional extraction tool is needed.
|
|
@@ -32,6 +43,7 @@ Document processing includes built-in AI extraction - use the node's `prompt` an
|
|
|
32
43
|
|
|
33
44
|
- `parameters.documentSource` (string or array, required): URL or file path to a single document, or array of URLs/file paths to process multiple documents. When an array is provided, documents are parsed and concatenated with double newlines.
|
|
34
45
|
- `parameters.returnStructuredOutput` (boolean, optional, default: `false`): Set to `true` to enable AI-powered extraction, `false` for raw text only. When `true`, requires either `prompt` or `outputSchema` (or both)
|
|
46
|
+
- `quality` (`'standard'` | `'advanced'`, optional, default: `'advanced'`): Controls OCR/parsing quality in managed mode. `'advanced'` (default) provides high-quality extraction supporting complex layouts, diagrams, and images at 3x the credit cost of `'standard'`. Use `'standard'` for simple, clean text-based documents where speed and cost matter. Has no effect in local processing mode.
|
|
35
47
|
- `prompt` (string, optional): Instructions for AI-powered extraction. Ignored when `returnStructuredOutput` is `false`
|
|
36
48
|
- `outputSchema` (schema object, optional): Define the structure of extracted data for structured extraction. Ignored when `returnStructuredOutput` is `false`
|
|
37
49
|
|
|
@@ -148,7 +160,8 @@ The SDK provides three main functions for document processing:
|
|
|
148
160
|
outputSchema?: ZodType<T>, // Optional: Zod schema for structured extraction
|
|
149
161
|
outputSchemaPrompt?: string, // Optional: Instructions for extraction
|
|
150
162
|
processingMode?: DocumentProcessingMode, // Optional: Processing mode (default: DocumentProcessingMode.MANAGED)
|
|
151
|
-
llamaCloudApiKey?: string
|
|
163
|
+
llamaCloudApiKey?: string, // Optional: API key for local mode
|
|
164
|
+
quality?: DocumentQuality, // Optional: 'standard' | 'advanced' (default: 'advanced'). Managed mode only.
|
|
152
165
|
}) => Promise<{
|
|
153
166
|
rawContent?: string, // Concatenated content when multiple documents provided
|
|
154
167
|
structuredContent?: T | string // Extracted from concatenated content when multiple documents provided
|
|
@@ -162,7 +175,8 @@ The SDK provides three main functions for document processing:
|
|
|
162
175
|
documentSource: string, // Required: URL or file path
|
|
163
176
|
sessionId: string, // Required: Session identifier
|
|
164
177
|
processingMode?: DocumentProcessingMode, // Optional: Processing mode (default: DocumentProcessingMode.MANAGED)
|
|
165
|
-
llamaCloudApiKey?: string
|
|
178
|
+
llamaCloudApiKey?: string, // Optional: API key for local mode
|
|
179
|
+
quality?: DocumentQuality, // Optional: 'standard' | 'advanced' (default: 'advanced'). Managed mode only.
|
|
166
180
|
}) => Promise<{
|
|
167
181
|
rawContent?: string,
|
|
168
182
|
metadata?: { fileSize?: number, fileType: string, processingTime: number, contentLength: number }
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@minded-ai/mindedjs",
|
|
3
|
-
"version": "3.1.
|
|
3
|
+
"version": "3.1.46",
|
|
4
4
|
"description": "MindedJS is a TypeScript library for building agents.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -34,18 +34,18 @@
|
|
|
34
34
|
"@types/chai": "^4.3.11",
|
|
35
35
|
"@types/ejs": "^3.1.5",
|
|
36
36
|
"@types/lodash": "^4.17.21",
|
|
37
|
-
"@types/mocha": "^10.0.
|
|
37
|
+
"@types/mocha": "^10.0.10",
|
|
38
38
|
"@types/node": "^20.11.19",
|
|
39
|
-
"@types/sinon": "^
|
|
39
|
+
"@types/sinon": "^21.0.0",
|
|
40
40
|
"@types/ws": "^8.18.1",
|
|
41
41
|
"chai": "^4.3.10",
|
|
42
42
|
"dotenv": "^16.4.5",
|
|
43
43
|
"eslint": "^9.27.0",
|
|
44
44
|
"globals": "^16.2.0",
|
|
45
|
-
"mocha": "^
|
|
45
|
+
"mocha": "^11.7.5",
|
|
46
46
|
"nodemon": "^3.1.10",
|
|
47
47
|
"prettier": "^3.7.4",
|
|
48
|
-
"sinon": "^
|
|
48
|
+
"sinon": "^21.0.2",
|
|
49
49
|
"ts-node": "^10.9.2",
|
|
50
50
|
"typedoc": "^0.28.5",
|
|
51
51
|
"typescript": "^5.3.3",
|
|
@@ -73,7 +73,16 @@
|
|
|
73
73
|
"zod": "^3.25.76",
|
|
74
74
|
"zod-to-json-schema": "^3.24.6"
|
|
75
75
|
},
|
|
76
|
+
"overrides": {
|
|
77
|
+
"mocha": {
|
|
78
|
+
"diff": "^8.0.3",
|
|
79
|
+
"serialize-javascript": "^7.0.4"
|
|
80
|
+
},
|
|
81
|
+
"sinon": {
|
|
82
|
+
"diff": "^8.0.3"
|
|
83
|
+
}
|
|
84
|
+
},
|
|
76
85
|
"peerDependencies": {
|
|
77
86
|
"playwright": "^1.55.0"
|
|
78
87
|
}
|
|
79
|
-
}
|
|
88
|
+
}
|
|
@@ -3,6 +3,7 @@ import { logger } from '../utils/logger';
|
|
|
3
3
|
import { saveRecord, getRecords } from './storage';
|
|
4
4
|
|
|
5
5
|
const BROWSER_SESSION_KEY = 'browser-session';
|
|
6
|
+
const STORAGE_OPERATION_TIMEOUT_MS = 3000;
|
|
6
7
|
|
|
7
8
|
interface Cookie {
|
|
8
9
|
name: string;
|
|
@@ -20,11 +21,34 @@ interface LocalStorageEntry {
|
|
|
20
21
|
data: Record<string, string>;
|
|
21
22
|
}
|
|
22
23
|
|
|
24
|
+
const withTimeout = async <T>(promise: Promise<T>, timeoutMs: number, operation: string): Promise<T> => {
|
|
25
|
+
let timeout: NodeJS.Timeout | null = null;
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
return await Promise.race([
|
|
29
|
+
promise,
|
|
30
|
+
new Promise<never>((_, reject) => {
|
|
31
|
+
timeout = setTimeout(() => {
|
|
32
|
+
reject(new Error(`${operation} timed out after ${timeoutMs}ms`));
|
|
33
|
+
}, timeoutMs);
|
|
34
|
+
}),
|
|
35
|
+
]);
|
|
36
|
+
} finally {
|
|
37
|
+
if (timeout) {
|
|
38
|
+
clearTimeout(timeout);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
|
|
23
43
|
/**
|
|
24
44
|
* Extract localStorage from the current page
|
|
25
45
|
*/
|
|
26
46
|
const extractLocalStorageFromPage = async (page: Page): Promise<Record<string, string>> => {
|
|
27
|
-
return await
|
|
47
|
+
return await withTimeout(
|
|
48
|
+
page.evaluate(() => Object.fromEntries(Object.entries(localStorage))),
|
|
49
|
+
STORAGE_OPERATION_TIMEOUT_MS,
|
|
50
|
+
'Extract localStorage from current page',
|
|
51
|
+
);
|
|
28
52
|
};
|
|
29
53
|
|
|
30
54
|
/**
|
|
@@ -64,7 +88,11 @@ export const saveSessionDataViaSocket = async (
|
|
|
64
88
|
sessionId: string,
|
|
65
89
|
): Promise<void> => {
|
|
66
90
|
try {
|
|
67
|
-
const cookies = await
|
|
91
|
+
const cookies = await withTimeout(
|
|
92
|
+
context.cookies(),
|
|
93
|
+
STORAGE_OPERATION_TIMEOUT_MS,
|
|
94
|
+
'Extract browser cookies',
|
|
95
|
+
);
|
|
68
96
|
const data: Record<string, unknown> = {};
|
|
69
97
|
|
|
70
98
|
if (cookies && cookies.length > 0) {
|
|
@@ -74,10 +102,18 @@ export const saveSessionDataViaSocket = async (
|
|
|
74
102
|
// Extract localStorage for ALL origins using CDP
|
|
75
103
|
let client;
|
|
76
104
|
try {
|
|
77
|
-
client = await
|
|
105
|
+
client = await withTimeout(
|
|
106
|
+
context.newCDPSession(page),
|
|
107
|
+
STORAGE_OPERATION_TIMEOUT_MS,
|
|
108
|
+
'Create CDP session for browser session persistence',
|
|
109
|
+
);
|
|
78
110
|
|
|
79
111
|
// Enable Target domain to track all frames and their origins
|
|
80
|
-
await
|
|
112
|
+
await withTimeout(
|
|
113
|
+
client.send('Target.setDiscoverTargets', { discover: true }),
|
|
114
|
+
STORAGE_OPERATION_TIMEOUT_MS,
|
|
115
|
+
'Enable CDP target discovery',
|
|
116
|
+
);
|
|
81
117
|
|
|
82
118
|
// Get all targets (frames, workers, etc.)
|
|
83
119
|
type TargetInfo = {
|
|
@@ -88,7 +124,11 @@ export const saveSessionDataViaSocket = async (
|
|
|
88
124
|
attached: boolean;
|
|
89
125
|
};
|
|
90
126
|
|
|
91
|
-
const response = await
|
|
127
|
+
const response = await withTimeout(
|
|
128
|
+
client.send('Target.getTargets'),
|
|
129
|
+
STORAGE_OPERATION_TIMEOUT_MS,
|
|
130
|
+
'Get CDP targets for localStorage persistence',
|
|
131
|
+
);
|
|
92
132
|
const targetInfos = (response as { targetInfos: TargetInfo[] }).targetInfos;
|
|
93
133
|
|
|
94
134
|
// Extract unique origins from all page-type targets
|
|
@@ -121,12 +161,16 @@ export const saveSessionDataViaSocket = async (
|
|
|
121
161
|
entries: Array<[string, string]>;
|
|
122
162
|
};
|
|
123
163
|
|
|
124
|
-
const storageResponse = await
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
164
|
+
const storageResponse = await withTimeout(
|
|
165
|
+
client!.send('DOMStorage.getDOMStorageItems', {
|
|
166
|
+
storageId: {
|
|
167
|
+
securityOrigin: origin,
|
|
168
|
+
isLocalStorage: true,
|
|
169
|
+
},
|
|
170
|
+
}),
|
|
171
|
+
STORAGE_OPERATION_TIMEOUT_MS,
|
|
172
|
+
`Extract localStorage for origin ${origin}`,
|
|
173
|
+
);
|
|
130
174
|
const { entries } = storageResponse as DOMStorageResponse;
|
|
131
175
|
|
|
132
176
|
if (entries && entries.length > 0) {
|
|
@@ -178,7 +222,11 @@ export const saveSessionDataViaSocket = async (
|
|
|
178
222
|
}
|
|
179
223
|
} finally {
|
|
180
224
|
if (client) {
|
|
181
|
-
await
|
|
225
|
+
await withTimeout(
|
|
226
|
+
client.detach(),
|
|
227
|
+
STORAGE_OPERATION_TIMEOUT_MS,
|
|
228
|
+
'Detach CDP session after browser session persistence',
|
|
229
|
+
).catch(() => { });
|
|
182
230
|
}
|
|
183
231
|
}
|
|
184
232
|
|