@illuma-ai/agents 1.4.0-alpha.0 → 1.4.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/main.cjs +20 -10
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/tools/fileSearch/formatter.cjs +95 -0
- package/dist/cjs/tools/fileSearch/formatter.cjs.map +1 -0
- package/dist/cjs/tools/fileSearch/ragClient.cjs +104 -0
- package/dist/cjs/tools/fileSearch/ragClient.cjs.map +1 -0
- package/dist/cjs/tools/fileSearch/schema.cjs +18 -0
- package/dist/cjs/tools/fileSearch/schema.cjs.map +1 -0
- package/dist/cjs/tools/fileSearch/tool.cjs +155 -0
- package/dist/cjs/tools/fileSearch/tool.cjs.map +1 -0
- package/dist/esm/main.mjs +4 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/tools/fileSearch/formatter.mjs +92 -0
- package/dist/esm/tools/fileSearch/formatter.mjs.map +1 -0
- package/dist/esm/tools/fileSearch/ragClient.mjs +100 -0
- package/dist/esm/tools/fileSearch/ragClient.mjs.map +1 -0
- package/dist/esm/tools/fileSearch/schema.mjs +15 -0
- package/dist/esm/tools/fileSearch/schema.mjs.map +1 -0
- package/dist/esm/tools/fileSearch/tool.mjs +152 -0
- package/dist/esm/tools/fileSearch/tool.mjs.map +1 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/providers/index.d.ts +0 -1
- package/dist/types/tools/fileSearch/formatter.d.ts +25 -0
- package/dist/types/tools/fileSearch/index.d.ts +5 -0
- package/dist/types/tools/fileSearch/ragClient.d.ts +32 -0
- package/dist/types/tools/fileSearch/schema.d.ts +13 -0
- package/dist/types/tools/fileSearch/tool.d.ts +18 -0
- package/dist/types/tools/fileSearch/types.d.ts +139 -0
- package/package.json +1 -6
- package/src/index.ts +1 -0
- package/src/providers/index.ts +4 -1
- package/src/tools/fileSearch/__tests__/tool.test.ts +251 -0
- package/src/tools/fileSearch/formatter.ts +131 -0
- package/src/tools/fileSearch/index.ts +23 -0
- package/src/tools/fileSearch/ragClient.ts +141 -0
- package/src/tools/fileSearch/schema.ts +19 -0
- package/src/tools/fileSearch/tool.ts +207 -0
- package/src/tools/fileSearch/types.ts +147 -0
- package/dist/cjs/providers/composite/CompositeCapabilityProvider.cjs +0 -80
- package/dist/cjs/providers/composite/CompositeCapabilityProvider.cjs.map +0 -1
- package/dist/esm/providers/composite/CompositeCapabilityProvider.mjs +0 -78
- package/dist/esm/providers/composite/CompositeCapabilityProvider.mjs.map +0 -1
- package/dist/types/providers/composite/CompositeCapabilityProvider.d.ts +0 -22
- package/dist/types/providers/composite/index.d.ts +0 -1
- package/src/providers/__tests__/CompositeCapabilityProvider.test.ts +0 -93
- package/src/providers/composite/CompositeCapabilityProvider.ts +0 -112
- package/src/providers/composite/index.ts +0 -1
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the file_search library tool.
|
|
3
|
+
*
|
|
4
|
+
* The real RAG backend is mocked through the `RagClient` interface so
|
|
5
|
+
* these tests verify the tool's own logic: target_files filtering,
|
|
6
|
+
* bounded-concurrency querying, per-file error isolation, formatter
|
|
7
|
+
* handoff, and empty/no-file edge cases.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { createFileSearchTool } from '../tool';
|
|
11
|
+
import { plainTextFormatter, createCitationAnchorFormatter } from '../formatter';
|
|
12
|
+
import type { RagClient, RagQueryParams, RagChunk, FileSearchFile } from '../types';
|
|
13
|
+
|
|
14
|
+
// Build a mock RagClient that records every query it receives and returns
|
|
15
|
+
// a deterministic chunk set per file.
|
|
16
|
+
function makeRagClient(opts: {
|
|
17
|
+
chunksByFile?: Record<string, RagChunk[]>;
|
|
18
|
+
failFileIds?: Set<string>;
|
|
19
|
+
hang?: boolean;
|
|
20
|
+
} = {}) {
|
|
21
|
+
const calls: RagQueryParams[] = [];
|
|
22
|
+
const client: RagClient = {
|
|
23
|
+
async query(params) {
|
|
24
|
+
calls.push({ ...params });
|
|
25
|
+
if (opts.hang) {
|
|
26
|
+
// Never resolves — used to test timeout handling.
|
|
27
|
+
await new Promise(() => {});
|
|
28
|
+
}
|
|
29
|
+
if (opts.failFileIds?.has(params.file_id)) {
|
|
30
|
+
throw new Error(`simulated failure for ${params.file_id}`);
|
|
31
|
+
}
|
|
32
|
+
return opts.chunksByFile?.[params.file_id] ?? [];
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
return { client, calls };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const files: FileSearchFile[] = [
|
|
39
|
+
{ file_id: 'f-alpha', filename: 'alpha.pdf', isCurrentMessage: true },
|
|
40
|
+
{ file_id: 'f-beta', filename: 'beta-report.docx' },
|
|
41
|
+
{ file_id: 'f-gamma', filename: 'gamma_notes.txt' },
|
|
42
|
+
];
|
|
43
|
+
|
|
44
|
+
describe('createFileSearchTool', () => {
|
|
45
|
+
it('returns a no-files message when factory was seeded with zero files', async () => {
|
|
46
|
+
const { client } = makeRagClient();
|
|
47
|
+
const t = createFileSearchTool({ ragClient: client, files: [] });
|
|
48
|
+
const result = await t.invoke({ query: 'anything' });
|
|
49
|
+
// Dual-format tools return [string, artifact?]; LangChain surfaces the
|
|
50
|
+
// string directly in some invocation paths — handle both shapes.
|
|
51
|
+
const text = Array.isArray(result) ? result[0] : result;
|
|
52
|
+
expect(String(text)).toMatch(/no files to search/i);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it('queries every file when target_files is omitted', async () => {
|
|
56
|
+
const { client, calls } = makeRagClient({
|
|
57
|
+
chunksByFile: {
|
|
58
|
+
'f-alpha': [chunk('f-alpha', 'alpha page 1', 0.1)],
|
|
59
|
+
'f-beta': [chunk('f-beta', 'beta text', 0.2)],
|
|
60
|
+
'f-gamma': [chunk('f-gamma', 'gamma note', 0.3)],
|
|
61
|
+
},
|
|
62
|
+
});
|
|
63
|
+
const t = createFileSearchTool({ ragClient: client, files });
|
|
64
|
+
await t.invoke({ query: 'test' });
|
|
65
|
+
expect(calls.map((c) => c.file_id).sort()).toEqual([
|
|
66
|
+
'f-alpha',
|
|
67
|
+
'f-beta',
|
|
68
|
+
'f-gamma',
|
|
69
|
+
]);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it('filters files by target_files substring (case-insensitive)', async () => {
|
|
73
|
+
const { client, calls } = makeRagClient({
|
|
74
|
+
chunksByFile: { 'f-beta': [chunk('f-beta', 'beta', 0.2)] },
|
|
75
|
+
});
|
|
76
|
+
const t = createFileSearchTool({ ragClient: client, files });
|
|
77
|
+
await t.invoke({ query: 'q', target_files: ['BETA-REPORT'] });
|
|
78
|
+
expect(calls.map((c) => c.file_id)).toEqual(['f-beta']);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('falls back to all files when target_files matches nothing', async () => {
|
|
82
|
+
const { client, calls } = makeRagClient({
|
|
83
|
+
chunksByFile: {
|
|
84
|
+
'f-alpha': [chunk('f-alpha', 'a', 0.1)],
|
|
85
|
+
'f-beta': [chunk('f-beta', 'b', 0.2)],
|
|
86
|
+
'f-gamma': [chunk('f-gamma', 'c', 0.3)],
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
const warn = jest.fn();
|
|
90
|
+
const t = createFileSearchTool({
|
|
91
|
+
ragClient: client,
|
|
92
|
+
files,
|
|
93
|
+
logger: { debug: jest.fn(), info: jest.fn(), warn, error: jest.fn() },
|
|
94
|
+
});
|
|
95
|
+
await t.invoke({ query: 'q', target_files: ['no-such-file'] });
|
|
96
|
+
expect(calls.length).toBe(3);
|
|
97
|
+
expect(warn).toHaveBeenCalled();
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it('isolates per-file failures so one bad file does not fail the call', async () => {
|
|
101
|
+
const { client, calls } = makeRagClient({
|
|
102
|
+
chunksByFile: {
|
|
103
|
+
'f-alpha': [chunk('f-alpha', 'alpha good', 0.1)],
|
|
104
|
+
'f-gamma': [chunk('f-gamma', 'gamma good', 0.2)],
|
|
105
|
+
},
|
|
106
|
+
failFileIds: new Set(['f-beta']),
|
|
107
|
+
});
|
|
108
|
+
const onFileError = jest.fn();
|
|
109
|
+
const t = createFileSearchTool({
|
|
110
|
+
ragClient: client,
|
|
111
|
+
files,
|
|
112
|
+
callbacks: { onFileError },
|
|
113
|
+
logger: silentLogger(),
|
|
114
|
+
});
|
|
115
|
+
const result = await t.invoke({ query: 'q' });
|
|
116
|
+
const text = Array.isArray(result) ? result[0] : result;
|
|
117
|
+
expect(String(text)).toMatch(/alpha good/);
|
|
118
|
+
expect(String(text)).toMatch(/gamma good/);
|
|
119
|
+
expect(calls.length).toBe(3);
|
|
120
|
+
expect(onFileError).toHaveBeenCalledWith(
|
|
121
|
+
expect.objectContaining({ file_id: 'f-beta' }),
|
|
122
|
+
expect.any(Error),
|
|
123
|
+
);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it('forwards entity_id, scope, and authHeaders on every query', async () => {
|
|
127
|
+
const { client, calls } = makeRagClient({
|
|
128
|
+
chunksByFile: { 'f-alpha': [chunk('f-alpha', 'x', 0.1)] },
|
|
129
|
+
});
|
|
130
|
+
const t = createFileSearchTool({
|
|
131
|
+
ragClient: client,
|
|
132
|
+
files: [files[0]],
|
|
133
|
+
entity_id: 'tenant-42',
|
|
134
|
+
scope: 'user:alice',
|
|
135
|
+
getAuthHeaders: () => ({ Authorization: 'Bearer TOKEN' }),
|
|
136
|
+
});
|
|
137
|
+
await t.invoke({ query: 'q' });
|
|
138
|
+
expect(calls[0]).toEqual(
|
|
139
|
+
expect.objectContaining({
|
|
140
|
+
file_id: 'f-alpha',
|
|
141
|
+
query: 'q',
|
|
142
|
+
entity_id: 'tenant-42',
|
|
143
|
+
scope: 'user:alice',
|
|
144
|
+
authHeaders: { Authorization: 'Bearer TOKEN' },
|
|
145
|
+
}),
|
|
146
|
+
);
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
it('prioritizes current-turn files even when stale files have closer distances', async () => {
|
|
150
|
+
const { client } = makeRagClient({
|
|
151
|
+
chunksByFile: {
|
|
152
|
+
'f-alpha': [chunk('f-alpha', 'current-turn', 0.5)],
|
|
153
|
+
'f-beta': [chunk('f-beta', 'older-turn', 0.1)],
|
|
154
|
+
},
|
|
155
|
+
});
|
|
156
|
+
const t = createFileSearchTool({
|
|
157
|
+
ragClient: client,
|
|
158
|
+
files: [files[0], files[1]], // alpha=current, beta=not
|
|
159
|
+
});
|
|
160
|
+
const result = await t.invoke({ query: 'q' });
|
|
161
|
+
const text = Array.isArray(result) ? String(result[0]) : String(result);
|
|
162
|
+
const currentIdx = text.indexOf('current-turn');
|
|
163
|
+
const olderIdx = text.indexOf('older-turn');
|
|
164
|
+
expect(currentIdx).toBeGreaterThanOrEqual(0);
|
|
165
|
+
expect(olderIdx).toBeGreaterThan(currentIdx);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
it('uses plainTextFormatter by default (no citation anchors)', async () => {
|
|
169
|
+
const { client } = makeRagClient({
|
|
170
|
+
chunksByFile: { 'f-alpha': [chunk('f-alpha', 'hello', 0.1)] },
|
|
171
|
+
});
|
|
172
|
+
const t = createFileSearchTool({
|
|
173
|
+
ragClient: client,
|
|
174
|
+
files: [files[0]],
|
|
175
|
+
});
|
|
176
|
+
const result = await t.invoke({ query: 'q' });
|
|
177
|
+
const text = Array.isArray(result) ? String(result[0]) : String(result);
|
|
178
|
+
expect(text).not.toMatch(/\\ue202turn0file/);
|
|
179
|
+
expect(text).toMatch(/File: alpha\.pdf/);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
it('uses citation anchors when createCitationAnchorFormatter is supplied', async () => {
|
|
183
|
+
const { client } = makeRagClient({
|
|
184
|
+
chunksByFile: { 'f-alpha': [chunk('f-alpha', 'hello', 0.1)] },
|
|
185
|
+
});
|
|
186
|
+
let offset = 0;
|
|
187
|
+
const formatter = createCitationAnchorFormatter({
|
|
188
|
+
getSourceOffset: () => offset,
|
|
189
|
+
advanceSourceOffset: (by) => {
|
|
190
|
+
offset += by;
|
|
191
|
+
},
|
|
192
|
+
});
|
|
193
|
+
const t = createFileSearchTool({
|
|
194
|
+
ragClient: client,
|
|
195
|
+
files: [files[0]],
|
|
196
|
+
formatter,
|
|
197
|
+
});
|
|
198
|
+
const first = await t.invoke({ query: 'q' });
|
|
199
|
+
const text1 = Array.isArray(first) ? String(first[0]) : String(first);
|
|
200
|
+
expect(text1).toMatch(/Source 0/);
|
|
201
|
+
expect(text1).toMatch(/\\ue202turn0file0/);
|
|
202
|
+
|
|
203
|
+
// Second call in the same turn: offset should have advanced.
|
|
204
|
+
const second = await t.invoke({ query: 'q2' });
|
|
205
|
+
const text2 = Array.isArray(second) ? String(second[0]) : String(second);
|
|
206
|
+
expect(text2).toMatch(/Source 1/);
|
|
207
|
+
expect(text2).toMatch(/\\ue202turn0file1/);
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
it('extracts 1-indexed page numbers from metadata.page (rag_api 0-indexed)', async () => {
|
|
211
|
+
const { client } = makeRagClient({
|
|
212
|
+
chunksByFile: {
|
|
213
|
+
'f-alpha': [
|
|
214
|
+
{
|
|
215
|
+
file_id: 'f-alpha',
|
|
216
|
+
page_content: 'page two content',
|
|
217
|
+
distance: 0.1,
|
|
218
|
+
metadata: { page: 1 }, // rag_api 0-indexed → display = 2
|
|
219
|
+
},
|
|
220
|
+
],
|
|
221
|
+
},
|
|
222
|
+
});
|
|
223
|
+
const t = createFileSearchTool({
|
|
224
|
+
ragClient: client,
|
|
225
|
+
files: [files[0]],
|
|
226
|
+
});
|
|
227
|
+
const result = await t.invoke({ query: 'q' });
|
|
228
|
+
const text = Array.isArray(result) ? String(result[0]) : String(result);
|
|
229
|
+
expect(text).toMatch(/Page: 2/);
|
|
230
|
+
});
|
|
231
|
+
});
|
|
232
|
+
|
|
233
|
+
// ── helpers ──────────────────────────────────────────────────────────────
|
|
234
|
+
|
|
235
|
+
function chunk(
|
|
236
|
+
file_id: string,
|
|
237
|
+
text: string,
|
|
238
|
+
distance: number,
|
|
239
|
+
metadata?: Record<string, unknown>,
|
|
240
|
+
): RagChunk {
|
|
241
|
+
return { file_id, page_content: text, distance, metadata };
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function silentLogger() {
|
|
245
|
+
return {
|
|
246
|
+
debug: jest.fn(),
|
|
247
|
+
info: jest.fn(),
|
|
248
|
+
warn: jest.fn(),
|
|
249
|
+
error: jest.fn(),
|
|
250
|
+
};
|
|
251
|
+
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Default result formatters.
|
|
3
|
+
*
|
|
4
|
+
* - `plainTextFormatter`: CLI / A2A / generic output. No citation anchors.
|
|
5
|
+
* - `citationAnchorFormatter`: ranger-style `\ue202turn0fileN` anchors with
|
|
6
|
+
* a monotonic `sourceOffset` so multi-call turns stay globally unique.
|
|
7
|
+
*
|
|
8
|
+
* Runtimes can supply their own `FileSearchResultFormatter` to override.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type {
|
|
12
|
+
FileSearchResultFormatter,
|
|
13
|
+
FileSearchFile,
|
|
14
|
+
RagChunk,
|
|
15
|
+
} from './types';
|
|
16
|
+
|
|
17
|
+
type AnnotatedChunk = RagChunk & {
|
|
18
|
+
filename: string;
|
|
19
|
+
isCurrentMessage: boolean;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export const plainTextFormatter: FileSearchResultFormatter = {
|
|
23
|
+
format(chunks, { files: _files }) {
|
|
24
|
+
if (chunks.length === 0) {
|
|
25
|
+
return { message: 'No relevant results found in the available files.' };
|
|
26
|
+
}
|
|
27
|
+
const body = chunks
|
|
28
|
+
.map((c) => {
|
|
29
|
+
const page = getPage(c);
|
|
30
|
+
const rel = (1 - c.distance).toFixed(4);
|
|
31
|
+
return (
|
|
32
|
+
`File: ${c.filename}` +
|
|
33
|
+
(page != null ? `\nPage: ${page}` : '') +
|
|
34
|
+
`\nRelevance: ${rel}\nContent: ${c.page_content}\n`
|
|
35
|
+
);
|
|
36
|
+
})
|
|
37
|
+
.join('\n---\n');
|
|
38
|
+
|
|
39
|
+
const sources = chunks.map((c) => ({
|
|
40
|
+
type: 'file' as const,
|
|
41
|
+
fileId: c.file_id,
|
|
42
|
+
content: c.page_content,
|
|
43
|
+
fileName: c.filename,
|
|
44
|
+
relevance: 1 - c.distance,
|
|
45
|
+
pages: getPage(c) != null ? [getPage(c) as number] : [],
|
|
46
|
+
}));
|
|
47
|
+
|
|
48
|
+
return { message: body, artifact: { file_search: { sources } } };
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
export interface CitationAnchorFormatterOptions {
|
|
53
|
+
/** Tool name used in the `file_search` artifact wrapper. Defaults to `'file_search'`. */
|
|
54
|
+
toolName?: string;
|
|
55
|
+
/**
|
|
56
|
+
* Monotonic counter for source indices within a turn. Pass the SAME
|
|
57
|
+
* function to the formatter across multiple calls in the same turn so
|
|
58
|
+
* anchors stay globally unique.
|
|
59
|
+
*/
|
|
60
|
+
getSourceOffset?: () => number;
|
|
61
|
+
/** Called after formatting to advance the offset. */
|
|
62
|
+
advanceSourceOffset?: (by: number) => void;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function createCitationAnchorFormatter(
|
|
66
|
+
opts: CitationAnchorFormatterOptions = {},
|
|
67
|
+
): FileSearchResultFormatter {
|
|
68
|
+
const toolName = opts.toolName ?? 'file_search';
|
|
69
|
+
const getOffset = opts.getSourceOffset ?? (() => 0);
|
|
70
|
+
const advance = opts.advanceSourceOffset ?? (() => {});
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
format(chunks) {
|
|
74
|
+
if (chunks.length === 0) {
|
|
75
|
+
return {
|
|
76
|
+
message:
|
|
77
|
+
'No results found or errors occurred while searching the files.',
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
const base = getOffset();
|
|
81
|
+
const body = chunks
|
|
82
|
+
.map((c, i) => {
|
|
83
|
+
const globalIndex = base + i;
|
|
84
|
+
const page = getPage(c);
|
|
85
|
+
const rel = (1 - c.distance).toFixed(4);
|
|
86
|
+
return (
|
|
87
|
+
`[Source ${globalIndex}] File: ${c.filename} | Anchor: \\ue202turn0file${globalIndex}` +
|
|
88
|
+
(page != null ? ` | Page: ${page}` : '') +
|
|
89
|
+
` | Relevance: ${rel}\nContent: ${c.page_content}\n` +
|
|
90
|
+
`↑ Cite this source using: \\ue202turn0file${globalIndex}`
|
|
91
|
+
);
|
|
92
|
+
})
|
|
93
|
+
.join('\n---\n');
|
|
94
|
+
|
|
95
|
+
const sources = chunks.map((c) => ({
|
|
96
|
+
type: 'file' as const,
|
|
97
|
+
fileId: c.file_id,
|
|
98
|
+
content: c.page_content,
|
|
99
|
+
fileName: c.filename,
|
|
100
|
+
relevance: 1 - c.distance,
|
|
101
|
+
pages: getPage(c) != null ? [getPage(c) as number] : [],
|
|
102
|
+
pageRelevance:
|
|
103
|
+
getPage(c) != null
|
|
104
|
+
? { [getPage(c) as number]: 1 - c.distance }
|
|
105
|
+
: {},
|
|
106
|
+
}));
|
|
107
|
+
|
|
108
|
+
advance(chunks.length);
|
|
109
|
+
return {
|
|
110
|
+
message: body,
|
|
111
|
+
artifact: { [toolName]: { sources, fileCitations: true } },
|
|
112
|
+
};
|
|
113
|
+
},
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/** Extract a 1-indexed page number from the chunk metadata, or null. */
|
|
118
|
+
function getPage(chunk: AnnotatedChunk | RagChunk): number | null {
|
|
119
|
+
const raw =
|
|
120
|
+
(chunk.metadata?.page as unknown) ??
|
|
121
|
+
(chunk.metadata?.page_number as unknown) ??
|
|
122
|
+
null;
|
|
123
|
+
if (raw == null) return null;
|
|
124
|
+
const parsed = typeof raw === 'number' ? raw : parseInt(String(raw), 10);
|
|
125
|
+
if (Number.isNaN(parsed) || parsed < 0) return null;
|
|
126
|
+
// rag_api stores 0-indexed; display is 1-indexed
|
|
127
|
+
return parsed + 1;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Re-export so consumers only import from the formatter module.
|
|
131
|
+
export type { FileSearchResultFormatter, FileSearchFile, RagChunk };
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export { createFileSearchTool, FileSearchToolName } from './tool';
|
|
2
|
+
export {
|
|
3
|
+
HttpRagClient,
|
|
4
|
+
getRagBaseUrl,
|
|
5
|
+
RAG_API_URL_ENV,
|
|
6
|
+
type HttpRagClientOptions,
|
|
7
|
+
} from './ragClient';
|
|
8
|
+
export {
|
|
9
|
+
plainTextFormatter,
|
|
10
|
+
createCitationAnchorFormatter,
|
|
11
|
+
type CitationAnchorFormatterOptions,
|
|
12
|
+
} from './formatter';
|
|
13
|
+
export { fileSearchInputSchema, type FileSearchInput } from './schema';
|
|
14
|
+
export type {
|
|
15
|
+
FileSearchFile,
|
|
16
|
+
RagChunk,
|
|
17
|
+
RagClient,
|
|
18
|
+
RagQueryParams,
|
|
19
|
+
FileSearchResultFormatter,
|
|
20
|
+
FileSearchToolCallbacks,
|
|
21
|
+
FileSearchToolConfig,
|
|
22
|
+
FileSearchToolLogger,
|
|
23
|
+
} from './types';
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Default HTTP RAG client. Posts to `${baseUrl}/query` with the shape
|
|
3
|
+
* rag_api expects (`{ file_id, query, k, entity_id? }`). Runtimes that
|
|
4
|
+
* use a different vector backend implement their own `RagClient`.
|
|
5
|
+
*
|
|
6
|
+
* Auth is runtime-provided per call (via `authHeaders` on the params) so
|
|
7
|
+
* short-lived tokens can be minted per request without the client
|
|
8
|
+
* caching stale credentials.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import fetch from 'node-fetch';
|
|
12
|
+
import { getEnvironmentVariable } from '@langchain/core/utils/env';
|
|
13
|
+
import type {
|
|
14
|
+
RagClient,
|
|
15
|
+
RagQueryParams,
|
|
16
|
+
RagChunk,
|
|
17
|
+
FileSearchToolLogger,
|
|
18
|
+
} from './types';
|
|
19
|
+
|
|
20
|
+
export const RAG_API_URL_ENV = 'RAG_API_URL';
|
|
21
|
+
|
|
22
|
+
/** Resolve base URL at call time so env-var changes propagate. */
|
|
23
|
+
export function getRagBaseUrl(override?: string): string {
|
|
24
|
+
const url =
|
|
25
|
+
override ??
|
|
26
|
+
getEnvironmentVariable(RAG_API_URL_ENV) ??
|
|
27
|
+
'';
|
|
28
|
+
if (!url) {
|
|
29
|
+
throw new Error(
|
|
30
|
+
`file_search: ${RAG_API_URL_ENV} is not configured. ` +
|
|
31
|
+
`Set the env var or pass baseUrl to HttpRagClient.`,
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
return url.replace(/\/$/, '');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface HttpRagClientOptions {
|
|
38
|
+
/** Base URL of the RAG service (no trailing slash). Falls back to env. */
|
|
39
|
+
baseUrl?: string;
|
|
40
|
+
/** Default headers sent on every request (e.g., a static API key). */
|
|
41
|
+
defaultHeaders?: Record<string, string>;
|
|
42
|
+
/** Default timeout if params don't override. Default 15_000. */
|
|
43
|
+
defaultTimeoutMs?: number;
|
|
44
|
+
logger?: FileSearchToolLogger;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Expected rag_api response shape: `[[{ page_content, metadata }, distance], ...]`
|
|
49
|
+
* — an array of [doc, score] tuples. Normalized here into `RagChunk[]`.
|
|
50
|
+
*/
|
|
51
|
+
type RagApiResponse = Array<
|
|
52
|
+
[
|
|
53
|
+
{
|
|
54
|
+
page_content: string;
|
|
55
|
+
metadata?: Record<string, unknown>;
|
|
56
|
+
},
|
|
57
|
+
number,
|
|
58
|
+
]
|
|
59
|
+
>;
|
|
60
|
+
|
|
61
|
+
export class HttpRagClient implements RagClient {
|
|
62
|
+
private readonly baseUrlOverride?: string;
|
|
63
|
+
private readonly defaultHeaders: Record<string, string>;
|
|
64
|
+
private readonly defaultTimeoutMs: number;
|
|
65
|
+
private readonly logger?: FileSearchToolLogger;
|
|
66
|
+
|
|
67
|
+
constructor(opts: HttpRagClientOptions = {}) {
|
|
68
|
+
this.baseUrlOverride = opts.baseUrl;
|
|
69
|
+
this.defaultHeaders = opts.defaultHeaders ?? {};
|
|
70
|
+
this.defaultTimeoutMs = opts.defaultTimeoutMs ?? 15_000;
|
|
71
|
+
this.logger = opts.logger;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async query(params: RagQueryParams): Promise<RagChunk[]> {
|
|
75
|
+
const baseUrl = getRagBaseUrl(this.baseUrlOverride);
|
|
76
|
+
const url = `${baseUrl}/query`;
|
|
77
|
+
|
|
78
|
+
const body: Record<string, unknown> = {
|
|
79
|
+
file_id: params.file_id,
|
|
80
|
+
query: params.query,
|
|
81
|
+
k: params.k ?? 10,
|
|
82
|
+
};
|
|
83
|
+
if (params.entity_id) body.entity_id = params.entity_id;
|
|
84
|
+
if (params.scope) body.scope = params.scope;
|
|
85
|
+
|
|
86
|
+
const headers: Record<string, string> = {
|
|
87
|
+
'Content-Type': 'application/json',
|
|
88
|
+
...this.defaultHeaders,
|
|
89
|
+
...(params.authHeaders ?? {}),
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
const timeoutMs = params.timeoutMs ?? this.defaultTimeoutMs;
|
|
93
|
+
const controller =
|
|
94
|
+
typeof AbortController !== 'undefined' ? new AbortController() : null;
|
|
95
|
+
const timer = controller
|
|
96
|
+
? setTimeout(() => controller.abort(), timeoutMs)
|
|
97
|
+
: null;
|
|
98
|
+
|
|
99
|
+
this.logger?.debug('[file_search] RAG query', {
|
|
100
|
+
url,
|
|
101
|
+
file_id: params.file_id,
|
|
102
|
+
k: body.k,
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
const res = await fetch(url, {
|
|
107
|
+
method: 'POST',
|
|
108
|
+
headers,
|
|
109
|
+
body: JSON.stringify(body),
|
|
110
|
+
signal: controller?.signal as unknown as undefined,
|
|
111
|
+
});
|
|
112
|
+
if (!res.ok) {
|
|
113
|
+
const text = await res.text().catch(() => '');
|
|
114
|
+
throw new Error(
|
|
115
|
+
`RAG query failed: ${res.status} ${res.statusText} — ${text.slice(0, 200)}`,
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
const json = (await res.json()) as RagApiResponse;
|
|
119
|
+
return this.normalize(params.file_id, json);
|
|
120
|
+
} finally {
|
|
121
|
+
if (timer) clearTimeout(timer);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** Convert rag_api's tuple format into the library's normalized shape. */
|
|
126
|
+
private normalize(file_id: string, resp: RagApiResponse): RagChunk[] {
|
|
127
|
+
if (!Array.isArray(resp)) {
|
|
128
|
+
this.logger?.warn('[file_search] RAG response not an array', { resp });
|
|
129
|
+
return [];
|
|
130
|
+
}
|
|
131
|
+
return resp
|
|
132
|
+
.filter((row) => Array.isArray(row) && row.length === 2)
|
|
133
|
+
.map(([doc, distance]) => ({
|
|
134
|
+
file_id:
|
|
135
|
+
(doc?.metadata?.file_id as string | undefined) ?? file_id,
|
|
136
|
+
page_content: doc?.page_content ?? '',
|
|
137
|
+
distance: typeof distance === 'number' ? distance : 1,
|
|
138
|
+
metadata: doc?.metadata,
|
|
139
|
+
}));
|
|
140
|
+
}
|
|
141
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
export const fileSearchInputSchema = z.object({
|
|
4
|
+
query: z
|
|
5
|
+
.string()
|
|
6
|
+
.describe(
|
|
7
|
+
"A natural language query to search for relevant information in the files. Be SPECIFIC and TARGETED — use keywords for the specific section or topic you need. For comprehensive tasks (summaries, overviews), call this tool multiple times with different targeted queries (e.g., 'introduction', 'methodology', 'results', 'conclusions') rather than one broad query.",
|
|
8
|
+
),
|
|
9
|
+
target_files: z
|
|
10
|
+
.array(z.string())
|
|
11
|
+
.optional()
|
|
12
|
+
.describe(
|
|
13
|
+
'Optional list of filenames (or partial names) to limit the search to. When provided, only files whose name contains one of these strings will be searched. Use this to avoid searching irrelevant files. Omit to search all available files.',
|
|
14
|
+
),
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
export type FileSearchInput = z.infer<typeof fileSearchInputSchema>;
|
|
18
|
+
|
|
19
|
+
export const FileSearchToolName = 'file_search';
|