@theia/ai-ide 1.70.0-next.21 → 1.70.0-next.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,7 +17,7 @@ import { ToolInvocationContext, ToolProvider, ToolRequest } from '@theia/ai-core
17
17
  import { CancellationToken, Disposable, PreferenceService, URI, Path } from '@theia/core';
18
18
  import { inject, injectable } from '@theia/core/shared/inversify';
19
19
  import { FileService } from '@theia/filesystem/lib/browser/file-service';
20
- import { FileStat } from '@theia/filesystem/lib/common/files';
20
+ import { FileStat, FileOperationError, FileOperationResult } from '@theia/filesystem/lib/common/files';
21
21
  import { WorkspaceService } from '@theia/workspace/lib/browser';
22
22
  import {
23
23
  FILE_CONTENT_FUNCTION_ID, GET_FILE_DIAGNOSTICS_ID,
@@ -26,7 +26,7 @@ import {
26
26
  } from '../common/workspace-functions';
27
27
  import ignore from 'ignore';
28
28
  import { Minimatch } from 'minimatch';
29
- import { CONSIDER_GITIGNORE_PREF, USER_EXCLUDE_PATTERN_PREF } from '../common/workspace-preferences';
29
+ import { CONSIDER_GITIGNORE_PREF, FILE_CONTENT_MAX_SIZE_KB_PREF, USER_EXCLUDE_PATTERN_PREF } from '../common/workspace-preferences';
30
30
  import { MonacoWorkspace } from '@theia/monaco/lib/browser/monaco-workspace';
31
31
  import { MonacoTextModelService } from '@theia/monaco/lib/browser/monaco-text-model-service';
32
32
  import { ProblemManager } from '@theia/markers/lib/browser';
@@ -280,7 +280,12 @@ export class FileContentFunction implements ToolProvider {
280
280
  'If the file is currently open in an editor with unsaved changes, returns the editor\'s current content (not the saved file on disk). ' +
281
281
  'Binary files may not be readable and will return an error. ' +
282
282
  'Use this tool to read file contents before making any edits with replacement functions. ' +
283
- 'Do NOT use this for files you haven\'t located yet - use findFilesByPattern or searchInWorkspace first.',
283
+ 'Do NOT use this for files you haven\'t located yet - use findFilesByPattern or searchInWorkspace first. ' +
284
+ 'Files exceeding the configured size limit will return an error. ' +
285
+ 'It is recommended to read the whole file by not providing offset or limit parameters, ' +
286
+ 'unless you expect it to be very large. ' +
287
+ 'If the size limit is hit, do NOT attempt to read the full file in chunks using offset and limit — ' +
288
+ 'this wastes context window. Use searchInWorkspace to find the specific content you need instead.',
284
289
  parameters: {
285
290
  type: 'object',
286
291
  properties: {
@@ -288,20 +293,30 @@ export class FileContentFunction implements ToolProvider {
288
293
  type: 'string',
289
294
  description: 'The relative path to the target file within the workspace (e.g., "src/index.ts", "package.json"). ' +
290
295
  'Must be relative to the workspace root. Absolute paths and paths outside the workspace will result in an error.',
296
+ },
297
+ offset: {
298
+ type: 'number',
299
+ description: 'Zero-based line offset to start reading from (default: 0). ' +
300
+ 'Use together with limit to page through large files.'
301
+ },
302
+ limit: {
303
+ type: 'number',
304
+ description: 'Maximum number of lines to return. Defaults to the rest of the file.'
291
305
  }
292
306
  },
293
307
  required: ['file']
294
308
  },
295
309
  handler: (arg_string: string, ctx?: ToolInvocationContext) => {
296
- const file = this.parseArg(arg_string);
297
- return this.getFileContent(file, ctx?.cancellationToken);
310
+ const { file, offset, limit } = this.parseArg(arg_string);
311
+ return this.getFileContent(file, ctx?.cancellationToken, offset, limit);
298
312
  },
299
313
  providerName: undefined,
300
314
  getArgumentsShortLabel: (args: string): { label: string; hasMore: boolean } | undefined => {
301
315
  try {
302
316
  const parsed = JSON.parse(args);
303
317
  if (parsed && typeof parsed === 'object' && 'file' in parsed) {
304
- return { label: String(parsed.file), hasMore: false };
318
+ const hasMore = 'offset' in parsed || 'limit' in parsed;
319
+ return { label: String(parsed.file), hasMore };
305
320
  }
306
321
  } catch {
307
322
  // ignore parse errors
@@ -320,16 +335,26 @@ export class FileContentFunction implements ToolProvider {
320
335
  @inject(MonacoWorkspace)
321
336
  protected readonly monacoWorkspace: MonacoWorkspace;
322
337
 
323
- private parseArg(arg_string: string): string {
338
+ @inject(PreferenceService)
339
+ protected readonly preferences: PreferenceService;
340
+
341
+ private parseArg(arg_string: string): { file: string; offset?: number; limit?: number } {
324
342
  const result = JSON.parse(arg_string);
325
- return result.file;
343
+ return { file: result.file, offset: result.offset, limit: result.limit };
326
344
  }
327
345
 
328
- private async getFileContent(file: string, cancellationToken?: CancellationToken): Promise<string> {
346
+ private async getFileContent(file: string, cancellationToken?: CancellationToken, offset?: number, limit?: number): Promise<string> {
329
347
  if (cancellationToken?.isCancellationRequested) {
330
348
  return JSON.stringify({ error: 'Operation cancelled by user' });
331
349
  }
332
350
 
351
+ if (offset !== undefined && (!Number.isInteger(offset) || offset < 0)) {
352
+ return JSON.stringify({ error: 'offset must be a non-negative integer.' });
353
+ }
354
+ if (limit !== undefined && (!Number.isInteger(limit) || limit <= 0)) {
355
+ return JSON.stringify({ error: 'limit must be a positive integer.' });
356
+ }
357
+
333
358
  let targetUri: URI | undefined;
334
359
  try {
335
360
  const workspaceRoot = await this.workspaceScope.getWorkspaceRoot();
@@ -339,22 +364,175 @@ export class FileContentFunction implements ToolProvider {
339
364
  return JSON.stringify({ error: error.message });
340
365
  }
341
366
 
342
- try {
343
- if (cancellationToken?.isCancellationRequested) {
344
- return JSON.stringify({ error: 'Operation cancelled by user' });
367
+ if (cancellationToken?.isCancellationRequested) {
368
+ return JSON.stringify({ error: 'Operation cancelled by user' });
369
+ }
370
+
371
+ const openEditorValue = this.monacoWorkspace.getTextDocument(targetUri.toString())?.getText();
372
+ const maxSizeKB = this.preferences.get<number>(FILE_CONTENT_MAX_SIZE_KB_PREF, 256);
373
+ const isEditorOpen = openEditorValue !== undefined;
374
+ const isPaginated = offset !== undefined || limit !== undefined;
375
+
376
+ if (isEditorOpen) {
377
+ return this.handleEditorContent(openEditorValue!, maxSizeKB, offset, limit);
378
+ } else if (isPaginated) {
379
+ return this.readStreamedSlice(targetUri, maxSizeKB, offset, limit);
380
+ } else {
381
+ return this.handleFullDiskRead(targetUri, maxSizeKB);
382
+ }
383
+ }
384
+
385
+ private handleEditorContent(content: string, maxSizeKB: number, offset?: number, limit?: number): string {
386
+ if (offset === undefined && limit === undefined) {
387
+ const sizeKB = this.sizeInKB(content);
388
+ if (sizeKB > maxSizeKB) {
389
+ return this.buildFileSizeLimitError(sizeKB, maxSizeKB);
345
390
  }
391
+ return content;
392
+ }
393
+
394
+ const lines = content.split('\n');
395
+ const startOffset = offset ?? 0;
396
+ const sliced = limit !== undefined ? lines.slice(startOffset, startOffset + limit) : lines.slice(startOffset);
397
+ const result = sliced.join('\n');
398
+ const resultSizeKB = this.sizeInKB(result);
399
+ if (resultSizeKB > maxSizeKB) {
400
+ return this.buildSliceSizeLimitError(resultSizeKB, maxSizeKB);
401
+ }
402
+ const startLine = startOffset + 1;
403
+ const endLine = startOffset + sliced.length;
404
+ const header = `[Lines ${startLine}\u2013${endLine} of ${lines.length} total. Use offset and limit to read other ranges.]`;
405
+ return `${header}\n${result}`;
406
+ }
346
407
 
347
- const openEditorValue = this.monacoWorkspace.getTextDocument(targetUri.toString())?.getText();
348
- if (openEditorValue !== undefined) {
349
- return openEditorValue;
408
+ private async handleFullDiskRead(targetUri: URI, maxSizeKB: number): Promise<string> {
409
+ try {
410
+ const stat = await this.fileService.resolve(targetUri);
411
+ if (stat.size !== undefined) {
412
+ const statSizeKB = Math.round(stat.size / 1024);
413
+ if (statSizeKB > maxSizeKB) {
414
+ return this.buildFileSizeLimitError(statSizeKB, maxSizeKB);
415
+ }
416
+ } else {
417
+ // Size is unknown from stat; use the streaming path to avoid loading
418
+ // an arbitrarily large file into memory, with a post-read size check.
419
+ return this.readStreamedSlice(targetUri, maxSizeKB);
350
420
  }
351
421
 
352
- const fileContent = await this.fileService.read(targetUri);
353
- return fileContent.value;
422
+ const rawContent = (await this.fileService.read(targetUri)).value;
423
+ const sizeKB = this.sizeInKB(rawContent);
424
+ if (sizeKB > maxSizeKB) {
425
+ return this.buildFileSizeLimitError(sizeKB, maxSizeKB);
426
+ }
427
+ return rawContent;
354
428
  } catch (error) {
429
+ if (error instanceof FileOperationError) {
430
+ if (error.fileOperationResult === FileOperationResult.FILE_TOO_LARGE ||
431
+ error.fileOperationResult === FileOperationResult.FILE_EXCEEDS_MEMORY_LIMIT) {
432
+ return this.buildFileSizeLimitError(undefined, maxSizeKB);
433
+ }
434
+ }
355
435
  return JSON.stringify({ error: 'File not found' });
356
436
  }
357
437
  }
438
+
439
+ private async readStreamedSlice(
440
+ targetUri: URI, maxSizeKB: number, startLine?: number, limit?: number
441
+ ): Promise<string> {
442
+ const isPaginated = startLine !== undefined || limit !== undefined;
443
+ const effectiveStartLine = startLine ?? 0;
444
+
445
+ let streamValue: Awaited<ReturnType<typeof this.fileService.readStream>>['value'];
446
+ try {
447
+ // Bypass the files.maxFileSizeMB preference: the streaming path never loads the
448
+ // full file into memory, so the OS-level size cap is not appropriate here.
449
+ // Our own per-result maxSizeKB check still applies to the collected slice.
450
+ streamValue = (await this.fileService.readStream(targetUri, { limits: { size: Number.MAX_SAFE_INTEGER } })).value;
451
+ } catch (e) {
452
+ if (e instanceof FileOperationError &&
453
+ (e.fileOperationResult === FileOperationResult.FILE_TOO_LARGE ||
454
+ e.fileOperationResult === FileOperationResult.FILE_EXCEEDS_MEMORY_LIMIT)) {
455
+ return JSON.stringify({
456
+ error: 'File exceeds the configured ' + maxSizeKB + 'KB size limit. ' +
457
+ 'Use the \'offset\' (0-based) and \'limit\' parameters to read specific line ranges, ' +
458
+ 'or use searchInWorkspace to find specific content.',
459
+ maxSizeKB
460
+ });
461
+ }
462
+ return JSON.stringify({ error: 'File not found' });
463
+ }
464
+
465
+ return new Promise<string>(resolve => {
466
+ let pending = '';
467
+ let lineIndex = 0;
468
+ const sliceLines: string[] = [];
469
+
470
+ streamValue.on('data', (chunk: string) => {
471
+ const parts = (pending + chunk).split('\n');
472
+ pending = parts.pop()!;
473
+ for (const line of parts) {
474
+ if (lineIndex >= effectiveStartLine && (limit === undefined || lineIndex < effectiveStartLine + limit)) {
475
+ sliceLines.push(line);
476
+ }
477
+ lineIndex++;
478
+ }
479
+ });
480
+
481
+ streamValue.on('end', () => {
482
+ if (pending.length > 0) {
483
+ if (lineIndex >= effectiveStartLine && (limit === undefined || lineIndex < effectiveStartLine + limit)) {
484
+ sliceLines.push(pending);
485
+ }
486
+ lineIndex++;
487
+ }
488
+ const result = sliceLines.join('\n');
489
+ const resultSizeKB = this.sizeInKB(result);
490
+ if (resultSizeKB > maxSizeKB) {
491
+ const sizeError = isPaginated
492
+ ? this.buildSliceSizeLimitError(resultSizeKB, maxSizeKB)
493
+ : this.buildFileSizeLimitError(resultSizeKB, maxSizeKB);
494
+ resolve(sizeError);
495
+ return;
496
+ }
497
+ if (isPaginated) {
498
+ const header =
499
+ `[Lines ${effectiveStartLine + 1}\u2013${effectiveStartLine + sliceLines.length} of ${lineIndex} total. ` +
500
+ 'Use offset and limit to read other ranges.]';
501
+ resolve(`${header}\n${result}`);
502
+ } else {
503
+ resolve(result);
504
+ }
505
+ });
506
+
507
+ streamValue.on('error', () => resolve(JSON.stringify({ error: 'File not found' })));
508
+ });
509
+ }
510
+
511
+ private sizeInKB(content: string): number {
512
+ return Math.round(Buffer.byteLength(content, 'utf8') / 1024);
513
+ }
514
+
515
+ private buildFileSizeLimitError(sizeKB: number | undefined, maxSizeKB: number): string {
516
+ const sizeInfo = sizeKB !== undefined ? ` (${sizeKB}KB)` : '';
517
+ const result: Record<string, unknown> = {
518
+ error: `File exceeds the configured ${maxSizeKB}KB size limit${sizeInfo}. ` +
519
+ 'Use the \'offset\' (0-based) and \'limit\' parameters to read specific line ranges, or use searchInWorkspace to find specific content.',
520
+ maxSizeKB
521
+ };
522
+ if (sizeKB !== undefined) {
523
+ result.sizeKB = sizeKB;
524
+ }
525
+ return JSON.stringify(result);
526
+ }
527
+
528
+ private buildSliceSizeLimitError(resultSizeKB: number, maxSizeKB: number): string {
529
+ return JSON.stringify({
530
+ error: 'Requested range exceeds the configured ' + maxSizeKB + 'KB size limit (' + resultSizeKB + 'KB). ' +
531
+ 'Use a smaller limit to read fewer lines at a time.',
532
+ resultSizeKB,
533
+ maxSizeKB
534
+ });
535
+ }
358
536
  }
359
537
 
360
538
  @injectable()
@@ -23,6 +23,7 @@ export const PROMPT_TEMPLATE_WORKSPACE_DIRECTORIES_PREF = 'ai-features.promptTem
23
23
  export const PROMPT_TEMPLATE_ADDITIONAL_EXTENSIONS_PREF = 'ai-features.promptTemplates.TemplateExtensions';
24
24
  export const PROMPT_TEMPLATE_WORKSPACE_FILES_PREF = 'ai-features.promptTemplates.WorkspaceTemplateFiles';
25
25
  export const TASK_CONTEXT_STORAGE_DIRECTORY_PREF = 'ai-features.promptTemplates.taskContextStorageDirectory';
26
+ export const FILE_CONTENT_MAX_SIZE_KB_PREF = 'ai-features.workspaceFunctions.fileContentMaxSizeKB';
26
27
 
27
28
  const CONFLICT_RESOLUTION_DESCRIPTION = 'When templates with the same ID (filename) exist in multiple locations, conflicts are resolved by priority: specific template files \
28
29
  (highest) > workspace directories > global directories (lowest).';
@@ -91,6 +92,16 @@ export const WorkspacePreferencesSchema: PreferenceSchema = {
91
92
  ' If set to empty value, generated task contexts will be stored in memory rather than on disk.'
92
93
  ),
93
94
  default: '.prompts/task-contexts'
95
+ },
96
+ [FILE_CONTENT_MAX_SIZE_KB_PREF]: {
97
+ type: 'number',
98
+ title: nls.localize('theia/ai/workspace/fileContentMaxSizeKB/title', 'File Content Max Size (KB)'),
99
+ description: nls.localize('theia/ai/workspace/fileContentMaxSizeKB/description',
100
+ 'Maximum size in kilobytes of the content returned by the getFileContent tool. ' +
101
+ 'When reading a full file (no offset/limit), files exceeding this limit return an error. ' +
102
+ 'When using offset and limit, only the requested range is checked against this limit.'),
103
+ default: 256,
104
+ minimum: 1
94
105
  }
95
106
  }
96
107
  };