@cloudwarriors-ai/rlm 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/application/handlers/llm-query-handler.d.ts +67 -0
  2. package/dist/application/handlers/llm-query-handler.d.ts.map +1 -0
  3. package/dist/application/handlers/llm-query-handler.js +169 -0
  4. package/dist/application/handlers/llm-query-handler.js.map +1 -0
  5. package/dist/application/query-handler.d.ts +23 -2
  6. package/dist/application/query-handler.d.ts.map +1 -1
  7. package/dist/application/query-handler.js +215 -112
  8. package/dist/application/query-handler.js.map +1 -1
  9. package/dist/cli/index.js +0 -0
  10. package/dist/domain/constants.d.ts +124 -0
  11. package/dist/domain/constants.d.ts.map +1 -0
  12. package/dist/domain/constants.js +148 -0
  13. package/dist/domain/constants.js.map +1 -0
  14. package/dist/domain/errors/index.d.ts +1 -0
  15. package/dist/domain/errors/index.d.ts.map +1 -1
  16. package/dist/domain/errors/index.js +2 -0
  17. package/dist/domain/errors/index.js.map +1 -1
  18. package/dist/domain/errors/token-budget-error.d.ts +47 -0
  19. package/dist/domain/errors/token-budget-error.d.ts.map +1 -0
  20. package/dist/domain/errors/token-budget-error.js +41 -0
  21. package/dist/domain/errors/token-budget-error.js.map +1 -0
  22. package/dist/domain/interfaces/code-executor.d.ts +32 -2
  23. package/dist/domain/interfaces/code-executor.d.ts.map +1 -1
  24. package/dist/domain/interfaces/event-emitter.d.ts +55 -1
  25. package/dist/domain/interfaces/event-emitter.d.ts.map +1 -1
  26. package/dist/domain/interfaces/llm-provider.d.ts +4 -0
  27. package/dist/domain/interfaces/llm-provider.d.ts.map +1 -1
  28. package/dist/domain/services/cost-calculator.d.ts.map +1 -1
  29. package/dist/domain/services/cost-calculator.js +9 -4
  30. package/dist/domain/services/cost-calculator.js.map +1 -1
  31. package/dist/domain/types/config.d.ts +17 -0
  32. package/dist/domain/types/config.d.ts.map +1 -1
  33. package/dist/domain/types/config.js +41 -0
  34. package/dist/domain/types/config.js.map +1 -1
  35. package/dist/domain/types/index-schema.d.ts +206 -0
  36. package/dist/domain/types/index-schema.d.ts.map +1 -0
  37. package/dist/domain/types/index-schema.js +41 -0
  38. package/dist/domain/types/index-schema.js.map +1 -0
  39. package/dist/domain/types/index.d.ts +2 -0
  40. package/dist/domain/types/index.d.ts.map +1 -1
  41. package/dist/domain/types/index.js +4 -0
  42. package/dist/domain/types/index.js.map +1 -1
  43. package/dist/domain/utils/timer.d.ts +34 -0
  44. package/dist/domain/utils/timer.d.ts.map +1 -0
  45. package/dist/domain/utils/timer.js +39 -0
  46. package/dist/domain/utils/timer.js.map +1 -0
  47. package/dist/factory/create-rlm.d.ts.map +1 -1
  48. package/dist/factory/create-rlm.js +1 -0
  49. package/dist/factory/create-rlm.js.map +1 -1
  50. package/dist/infrastructure/llm/openrouter-provider.d.ts +1 -0
  51. package/dist/infrastructure/llm/openrouter-provider.d.ts.map +1 -1
  52. package/dist/infrastructure/llm/openrouter-provider.js +30 -9
  53. package/dist/infrastructure/llm/openrouter-provider.js.map +1 -1
  54. package/dist/infrastructure/llm/prompts/index.d.ts +1 -1
  55. package/dist/infrastructure/llm/prompts/index.d.ts.map +1 -1
  56. package/dist/infrastructure/llm/prompts/index.js +1 -1
  57. package/dist/infrastructure/llm/prompts/index.js.map +1 -1
  58. package/dist/infrastructure/llm/prompts/system-prompt.d.ts +14 -1
  59. package/dist/infrastructure/llm/prompts/system-prompt.d.ts.map +1 -1
  60. package/dist/infrastructure/llm/prompts/system-prompt.js +186 -52
  61. package/dist/infrastructure/llm/prompts/system-prompt.js.map +1 -1
  62. package/dist/infrastructure/logging/debug-logger.d.ts +29 -0
  63. package/dist/infrastructure/logging/debug-logger.d.ts.map +1 -0
  64. package/dist/infrastructure/logging/debug-logger.js +35 -0
  65. package/dist/infrastructure/logging/debug-logger.js.map +1 -0
  66. package/dist/infrastructure/sandbox/prelude/rlm_prelude.py +637 -41
  67. package/dist/infrastructure/sandbox/process-manager.d.ts +1 -0
  68. package/dist/infrastructure/sandbox/process-manager.d.ts.map +1 -1
  69. package/dist/infrastructure/sandbox/process-manager.js +19 -6
  70. package/dist/infrastructure/sandbox/process-manager.js.map +1 -1
  71. package/dist/infrastructure/sandbox/python-executor.d.ts +6 -2
  72. package/dist/infrastructure/sandbox/python-executor.d.ts.map +1 -1
  73. package/dist/infrastructure/sandbox/python-executor.js +138 -5
  74. package/dist/infrastructure/sandbox/python-executor.js.map +1 -1
  75. package/package.json +2 -1
  76. package/src/infrastructure/sandbox/prelude/rlm_prelude.py +637 -41
@@ -90,6 +90,65 @@ def llm_query(query: str, *context_vars: str) -> str:
90
90
  return response.get("result", "")
91
91
 
92
92
 
93
+ def llm_query_batch(queries: list[tuple[str, str]]) -> list[str]:
94
+ """
95
+ Make multiple LLM queries in PARALLEL for significant speedup.
96
+
97
+ Per MIT RLM paper: Sequential sub-calls are a runtime bottleneck.
98
+ Use this instead of multiple llm_query() calls when queries are independent.
99
+
100
+ Args:
101
+ queries: List of (query, context) tuples. Each context is a string.
102
+
103
+ Returns:
104
+ List of responses in the same order as queries.
105
+
106
+ Example:
107
+ # Instead of this (sequential, slow):
108
+ results = []
109
+ for chunk in chunks:
110
+ result = llm_query("Analyze this", chunk)
111
+ results.append(result)
112
+
113
+ # Do this (parallel, fast):
114
+ queries = [("Analyze this", chunk) for chunk in chunks]
115
+ results = llm_query_batch(queries)
116
+ """
117
+ if not queries:
118
+ return []
119
+
120
+ # Build batch payload
121
+ batch = []
122
+ for query, context in queries:
123
+ # Resolve context if it's a variable name
124
+ if context in _context_store:
125
+ context = _context_store[context]
126
+ batch.append({
127
+ "query": query,
128
+ "context": [context] if isinstance(context, str) else list(context)
129
+ })
130
+
131
+ # Send batch command
132
+ _send_command("llm_query_batch", {"queries": batch})
133
+
134
+ # Wait for batch response
135
+ response = _wait_for_response()
136
+
137
+ if "error" in response:
138
+ raise RuntimeError(f"Batch query failed: {response['error']}")
139
+
140
+ results = response.get("results", [])
141
+
142
+ # Check for individual errors
143
+ final_results = []
144
+ for i, r in enumerate(results):
145
+ if isinstance(r, dict) and "error" in r:
146
+ raise RuntimeError(f"Query {i} failed: {r['error']}")
147
+ final_results.append(r)
148
+
149
+ return final_results
150
+
151
+
93
152
  def set_result(result: str) -> None:
94
153
  """
95
154
  Set the final result of the RLM execution.
@@ -105,6 +164,30 @@ def set_result(result: str) -> None:
105
164
  _send_command("set_result", {"result": _result})
106
165
 
107
166
 
167
+ def set_result_final(result: str, confidence: float = 1.0) -> None:
168
+ """
169
+ Set result AND signal that the answer is complete.
170
+
171
+ Use this when you have found a definitive answer and no further
172
+ processing is needed. This is more efficient than set_result()
173
+ when you're confident the answer is complete.
174
+
175
+ Args:
176
+ result: The final answer
177
+ confidence: How confident (0.0-1.0) that this is complete
178
+
179
+ Example:
180
+ # Found the specific file requested
181
+ set_result_final("The main entry point is src/index.ts", confidence=1.0)
182
+ """
183
+ global _result
184
+ _result = str(result)
185
+ _send_command("set_result_final", {
186
+ "result": _result,
187
+ "confidence": confidence
188
+ })
189
+
190
+
108
191
  def set_variable(name: str, value: str) -> None:
109
192
  """
110
193
  Store a variable for use in subsequent code or queries.
@@ -213,6 +296,500 @@ def count_tokens(text: str) -> int:
213
296
  return len(text) // 4
214
297
 
215
298
 
299
+ # Unit index cache for lazy structure extraction
300
+ _unit_index: dict[str, dict] = {}
301
+
302
+
303
+ def _extract_codebase_structure(content: str) -> dict:
304
+ """Extract structure from codebase format (=== FILE: ... ===)."""
305
+ import re
306
+ units = []
307
+ file_pattern = re.compile(r'^=== FILE: (.+?) ===$', re.MULTILINE)
308
+
309
+ matches = list(file_pattern.finditer(content))
310
+ for i, match in enumerate(matches):
311
+ file_path = match.group(1)
312
+ start = match.end()
313
+ end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
314
+
315
+ file_content = content[start:end].strip()
316
+ unit_id = f"file:{file_path}"
317
+
318
+ unit_info = {
319
+ 'id': unit_id,
320
+ 'type': 'file',
321
+ 'path': file_path,
322
+ 'start': match.start(),
323
+ 'end': end,
324
+ 'size': len(file_content),
325
+ 'tokens': count_tokens(file_content),
326
+ }
327
+ units.append(unit_info)
328
+ _unit_index[unit_id] = {**unit_info, 'content_start': start, 'content_end': end}
329
+
330
+ # Group by directory
331
+ directories = {}
332
+ for unit in units:
333
+ parts = unit['path'].split('/')
334
+ dir_path = '/'.join(parts[:-1]) if len(parts) > 1 else '.'
335
+ if dir_path not in directories:
336
+ directories[dir_path] = []
337
+ directories[dir_path].append(unit['path'].split('/')[-1])
338
+
339
+ return {
340
+ 'type': 'codebase',
341
+ 'total_files': len(units),
342
+ 'total_tokens': sum(u['tokens'] for u in units),
343
+ 'directories': directories,
344
+ 'units': [{'id': u['id'], 'path': u['path'], 'tokens': u['tokens']} for u in units],
345
+ }
346
+
347
+
348
+ def _extract_markdown_structure(content: str) -> dict:
349
+ """Extract structure from markdown (# headings)."""
350
+ import re
351
+ units = []
352
+ heading_pattern = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
353
+
354
+ matches = list(heading_pattern.finditer(content))
355
+ for i, match in enumerate(matches):
356
+ level = len(match.group(1))
357
+ title = match.group(2).strip()
358
+ start = match.start()
359
+ end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
360
+
361
+ section_content = content[start:end].strip()
362
+ unit_id = f"section:{i}:{title[:50]}"
363
+
364
+ unit_info = {
365
+ 'id': unit_id,
366
+ 'type': 'section',
367
+ 'level': level,
368
+ 'title': title,
369
+ 'start': start,
370
+ 'end': end,
371
+ 'size': len(section_content),
372
+ 'tokens': count_tokens(section_content),
373
+ }
374
+ units.append(unit_info)
375
+ _unit_index[unit_id] = {**unit_info, 'content_start': start, 'content_end': end}
376
+
377
+ return {
378
+ 'type': 'markdown',
379
+ 'total_sections': len(units),
380
+ 'total_tokens': sum(u['tokens'] for u in units),
381
+ 'units': [{'id': u['id'], 'level': u['level'], 'title': u['title'], 'tokens': u['tokens']} for u in units],
382
+ }
383
+
384
+
385
+ def _extract_generic_structure(content: str) -> dict:
386
+ """Extract structure by splitting into equal chunks."""
387
+ chunk_size = 50000 # ~12.5K tokens per chunk
388
+ units = []
389
+
390
+ for i in range(0, len(content), chunk_size):
391
+ chunk = content[i:i + chunk_size]
392
+ unit_id = f"chunk:{i // chunk_size}"
393
+
394
+ unit_info = {
395
+ 'id': unit_id,
396
+ 'type': 'chunk',
397
+ 'index': i // chunk_size,
398
+ 'start': i,
399
+ 'end': min(i + chunk_size, len(content)),
400
+ 'size': len(chunk),
401
+ 'tokens': count_tokens(chunk),
402
+ }
403
+ units.append(unit_info)
404
+ _unit_index[unit_id] = {**unit_info, 'content_start': i, 'content_end': min(i + chunk_size, len(content))}
405
+
406
+ return {
407
+ 'type': 'generic',
408
+ 'total_chunks': len(units),
409
+ 'total_tokens': sum(u['tokens'] for u in units),
410
+ 'units': [{'id': u['id'], 'index': u['index'], 'tokens': u['tokens']} for u in units],
411
+ }
412
+
413
+
414
+ def get_structure(content: str = None) -> dict:
415
+ """
416
+ Extract structure lazily from content without loading full content into memory.
417
+
418
+ Args:
419
+ content: Content to analyze (defaults to 'context' variable)
420
+
421
+ Returns:
422
+ Structure dict with type, units summary, and metadata
423
+
424
+ Example:
425
+ structure = get_structure()
426
+ print(f"Found {structure['total_files']} files")
427
+ for unit in structure['units']:
428
+ print(f" {unit['path']}: {unit['tokens']} tokens")
429
+ """
430
+ if content is None:
431
+ content = _context_store.get('context', '')
432
+
433
+ # Clear previous index
434
+ _unit_index.clear()
435
+
436
+ # Detect type and extract structure markers
437
+ if '=== FILE:' in content:
438
+ return _extract_codebase_structure(content)
439
+ elif content.lstrip().startswith('#'):
440
+ return _extract_markdown_structure(content)
441
+ else:
442
+ return _extract_generic_structure(content)
443
+
444
+
445
+ def get_unit(unit_id: str, content: str = None) -> str:
446
+ """
447
+ Fetch a specific unit by ID.
448
+
449
+ Args:
450
+ unit_id: The unit ID from get_structure()
451
+ content: Content to extract from (defaults to 'context' variable)
452
+
453
+ Returns:
454
+ The content of the requested unit
455
+
456
+ Example:
457
+ structure = get_structure()
458
+ file_content = get_unit(structure['units'][0]['id'])
459
+ """
460
+ if content is None:
461
+ content = _context_store.get('context', '')
462
+
463
+ # Ensure structure has been extracted
464
+ if not _unit_index:
465
+ get_structure(content)
466
+
467
+ if unit_id not in _unit_index:
468
+ raise ValueError(f"Unit not found: {unit_id}. Call get_structure() first.")
469
+
470
+ unit = _unit_index[unit_id]
471
+ return content[unit['content_start']:unit['content_end']].strip()
472
+
473
+
474
+ def list_units(pattern: str = None, content: str = None) -> list[dict]:
475
+ """
476
+ List units matching an optional pattern.
477
+
478
+ Args:
479
+ pattern: Optional pattern to filter units (substring match on id/path)
480
+ content: Content to analyze (defaults to 'context' variable)
481
+
482
+ Returns:
483
+ List of unit info dicts with id, type, and tokens
484
+
485
+ Example:
486
+ # List all Python files
487
+ py_files = list_units('.py')
488
+ for f in py_files:
489
+ print(f"{f['id']}: {f['tokens']} tokens")
490
+ """
491
+ if content is None:
492
+ content = _context_store.get('context', '')
493
+
494
+ # Ensure structure has been extracted
495
+ if not _unit_index:
496
+ get_structure(content)
497
+
498
+ units = []
499
+ for unit_id, info in _unit_index.items():
500
+ if pattern is None or pattern in unit_id or pattern in info.get('path', ''):
501
+ units.append({
502
+ 'id': unit_id,
503
+ 'type': info['type'],
504
+ 'tokens': info['tokens'],
505
+ 'path': info.get('path'),
506
+ 'title': info.get('title'),
507
+ })
508
+
509
+ return units
510
+
511
+
512
+ def get_directory_tree(content: str = None) -> dict:
513
+ """
514
+ Get hierarchical directory structure from codebase content.
515
+
516
+ This function parses the codebase content and builds a nested dictionary
517
+ representing the directory structure.
518
+
519
+ Args:
520
+ content: Content to analyze (defaults to 'context' variable)
521
+
522
+ Returns:
523
+ Nested dict representing directory tree:
524
+ {
525
+ 'module1': {
526
+ 'files': ['main.py', 'utils.py'],
527
+ 'dirs': {
528
+ 'subdir1': {'files': [...], 'dirs': {...}},
529
+ ...
530
+ }
531
+ },
532
+ ...
533
+ }
534
+
535
+ Example:
536
+ tree = get_directory_tree()
537
+ for module, data in tree.items():
538
+ print(f"Module: {module}")
539
+ print(f" Files: {data['files']}")
540
+ for subdir in data['dirs']:
541
+ print(f" Subdir: {subdir}")
542
+ """
543
+ import re
544
+
545
+ if content is None:
546
+ content = _context_store.get('context', '')
547
+
548
+ # Parse all file paths
549
+ file_pattern = re.compile(r'^=== FILE: (.+?) ===$', re.MULTILINE)
550
+ paths = [m.group(1) for m in file_pattern.finditer(content)]
551
+
552
+ # Build tree
553
+ tree = {}
554
+
555
+ def ensure_path(tree_node: dict, parts: list[str], filename: str):
556
+ """Recursively ensure path exists and add file."""
557
+ if len(parts) == 0:
558
+ # At the target directory level
559
+ if 'files' not in tree_node:
560
+ tree_node['files'] = []
561
+ tree_node['files'].append(filename)
562
+ if 'dirs' not in tree_node:
563
+ tree_node['dirs'] = {}
564
+ else:
565
+ # Need to traverse deeper
566
+ if 'dirs' not in tree_node:
567
+ tree_node['dirs'] = {}
568
+ if 'files' not in tree_node:
569
+ tree_node['files'] = []
570
+ dir_name = parts[0]
571
+ if dir_name not in tree_node['dirs']:
572
+ tree_node['dirs'][dir_name] = {'files': [], 'dirs': {}}
573
+ ensure_path(tree_node['dirs'][dir_name], parts[1:], filename)
574
+
575
+ for path in paths:
576
+ parts = path.split('/')
577
+ if len(parts) == 1:
578
+ # Root level file
579
+ if '_root' not in tree:
580
+ tree['_root'] = {'files': [], 'dirs': {}}
581
+ tree['_root']['files'].append(parts[0])
582
+ else:
583
+ # File in a directory
584
+ top_level = parts[0]
585
+ if top_level not in tree:
586
+ tree[top_level] = {'files': [], 'dirs': {}}
587
+ if len(parts) == 2:
588
+ tree[top_level]['files'].append(parts[1])
589
+ else:
590
+ ensure_path(tree[top_level], parts[1:-1], parts[-1])
591
+
592
+ return tree
593
+
594
+
595
+ def get_files_in_directory(directory: str, content: str = None) -> list[dict]:
596
+ """
597
+ Get all files in a specific directory with metadata.
598
+
599
+ Args:
600
+ directory: Directory path to list (e.g., "rapture/elastic")
601
+ content: Content to analyze (defaults to 'context' variable)
602
+
603
+ Returns:
604
+ List of file metadata dicts:
605
+ [{'id': 'file:path', 'path': 'path', 'name': 'file.py', 'tokens': 123, 'size': 456}, ...]
606
+
607
+ Example:
608
+ files = get_files_in_directory("rapture/api")
609
+ for f in files:
610
+ print(f"{f['name']}: {f['tokens']} tokens")
611
+ """
612
+ if content is None:
613
+ content = _context_store.get('context', '')
614
+
615
+ # Ensure structure has been extracted
616
+ if not _unit_index:
617
+ get_structure(content)
618
+
619
+ # Normalize directory path (remove trailing slash)
620
+ directory = directory.rstrip('/')
621
+
622
+ files = []
623
+ for unit_id, info in _unit_index.items():
624
+ if info['type'] != 'file':
625
+ continue
626
+
627
+ file_path = info.get('path', '')
628
+ file_dir = '/'.join(file_path.split('/')[:-1])
629
+ file_name = file_path.split('/')[-1]
630
+
631
+ # Match exact directory (not subdirectories)
632
+ if file_dir == directory:
633
+ files.append({
634
+ 'id': unit_id,
635
+ 'path': file_path,
636
+ 'name': file_name,
637
+ 'tokens': info.get('tokens', 0),
638
+ 'size': info.get('size', 0),
639
+ })
640
+
641
+ return sorted(files, key=lambda f: f['name'])
642
+
643
+
644
+ def get_directory_content(directory: str, max_tokens: int = 100000, content: str = None) -> tuple[str, list[str]]:
645
+ """
646
+ Get combined content of files in a directory up to token limit.
647
+
648
+ This is useful for feeding a directory's contents to llm_query.
649
+
650
+ Args:
651
+ directory: Directory path to get content from
652
+ max_tokens: Maximum total tokens to fetch (default: 100000)
653
+ content: Content to extract from (defaults to 'context' variable)
654
+
655
+ Returns:
656
+ Tuple of (combined_content, unfetched_file_ids)
657
+
658
+ Example:
659
+ content, remaining = get_directory_content("rapture/elastic", max_tokens=50000)
660
+ analysis = llm_query("Analyze this directory", content)
661
+ if remaining:
662
+ content2, _ = get_directory_content("rapture/elastic", max_tokens=50000)
663
+ # ... handle remaining files
664
+ """
665
+ if content is None:
666
+ content = _context_store.get('context', '')
667
+
668
+ # Get files in directory
669
+ files = get_files_in_directory(directory, content)
670
+ file_ids = [f['id'] for f in files]
671
+
672
+ # Use get_units_safe to fetch with token budget
673
+ return get_units_safe(file_ids, max_tokens=max_tokens, content=content)
674
+
675
+
676
+ def get_all_directories(content: str = None) -> list[str]:
677
+ """
678
+ Get a flat list of all directories in the codebase.
679
+
680
+ Args:
681
+ content: Content to analyze (defaults to 'context' variable)
682
+
683
+ Returns:
684
+ List of directory paths (e.g., ["rapture", "rapture/api", "rapture/elastic", ...])
685
+
686
+ Example:
687
+ dirs = get_all_directories()
688
+ for d in dirs:
689
+ print(f"Directory: {d}")
690
+ files = get_files_in_directory(d)
691
+ print(f" Files: {len(files)}")
692
+ """
693
+ if content is None:
694
+ content = _context_store.get('context', '')
695
+
696
+ # Ensure structure has been extracted
697
+ if not _unit_index:
698
+ get_structure(content)
699
+
700
+ # Collect all unique directories
701
+ directories = set()
702
+ for unit_id, info in _unit_index.items():
703
+ if info['type'] != 'file':
704
+ continue
705
+
706
+ file_path = info.get('path', '')
707
+ parts = file_path.split('/')
708
+
709
+ # Add all parent directories
710
+ for i in range(1, len(parts)):
711
+ dir_path = '/'.join(parts[:i])
712
+ directories.add(dir_path)
713
+
714
+ return sorted(directories)
715
+
716
+
717
+ def get_module_directories(module: str, content: str = None) -> list[str]:
718
+ """
719
+ Get all directories within a specific top-level module.
720
+
721
+ Args:
722
+ module: Module name (e.g., "rapture", "rapture-frontend")
723
+ content: Content to analyze (defaults to 'context' variable)
724
+
725
+ Returns:
726
+ List of directory paths within the module
727
+
728
+ Example:
729
+ dirs = get_module_directories("rapture")
730
+ # Returns: ["rapture", "rapture/api", "rapture/elastic", ...]
731
+ """
732
+ all_dirs = get_all_directories(content)
733
+ return [d for d in all_dirs if d == module or d.startswith(module + '/')]
734
+
735
+
736
+ def get_units_safe(unit_ids: list[str], max_tokens: int = 50000, content: str = None) -> tuple[str, list[str]]:
737
+ """
738
+ Fetch units up to a token budget.
739
+
740
+ This is the recommended way to fetch multiple units while staying within
741
+ the llm_query() capacity limit.
742
+
743
+ Args:
744
+ unit_ids: List of unit IDs to fetch
745
+ max_tokens: Maximum total tokens to fetch (default: 50000 = ~200K chars)
746
+ content: Content to extract from (defaults to 'context' variable)
747
+
748
+ Returns:
749
+ Tuple of (combined_content, unfetched_unit_ids)
750
+
751
+ Example:
752
+ structure = get_structure()
753
+ all_ids = [u['id'] for u in structure['units']]
754
+
755
+ # Fetch in batches that fit in llm_query()
756
+ fetched, remaining = get_units_safe(all_ids, max_tokens=100000)
757
+ result1 = llm_query("Analyze these files", fetched)
758
+
759
+ if remaining:
760
+ fetched2, remaining2 = get_units_safe(remaining, max_tokens=100000)
761
+ result2 = llm_query("Analyze these files", fetched2)
762
+ """
763
+ if content is None:
764
+ content = _context_store.get('context', '')
765
+
766
+ # Ensure structure has been extracted
767
+ if not _unit_index:
768
+ get_structure(content)
769
+
770
+ fetched_content = []
771
+ fetched_tokens = 0
772
+ unfetched = []
773
+
774
+ for unit_id in unit_ids:
775
+ if unit_id not in _unit_index:
776
+ unfetched.append(unit_id)
777
+ continue
778
+
779
+ unit_info = _unit_index[unit_id]
780
+ unit_tokens = unit_info['tokens']
781
+
782
+ if fetched_tokens + unit_tokens > max_tokens:
783
+ unfetched.append(unit_id)
784
+ else:
785
+ # Include the full unit with header marker so get_structure() works on result
786
+ unit_content = content[unit_info['start']:unit_info['content_end']].strip()
787
+ fetched_content.append(unit_content)
788
+ fetched_tokens += unit_tokens
789
+
790
+ return '\n\n'.join(fetched_content), unfetched
791
+
792
+
216
793
  # Safe modules that can be imported (defined first so _restricted_import can use it)
217
794
  _SAFE_MODULES = {
218
795
  'json': __import__('json'),
@@ -305,11 +882,24 @@ _SAFE_BUILTINS = {
305
882
 
306
883
  # RLM functions
307
884
  'llm_query': llm_query,
885
+ 'llm_query_batch': llm_query_batch,
308
886
  'set_result': set_result,
887
+ 'set_result_final': set_result_final,
309
888
  'set_variable': set_variable,
310
889
  'chunk_text': chunk_text,
311
890
  'filter_lines': filter_lines,
312
891
  'count_tokens': count_tokens,
892
+ # Structure extraction (lazy loading)
893
+ 'get_structure': get_structure,
894
+ 'get_unit': get_unit,
895
+ 'list_units': list_units,
896
+ 'get_units_safe': get_units_safe,
897
+ # Directory-aware functions (for hierarchical indexing)
898
+ 'get_directory_tree': get_directory_tree,
899
+ 'get_files_in_directory': get_files_in_directory,
900
+ 'get_directory_content': get_directory_content,
901
+ 'get_all_directories': get_all_directories,
902
+ 'get_module_directories': get_module_directories,
313
903
  }
314
904
 
315
905
 
@@ -318,53 +908,59 @@ def main():
318
908
  # Read initial context
319
909
  try:
320
910
  init_line = sys.stdin.readline().strip()
321
- if init_line.startswith("__INIT__:"):
911
+
912
+ # Handle large payloads via temp file (avoids stdin buffer issues)
913
+ if init_line.startswith("__INIT_FILE__:"):
914
+ temp_file = init_line[len("__INIT_FILE__:"):]
915
+ with open(temp_file, 'r', encoding='utf-8') as f:
916
+ init_data = json.load(f)
917
+ elif init_line.startswith("__INIT__:"):
322
918
  init_data = json.loads(init_line[len("__INIT__:"):])
919
+ else:
920
+ print("__ERROR__:Invalid initialization", flush=True)
921
+ return 1
323
922
 
324
- # Load context variables
325
- for name, value in init_data.get("context", {}).items():
326
- _context_store[name] = value
923
+ # Load context variables
924
+ for name, value in init_data.get("context", {}).items():
925
+ _context_store[name] = value
327
926
 
328
- # Get the code to execute
329
- code = init_data.get("code", "")
927
+ # Get the code to execute
928
+ code = init_data.get("code", "")
330
929
 
331
- if not code:
332
- print("__ERROR__:No code provided", flush=True)
333
- return 1
930
+ if not code:
931
+ print("__ERROR__:No code provided", flush=True)
932
+ return 1
334
933
 
335
- # Create restricted execution environment
336
- exec_globals = {
337
- '__builtins__': _SAFE_BUILTINS,
338
- '__name__': '__main__',
339
- '__doc__': None,
934
+ # Create restricted execution environment
935
+ exec_globals = {
936
+ '__builtins__': _SAFE_BUILTINS,
937
+ '__name__': '__main__',
938
+ '__doc__': None,
939
+ }
940
+
941
+ # Add context variables to globals
942
+ exec_globals.update(_context_store)
943
+
944
+ # Execute the code
945
+ try:
946
+ exec(code, exec_globals)
947
+
948
+ # Check if result was set
949
+ if _result is None:
950
+ print("__WARNING__:No result was set. Call set_result() with your answer.", flush=True)
951
+
952
+ print("__DONE__", flush=True)
953
+ return 0
954
+
955
+ except Exception as e:
956
+ import traceback
957
+ tb = traceback.format_exc()
958
+ error_info = {
959
+ "error": str(e),
960
+ "type": type(e).__name__,
961
+ "traceback": tb
340
962
  }
341
-
342
- # Add context variables to globals
343
- exec_globals.update(_context_store)
344
-
345
- # Execute the code
346
- try:
347
- exec(code, exec_globals)
348
-
349
- # Check if result was set
350
- if _result is None:
351
- print("__WARNING__:No result was set. Call set_result() with your answer.", flush=True)
352
-
353
- print("__DONE__", flush=True)
354
- return 0
355
-
356
- except Exception as e:
357
- import traceback
358
- tb = traceback.format_exc()
359
- error_info = {
360
- "error": str(e),
361
- "type": type(e).__name__,
362
- "traceback": tb
363
- }
364
- print(f"__ERROR__:{json.dumps(error_info)}", flush=True)
365
- return 1
366
- else:
367
- print("__ERROR__:Invalid initialization", flush=True)
963
+ print(f"__ERROR__:{json.dumps(error_info)}", flush=True)
368
964
  return 1
369
965
 
370
966
  except Exception as e: