@cloudwarriors-ai/rlm 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/application/handlers/llm-query-handler.d.ts +67 -0
- package/dist/application/handlers/llm-query-handler.d.ts.map +1 -0
- package/dist/application/handlers/llm-query-handler.js +169 -0
- package/dist/application/handlers/llm-query-handler.js.map +1 -0
- package/dist/application/query-handler.d.ts +23 -2
- package/dist/application/query-handler.d.ts.map +1 -1
- package/dist/application/query-handler.js +215 -112
- package/dist/application/query-handler.js.map +1 -1
- package/dist/cli/index.js +0 -0
- package/dist/domain/constants.d.ts +124 -0
- package/dist/domain/constants.d.ts.map +1 -0
- package/dist/domain/constants.js +148 -0
- package/dist/domain/constants.js.map +1 -0
- package/dist/domain/errors/index.d.ts +1 -0
- package/dist/domain/errors/index.d.ts.map +1 -1
- package/dist/domain/errors/index.js +2 -0
- package/dist/domain/errors/index.js.map +1 -1
- package/dist/domain/errors/token-budget-error.d.ts +47 -0
- package/dist/domain/errors/token-budget-error.d.ts.map +1 -0
- package/dist/domain/errors/token-budget-error.js +41 -0
- package/dist/domain/errors/token-budget-error.js.map +1 -0
- package/dist/domain/interfaces/code-executor.d.ts +32 -2
- package/dist/domain/interfaces/code-executor.d.ts.map +1 -1
- package/dist/domain/interfaces/event-emitter.d.ts +55 -1
- package/dist/domain/interfaces/event-emitter.d.ts.map +1 -1
- package/dist/domain/interfaces/llm-provider.d.ts +4 -0
- package/dist/domain/interfaces/llm-provider.d.ts.map +1 -1
- package/dist/domain/services/cost-calculator.d.ts.map +1 -1
- package/dist/domain/services/cost-calculator.js +9 -4
- package/dist/domain/services/cost-calculator.js.map +1 -1
- package/dist/domain/types/config.d.ts +17 -0
- package/dist/domain/types/config.d.ts.map +1 -1
- package/dist/domain/types/config.js +41 -0
- package/dist/domain/types/config.js.map +1 -1
- package/dist/domain/types/index-schema.d.ts +206 -0
- package/dist/domain/types/index-schema.d.ts.map +1 -0
- package/dist/domain/types/index-schema.js +41 -0
- package/dist/domain/types/index-schema.js.map +1 -0
- package/dist/domain/types/index.d.ts +2 -0
- package/dist/domain/types/index.d.ts.map +1 -1
- package/dist/domain/types/index.js +4 -0
- package/dist/domain/types/index.js.map +1 -1
- package/dist/domain/utils/timer.d.ts +34 -0
- package/dist/domain/utils/timer.d.ts.map +1 -0
- package/dist/domain/utils/timer.js +39 -0
- package/dist/domain/utils/timer.js.map +1 -0
- package/dist/factory/create-rlm.d.ts.map +1 -1
- package/dist/factory/create-rlm.js +1 -0
- package/dist/factory/create-rlm.js.map +1 -1
- package/dist/infrastructure/llm/openrouter-provider.d.ts +1 -0
- package/dist/infrastructure/llm/openrouter-provider.d.ts.map +1 -1
- package/dist/infrastructure/llm/openrouter-provider.js +30 -9
- package/dist/infrastructure/llm/openrouter-provider.js.map +1 -1
- package/dist/infrastructure/llm/prompts/index.d.ts +1 -1
- package/dist/infrastructure/llm/prompts/index.d.ts.map +1 -1
- package/dist/infrastructure/llm/prompts/index.js +1 -1
- package/dist/infrastructure/llm/prompts/index.js.map +1 -1
- package/dist/infrastructure/llm/prompts/system-prompt.d.ts +14 -1
- package/dist/infrastructure/llm/prompts/system-prompt.d.ts.map +1 -1
- package/dist/infrastructure/llm/prompts/system-prompt.js +186 -52
- package/dist/infrastructure/llm/prompts/system-prompt.js.map +1 -1
- package/dist/infrastructure/logging/debug-logger.d.ts +29 -0
- package/dist/infrastructure/logging/debug-logger.d.ts.map +1 -0
- package/dist/infrastructure/logging/debug-logger.js +35 -0
- package/dist/infrastructure/logging/debug-logger.js.map +1 -0
- package/dist/infrastructure/sandbox/prelude/rlm_prelude.py +637 -41
- package/dist/infrastructure/sandbox/process-manager.d.ts +1 -0
- package/dist/infrastructure/sandbox/process-manager.d.ts.map +1 -1
- package/dist/infrastructure/sandbox/process-manager.js +19 -6
- package/dist/infrastructure/sandbox/process-manager.js.map +1 -1
- package/dist/infrastructure/sandbox/python-executor.d.ts +6 -2
- package/dist/infrastructure/sandbox/python-executor.d.ts.map +1 -1
- package/dist/infrastructure/sandbox/python-executor.js +138 -5
- package/dist/infrastructure/sandbox/python-executor.js.map +1 -1
- package/package.json +2 -1
- package/src/infrastructure/sandbox/prelude/rlm_prelude.py +637 -41
|
@@ -90,6 +90,65 @@ def llm_query(query: str, *context_vars: str) -> str:
|
|
|
90
90
|
return response.get("result", "")
|
|
91
91
|
|
|
92
92
|
|
|
93
|
+
def llm_query_batch(queries: list[tuple[str, str]]) -> list[str]:
|
|
94
|
+
"""
|
|
95
|
+
Make multiple LLM queries in PARALLEL for significant speedup.
|
|
96
|
+
|
|
97
|
+
Per MIT RLM paper: Sequential sub-calls are a runtime bottleneck.
|
|
98
|
+
Use this instead of multiple llm_query() calls when queries are independent.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
queries: List of (query, context) tuples. Each context is a string.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
List of responses in the same order as queries.
|
|
105
|
+
|
|
106
|
+
Example:
|
|
107
|
+
# Instead of this (sequential, slow):
|
|
108
|
+
results = []
|
|
109
|
+
for chunk in chunks:
|
|
110
|
+
result = llm_query("Analyze this", chunk)
|
|
111
|
+
results.append(result)
|
|
112
|
+
|
|
113
|
+
# Do this (parallel, fast):
|
|
114
|
+
queries = [("Analyze this", chunk) for chunk in chunks]
|
|
115
|
+
results = llm_query_batch(queries)
|
|
116
|
+
"""
|
|
117
|
+
if not queries:
|
|
118
|
+
return []
|
|
119
|
+
|
|
120
|
+
# Build batch payload
|
|
121
|
+
batch = []
|
|
122
|
+
for query, context in queries:
|
|
123
|
+
# Resolve context if it's a variable name
|
|
124
|
+
if context in _context_store:
|
|
125
|
+
context = _context_store[context]
|
|
126
|
+
batch.append({
|
|
127
|
+
"query": query,
|
|
128
|
+
"context": [context] if isinstance(context, str) else list(context)
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
# Send batch command
|
|
132
|
+
_send_command("llm_query_batch", {"queries": batch})
|
|
133
|
+
|
|
134
|
+
# Wait for batch response
|
|
135
|
+
response = _wait_for_response()
|
|
136
|
+
|
|
137
|
+
if "error" in response:
|
|
138
|
+
raise RuntimeError(f"Batch query failed: {response['error']}")
|
|
139
|
+
|
|
140
|
+
results = response.get("results", [])
|
|
141
|
+
|
|
142
|
+
# Check for individual errors
|
|
143
|
+
final_results = []
|
|
144
|
+
for i, r in enumerate(results):
|
|
145
|
+
if isinstance(r, dict) and "error" in r:
|
|
146
|
+
raise RuntimeError(f"Query {i} failed: {r['error']}")
|
|
147
|
+
final_results.append(r)
|
|
148
|
+
|
|
149
|
+
return final_results
|
|
150
|
+
|
|
151
|
+
|
|
93
152
|
def set_result(result: str) -> None:
|
|
94
153
|
"""
|
|
95
154
|
Set the final result of the RLM execution.
|
|
@@ -105,6 +164,30 @@ def set_result(result: str) -> None:
|
|
|
105
164
|
_send_command("set_result", {"result": _result})
|
|
106
165
|
|
|
107
166
|
|
|
167
|
+
def set_result_final(result: str, confidence: float = 1.0) -> None:
|
|
168
|
+
"""
|
|
169
|
+
Set result AND signal that the answer is complete.
|
|
170
|
+
|
|
171
|
+
Use this when you have found a definitive answer and no further
|
|
172
|
+
processing is needed. This is more efficient than set_result()
|
|
173
|
+
when you're confident the answer is complete.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
result: The final answer
|
|
177
|
+
confidence: How confident (0.0-1.0) that this is complete
|
|
178
|
+
|
|
179
|
+
Example:
|
|
180
|
+
# Found the specific file requested
|
|
181
|
+
set_result_final("The main entry point is src/index.ts", confidence=1.0)
|
|
182
|
+
"""
|
|
183
|
+
global _result
|
|
184
|
+
_result = str(result)
|
|
185
|
+
_send_command("set_result_final", {
|
|
186
|
+
"result": _result,
|
|
187
|
+
"confidence": confidence
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
|
|
108
191
|
def set_variable(name: str, value: str) -> None:
|
|
109
192
|
"""
|
|
110
193
|
Store a variable for use in subsequent code or queries.
|
|
@@ -213,6 +296,500 @@ def count_tokens(text: str) -> int:
|
|
|
213
296
|
return len(text) // 4
|
|
214
297
|
|
|
215
298
|
|
|
299
|
+
# Unit index cache for lazy structure extraction
|
|
300
|
+
_unit_index: dict[str, dict] = {}
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _extract_codebase_structure(content: str) -> dict:
|
|
304
|
+
"""Extract structure from codebase format (=== FILE: ... ===)."""
|
|
305
|
+
import re
|
|
306
|
+
units = []
|
|
307
|
+
file_pattern = re.compile(r'^=== FILE: (.+?) ===$', re.MULTILINE)
|
|
308
|
+
|
|
309
|
+
matches = list(file_pattern.finditer(content))
|
|
310
|
+
for i, match in enumerate(matches):
|
|
311
|
+
file_path = match.group(1)
|
|
312
|
+
start = match.end()
|
|
313
|
+
end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
|
|
314
|
+
|
|
315
|
+
file_content = content[start:end].strip()
|
|
316
|
+
unit_id = f"file:{file_path}"
|
|
317
|
+
|
|
318
|
+
unit_info = {
|
|
319
|
+
'id': unit_id,
|
|
320
|
+
'type': 'file',
|
|
321
|
+
'path': file_path,
|
|
322
|
+
'start': match.start(),
|
|
323
|
+
'end': end,
|
|
324
|
+
'size': len(file_content),
|
|
325
|
+
'tokens': count_tokens(file_content),
|
|
326
|
+
}
|
|
327
|
+
units.append(unit_info)
|
|
328
|
+
_unit_index[unit_id] = {**unit_info, 'content_start': start, 'content_end': end}
|
|
329
|
+
|
|
330
|
+
# Group by directory
|
|
331
|
+
directories = {}
|
|
332
|
+
for unit in units:
|
|
333
|
+
parts = unit['path'].split('/')
|
|
334
|
+
dir_path = '/'.join(parts[:-1]) if len(parts) > 1 else '.'
|
|
335
|
+
if dir_path not in directories:
|
|
336
|
+
directories[dir_path] = []
|
|
337
|
+
directories[dir_path].append(unit['path'].split('/')[-1])
|
|
338
|
+
|
|
339
|
+
return {
|
|
340
|
+
'type': 'codebase',
|
|
341
|
+
'total_files': len(units),
|
|
342
|
+
'total_tokens': sum(u['tokens'] for u in units),
|
|
343
|
+
'directories': directories,
|
|
344
|
+
'units': [{'id': u['id'], 'path': u['path'], 'tokens': u['tokens']} for u in units],
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def _extract_markdown_structure(content: str) -> dict:
|
|
349
|
+
"""Extract structure from markdown (# headings)."""
|
|
350
|
+
import re
|
|
351
|
+
units = []
|
|
352
|
+
heading_pattern = re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE)
|
|
353
|
+
|
|
354
|
+
matches = list(heading_pattern.finditer(content))
|
|
355
|
+
for i, match in enumerate(matches):
|
|
356
|
+
level = len(match.group(1))
|
|
357
|
+
title = match.group(2).strip()
|
|
358
|
+
start = match.start()
|
|
359
|
+
end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
|
|
360
|
+
|
|
361
|
+
section_content = content[start:end].strip()
|
|
362
|
+
unit_id = f"section:{i}:{title[:50]}"
|
|
363
|
+
|
|
364
|
+
unit_info = {
|
|
365
|
+
'id': unit_id,
|
|
366
|
+
'type': 'section',
|
|
367
|
+
'level': level,
|
|
368
|
+
'title': title,
|
|
369
|
+
'start': start,
|
|
370
|
+
'end': end,
|
|
371
|
+
'size': len(section_content),
|
|
372
|
+
'tokens': count_tokens(section_content),
|
|
373
|
+
}
|
|
374
|
+
units.append(unit_info)
|
|
375
|
+
_unit_index[unit_id] = {**unit_info, 'content_start': start, 'content_end': end}
|
|
376
|
+
|
|
377
|
+
return {
|
|
378
|
+
'type': 'markdown',
|
|
379
|
+
'total_sections': len(units),
|
|
380
|
+
'total_tokens': sum(u['tokens'] for u in units),
|
|
381
|
+
'units': [{'id': u['id'], 'level': u['level'], 'title': u['title'], 'tokens': u['tokens']} for u in units],
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _extract_generic_structure(content: str) -> dict:
|
|
386
|
+
"""Extract structure by splitting into equal chunks."""
|
|
387
|
+
chunk_size = 50000 # ~12.5K tokens per chunk
|
|
388
|
+
units = []
|
|
389
|
+
|
|
390
|
+
for i in range(0, len(content), chunk_size):
|
|
391
|
+
chunk = content[i:i + chunk_size]
|
|
392
|
+
unit_id = f"chunk:{i // chunk_size}"
|
|
393
|
+
|
|
394
|
+
unit_info = {
|
|
395
|
+
'id': unit_id,
|
|
396
|
+
'type': 'chunk',
|
|
397
|
+
'index': i // chunk_size,
|
|
398
|
+
'start': i,
|
|
399
|
+
'end': min(i + chunk_size, len(content)),
|
|
400
|
+
'size': len(chunk),
|
|
401
|
+
'tokens': count_tokens(chunk),
|
|
402
|
+
}
|
|
403
|
+
units.append(unit_info)
|
|
404
|
+
_unit_index[unit_id] = {**unit_info, 'content_start': i, 'content_end': min(i + chunk_size, len(content))}
|
|
405
|
+
|
|
406
|
+
return {
|
|
407
|
+
'type': 'generic',
|
|
408
|
+
'total_chunks': len(units),
|
|
409
|
+
'total_tokens': sum(u['tokens'] for u in units),
|
|
410
|
+
'units': [{'id': u['id'], 'index': u['index'], 'tokens': u['tokens']} for u in units],
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def get_structure(content: str = None) -> dict:
|
|
415
|
+
"""
|
|
416
|
+
Extract structure lazily from content without loading full content into memory.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
content: Content to analyze (defaults to 'context' variable)
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
Structure dict with type, units summary, and metadata
|
|
423
|
+
|
|
424
|
+
Example:
|
|
425
|
+
structure = get_structure()
|
|
426
|
+
print(f"Found {structure['total_files']} files")
|
|
427
|
+
for unit in structure['units']:
|
|
428
|
+
print(f" {unit['path']}: {unit['tokens']} tokens")
|
|
429
|
+
"""
|
|
430
|
+
if content is None:
|
|
431
|
+
content = _context_store.get('context', '')
|
|
432
|
+
|
|
433
|
+
# Clear previous index
|
|
434
|
+
_unit_index.clear()
|
|
435
|
+
|
|
436
|
+
# Detect type and extract structure markers
|
|
437
|
+
if '=== FILE:' in content:
|
|
438
|
+
return _extract_codebase_structure(content)
|
|
439
|
+
elif content.lstrip().startswith('#'):
|
|
440
|
+
return _extract_markdown_structure(content)
|
|
441
|
+
else:
|
|
442
|
+
return _extract_generic_structure(content)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def get_unit(unit_id: str, content: str = None) -> str:
|
|
446
|
+
"""
|
|
447
|
+
Fetch a specific unit by ID.
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
unit_id: The unit ID from get_structure()
|
|
451
|
+
content: Content to extract from (defaults to 'context' variable)
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
The content of the requested unit
|
|
455
|
+
|
|
456
|
+
Example:
|
|
457
|
+
structure = get_structure()
|
|
458
|
+
file_content = get_unit(structure['units'][0]['id'])
|
|
459
|
+
"""
|
|
460
|
+
if content is None:
|
|
461
|
+
content = _context_store.get('context', '')
|
|
462
|
+
|
|
463
|
+
# Ensure structure has been extracted
|
|
464
|
+
if not _unit_index:
|
|
465
|
+
get_structure(content)
|
|
466
|
+
|
|
467
|
+
if unit_id not in _unit_index:
|
|
468
|
+
raise ValueError(f"Unit not found: {unit_id}. Call get_structure() first.")
|
|
469
|
+
|
|
470
|
+
unit = _unit_index[unit_id]
|
|
471
|
+
return content[unit['content_start']:unit['content_end']].strip()
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def list_units(pattern: str = None, content: str = None) -> list[dict]:
|
|
475
|
+
"""
|
|
476
|
+
List units matching an optional pattern.
|
|
477
|
+
|
|
478
|
+
Args:
|
|
479
|
+
pattern: Optional pattern to filter units (substring match on id/path)
|
|
480
|
+
content: Content to analyze (defaults to 'context' variable)
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
List of unit info dicts with id, type, and tokens
|
|
484
|
+
|
|
485
|
+
Example:
|
|
486
|
+
# List all Python files
|
|
487
|
+
py_files = list_units('.py')
|
|
488
|
+
for f in py_files:
|
|
489
|
+
print(f"{f['id']}: {f['tokens']} tokens")
|
|
490
|
+
"""
|
|
491
|
+
if content is None:
|
|
492
|
+
content = _context_store.get('context', '')
|
|
493
|
+
|
|
494
|
+
# Ensure structure has been extracted
|
|
495
|
+
if not _unit_index:
|
|
496
|
+
get_structure(content)
|
|
497
|
+
|
|
498
|
+
units = []
|
|
499
|
+
for unit_id, info in _unit_index.items():
|
|
500
|
+
if pattern is None or pattern in unit_id or pattern in info.get('path', ''):
|
|
501
|
+
units.append({
|
|
502
|
+
'id': unit_id,
|
|
503
|
+
'type': info['type'],
|
|
504
|
+
'tokens': info['tokens'],
|
|
505
|
+
'path': info.get('path'),
|
|
506
|
+
'title': info.get('title'),
|
|
507
|
+
})
|
|
508
|
+
|
|
509
|
+
return units
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def get_directory_tree(content: str = None) -> dict:
|
|
513
|
+
"""
|
|
514
|
+
Get hierarchical directory structure from codebase content.
|
|
515
|
+
|
|
516
|
+
This function parses the codebase content and builds a nested dictionary
|
|
517
|
+
representing the directory structure.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
content: Content to analyze (defaults to 'context' variable)
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
Nested dict representing directory tree:
|
|
524
|
+
{
|
|
525
|
+
'module1': {
|
|
526
|
+
'files': ['main.py', 'utils.py'],
|
|
527
|
+
'dirs': {
|
|
528
|
+
'subdir1': {'files': [...], 'dirs': {...}},
|
|
529
|
+
...
|
|
530
|
+
}
|
|
531
|
+
},
|
|
532
|
+
...
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
Example:
|
|
536
|
+
tree = get_directory_tree()
|
|
537
|
+
for module, data in tree.items():
|
|
538
|
+
print(f"Module: {module}")
|
|
539
|
+
print(f" Files: {data['files']}")
|
|
540
|
+
for subdir in data['dirs']:
|
|
541
|
+
print(f" Subdir: {subdir}")
|
|
542
|
+
"""
|
|
543
|
+
import re
|
|
544
|
+
|
|
545
|
+
if content is None:
|
|
546
|
+
content = _context_store.get('context', '')
|
|
547
|
+
|
|
548
|
+
# Parse all file paths
|
|
549
|
+
file_pattern = re.compile(r'^=== FILE: (.+?) ===$', re.MULTILINE)
|
|
550
|
+
paths = [m.group(1) for m in file_pattern.finditer(content)]
|
|
551
|
+
|
|
552
|
+
# Build tree
|
|
553
|
+
tree = {}
|
|
554
|
+
|
|
555
|
+
def ensure_path(tree_node: dict, parts: list[str], filename: str):
|
|
556
|
+
"""Recursively ensure path exists and add file."""
|
|
557
|
+
if len(parts) == 0:
|
|
558
|
+
# At the target directory level
|
|
559
|
+
if 'files' not in tree_node:
|
|
560
|
+
tree_node['files'] = []
|
|
561
|
+
tree_node['files'].append(filename)
|
|
562
|
+
if 'dirs' not in tree_node:
|
|
563
|
+
tree_node['dirs'] = {}
|
|
564
|
+
else:
|
|
565
|
+
# Need to traverse deeper
|
|
566
|
+
if 'dirs' not in tree_node:
|
|
567
|
+
tree_node['dirs'] = {}
|
|
568
|
+
if 'files' not in tree_node:
|
|
569
|
+
tree_node['files'] = []
|
|
570
|
+
dir_name = parts[0]
|
|
571
|
+
if dir_name not in tree_node['dirs']:
|
|
572
|
+
tree_node['dirs'][dir_name] = {'files': [], 'dirs': {}}
|
|
573
|
+
ensure_path(tree_node['dirs'][dir_name], parts[1:], filename)
|
|
574
|
+
|
|
575
|
+
for path in paths:
|
|
576
|
+
parts = path.split('/')
|
|
577
|
+
if len(parts) == 1:
|
|
578
|
+
# Root level file
|
|
579
|
+
if '_root' not in tree:
|
|
580
|
+
tree['_root'] = {'files': [], 'dirs': {}}
|
|
581
|
+
tree['_root']['files'].append(parts[0])
|
|
582
|
+
else:
|
|
583
|
+
# File in a directory
|
|
584
|
+
top_level = parts[0]
|
|
585
|
+
if top_level not in tree:
|
|
586
|
+
tree[top_level] = {'files': [], 'dirs': {}}
|
|
587
|
+
if len(parts) == 2:
|
|
588
|
+
tree[top_level]['files'].append(parts[1])
|
|
589
|
+
else:
|
|
590
|
+
ensure_path(tree[top_level], parts[1:-1], parts[-1])
|
|
591
|
+
|
|
592
|
+
return tree
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def get_files_in_directory(directory: str, content: str = None) -> list[dict]:
|
|
596
|
+
"""
|
|
597
|
+
Get all files in a specific directory with metadata.
|
|
598
|
+
|
|
599
|
+
Args:
|
|
600
|
+
directory: Directory path to list (e.g., "rapture/elastic")
|
|
601
|
+
content: Content to analyze (defaults to 'context' variable)
|
|
602
|
+
|
|
603
|
+
Returns:
|
|
604
|
+
List of file metadata dicts:
|
|
605
|
+
[{'id': 'file:path', 'path': 'path', 'name': 'file.py', 'tokens': 123, 'size': 456}, ...]
|
|
606
|
+
|
|
607
|
+
Example:
|
|
608
|
+
files = get_files_in_directory("rapture/api")
|
|
609
|
+
for f in files:
|
|
610
|
+
print(f"{f['name']}: {f['tokens']} tokens")
|
|
611
|
+
"""
|
|
612
|
+
if content is None:
|
|
613
|
+
content = _context_store.get('context', '')
|
|
614
|
+
|
|
615
|
+
# Ensure structure has been extracted
|
|
616
|
+
if not _unit_index:
|
|
617
|
+
get_structure(content)
|
|
618
|
+
|
|
619
|
+
# Normalize directory path (remove trailing slash)
|
|
620
|
+
directory = directory.rstrip('/')
|
|
621
|
+
|
|
622
|
+
files = []
|
|
623
|
+
for unit_id, info in _unit_index.items():
|
|
624
|
+
if info['type'] != 'file':
|
|
625
|
+
continue
|
|
626
|
+
|
|
627
|
+
file_path = info.get('path', '')
|
|
628
|
+
file_dir = '/'.join(file_path.split('/')[:-1])
|
|
629
|
+
file_name = file_path.split('/')[-1]
|
|
630
|
+
|
|
631
|
+
# Match exact directory (not subdirectories)
|
|
632
|
+
if file_dir == directory:
|
|
633
|
+
files.append({
|
|
634
|
+
'id': unit_id,
|
|
635
|
+
'path': file_path,
|
|
636
|
+
'name': file_name,
|
|
637
|
+
'tokens': info.get('tokens', 0),
|
|
638
|
+
'size': info.get('size', 0),
|
|
639
|
+
})
|
|
640
|
+
|
|
641
|
+
return sorted(files, key=lambda f: f['name'])
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
def get_directory_content(directory: str, max_tokens: int = 100000, content: str = None) -> tuple[str, list[str]]:
|
|
645
|
+
"""
|
|
646
|
+
Get combined content of files in a directory up to token limit.
|
|
647
|
+
|
|
648
|
+
This is useful for feeding a directory's contents to llm_query.
|
|
649
|
+
|
|
650
|
+
Args:
|
|
651
|
+
directory: Directory path to get content from
|
|
652
|
+
max_tokens: Maximum total tokens to fetch (default: 100000)
|
|
653
|
+
content: Content to extract from (defaults to 'context' variable)
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
Tuple of (combined_content, unfetched_file_ids)
|
|
657
|
+
|
|
658
|
+
Example:
|
|
659
|
+
content, remaining = get_directory_content("rapture/elastic", max_tokens=50000)
|
|
660
|
+
analysis = llm_query("Analyze this directory", content)
|
|
661
|
+
if remaining:
|
|
662
|
+
content2, _ = get_directory_content("rapture/elastic", max_tokens=50000)
|
|
663
|
+
# ... handle remaining files
|
|
664
|
+
"""
|
|
665
|
+
if content is None:
|
|
666
|
+
content = _context_store.get('context', '')
|
|
667
|
+
|
|
668
|
+
# Get files in directory
|
|
669
|
+
files = get_files_in_directory(directory, content)
|
|
670
|
+
file_ids = [f['id'] for f in files]
|
|
671
|
+
|
|
672
|
+
# Use get_units_safe to fetch with token budget
|
|
673
|
+
return get_units_safe(file_ids, max_tokens=max_tokens, content=content)
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def get_all_directories(content: str = None) -> list[str]:
|
|
677
|
+
"""
|
|
678
|
+
Get a flat list of all directories in the codebase.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
content: Content to analyze (defaults to 'context' variable)
|
|
682
|
+
|
|
683
|
+
Returns:
|
|
684
|
+
List of directory paths (e.g., ["rapture", "rapture/api", "rapture/elastic", ...])
|
|
685
|
+
|
|
686
|
+
Example:
|
|
687
|
+
dirs = get_all_directories()
|
|
688
|
+
for d in dirs:
|
|
689
|
+
print(f"Directory: {d}")
|
|
690
|
+
files = get_files_in_directory(d)
|
|
691
|
+
print(f" Files: {len(files)}")
|
|
692
|
+
"""
|
|
693
|
+
if content is None:
|
|
694
|
+
content = _context_store.get('context', '')
|
|
695
|
+
|
|
696
|
+
# Ensure structure has been extracted
|
|
697
|
+
if not _unit_index:
|
|
698
|
+
get_structure(content)
|
|
699
|
+
|
|
700
|
+
# Collect all unique directories
|
|
701
|
+
directories = set()
|
|
702
|
+
for unit_id, info in _unit_index.items():
|
|
703
|
+
if info['type'] != 'file':
|
|
704
|
+
continue
|
|
705
|
+
|
|
706
|
+
file_path = info.get('path', '')
|
|
707
|
+
parts = file_path.split('/')
|
|
708
|
+
|
|
709
|
+
# Add all parent directories
|
|
710
|
+
for i in range(1, len(parts)):
|
|
711
|
+
dir_path = '/'.join(parts[:i])
|
|
712
|
+
directories.add(dir_path)
|
|
713
|
+
|
|
714
|
+
return sorted(directories)
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def get_module_directories(module: str, content: str = None) -> list[str]:
|
|
718
|
+
"""
|
|
719
|
+
Get all directories within a specific top-level module.
|
|
720
|
+
|
|
721
|
+
Args:
|
|
722
|
+
module: Module name (e.g., "rapture", "rapture-frontend")
|
|
723
|
+
content: Content to analyze (defaults to 'context' variable)
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
List of directory paths within the module
|
|
727
|
+
|
|
728
|
+
Example:
|
|
729
|
+
dirs = get_module_directories("rapture")
|
|
730
|
+
# Returns: ["rapture", "rapture/api", "rapture/elastic", ...]
|
|
731
|
+
"""
|
|
732
|
+
all_dirs = get_all_directories(content)
|
|
733
|
+
return [d for d in all_dirs if d == module or d.startswith(module + '/')]
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def get_units_safe(unit_ids: list[str], max_tokens: int = 50000, content: str = None) -> tuple[str, list[str]]:
|
|
737
|
+
"""
|
|
738
|
+
Fetch units up to a token budget.
|
|
739
|
+
|
|
740
|
+
This is the recommended way to fetch multiple units while staying within
|
|
741
|
+
the llm_query() capacity limit.
|
|
742
|
+
|
|
743
|
+
Args:
|
|
744
|
+
unit_ids: List of unit IDs to fetch
|
|
745
|
+
max_tokens: Maximum total tokens to fetch (default: 50000 = ~200K chars)
|
|
746
|
+
content: Content to extract from (defaults to 'context' variable)
|
|
747
|
+
|
|
748
|
+
Returns:
|
|
749
|
+
Tuple of (combined_content, unfetched_unit_ids)
|
|
750
|
+
|
|
751
|
+
Example:
|
|
752
|
+
structure = get_structure()
|
|
753
|
+
all_ids = [u['id'] for u in structure['units']]
|
|
754
|
+
|
|
755
|
+
# Fetch in batches that fit in llm_query()
|
|
756
|
+
fetched, remaining = get_units_safe(all_ids, max_tokens=100000)
|
|
757
|
+
result1 = llm_query("Analyze these files", fetched)
|
|
758
|
+
|
|
759
|
+
if remaining:
|
|
760
|
+
fetched2, remaining2 = get_units_safe(remaining, max_tokens=100000)
|
|
761
|
+
result2 = llm_query("Analyze these files", fetched2)
|
|
762
|
+
"""
|
|
763
|
+
if content is None:
|
|
764
|
+
content = _context_store.get('context', '')
|
|
765
|
+
|
|
766
|
+
# Ensure structure has been extracted
|
|
767
|
+
if not _unit_index:
|
|
768
|
+
get_structure(content)
|
|
769
|
+
|
|
770
|
+
fetched_content = []
|
|
771
|
+
fetched_tokens = 0
|
|
772
|
+
unfetched = []
|
|
773
|
+
|
|
774
|
+
for unit_id in unit_ids:
|
|
775
|
+
if unit_id not in _unit_index:
|
|
776
|
+
unfetched.append(unit_id)
|
|
777
|
+
continue
|
|
778
|
+
|
|
779
|
+
unit_info = _unit_index[unit_id]
|
|
780
|
+
unit_tokens = unit_info['tokens']
|
|
781
|
+
|
|
782
|
+
if fetched_tokens + unit_tokens > max_tokens:
|
|
783
|
+
unfetched.append(unit_id)
|
|
784
|
+
else:
|
|
785
|
+
# Include the full unit with header marker so get_structure() works on result
|
|
786
|
+
unit_content = content[unit_info['start']:unit_info['content_end']].strip()
|
|
787
|
+
fetched_content.append(unit_content)
|
|
788
|
+
fetched_tokens += unit_tokens
|
|
789
|
+
|
|
790
|
+
return '\n\n'.join(fetched_content), unfetched
|
|
791
|
+
|
|
792
|
+
|
|
216
793
|
# Safe modules that can be imported (defined first so _restricted_import can use it)
|
|
217
794
|
_SAFE_MODULES = {
|
|
218
795
|
'json': __import__('json'),
|
|
@@ -305,11 +882,24 @@ _SAFE_BUILTINS = {
|
|
|
305
882
|
|
|
306
883
|
# RLM functions
|
|
307
884
|
'llm_query': llm_query,
|
|
885
|
+
'llm_query_batch': llm_query_batch,
|
|
308
886
|
'set_result': set_result,
|
|
887
|
+
'set_result_final': set_result_final,
|
|
309
888
|
'set_variable': set_variable,
|
|
310
889
|
'chunk_text': chunk_text,
|
|
311
890
|
'filter_lines': filter_lines,
|
|
312
891
|
'count_tokens': count_tokens,
|
|
892
|
+
# Structure extraction (lazy loading)
|
|
893
|
+
'get_structure': get_structure,
|
|
894
|
+
'get_unit': get_unit,
|
|
895
|
+
'list_units': list_units,
|
|
896
|
+
'get_units_safe': get_units_safe,
|
|
897
|
+
# Directory-aware functions (for hierarchical indexing)
|
|
898
|
+
'get_directory_tree': get_directory_tree,
|
|
899
|
+
'get_files_in_directory': get_files_in_directory,
|
|
900
|
+
'get_directory_content': get_directory_content,
|
|
901
|
+
'get_all_directories': get_all_directories,
|
|
902
|
+
'get_module_directories': get_module_directories,
|
|
313
903
|
}
|
|
314
904
|
|
|
315
905
|
|
|
@@ -318,53 +908,59 @@ def main():
|
|
|
318
908
|
# Read initial context
|
|
319
909
|
try:
|
|
320
910
|
init_line = sys.stdin.readline().strip()
|
|
321
|
-
|
|
911
|
+
|
|
912
|
+
# Handle large payloads via temp file (avoids stdin buffer issues)
|
|
913
|
+
if init_line.startswith("__INIT_FILE__:"):
|
|
914
|
+
temp_file = init_line[len("__INIT_FILE__:"):]
|
|
915
|
+
with open(temp_file, 'r', encoding='utf-8') as f:
|
|
916
|
+
init_data = json.load(f)
|
|
917
|
+
elif init_line.startswith("__INIT__:"):
|
|
322
918
|
init_data = json.loads(init_line[len("__INIT__:"):])
|
|
919
|
+
else:
|
|
920
|
+
print("__ERROR__:Invalid initialization", flush=True)
|
|
921
|
+
return 1
|
|
323
922
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
923
|
+
# Load context variables
|
|
924
|
+
for name, value in init_data.get("context", {}).items():
|
|
925
|
+
_context_store[name] = value
|
|
327
926
|
|
|
328
|
-
|
|
329
|
-
|
|
927
|
+
# Get the code to execute
|
|
928
|
+
code = init_data.get("code", "")
|
|
330
929
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
930
|
+
if not code:
|
|
931
|
+
print("__ERROR__:No code provided", flush=True)
|
|
932
|
+
return 1
|
|
334
933
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
934
|
+
# Create restricted execution environment
|
|
935
|
+
exec_globals = {
|
|
936
|
+
'__builtins__': _SAFE_BUILTINS,
|
|
937
|
+
'__name__': '__main__',
|
|
938
|
+
'__doc__': None,
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
# Add context variables to globals
|
|
942
|
+
exec_globals.update(_context_store)
|
|
943
|
+
|
|
944
|
+
# Execute the code
|
|
945
|
+
try:
|
|
946
|
+
exec(code, exec_globals)
|
|
947
|
+
|
|
948
|
+
# Check if result was set
|
|
949
|
+
if _result is None:
|
|
950
|
+
print("__WARNING__:No result was set. Call set_result() with your answer.", flush=True)
|
|
951
|
+
|
|
952
|
+
print("__DONE__", flush=True)
|
|
953
|
+
return 0
|
|
954
|
+
|
|
955
|
+
except Exception as e:
|
|
956
|
+
import traceback
|
|
957
|
+
tb = traceback.format_exc()
|
|
958
|
+
error_info = {
|
|
959
|
+
"error": str(e),
|
|
960
|
+
"type": type(e).__name__,
|
|
961
|
+
"traceback": tb
|
|
340
962
|
}
|
|
341
|
-
|
|
342
|
-
# Add context variables to globals
|
|
343
|
-
exec_globals.update(_context_store)
|
|
344
|
-
|
|
345
|
-
# Execute the code
|
|
346
|
-
try:
|
|
347
|
-
exec(code, exec_globals)
|
|
348
|
-
|
|
349
|
-
# Check if result was set
|
|
350
|
-
if _result is None:
|
|
351
|
-
print("__WARNING__:No result was set. Call set_result() with your answer.", flush=True)
|
|
352
|
-
|
|
353
|
-
print("__DONE__", flush=True)
|
|
354
|
-
return 0
|
|
355
|
-
|
|
356
|
-
except Exception as e:
|
|
357
|
-
import traceback
|
|
358
|
-
tb = traceback.format_exc()
|
|
359
|
-
error_info = {
|
|
360
|
-
"error": str(e),
|
|
361
|
-
"type": type(e).__name__,
|
|
362
|
-
"traceback": tb
|
|
363
|
-
}
|
|
364
|
-
print(f"__ERROR__:{json.dumps(error_info)}", flush=True)
|
|
365
|
-
return 1
|
|
366
|
-
else:
|
|
367
|
-
print("__ERROR__:Invalid initialization", flush=True)
|
|
963
|
+
print(f"__ERROR__:{json.dumps(error_info)}", flush=True)
|
|
368
964
|
return 1
|
|
369
965
|
|
|
370
966
|
except Exception as e:
|