groknroll 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. groknroll/__init__.py +36 -0
  2. groknroll/__main__.py +9 -0
  3. groknroll/agents/__init__.py +18 -0
  4. groknroll/agents/agent_manager.py +187 -0
  5. groknroll/agents/base_agent.py +118 -0
  6. groknroll/agents/build_agent.py +231 -0
  7. groknroll/agents/plan_agent.py +215 -0
  8. groknroll/cli/__init__.py +7 -0
  9. groknroll/cli/enhanced_cli.py +372 -0
  10. groknroll/cli/large_codebase_cli.py +413 -0
  11. groknroll/cli/main.py +331 -0
  12. groknroll/cli/rlm_commands.py +258 -0
  13. groknroll/clients/__init__.py +63 -0
  14. groknroll/clients/anthropic.py +112 -0
  15. groknroll/clients/azure_openai.py +142 -0
  16. groknroll/clients/base_lm.py +33 -0
  17. groknroll/clients/gemini.py +162 -0
  18. groknroll/clients/litellm.py +105 -0
  19. groknroll/clients/openai.py +129 -0
  20. groknroll/clients/portkey.py +94 -0
  21. groknroll/core/__init__.py +9 -0
  22. groknroll/core/agent.py +339 -0
  23. groknroll/core/comms_utils.py +264 -0
  24. groknroll/core/context.py +251 -0
  25. groknroll/core/exceptions.py +181 -0
  26. groknroll/core/large_codebase.py +564 -0
  27. groknroll/core/lm_handler.py +206 -0
  28. groknroll/core/rlm.py +446 -0
  29. groknroll/core/rlm_codebase.py +448 -0
  30. groknroll/core/rlm_integration.py +256 -0
  31. groknroll/core/types.py +276 -0
  32. groknroll/environments/__init__.py +34 -0
  33. groknroll/environments/base_env.py +182 -0
  34. groknroll/environments/constants.py +32 -0
  35. groknroll/environments/docker_repl.py +336 -0
  36. groknroll/environments/local_repl.py +388 -0
  37. groknroll/environments/modal_repl.py +502 -0
  38. groknroll/environments/prime_repl.py +588 -0
  39. groknroll/logger/__init__.py +4 -0
  40. groknroll/logger/rlm_logger.py +63 -0
  41. groknroll/logger/verbose.py +393 -0
  42. groknroll/operations/__init__.py +15 -0
  43. groknroll/operations/bash_ops.py +447 -0
  44. groknroll/operations/file_ops.py +473 -0
  45. groknroll/operations/git_ops.py +620 -0
  46. groknroll/oracle/__init__.py +11 -0
  47. groknroll/oracle/codebase_indexer.py +238 -0
  48. groknroll/oracle/oracle_agent.py +278 -0
  49. groknroll/setup.py +34 -0
  50. groknroll/storage/__init__.py +14 -0
  51. groknroll/storage/database.py +272 -0
  52. groknroll/storage/models.py +128 -0
  53. groknroll/utils/__init__.py +0 -0
  54. groknroll/utils/parsing.py +168 -0
  55. groknroll/utils/prompts.py +146 -0
  56. groknroll/utils/rlm_utils.py +19 -0
  57. groknroll-2.0.0.dist-info/METADATA +246 -0
  58. groknroll-2.0.0.dist-info/RECORD +62 -0
  59. groknroll-2.0.0.dist-info/WHEEL +5 -0
  60. groknroll-2.0.0.dist-info/entry_points.txt +3 -0
  61. groknroll-2.0.0.dist-info/licenses/LICENSE +21 -0
  62. groknroll-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,564 @@
1
+ """
2
+ Large Codebase Handler
3
+
4
+ Specialized tools for working with massive codebases without context rot.
5
+ Uses RLM's unlimited context + intelligent chunking + hierarchical indexing.
6
+ """
7
+
8
+ from pathlib import Path
9
+ from typing import Dict, List, Any, Optional, Set
10
+ from dataclasses import dataclass
11
+ import hashlib
12
+ import json
13
+
14
+
15
+ @dataclass
16
+ class CodeChunk:
17
+ """Represents a chunk of the codebase"""
18
+ id: str
19
+ path: Path
20
+ language: str
21
+ size: int # lines of code
22
+ dependencies: List[str] # Other chunk IDs this depends on
23
+ summary: Optional[str] = None
24
+ last_analyzed: Optional[float] = None
25
+ hash: Optional[str] = None
26
+
27
+
28
+ @dataclass
29
+ class CodebaseMap:
30
+ """Hierarchical map of the codebase"""
31
+ root: Path
32
+ chunks: Dict[str, CodeChunk]
33
+ modules: Dict[str, List[str]] # module name -> chunk IDs
34
+ dependency_graph: Dict[str, Set[str]] # chunk ID -> dependencies
35
+ total_files: int
36
+ total_lines: int
37
+
38
+
39
+ class LargeCodebaseHandler:
40
+ """
41
+ Handle massive codebases without context rot
42
+
43
+ Strategy:
44
+ 1. Hierarchical chunking (directory-based)
45
+ 2. Dependency tracking
46
+ 3. Incremental analysis (only changed files)
47
+ 4. Smart context selection (only relevant chunks)
48
+ 5. RLM handles unlimited context when needed
49
+ """
50
+
51
+ def __init__(self, project_path: Path, db):
52
+ """
53
+ Initialize large codebase handler
54
+
55
+ Args:
56
+ project_path: Project root
57
+ db: Database instance
58
+ """
59
+ self.project_path = project_path.resolve()
60
+ self.db = db
61
+ self.cache_dir = project_path / ".groknroll" / "cache"
62
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
63
+
64
+ # Load or create codebase map
65
+ self.map_file = self.cache_dir / "codebase_map.json"
66
+ self.codebase_map = self._load_or_create_map()
67
+
68
+ def chunk_codebase(self, max_chunk_size: int = 1000) -> CodebaseMap:
69
+ """
70
+ Chunk codebase into manageable pieces
71
+
72
+ Args:
73
+ max_chunk_size: Max lines per chunk
74
+
75
+ Returns:
76
+ CodebaseMap
77
+ """
78
+ chunks = {}
79
+ modules = {}
80
+ total_files = 0
81
+ total_lines = 0
82
+
83
+ # Walk directory tree
84
+ for root, dirs, files in os.walk(self.project_path):
85
+ # Skip common ignore directories
86
+ dirs[:] = [d for d in dirs if d not in {
87
+ '.git', 'node_modules', '.venv', 'venv', '__pycache__',
88
+ 'build', 'dist', '.next', 'target', '.groknroll'
89
+ }]
90
+
91
+ root_path = Path(root)
92
+ module_name = root_path.relative_to(self.project_path).as_posix()
93
+
94
+ for file in files:
95
+ if self._should_index(file):
96
+ file_path = root_path / file
97
+
98
+ try:
99
+ lines = len(file_path.read_text().splitlines())
100
+ total_lines += lines
101
+ total_files += 1
102
+
103
+ # Create chunk
104
+ chunk_id = self._generate_chunk_id(file_path)
105
+ chunk = CodeChunk(
106
+ id=chunk_id,
107
+ path=file_path,
108
+ language=self._detect_language(file_path.suffix),
109
+ size=lines,
110
+ dependencies=[], # Will be populated later
111
+ hash=self._file_hash(file_path)
112
+ )
113
+
114
+ chunks[chunk_id] = chunk
115
+
116
+ # Add to module
117
+ if module_name not in modules:
118
+ modules[module_name] = []
119
+ modules[module_name].append(chunk_id)
120
+
121
+ except Exception as e:
122
+ # Skip files we can't read
123
+ continue
124
+
125
+ # Build dependency graph
126
+ dependency_graph = self._build_dependency_graph(chunks)
127
+
128
+ codebase_map = CodebaseMap(
129
+ root=self.project_path,
130
+ chunks=chunks,
131
+ modules=modules,
132
+ dependency_graph=dependency_graph,
133
+ total_files=total_files,
134
+ total_lines=total_lines
135
+ )
136
+
137
+ # Save map
138
+ self._save_map(codebase_map)
139
+
140
+ return codebase_map
141
+
142
+ def get_relevant_context(
143
+ self,
144
+ query: str,
145
+ max_chunks: int = 50,
146
+ include_dependencies: bool = True
147
+ ) -> List[CodeChunk]:
148
+ """
149
+ Get relevant code chunks for a query (smart context selection)
150
+
151
+ Args:
152
+ query: Search query or task description
153
+ max_chunks: Maximum chunks to return
154
+ include_dependencies: Include dependent chunks
155
+
156
+ Returns:
157
+ List of relevant CodeChunk objects
158
+ """
159
+ # Search for relevant chunks
160
+ relevant_chunks = []
161
+
162
+ # 1. Keyword-based search
163
+ keywords = self._extract_keywords(query)
164
+ scored_chunks = []
165
+
166
+ for chunk_id, chunk in self.codebase_map.chunks.items():
167
+ score = 0
168
+
169
+ # Score based on path matching
170
+ path_str = str(chunk.path).lower()
171
+ for keyword in keywords:
172
+ if keyword in path_str:
173
+ score += 10
174
+
175
+ # Score based on module relevance
176
+ module = str(chunk.path.parent.relative_to(self.project_path))
177
+ for keyword in keywords:
178
+ if keyword in module.lower():
179
+ score += 5
180
+
181
+ if score > 0:
182
+ scored_chunks.append((score, chunk))
183
+
184
+ # Sort by score
185
+ scored_chunks.sort(reverse=True, key=lambda x: x[0])
186
+
187
+ # Take top chunks
188
+ relevant_chunks = [chunk for _, chunk in scored_chunks[:max_chunks]]
189
+
190
+ # 2. Include dependencies if requested
191
+ if include_dependencies:
192
+ additional_chunks = set()
193
+ for chunk in relevant_chunks:
194
+ if chunk.id in self.codebase_map.dependency_graph:
195
+ deps = self.codebase_map.dependency_graph[chunk.id]
196
+ for dep_id in deps:
197
+ if dep_id in self.codebase_map.chunks:
198
+ additional_chunks.add(self.codebase_map.chunks[dep_id])
199
+
200
+ relevant_chunks.extend(list(additional_chunks)[:max_chunks // 2])
201
+
202
+ return relevant_chunks[:max_chunks]
203
+
204
+ def get_module_summary(self, module_name: str) -> Dict[str, Any]:
205
+ """
206
+ Get summary of a module (directory)
207
+
208
+ Args:
209
+ module_name: Module path (e.g., "src/core")
210
+
211
+ Returns:
212
+ Module summary
213
+ """
214
+ if module_name not in self.codebase_map.modules:
215
+ return {
216
+ "error": f"Module not found: {module_name}",
217
+ "available_modules": list(self.codebase_map.modules.keys())[:20]
218
+ }
219
+
220
+ chunk_ids = self.codebase_map.modules[module_name]
221
+ chunks = [self.codebase_map.chunks[cid] for cid in chunk_ids]
222
+
223
+ # Aggregate statistics
224
+ total_lines = sum(c.size for c in chunks)
225
+ languages = {}
226
+
227
+ for chunk in chunks:
228
+ lang = chunk.language
229
+ if lang not in languages:
230
+ languages[lang] = {"files": 0, "lines": 0}
231
+ languages[lang]["files"] += 1
232
+ languages[lang]["lines"] += chunk.size
233
+
234
+ return {
235
+ "module": module_name,
236
+ "files": len(chunks),
237
+ "total_lines": total_lines,
238
+ "languages": languages,
239
+ "chunks": [
240
+ {
241
+ "file": chunk.path.name,
242
+ "language": chunk.language,
243
+ "lines": chunk.size
244
+ }
245
+ for chunk in chunks
246
+ ]
247
+ }
248
+
249
+ def get_changed_chunks(self, since_last_analysis: bool = True) -> List[CodeChunk]:
250
+ """
251
+ Get chunks that have changed (incremental analysis)
252
+
253
+ Args:
254
+ since_last_analysis: Only get chunks changed since last analysis
255
+
256
+ Returns:
257
+ List of changed chunks
258
+ """
259
+ changed = []
260
+
261
+ for chunk_id, chunk in self.codebase_map.chunks.items():
262
+ current_hash = self._file_hash(chunk.path)
263
+
264
+ if chunk.hash != current_hash:
265
+ # File has changed
266
+ changed.append(chunk)
267
+
268
+ # Update hash
269
+ chunk.hash = current_hash
270
+
271
+ # Save updated map
272
+ if changed:
273
+ self._save_map(self.codebase_map)
274
+
275
+ return changed
276
+
277
+ def get_codebase_overview(self) -> Dict[str, Any]:
278
+ """
279
+ Get high-level overview of entire codebase
280
+
281
+ Returns:
282
+ Codebase overview
283
+ """
284
+ # Language statistics
285
+ languages = {}
286
+ for chunk in self.codebase_map.chunks.values():
287
+ lang = chunk.language
288
+ if lang not in languages:
289
+ languages[lang] = {"files": 0, "lines": 0}
290
+ languages[lang]["files"] += 1
291
+ languages[lang]["lines"] += chunk.size
292
+
293
+ # Top modules by size
294
+ module_sizes = {}
295
+ for module_name, chunk_ids in self.codebase_map.modules.items():
296
+ total_lines = sum(
297
+ self.codebase_map.chunks[cid].size
298
+ for cid in chunk_ids
299
+ if cid in self.codebase_map.chunks
300
+ )
301
+ module_sizes[module_name] = total_lines
302
+
303
+ top_modules = sorted(
304
+ module_sizes.items(),
305
+ key=lambda x: x[1],
306
+ reverse=True
307
+ )[:10]
308
+
309
+ return {
310
+ "total_files": self.codebase_map.total_files,
311
+ "total_lines": self.codebase_map.total_lines,
312
+ "total_chunks": len(self.codebase_map.chunks),
313
+ "total_modules": len(self.codebase_map.modules),
314
+ "languages": languages,
315
+ "top_modules": [
316
+ {"module": name, "lines": lines}
317
+ for name, lines in top_modules
318
+ ]
319
+ }
320
+
321
+ def navigate_to_definition(self, symbol: str) -> List[Dict[str, Any]]:
322
+ """
323
+ Find definition of a symbol across codebase
324
+
325
+ Args:
326
+ symbol: Symbol to find (function, class, variable)
327
+
328
+ Returns:
329
+ List of potential definitions
330
+ """
331
+ results = []
332
+
333
+ # Search patterns for different languages
334
+ patterns = {
335
+ "python": [
336
+ f"def {symbol}(",
337
+ f"class {symbol}:",
338
+ f"{symbol} = "
339
+ ],
340
+ "javascript": [
341
+ f"function {symbol}(",
342
+ f"const {symbol} =",
343
+ f"class {symbol}",
344
+ ],
345
+ "typescript": [
346
+ f"function {symbol}(",
347
+ f"const {symbol}:",
348
+ f"class {symbol}",
349
+ f"interface {symbol}",
350
+ ]
351
+ }
352
+
353
+ for chunk in self.codebase_map.chunks.values():
354
+ try:
355
+ content = chunk.path.read_text()
356
+
357
+ # Get patterns for this language
358
+ lang_patterns = patterns.get(chunk.language, [])
359
+
360
+ for pattern in lang_patterns:
361
+ if pattern in content:
362
+ # Find line number
363
+ lines = content.splitlines()
364
+ for i, line in enumerate(lines, 1):
365
+ if pattern in line:
366
+ results.append({
367
+ "file": str(chunk.path.relative_to(self.project_path)),
368
+ "line": i,
369
+ "snippet": line.strip(),
370
+ "language": chunk.language
371
+ })
372
+ except Exception:
373
+ continue
374
+
375
+ return results
376
+
377
+ # =========================================================================
378
+ # Helper Methods
379
+ # =========================================================================
380
+
381
+ def _load_or_create_map(self) -> CodebaseMap:
382
+ """Load existing map or create new one"""
383
+ if self.map_file.exists():
384
+ try:
385
+ data = json.loads(self.map_file.read_text())
386
+
387
+ # Reconstruct CodebaseMap
388
+ chunks = {
389
+ cid: CodeChunk(
390
+ id=c["id"],
391
+ path=Path(c["path"]),
392
+ language=c["language"],
393
+ size=c["size"],
394
+ dependencies=c["dependencies"],
395
+ summary=c.get("summary"),
396
+ last_analyzed=c.get("last_analyzed"),
397
+ hash=c.get("hash")
398
+ )
399
+ for cid, c in data["chunks"].items()
400
+ }
401
+
402
+ return CodebaseMap(
403
+ root=Path(data["root"]),
404
+ chunks=chunks,
405
+ modules=data["modules"],
406
+ dependency_graph={
407
+ k: set(v) for k, v in data["dependency_graph"].items()
408
+ },
409
+ total_files=data["total_files"],
410
+ total_lines=data["total_lines"]
411
+ )
412
+ except Exception as e:
413
+ # If loading fails, create new map
414
+ pass
415
+
416
+ # Create new map
417
+ return self.chunk_codebase()
418
+
419
+ def _save_map(self, codebase_map: CodebaseMap) -> None:
420
+ """Save codebase map to disk"""
421
+ data = {
422
+ "root": str(codebase_map.root),
423
+ "chunks": {
424
+ cid: {
425
+ "id": c.id,
426
+ "path": str(c.path),
427
+ "language": c.language,
428
+ "size": c.size,
429
+ "dependencies": c.dependencies,
430
+ "summary": c.summary,
431
+ "last_analyzed": c.last_analyzed,
432
+ "hash": c.hash
433
+ }
434
+ for cid, c in codebase_map.chunks.items()
435
+ },
436
+ "modules": codebase_map.modules,
437
+ "dependency_graph": {
438
+ k: list(v) for k, v in codebase_map.dependency_graph.items()
439
+ },
440
+ "total_files": codebase_map.total_files,
441
+ "total_lines": codebase_map.total_lines
442
+ }
443
+
444
+ self.map_file.write_text(json.dumps(data, indent=2))
445
+
446
+ def _generate_chunk_id(self, file_path: Path) -> str:
447
+ """Generate unique chunk ID"""
448
+ relative_path = file_path.relative_to(self.project_path)
449
+ return hashlib.md5(str(relative_path).encode()).hexdigest()[:16]
450
+
451
+ def _file_hash(self, file_path: Path) -> str:
452
+ """Calculate file hash for change detection"""
453
+ try:
454
+ content = file_path.read_bytes()
455
+ return hashlib.md5(content).hexdigest()
456
+ except Exception:
457
+ return ""
458
+
459
+ def _should_index(self, filename: str) -> bool:
460
+ """Check if file should be indexed"""
461
+ # Skip common non-code files
462
+ skip_extensions = {
463
+ '.pyc', '.pyo', '.so', '.dylib', '.dll',
464
+ '.jpg', '.jpeg', '.png', '.gif', '.svg',
465
+ '.pdf', '.zip', '.tar', '.gz',
466
+ '.lock', '.log', '.tmp'
467
+ }
468
+
469
+ ext = Path(filename).suffix.lower()
470
+ return ext not in skip_extensions
471
+
472
+ def _detect_language(self, extension: str) -> str:
473
+ """Detect language from file extension"""
474
+ lang_map = {
475
+ '.py': 'python',
476
+ '.js': 'javascript',
477
+ '.ts': 'typescript',
478
+ '.jsx': 'javascript',
479
+ '.tsx': 'typescript',
480
+ '.go': 'go',
481
+ '.rs': 'rust',
482
+ '.java': 'java',
483
+ '.cpp': 'cpp',
484
+ '.c': 'c',
485
+ '.rb': 'ruby',
486
+ '.php': 'php',
487
+ '.md': 'markdown',
488
+ '.json': 'json',
489
+ '.yaml': 'yaml',
490
+ '.yml': 'yaml',
491
+ }
492
+ return lang_map.get(extension.lower(), 'unknown')
493
+
494
+ def _extract_keywords(self, query: str) -> List[str]:
495
+ """Extract keywords from query"""
496
+ # Simple keyword extraction (can be enhanced)
497
+ words = query.lower().split()
498
+
499
+ # Filter out common words
500
+ stop_words = {'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'and', 'or'}
501
+ keywords = [w for w in words if w not in stop_words and len(w) > 2]
502
+
503
+ return keywords
504
+
505
+ def _build_dependency_graph(self, chunks: Dict[str, CodeChunk]) -> Dict[str, Set[str]]:
506
+ """Build dependency graph between chunks"""
507
+ graph = {}
508
+
509
+ # For Python files, look for imports
510
+ for chunk_id, chunk in chunks.items():
511
+ if chunk.language == 'python':
512
+ try:
513
+ content = chunk.path.read_text()
514
+ imports = self._extract_python_imports(content)
515
+
516
+ # Map imports to chunks
517
+ deps = set()
518
+ for imp in imports:
519
+ # Try to find corresponding chunk
520
+ for other_id, other_chunk in chunks.items():
521
+ if other_id != chunk_id:
522
+ # Check if import matches this file
523
+ if self._import_matches_file(imp, other_chunk.path):
524
+ deps.add(other_id)
525
+
526
+ if deps:
527
+ graph[chunk_id] = deps
528
+
529
+ except Exception:
530
+ continue
531
+
532
+ return graph
533
+
534
+ def _extract_python_imports(self, content: str) -> List[str]:
535
+ """Extract Python imports from content"""
536
+ imports = []
537
+
538
+ for line in content.splitlines():
539
+ line = line.strip()
540
+
541
+ if line.startswith('import '):
542
+ # import foo
543
+ module = line[7:].split()[0].strip()
544
+ imports.append(module)
545
+ elif line.startswith('from '):
546
+ # from foo import bar
547
+ parts = line.split()
548
+ if len(parts) >= 2:
549
+ module = parts[1].strip()
550
+ imports.append(module)
551
+
552
+ return imports
553
+
554
+ def _import_matches_file(self, import_name: str, file_path: Path) -> bool:
555
+ """Check if import matches a file"""
556
+ # Convert import to path
557
+ import_path = import_name.replace('.', '/')
558
+ file_str = str(file_path)
559
+
560
+ return import_path in file_str
561
+
562
+
563
+ # Missing import
564
+ import os