emdash-core 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. emdash_core/__init__.py +3 -0
  2. emdash_core/agent/__init__.py +37 -0
  3. emdash_core/agent/agents.py +225 -0
  4. emdash_core/agent/code_reviewer.py +476 -0
  5. emdash_core/agent/compaction.py +143 -0
  6. emdash_core/agent/context_manager.py +140 -0
  7. emdash_core/agent/events.py +338 -0
  8. emdash_core/agent/handlers.py +224 -0
  9. emdash_core/agent/inprocess_subagent.py +377 -0
  10. emdash_core/agent/mcp/__init__.py +50 -0
  11. emdash_core/agent/mcp/client.py +346 -0
  12. emdash_core/agent/mcp/config.py +302 -0
  13. emdash_core/agent/mcp/manager.py +496 -0
  14. emdash_core/agent/mcp/tool_factory.py +213 -0
  15. emdash_core/agent/prompts/__init__.py +38 -0
  16. emdash_core/agent/prompts/main_agent.py +104 -0
  17. emdash_core/agent/prompts/subagents.py +131 -0
  18. emdash_core/agent/prompts/workflow.py +136 -0
  19. emdash_core/agent/providers/__init__.py +34 -0
  20. emdash_core/agent/providers/base.py +143 -0
  21. emdash_core/agent/providers/factory.py +80 -0
  22. emdash_core/agent/providers/models.py +220 -0
  23. emdash_core/agent/providers/openai_provider.py +463 -0
  24. emdash_core/agent/providers/transformers_provider.py +217 -0
  25. emdash_core/agent/research/__init__.py +81 -0
  26. emdash_core/agent/research/agent.py +143 -0
  27. emdash_core/agent/research/controller.py +254 -0
  28. emdash_core/agent/research/critic.py +428 -0
  29. emdash_core/agent/research/macros.py +469 -0
  30. emdash_core/agent/research/planner.py +449 -0
  31. emdash_core/agent/research/researcher.py +436 -0
  32. emdash_core/agent/research/state.py +523 -0
  33. emdash_core/agent/research/synthesizer.py +594 -0
  34. emdash_core/agent/reviewer_profile.py +475 -0
  35. emdash_core/agent/rules.py +123 -0
  36. emdash_core/agent/runner.py +601 -0
  37. emdash_core/agent/session.py +262 -0
  38. emdash_core/agent/spec_schema.py +66 -0
  39. emdash_core/agent/specification.py +479 -0
  40. emdash_core/agent/subagent.py +397 -0
  41. emdash_core/agent/subagent_prompts.py +13 -0
  42. emdash_core/agent/toolkit.py +482 -0
  43. emdash_core/agent/toolkits/__init__.py +64 -0
  44. emdash_core/agent/toolkits/base.py +96 -0
  45. emdash_core/agent/toolkits/explore.py +47 -0
  46. emdash_core/agent/toolkits/plan.py +55 -0
  47. emdash_core/agent/tools/__init__.py +141 -0
  48. emdash_core/agent/tools/analytics.py +436 -0
  49. emdash_core/agent/tools/base.py +131 -0
  50. emdash_core/agent/tools/coding.py +484 -0
  51. emdash_core/agent/tools/github_mcp.py +592 -0
  52. emdash_core/agent/tools/history.py +13 -0
  53. emdash_core/agent/tools/modes.py +153 -0
  54. emdash_core/agent/tools/plan.py +206 -0
  55. emdash_core/agent/tools/plan_write.py +135 -0
  56. emdash_core/agent/tools/search.py +412 -0
  57. emdash_core/agent/tools/spec.py +341 -0
  58. emdash_core/agent/tools/task.py +262 -0
  59. emdash_core/agent/tools/task_output.py +204 -0
  60. emdash_core/agent/tools/tasks.py +454 -0
  61. emdash_core/agent/tools/traversal.py +588 -0
  62. emdash_core/agent/tools/web.py +179 -0
  63. emdash_core/analytics/__init__.py +5 -0
  64. emdash_core/analytics/engine.py +1286 -0
  65. emdash_core/api/__init__.py +5 -0
  66. emdash_core/api/agent.py +308 -0
  67. emdash_core/api/agents.py +154 -0
  68. emdash_core/api/analyze.py +264 -0
  69. emdash_core/api/auth.py +173 -0
  70. emdash_core/api/context.py +77 -0
  71. emdash_core/api/db.py +121 -0
  72. emdash_core/api/embed.py +131 -0
  73. emdash_core/api/feature.py +143 -0
  74. emdash_core/api/health.py +93 -0
  75. emdash_core/api/index.py +162 -0
  76. emdash_core/api/plan.py +110 -0
  77. emdash_core/api/projectmd.py +210 -0
  78. emdash_core/api/query.py +320 -0
  79. emdash_core/api/research.py +122 -0
  80. emdash_core/api/review.py +161 -0
  81. emdash_core/api/router.py +76 -0
  82. emdash_core/api/rules.py +116 -0
  83. emdash_core/api/search.py +119 -0
  84. emdash_core/api/spec.py +99 -0
  85. emdash_core/api/swarm.py +223 -0
  86. emdash_core/api/tasks.py +109 -0
  87. emdash_core/api/team.py +120 -0
  88. emdash_core/auth/__init__.py +17 -0
  89. emdash_core/auth/github.py +389 -0
  90. emdash_core/config.py +74 -0
  91. emdash_core/context/__init__.py +52 -0
  92. emdash_core/context/models.py +50 -0
  93. emdash_core/context/providers/__init__.py +11 -0
  94. emdash_core/context/providers/base.py +74 -0
  95. emdash_core/context/providers/explored_areas.py +183 -0
  96. emdash_core/context/providers/touched_areas.py +360 -0
  97. emdash_core/context/registry.py +73 -0
  98. emdash_core/context/reranker.py +199 -0
  99. emdash_core/context/service.py +260 -0
  100. emdash_core/context/session.py +352 -0
  101. emdash_core/core/__init__.py +104 -0
  102. emdash_core/core/config.py +454 -0
  103. emdash_core/core/exceptions.py +55 -0
  104. emdash_core/core/models.py +265 -0
  105. emdash_core/core/review_config.py +57 -0
  106. emdash_core/db/__init__.py +67 -0
  107. emdash_core/db/auth.py +134 -0
  108. emdash_core/db/models.py +91 -0
  109. emdash_core/db/provider.py +222 -0
  110. emdash_core/db/providers/__init__.py +5 -0
  111. emdash_core/db/providers/supabase.py +452 -0
  112. emdash_core/embeddings/__init__.py +24 -0
  113. emdash_core/embeddings/indexer.py +534 -0
  114. emdash_core/embeddings/models.py +192 -0
  115. emdash_core/embeddings/providers/__init__.py +7 -0
  116. emdash_core/embeddings/providers/base.py +112 -0
  117. emdash_core/embeddings/providers/fireworks.py +141 -0
  118. emdash_core/embeddings/providers/openai.py +104 -0
  119. emdash_core/embeddings/registry.py +146 -0
  120. emdash_core/embeddings/service.py +215 -0
  121. emdash_core/graph/__init__.py +26 -0
  122. emdash_core/graph/builder.py +134 -0
  123. emdash_core/graph/connection.py +692 -0
  124. emdash_core/graph/schema.py +416 -0
  125. emdash_core/graph/writer.py +667 -0
  126. emdash_core/ingestion/__init__.py +7 -0
  127. emdash_core/ingestion/change_detector.py +150 -0
  128. emdash_core/ingestion/git/__init__.py +5 -0
  129. emdash_core/ingestion/git/commit_analyzer.py +196 -0
  130. emdash_core/ingestion/github/__init__.py +6 -0
  131. emdash_core/ingestion/github/pr_fetcher.py +296 -0
  132. emdash_core/ingestion/github/task_extractor.py +100 -0
  133. emdash_core/ingestion/orchestrator.py +540 -0
  134. emdash_core/ingestion/parsers/__init__.py +10 -0
  135. emdash_core/ingestion/parsers/base_parser.py +66 -0
  136. emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
  137. emdash_core/ingestion/parsers/class_extractor.py +154 -0
  138. emdash_core/ingestion/parsers/function_extractor.py +202 -0
  139. emdash_core/ingestion/parsers/import_analyzer.py +119 -0
  140. emdash_core/ingestion/parsers/python_parser.py +123 -0
  141. emdash_core/ingestion/parsers/registry.py +72 -0
  142. emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
  143. emdash_core/ingestion/parsers/typescript_parser.py +278 -0
  144. emdash_core/ingestion/repository.py +346 -0
  145. emdash_core/models/__init__.py +38 -0
  146. emdash_core/models/agent.py +68 -0
  147. emdash_core/models/index.py +77 -0
  148. emdash_core/models/query.py +113 -0
  149. emdash_core/planning/__init__.py +7 -0
  150. emdash_core/planning/agent_api.py +413 -0
  151. emdash_core/planning/context_builder.py +265 -0
  152. emdash_core/planning/feature_context.py +232 -0
  153. emdash_core/planning/feature_expander.py +646 -0
  154. emdash_core/planning/llm_explainer.py +198 -0
  155. emdash_core/planning/similarity.py +509 -0
  156. emdash_core/planning/team_focus.py +821 -0
  157. emdash_core/server.py +153 -0
  158. emdash_core/sse/__init__.py +5 -0
  159. emdash_core/sse/stream.py +196 -0
  160. emdash_core/swarm/__init__.py +17 -0
  161. emdash_core/swarm/merge_agent.py +383 -0
  162. emdash_core/swarm/session_manager.py +274 -0
  163. emdash_core/swarm/swarm_runner.py +226 -0
  164. emdash_core/swarm/task_definition.py +137 -0
  165. emdash_core/swarm/worker_spawner.py +319 -0
  166. emdash_core/swarm/worktree_manager.py +278 -0
  167. emdash_core/templates/__init__.py +10 -0
  168. emdash_core/templates/defaults/agent-builder.md.template +82 -0
  169. emdash_core/templates/defaults/focus.md.template +115 -0
  170. emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
  171. emdash_core/templates/defaults/pr-review.md.template +80 -0
  172. emdash_core/templates/defaults/project.md.template +85 -0
  173. emdash_core/templates/defaults/research_critic.md.template +112 -0
  174. emdash_core/templates/defaults/research_planner.md.template +85 -0
  175. emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
  176. emdash_core/templates/defaults/reviewer.md.template +81 -0
  177. emdash_core/templates/defaults/spec.md.template +41 -0
  178. emdash_core/templates/defaults/tasks.md.template +78 -0
  179. emdash_core/templates/loader.py +296 -0
  180. emdash_core/utils/__init__.py +45 -0
  181. emdash_core/utils/git.py +84 -0
  182. emdash_core/utils/image.py +502 -0
  183. emdash_core/utils/logger.py +51 -0
  184. emdash_core-0.1.7.dist-info/METADATA +35 -0
  185. emdash_core-0.1.7.dist-info/RECORD +187 -0
  186. emdash_core-0.1.7.dist-info/WHEEL +4 -0
  187. emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,476 @@
1
+ """Code reviewer agent for generating PR reviews using learned reviewer profiles."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import dataclass, field, asdict
8
+ from typing import Any, Optional
9
+
10
+ from rich.console import Console
11
+ from rich.panel import Panel
12
+ from rich.markdown import Markdown
13
+
14
+ from .toolkit import AgentToolkit
15
+ from .providers import get_provider
16
+ from .providers.factory import DEFAULT_MODEL
17
+ from ..templates import load_template_for_agent
18
+ from ..utils.logger import log
19
+
20
+
21
+ @dataclass
22
+ class ReviewComment:
23
+ """An inline review comment on a specific line of code."""
24
+
25
+ path: str # File path relative to repo root
26
+ line: int # Line number in the diff
27
+ body: str # Comment text
28
+ side: str = "RIGHT" # LEFT (old code) or RIGHT (new code)
29
+
30
+ def to_dict(self) -> dict:
31
+ return asdict(self)
32
+
33
+
34
+ @dataclass
35
+ class ReviewResult:
36
+ """Result of a code review."""
37
+
38
+ summary: str # Overall review summary
39
+ verdict: str # APPROVE, REQUEST_CHANGES, or COMMENT
40
+ comments: list[ReviewComment] = field(default_factory=list)
41
+
42
+ @property
43
+ def comments_count(self) -> int:
44
+ return len(self.comments)
45
+
46
+
47
+ REVIEW_SYSTEM_PROMPT = """You are reviewing a pull request as a senior code reviewer. You have access to tools to explore the codebase and verify your understanding.
48
+
49
+ ## Your Reviewer Profile
50
+ {profile}
51
+
52
+ ## Your Task
53
+ Review the PR thoroughly. You SHOULD use the available tools to:
54
+ 1. Understand the context of changes (use semantic_search, expand_node)
55
+ 2. Check how similar patterns are handled elsewhere (use text_search, get_callers)
56
+ 3. Verify the impact of changes (use get_impact_analysis)
57
+ 4. Look at related files (use get_file_dependencies)
58
+ 5. **Verify your comments with grep** - Before making a comment, use the grep tool to verify your claims are accurate
59
+
60
+ ## Review Process
61
+ 1. First, explore the PR diff provided
62
+ 2. Use tools to understand the code context and verify your assumptions
63
+ 3. **IMPORTANT: Before writing each comment, use the grep tool to verify:**
64
+ - The pattern/issue you're commenting on actually exists
65
+ - Similar patterns in the codebase to ensure consistency feedback is accurate
66
+ - Any claims about "missing" code or "unused" variables are correct
67
+ 4. Once you have verified your findings, generate your review
68
+
69
+ ## Final Output
70
+ When you're ready to submit your review, output a JSON block with this EXACT format:
71
+
72
+ ```json
73
+ {{
74
+ "summary": "Overall review summary - be constructive and specific",
75
+ "verdict": "APPROVE" | "REQUEST_CHANGES" | "COMMENT",
76
+ "comments": [
77
+ {{
78
+ "path": "path/to/file.py",
79
+ "line": 42,
80
+ "body": "Detailed comment explaining the issue or suggestion",
81
+ "side": "RIGHT"
82
+ }}
83
+ ]
84
+ }}
85
+ ```
86
+
87
+ Guidelines for comments:
88
+ - "verdict" must be exactly one of: APPROVE, REQUEST_CHANGES, COMMENT
89
+ - "line" is the line number in the NEW version of the file (side=RIGHT)
90
+ - Use "side": "LEFT" only when commenting on deleted lines
91
+ - Be specific and constructive
92
+ - Only comment where there's something meaningful to say
93
+
94
+ IMPORTANT: When you're done exploring and ready to submit, output the JSON block. Do NOT wrap it in markdown code fences other than ```json.
95
+ """
96
+
97
+
98
+ class CodeReviewerAgent:
99
+ """Agent that generates PR reviews using a learned reviewer profile and tool access."""
100
+
101
+ def __init__(
102
+ self,
103
+ model: str = DEFAULT_MODEL,
104
+ verbose: bool = True,
105
+ enable_posting: bool = False,
106
+ max_iterations: int = 10,
107
+ ):
108
+ """Initialize the code reviewer agent.
109
+
110
+ Args:
111
+ model: LLM model to use
112
+ verbose: Whether to print progress
113
+ enable_posting: Whether to allow posting reviews to GitHub
114
+ max_iterations: Maximum tool call iterations
115
+ """
116
+ self.provider = get_provider(model)
117
+ self.toolkit = AgentToolkit(enable_session=False)
118
+ self.model = model
119
+ self.verbose = verbose
120
+ self.enable_posting = enable_posting
121
+ self.max_iterations = max_iterations
122
+ self.console = Console()
123
+
124
+ # Load the reviewer profile
125
+ try:
126
+ self.profile = load_template_for_agent("reviewer")
127
+ except FileNotFoundError:
128
+ log.warning("Reviewer profile not found, using default template")
129
+ self.profile = self._get_default_profile()
130
+
131
+ def _get_default_profile(self) -> str:
132
+ """Get a minimal default profile if no template exists."""
133
+ return """Focus on:
134
+ - Code correctness and potential bugs
135
+ - Security issues
136
+ - Performance concerns
137
+ - Code clarity and maintainability
138
+ - Test coverage
139
+
140
+ Be constructive and specific in your feedback."""
141
+
142
+ def review(self, pr_number: int) -> ReviewResult:
143
+ """Generate a review for a pull request.
144
+
145
+ Args:
146
+ pr_number: The PR number to review
147
+
148
+ Returns:
149
+ ReviewResult with summary, verdict, and inline comments
150
+ """
151
+ if self.verbose:
152
+ self.console.print(f"\n[bold cyan]Reviewing PR #{pr_number}[/bold cyan]\n")
153
+
154
+ # 1. Fetch PR details
155
+ pr_data = self._fetch_pr(pr_number)
156
+ if not pr_data:
157
+ raise ValueError(f"Failed to fetch PR #{pr_number}")
158
+
159
+ # 2. Build initial context
160
+ context = self._build_review_context(pr_data)
161
+
162
+ # 3. Run agent loop with tool access
163
+ result = self._run_review_loop(context)
164
+
165
+ if self.verbose:
166
+ self._print_review_summary(result)
167
+
168
+ return result
169
+
170
+ def _fetch_pr(self, pr_number: int) -> Optional[dict]:
171
+ """Fetch PR details including diff."""
172
+ result = self.toolkit.execute(
173
+ "github_pr_details",
174
+ pull_number=pr_number,
175
+ include_diff=True,
176
+ include_comments=True,
177
+ include_reviews=True,
178
+ include_review_comments=True,
179
+ )
180
+
181
+ if not result.success:
182
+ log.error(f"Failed to fetch PR: {result.error}")
183
+ return None
184
+
185
+ return result.data
186
+
187
+ def _build_review_context(self, pr_data: dict) -> str:
188
+ """Build context string for the LLM review."""
189
+ pr = pr_data.get("pr", {})
190
+ diff = pr_data.get("diff", "")
191
+
192
+ # Parse diff to get file-level context
193
+ files = self._parse_diff_files(diff)
194
+
195
+ context_parts = [
196
+ f"# PR #{pr.get('number')}: {pr.get('title')}",
197
+ f"\n**Author:** {pr.get('user', {}).get('login', 'unknown')}",
198
+ f"**State:** {pr.get('state')}",
199
+ f"**Changes:** +{pr.get('additions', 0)} / -{pr.get('deletions', 0)}",
200
+ ]
201
+
202
+ # Add PR description if available
203
+ body = pr.get("body")
204
+ if body:
205
+ context_parts.append(f"\n## Description\n{body[:2000]}")
206
+
207
+ # Add existing review comments for context
208
+ existing_comments_data = pr_data.get("review_comments", {})
209
+ existing_comments = []
210
+ if isinstance(existing_comments_data, dict):
211
+ existing_comments = existing_comments_data.get("nodes", [])
212
+ elif isinstance(existing_comments_data, list):
213
+ existing_comments = existing_comments_data
214
+
215
+ if existing_comments:
216
+ context_parts.append(f"\n## Existing Review Comments ({len(existing_comments)})")
217
+ for c in existing_comments[:5]:
218
+ if isinstance(c, dict):
219
+ user = c.get("user", {}).get("login", "unknown")
220
+ path = c.get("path", "")
221
+ body = c.get("body", "")[:200]
222
+ context_parts.append(f"- **{user}** on `{path}`: {body}")
223
+
224
+ # Add diff with file structure
225
+ context_parts.append("\n## Changes\n")
226
+ for file_info in files[:20]: # Limit to 20 files
227
+ context_parts.append(f"\n### {file_info['path']}\n```diff")
228
+ context_parts.append(file_info["content"][:8000]) # Limit per file
229
+ context_parts.append("```")
230
+
231
+ return "\n".join(context_parts)
232
+
233
+ def _parse_diff_files(self, diff: str) -> list[dict]:
234
+ """Parse unified diff into file-level chunks."""
235
+ if not diff:
236
+ return []
237
+
238
+ files = []
239
+ current_file = None
240
+ current_lines = []
241
+
242
+ for line in diff.splitlines():
243
+ if line.startswith("diff --git"):
244
+ if current_file and current_lines:
245
+ files.append({
246
+ "path": current_file,
247
+ "content": "\n".join(current_lines),
248
+ })
249
+ current_file = None
250
+ current_lines = []
251
+ elif line.startswith("+++ b/"):
252
+ current_file = line[6:]
253
+ elif current_file:
254
+ current_lines.append(line)
255
+
256
+ # Don't forget the last file
257
+ if current_file and current_lines:
258
+ files.append({
259
+ "path": current_file,
260
+ "content": "\n".join(current_lines),
261
+ })
262
+
263
+ return files
264
+
265
+ def _run_review_loop(self, context: str) -> ReviewResult:
266
+ """Run the agent loop with tool access."""
267
+ # Build system prompt with profile
268
+ system_prompt = REVIEW_SYSTEM_PROMPT.format(profile=self.profile)
269
+
270
+ # Get tool schemas
271
+ tools = self.toolkit.get_all_schemas()
272
+
273
+ # Initialize messages
274
+ messages = [
275
+ {"role": "system", "content": system_prompt},
276
+ {"role": "user", "content": f"Please review this pull request:\n\n{context}"},
277
+ ]
278
+
279
+ iterations = 0
280
+ while iterations < self.max_iterations:
281
+ iterations += 1
282
+
283
+ # Call LLM
284
+ response = self.provider.chat(messages, tools=tools)
285
+
286
+ # Add assistant message
287
+ messages.append(self.provider.format_assistant_message(response))
288
+
289
+ # Check for tool calls
290
+ if response.tool_calls:
291
+ for tool_call in response.tool_calls:
292
+ result = self._execute_tool_call(tool_call)
293
+ result_json = json.dumps(result, default=str)
294
+
295
+ # Truncate large results
296
+ if len(result_json) > 10000:
297
+ result_json = result_json[:10000] + "...[TRUNCATED]"
298
+
299
+ messages.append(
300
+ self.provider.format_tool_result(tool_call.id, result_json)
301
+ )
302
+ else:
303
+ # No tool calls - try to parse the review
304
+ content = response.content or ""
305
+ result = self._parse_review_response(content)
306
+
307
+ # If we got a valid result, return it
308
+ if result.summary or result.comments:
309
+ return result
310
+
311
+ # Otherwise, ask the LLM to provide the final review
312
+ messages.append({
313
+ "role": "user",
314
+ "content": "Please provide your final review in the JSON format specified.",
315
+ })
316
+
317
+ # Max iterations reached - return what we have
318
+ messages.append({
319
+ "role": "user",
320
+ "content": (
321
+ "Tool budget reached. Provide your final review now in the JSON format specified. "
322
+ "Do not call any tools."
323
+ ),
324
+ })
325
+ final_response = self.provider.chat(messages, tools=None)
326
+ final_content = final_response.content or ""
327
+ final_result = self._parse_review_response(final_content)
328
+ if final_result.summary or final_result.comments:
329
+ return final_result
330
+
331
+ log.warning("Max iterations reached, returning partial review")
332
+ return ReviewResult(
333
+ summary="Review incomplete - max iterations reached",
334
+ verdict="COMMENT",
335
+ comments=[],
336
+ )
337
+
338
+ def _execute_tool_call(self, tool_call) -> dict:
339
+ """Execute a tool call and return the result."""
340
+ name = tool_call.name
341
+ try:
342
+ args = json.loads(tool_call.arguments)
343
+ except json.JSONDecodeError:
344
+ args = {}
345
+
346
+ if self.verbose:
347
+ self.console.print(f"[dim]Using tool: {name}[/dim]")
348
+
349
+ result = self.toolkit.execute(name, **args)
350
+
351
+ if result.success:
352
+ return {
353
+ "success": True,
354
+ "data": result.data,
355
+ }
356
+ else:
357
+ return {
358
+ "success": False,
359
+ "error": result.error,
360
+ }
361
+
362
+ def _parse_review_response(self, content: str) -> ReviewResult:
363
+ """Parse LLM response into ReviewResult."""
364
+ try:
365
+ # Remove markdown code blocks if present
366
+ json_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content)
367
+ if json_match:
368
+ content = json_match.group(1)
369
+
370
+ # Try to find JSON object in the content
371
+ json_start = content.find("{")
372
+ json_end = content.rfind("}") + 1
373
+ if json_start >= 0 and json_end > json_start:
374
+ content = content[json_start:json_end]
375
+
376
+ data = json.loads(content.strip())
377
+
378
+ comments = []
379
+ for c in data.get("comments", []):
380
+ comments.append(ReviewComment(
381
+ path=c.get("path", ""),
382
+ line=c.get("line", 1),
383
+ body=c.get("body", ""),
384
+ side=c.get("side", "RIGHT"),
385
+ ))
386
+
387
+ return ReviewResult(
388
+ summary=data.get("summary", ""),
389
+ verdict=data.get("verdict", "COMMENT"),
390
+ comments=comments,
391
+ )
392
+
393
+ except json.JSONDecodeError as e:
394
+ log.debug(f"Failed to parse review JSON: {e}")
395
+ return ReviewResult(summary="", verdict="COMMENT", comments=[])
396
+
397
+ def _print_review_summary(self, result: ReviewResult):
398
+ """Print a summary of the review to console."""
399
+ # Verdict color
400
+ verdict_colors = {
401
+ "APPROVE": "green",
402
+ "REQUEST_CHANGES": "red",
403
+ "COMMENT": "yellow",
404
+ }
405
+ color = verdict_colors.get(result.verdict, "white")
406
+
407
+ # Print summary panel
408
+ self.console.print(Panel(
409
+ f"[bold {color}]{result.verdict}[/bold {color}]\n\n{result.summary}",
410
+ title="Review Summary",
411
+ border_style=color,
412
+ ))
413
+
414
+ # Print each comment in full
415
+ if result.comments:
416
+ self.console.print(f"\n[bold]Inline Comments ({len(result.comments)}):[/bold]\n")
417
+
418
+ for i, comment in enumerate(result.comments, 1):
419
+ self.console.print(Panel(
420
+ f"[cyan]{comment.path}[/cyan]:[magenta]{comment.line}[/magenta]\n\n{comment.body}",
421
+ title=f"Comment {i}",
422
+ border_style="dim",
423
+ ))
424
+
425
+ def post_review(self, pr_number: int, result: ReviewResult) -> bool:
426
+ """Post the review to GitHub.
427
+
428
+ Args:
429
+ pr_number: The PR number
430
+ result: The review result to post
431
+
432
+ Returns:
433
+ True if successful, False otherwise
434
+ """
435
+ if not self.enable_posting:
436
+ log.error("Posting is disabled. Initialize with enable_posting=True")
437
+ return False
438
+
439
+ # Convert comments to dict format
440
+ comments = [c.to_dict() for c in result.comments] if result.comments else None
441
+
442
+ post_result = self.toolkit.execute(
443
+ "github_create_review",
444
+ pull_number=pr_number,
445
+ body=result.summary,
446
+ event=result.verdict,
447
+ comments=comments,
448
+ )
449
+
450
+ if not post_result.success:
451
+ log.error(f"Failed to post review: {post_result.error}")
452
+ return False
453
+
454
+ if self.verbose:
455
+ self.console.print(
456
+ f"\n[bold green]✓[/bold green] Review posted to PR #{pr_number}"
457
+ )
458
+
459
+ return True
460
+
461
+ def review_and_post(self, pr_number: int) -> tuple[ReviewResult, bool]:
462
+ """Generate and post a review.
463
+
464
+ Args:
465
+ pr_number: The PR number to review
466
+
467
+ Returns:
468
+ Tuple of (ReviewResult, success_bool)
469
+ """
470
+ result = self.review(pr_number)
471
+ success = self.post_review(pr_number, result)
472
+ return result, success
473
+
474
+
475
+ # Alias for backwards compatibility with API that imports CodeReviewer
476
+ CodeReviewer = CodeReviewerAgent
@@ -0,0 +1,143 @@
1
+ """Context compaction for managing LLM context size.
2
+
3
+ Provides utilities for compressing large payloads to fit within
4
+ token limits while preserving essential information.
5
+ """
6
+
7
+ from typing import Any, Optional
8
+
9
+
10
+ class LLMCompactor:
11
+ """Compacts payloads to fit within LLM context limits.
12
+
13
+ Uses LLM-based summarization to compress large sections of context
14
+ while preserving the most important information.
15
+
16
+ Example:
17
+ compactor = LLMCompactor(provider)
18
+
19
+ payload = {
20
+ "evidence": "Very long evidence text...",
21
+ "claims": "Long claims list...",
22
+ }
23
+
24
+ compacted = compactor.compact_payload(payload, goal="Find auth bugs")
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ provider: Any,
30
+ max_section_tokens: int = 4000,
31
+ max_total_tokens: int = 16000,
32
+ ):
33
+ """Initialize the compactor.
34
+
35
+ Args:
36
+ provider: LLM provider for summarization
37
+ max_section_tokens: Max tokens per section
38
+ max_total_tokens: Max total tokens for payload
39
+ """
40
+ self.provider = provider
41
+ self.max_section_tokens = max_section_tokens
42
+ self.max_total_tokens = max_total_tokens
43
+
44
+ def compact_payload(
45
+ self,
46
+ payload: dict[str, str],
47
+ goal: str,
48
+ ) -> dict[str, str]:
49
+ """Compact a payload to fit within token limits.
50
+
51
+ Args:
52
+ payload: Dict of section name to content
53
+ goal: Research goal for context
54
+
55
+ Returns:
56
+ Dict with compacted sections
57
+ """
58
+ # Estimate tokens (rough: 4 chars per token)
59
+ total_chars = sum(len(v) for v in payload.values())
60
+ estimated_tokens = total_chars // 4
61
+
62
+ if estimated_tokens <= self.max_total_tokens:
63
+ return payload
64
+
65
+ # Need to compact - prioritize sections
66
+ compacted = {}
67
+ remaining_budget = self.max_total_tokens
68
+
69
+ # Priority order for sections
70
+ priority = ["claims", "evidence", "questions", "gaps", "entities", "prior_claims"]
71
+
72
+ for key in priority:
73
+ if key not in payload:
74
+ continue
75
+
76
+ content = payload[key]
77
+ section_tokens = len(content) // 4
78
+
79
+ if section_tokens > self.max_section_tokens:
80
+ # Summarize this section
81
+ compacted[key] = self._summarize_section(key, content, goal)
82
+ else:
83
+ compacted[key] = content
84
+
85
+ remaining_budget -= len(compacted[key]) // 4
86
+
87
+ if remaining_budget <= 0:
88
+ break
89
+
90
+ # Copy any remaining sections that fit
91
+ for key, value in payload.items():
92
+ if key not in compacted:
93
+ if len(value) // 4 <= remaining_budget:
94
+ compacted[key] = value
95
+ remaining_budget -= len(value) // 4
96
+
97
+ return compacted
98
+
99
+ def _summarize_section(
100
+ self,
101
+ section_name: str,
102
+ content: str,
103
+ goal: str,
104
+ ) -> str:
105
+ """Summarize a section using the LLM.
106
+
107
+ Args:
108
+ section_name: Name of the section
109
+ content: Content to summarize
110
+ goal: Research goal for context
111
+
112
+ Returns:
113
+ Summarized content
114
+ """
115
+ try:
116
+ prompt = f"""Summarize this {section_name} section concisely.
117
+ Keep the most important information relevant to the goal: {goal}
118
+
119
+ Content:
120
+ {content[:8000]} # Limit input
121
+
122
+ Provide a concise summary that preserves key facts and references."""
123
+
124
+ messages = [{"role": "user", "content": prompt}]
125
+ response = self.provider.chat(messages)
126
+
127
+ return response.content or content[:2000]
128
+
129
+ except Exception:
130
+ # Fallback: truncate
131
+ return content[: self.max_section_tokens * 4]
132
+
133
+ def estimate_tokens(self, text: str) -> int:
134
+ """Estimate token count for text.
135
+
136
+ Args:
137
+ text: Text to estimate
138
+
139
+ Returns:
140
+ Estimated token count
141
+ """
142
+ # Rough estimate: 4 chars per token on average
143
+ return len(text) // 4