repr-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
repr/tools.py ADDED
@@ -0,0 +1,446 @@
1
+ """
2
+ Local tool implementations for the agentic analysis workflow.
3
+
4
+ These tools execute locally on the CLI and return results to the server.
5
+ """
6
+
7
+ import re
8
+ from datetime import datetime, timedelta
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from git import Repo
13
+ from git.exc import GitCommandError
14
+
15
+ from .extractor import detect_languages, detect_dependencies, get_file_tree_flat
16
+
17
+
18
+ class ToolError(Exception):
19
+ """Error executing a tool."""
20
+ pass
21
+
22
+
23
+ def execute_tool(tool_name: str, args: dict[str, Any], repos_map: dict[str, Path]) -> Any:
24
+ """
25
+ Execute a tool by name with given arguments.
26
+
27
+ Args:
28
+ tool_name: Name of the tool to execute
29
+ args: Tool arguments
30
+ repos_map: Mapping of repo names to paths
31
+
32
+ Returns:
33
+ Tool result
34
+
35
+ Raises:
36
+ ToolError: If tool execution fails
37
+ """
38
+ tools = {
39
+ "get_recent_commits": get_recent_commits,
40
+ "get_commit_detail": get_commit_detail,
41
+ "read_file": read_file,
42
+ "search_code": search_code,
43
+ "get_file_tree": get_file_tree,
44
+ "get_contributor_stats": get_contributor_stats,
45
+ "get_languages": get_languages,
46
+ "get_dependencies": get_dependencies,
47
+ }
48
+
49
+ if tool_name not in tools:
50
+ raise ToolError(f"Unknown tool: {tool_name}")
51
+
52
+ # Resolve repo path
53
+ repo_name = args.get("repo")
54
+ if repo_name and repo_name in repos_map:
55
+ args["repo_path"] = repos_map[repo_name]
56
+ elif repo_name:
57
+ raise ToolError(f"Unknown repository: {repo_name}")
58
+
59
+ try:
60
+ return tools[tool_name](**args)
61
+ except Exception as e:
62
+ raise ToolError(f"Tool {tool_name} failed: {str(e)}")
63
+
64
+
65
+ def get_recent_commits(
66
+ repo_path: Path,
67
+ count: int | None = None,
68
+ days: int = 365,
69
+ **kwargs,
70
+ ) -> list[dict[str, Any]]:
71
+ """
72
+ Get recent commits from a repository.
73
+
74
+ Args:
75
+ repo_path: Path to the repository
76
+ count: Maximum number of commits to retrieve (None = unlimited within time range)
77
+ days: Number of days to look back (default 365 for 1 year)
78
+
79
+ Returns:
80
+ List of commit objects with metadata
81
+ """
82
+ repo = Repo(repo_path)
83
+ commits = []
84
+
85
+ # Calculate cutoff date
86
+ cutoff_date = datetime.now() - timedelta(days=days)
87
+ cutoff_timestamp = cutoff_date.timestamp()
88
+
89
+ for commit in repo.iter_commits(max_count=count):
90
+ # Stop if we've gone past the time window
91
+ if commit.committed_date < cutoff_timestamp:
92
+ break
93
+
94
+ commits.append({
95
+ "sha": commit.hexsha[:8],
96
+ "full_sha": commit.hexsha,
97
+ "message": commit.message.strip().split("\n")[0], # First line only
98
+ "author": commit.author.name,
99
+ "author_email": commit.author.email,
100
+ "date": datetime.fromtimestamp(commit.committed_date).isoformat(),
101
+ "files_changed": len(commit.stats.files),
102
+ "insertions": commit.stats.total["insertions"],
103
+ "deletions": commit.stats.total["deletions"],
104
+ })
105
+
106
+ return commits
107
+
108
+
109
+ def get_commits_with_diffs(
110
+ repo_path: Path,
111
+ count: int = 100,
112
+ days: int = 365,
113
+ max_diff_lines_per_file: int = 50,
114
+ max_files_per_commit: int = 10,
115
+ ) -> list[dict[str, Any]]:
116
+ """
117
+ Get commits with actual diff content for LLM analysis.
118
+
119
+ Args:
120
+ repo_path: Path to the repository
121
+ count: Maximum number of commits to retrieve
122
+ days: Number of days to look back (default 365 for 1 year)
123
+ max_diff_lines_per_file: Maximum diff lines to include per file
124
+ max_files_per_commit: Maximum files to include per commit
125
+
126
+ Returns:
127
+ List of commit objects with diffs
128
+ """
129
+ repo = Repo(repo_path)
130
+ commits = []
131
+
132
+ # Calculate cutoff date
133
+ cutoff_date = datetime.now() - timedelta(days=days)
134
+ cutoff_timestamp = cutoff_date.timestamp()
135
+
136
+ for commit in repo.iter_commits(max_count=count):
137
+ # Stop if we've gone past the time window
138
+ if commit.committed_date < cutoff_timestamp:
139
+ break
140
+
141
+ # Get files changed with diffs
142
+ files = []
143
+ parent = commit.parents[0] if commit.parents else None
144
+
145
+ try:
146
+ # Get diff for this commit
147
+ if parent:
148
+ diffs = parent.diff(commit, create_patch=True)
149
+ else:
150
+ # Initial commit - show all files as additions
151
+ diffs = commit.diff(None, create_patch=True)
152
+
153
+ for diff_item in list(diffs)[:max_files_per_commit]:
154
+ file_path = diff_item.b_path or diff_item.a_path
155
+ if not file_path:
156
+ continue
157
+
158
+ # Get the diff text
159
+ diff_text = ""
160
+ if diff_item.diff:
161
+ try:
162
+ diff_text = diff_item.diff.decode('utf-8', errors='ignore')
163
+ except:
164
+ diff_text = str(diff_item.diff)
165
+
166
+ # Truncate diff if too long
167
+ diff_lines = diff_text.split('\n')
168
+ if len(diff_lines) > max_diff_lines_per_file:
169
+ diff_text = '\n'.join(diff_lines[:max_diff_lines_per_file])
170
+ diff_text += f"\n... ({len(diff_lines) - max_diff_lines_per_file} more lines)"
171
+
172
+ files.append({
173
+ "path": file_path,
174
+ "change_type": diff_item.change_type, # A=added, D=deleted, M=modified, R=renamed
175
+ "diff": diff_text,
176
+ })
177
+ except (GitCommandError, Exception):
178
+ # If we can't get diff, just include file list
179
+ for filename in commit.stats.files.keys():
180
+ if len(files) >= max_files_per_commit:
181
+ break
182
+ files.append({
183
+ "path": filename,
184
+ "change_type": "M",
185
+ "diff": "",
186
+ })
187
+
188
+ commits.append({
189
+ "sha": commit.hexsha[:8],
190
+ "full_sha": commit.hexsha,
191
+ "message": commit.message.strip(),
192
+ "author": commit.author.name,
193
+ "author_email": commit.author.email,
194
+ "date": datetime.fromtimestamp(commit.committed_date).isoformat(),
195
+ "files": files,
196
+ "insertions": commit.stats.total["insertions"],
197
+ "deletions": commit.stats.total["deletions"],
198
+ })
199
+
200
+ return commits
201
+
202
+
203
+ def get_commit_detail(
204
+ repo_path: Path,
205
+ sha: str,
206
+ **kwargs,
207
+ ) -> dict[str, Any]:
208
+ """
209
+ Get detailed information about a specific commit.
210
+
211
+ Args:
212
+ repo_path: Path to the repository
213
+ sha: Commit SHA (full or short)
214
+
215
+ Returns:
216
+ Detailed commit object with diff stats
217
+ """
218
+ repo = Repo(repo_path)
219
+ commit = repo.commit(sha)
220
+
221
+ # Get files changed with stats
222
+ files = []
223
+ for filename, stats in commit.stats.files.items():
224
+ files.append({
225
+ "path": filename,
226
+ "insertions": stats["insertions"],
227
+ "deletions": stats["deletions"],
228
+ })
229
+
230
+ return {
231
+ "sha": commit.hexsha[:8],
232
+ "full_sha": commit.hexsha,
233
+ "message": commit.message.strip(),
234
+ "author": commit.author.name,
235
+ "author_email": commit.author.email,
236
+ "date": datetime.fromtimestamp(commit.committed_date).isoformat(),
237
+ "files": files,
238
+ "total_insertions": commit.stats.total["insertions"],
239
+ "total_deletions": commit.stats.total["deletions"],
240
+ }
241
+
242
+
243
+ def read_file(
244
+ repo_path: Path,
245
+ path: str,
246
+ max_lines: int = 500,
247
+ **kwargs,
248
+ ) -> dict[str, Any]:
249
+ """
250
+ Read a file from the repository.
251
+
252
+ Args:
253
+ repo_path: Path to the repository
254
+ path: Relative path to the file
255
+ max_lines: Maximum lines to return
256
+
257
+ Returns:
258
+ File contents and metadata
259
+ """
260
+ file_path = repo_path / path
261
+
262
+ if not file_path.exists():
263
+ return {"error": "File not found", "path": path}
264
+
265
+ if not file_path.is_file():
266
+ return {"error": "Not a file", "path": path}
267
+
268
+ # Check file size
269
+ size = file_path.stat().st_size
270
+ if size > 1024 * 1024: # 1MB limit
271
+ return {"error": "File too large", "path": path, "size": size}
272
+
273
+ try:
274
+ content = file_path.read_text()
275
+ lines = content.split("\n")
276
+ truncated = len(lines) > max_lines
277
+
278
+ if truncated:
279
+ content = "\n".join(lines[:max_lines])
280
+
281
+ return {
282
+ "path": path,
283
+ "content": content,
284
+ "lines": min(len(lines), max_lines),
285
+ "truncated": truncated,
286
+ "size": size,
287
+ }
288
+ except UnicodeDecodeError:
289
+ return {"error": "Binary file", "path": path}
290
+
291
+
292
+ def search_code(
293
+ repo_path: Path,
294
+ pattern: str,
295
+ max_results: int = 50,
296
+ **kwargs,
297
+ ) -> list[dict[str, Any]]:
298
+ """
299
+ Search for a pattern in repository code.
300
+
301
+ Args:
302
+ repo_path: Path to the repository
303
+ pattern: Regex pattern to search for
304
+ max_results: Maximum number of results
305
+
306
+ Returns:
307
+ List of matching lines with context
308
+ """
309
+ results = []
310
+ skip_dirs = {".git", "node_modules", "venv", ".venv", "__pycache__", "dist", "build"}
311
+
312
+ try:
313
+ regex = re.compile(pattern, re.IGNORECASE)
314
+ except re.error:
315
+ return [{"error": f"Invalid regex pattern: {pattern}"}]
316
+
317
+ for file_path in repo_path.rglob("*"):
318
+ if len(results) >= max_results:
319
+ break
320
+
321
+ if not file_path.is_file():
322
+ continue
323
+
324
+ # Skip binary-like extensions and directories
325
+ rel_parts = file_path.relative_to(repo_path).parts
326
+ if any(skip in rel_parts for skip in skip_dirs):
327
+ continue
328
+
329
+ if file_path.suffix in {".png", ".jpg", ".jpeg", ".gif", ".ico", ".pdf", ".zip", ".tar", ".gz"}:
330
+ continue
331
+
332
+ try:
333
+ content = file_path.read_text()
334
+ for line_num, line in enumerate(content.split("\n"), 1):
335
+ if regex.search(line):
336
+ results.append({
337
+ "path": str(file_path.relative_to(repo_path)),
338
+ "line": line_num,
339
+ "content": line.strip()[:200], # Truncate long lines
340
+ })
341
+ if len(results) >= max_results:
342
+ break
343
+ except (UnicodeDecodeError, PermissionError):
344
+ continue
345
+
346
+ return results
347
+
348
+
349
+ def get_file_tree(
350
+ repo_path: Path,
351
+ depth: int = 3,
352
+ **kwargs,
353
+ ) -> list[str]:
354
+ """
355
+ Get the file tree structure of a repository.
356
+
357
+ Args:
358
+ repo_path: Path to the repository
359
+ depth: Maximum depth to traverse
360
+
361
+ Returns:
362
+ List of file/directory paths
363
+ """
364
+ return get_file_tree_flat(repo_path, max_depth=depth)
365
+
366
+
367
+ def get_contributor_stats(
368
+ repo_path: Path,
369
+ **kwargs,
370
+ ) -> list[dict[str, Any]]:
371
+ """
372
+ Get contributor statistics for a repository.
373
+
374
+ Args:
375
+ repo_path: Path to the repository
376
+
377
+ Returns:
378
+ List of contributors with commit counts
379
+ """
380
+ repo = Repo(repo_path)
381
+ contributors: dict[str, dict[str, Any]] = {}
382
+
383
+ for commit in repo.iter_commits():
384
+ email = commit.author.email
385
+ if email not in contributors:
386
+ contributors[email] = {
387
+ "name": commit.author.name,
388
+ "email": email,
389
+ "commits": 0,
390
+ "first_commit": datetime.fromtimestamp(commit.committed_date),
391
+ "last_commit": datetime.fromtimestamp(commit.committed_date),
392
+ }
393
+
394
+ contributors[email]["commits"] += 1
395
+ commit_date = datetime.fromtimestamp(commit.committed_date)
396
+
397
+ if commit_date < contributors[email]["first_commit"]:
398
+ contributors[email]["first_commit"] = commit_date
399
+ if commit_date > contributors[email]["last_commit"]:
400
+ contributors[email]["last_commit"] = commit_date
401
+
402
+ # Sort by commit count and format
403
+ result = []
404
+ for email, data in sorted(contributors.items(), key=lambda x: x[1]["commits"], reverse=True):
405
+ result.append({
406
+ "name": data["name"],
407
+ "email": data["email"],
408
+ "commits": data["commits"],
409
+ "first_commit": data["first_commit"].isoformat(),
410
+ "last_commit": data["last_commit"].isoformat(),
411
+ })
412
+
413
+ return result
414
+
415
+
416
+ def get_languages(
417
+ repo_path: Path,
418
+ **kwargs,
419
+ ) -> dict[str, float]:
420
+ """
421
+ Get language breakdown for a repository.
422
+
423
+ Args:
424
+ repo_path: Path to the repository
425
+
426
+ Returns:
427
+ Dictionary of language -> percentage
428
+ """
429
+ return detect_languages(repo_path)
430
+
431
+
432
+ def get_dependencies(
433
+ repo_path: Path,
434
+ **kwargs,
435
+ ) -> dict[str, list[str]]:
436
+ """
437
+ Get dependencies for a repository.
438
+
439
+ Args:
440
+ repo_path: Path to the repository
441
+
442
+ Returns:
443
+ Dictionary of ecosystem -> list of dependencies
444
+ """
445
+ return detect_dependencies(repo_path)
446
+