ai-cr 3.2.2__py3-none-any.whl → 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {ai_cr-3.2.2.dist-info → ai_cr-3.3.0.dist-info}/LICENSE +21 -21
  2. {ai_cr-3.2.2.dist-info → ai_cr-3.3.0.dist-info}/METADATA +1 -1
  3. ai_cr-3.3.0.dist-info/RECORD +41 -0
  4. {ai_cr-3.2.2.dist-info → ai_cr-3.3.0.dist-info}/WHEEL +1 -1
  5. gito/__main__.py +4 -4
  6. gito/bootstrap.py +90 -90
  7. gito/cli.py +255 -244
  8. gito/cli_base.py +104 -94
  9. gito/commands/__init__.py +1 -1
  10. gito/commands/deploy.py +138 -138
  11. gito/commands/fix.py +160 -160
  12. gito/commands/gh_post_review_comment.py +111 -111
  13. gito/commands/gh_react_to_comment.py +217 -217
  14. gito/commands/linear_comment.py +53 -53
  15. gito/commands/repl.py +30 -30
  16. gito/commands/version.py +8 -8
  17. gito/config.toml +450 -448
  18. gito/constants.py +15 -14
  19. gito/context.py +19 -19
  20. gito/core.py +520 -508
  21. gito/env.py +8 -7
  22. gito/gh_api.py +116 -116
  23. gito/issue_trackers.py +50 -50
  24. gito/pipeline.py +83 -83
  25. gito/pipeline_steps/jira.py +62 -62
  26. gito/pipeline_steps/linear.py +85 -85
  27. gito/project_config.py +85 -85
  28. gito/report_struct.py +136 -136
  29. gito/tpl/answer.j2 +25 -25
  30. gito/tpl/github_workflows/components/env-vars.j2 +11 -11
  31. gito/tpl/github_workflows/components/installs.j2 +23 -23
  32. gito/tpl/github_workflows/gito-code-review.yml.j2 +32 -32
  33. gito/tpl/github_workflows/gito-react-to-comments.yml.j2 +70 -70
  34. gito/tpl/partial/aux_files.j2 +8 -8
  35. gito/tpl/questions/changes_summary.j2 +55 -55
  36. gito/tpl/questions/release_notes.j2 +26 -26
  37. gito/tpl/questions/test_cases.j2 +37 -37
  38. gito/utils.py +267 -267
  39. ai_cr-3.2.2.dist-info/RECORD +0 -41
  40. {ai_cr-3.2.2.dist-info → ai_cr-3.3.0.dist-info}/entry_points.txt +0 -0
gito/core.py CHANGED
@@ -1,508 +1,520 @@
1
- import os
2
- import fnmatch
3
- import logging
4
- from typing import Iterable
5
- from pathlib import Path
6
- from functools import partial
7
-
8
- import microcore as mc
9
- from gito.gh_api import gh_api
10
- from microcore import ui
11
- from git import Repo, Commit
12
- from git.exc import GitCommandError
13
- from unidiff import PatchSet, PatchedFile
14
- from unidiff.constants import DEV_NULL
15
-
16
- from .context import Context
17
- from .project_config import ProjectConfig
18
- from .report_struct import Report
19
- from .constants import JSON_REPORT_FILE_NAME
20
- from .utils import make_streaming_function
21
- from .pipeline import Pipeline
22
- from .env import Env
23
-
24
-
25
- def review_subject_is_index(what):
26
- return not what or what == 'INDEX'
27
-
28
-
29
- def is_binary_file(repo: Repo, file_path: str) -> bool:
30
- """
31
- Check if a file is binary by attempting to read it as text.
32
- Returns True if the file is binary, False otherwise.
33
- """
34
- try:
35
- # Attempt to read the file content from the repository tree
36
- content = repo.tree()[file_path].data_stream.read()
37
- # Try decoding as UTF-8; if it fails, it's likely binary
38
- content.decode("utf-8")
39
- return False
40
- except KeyError:
41
- try:
42
- fs_path = Path(repo.working_tree_dir) / file_path
43
- fs_path.read_text(encoding='utf-8')
44
- return False
45
- except FileNotFoundError:
46
- logging.error(f"File {file_path} not found in the repository.")
47
- return True
48
- except UnicodeDecodeError:
49
- return True
50
- except Exception as e:
51
- logging.error(f"Error reading file {file_path}: {e}")
52
- return True
53
- except UnicodeDecodeError:
54
- return True
55
- except Exception as e:
56
- logging.warning(f"Error checking if file {file_path} is binary: {e}")
57
- return True # Conservatively treat errors as binary to avoid issues
58
-
59
-
60
- def commit_in_branch(repo: Repo, commit: Commit, target_branch: str) -> bool:
61
- try:
62
- # exit code 0 if commit is ancestor of branch
63
- repo.git.merge_base('--is-ancestor', commit.hexsha, target_branch)
64
- return True
65
- except GitCommandError:
66
- pass
67
- return False
68
-
69
-
70
- def get_base_branch(repo: Repo, pr: int | str = None):
71
- if os.getenv('GITHUB_ACTIONS'):
72
-
73
- # triggered from PR
74
- if base_ref := os.getenv('GITHUB_BASE_REF'):
75
- logging.info(f"Using GITHUB_BASE_REF:{base_ref} as base branch")
76
- return f'origin/{base_ref}'
77
- logging.info("GITHUB_BASE_REF is not available...")
78
- if pr:
79
- api = gh_api(repo=repo)
80
- pr_obj = api.pulls.get(pr)
81
- logging.info(
82
- f"Using 'origin/{pr_obj.base.ref}' as base branch "
83
- f"(received via GH API for PR#{pr})"
84
- )
85
- return f'origin/{pr_obj.base.ref}'
86
-
87
- try:
88
- logging.info(
89
- "Trying to resolve base branch from repo.remotes.origin.refs.HEAD.reference.name..."
90
- )
91
- # 'origin/main', 'origin/master', etc
92
- # Stopped working in github actions since 07/2025
93
- return repo.remotes.origin.refs.HEAD.reference.name
94
- except AttributeError:
95
- try:
96
- logging.info(
97
- "Checking if repo has 'main' or 'master' branchs to use as --against branch..."
98
- )
99
- remote_refs = repo.remotes.origin.refs
100
- for branch_name in ['main', 'master']:
101
- if hasattr(remote_refs, branch_name):
102
- return f'origin/{branch_name}'
103
- except Exception:
104
- pass
105
-
106
- logging.error("Could not determine default branch from remote refs.")
107
- raise ValueError("No default branch found in the repository.")
108
-
109
-
110
- def get_diff(
111
- repo: Repo = None,
112
- what: str = None,
113
- against: str = None,
114
- use_merge_base: bool = True,
115
- pr: str | int = None
116
- ) -> PatchSet | list[PatchedFile]:
117
- repo = repo or Repo(".")
118
- if not against:
119
- # 'origin/main', 'origin/master', etc
120
- against = get_base_branch(repo, pr=pr)
121
- if review_subject_is_index(what):
122
- what = None # working copy
123
- if use_merge_base:
124
- try:
125
- if review_subject_is_index(what):
126
- try:
127
- current_ref = repo.active_branch.name
128
- except TypeError:
129
- # In detached HEAD state, use HEAD directly
130
- current_ref = "HEAD"
131
- logging.info(
132
- "Detected detached HEAD state, using HEAD as current reference"
133
- )
134
- else:
135
- current_ref = what
136
- merge_base = repo.merge_base(current_ref or repo.active_branch.name, against)[0]
137
- logging.info(
138
- f"Merge base({ui.green(current_ref)},{ui.yellow(against)})"
139
- f" --> {ui.cyan(merge_base.hexsha)}"
140
- )
141
- # if branch is already an ancestor of "against", merge_base == branch ⇒ it’s been merged
142
- if merge_base.hexsha == repo.commit(current_ref or repo.active_branch.name).hexsha:
143
- # @todo: check case: reviewing working copy index in main branch #103
144
- logging.info(
145
- f"Branch is already merged. ({ui.green(current_ref)} vs {ui.yellow(against)})"
146
- )
147
- merge_sha = repo.git.log(
148
- '--merges',
149
- '--ancestry-path',
150
- f'{current_ref}..{against}',
151
- '-n',
152
- '1',
153
- '--pretty=format:%H'
154
- ).strip()
155
- if merge_sha:
156
- logging.info(f"Merge commit is {ui.cyan(merge_sha)}")
157
- merge_commit = repo.commit(merge_sha)
158
-
159
- other_merge_parent = None
160
- for parent in merge_commit.parents:
161
- logging.info(f"Checking merge parent: {parent.hexsha[:8]}")
162
- if parent.hexsha == merge_base.hexsha:
163
- logging.info(f"merge parent is {ui.cyan(parent.hexsha[:8])}, skipping")
164
- continue
165
- if not commit_in_branch(repo, parent, against):
166
- logging.warning(f"merge parent is not in {against}, skipping")
167
- continue
168
- logging.info(f"Found other merge parent: {ui.cyan(parent.hexsha[:8])}")
169
- other_merge_parent = parent
170
- break
171
- if other_merge_parent:
172
- first_common_ancestor = repo.merge_base(other_merge_parent, merge_base)[0]
173
- # for gito remote (feature_branch vs origin/main)
174
- # the same merge base appears in first_common_ancestor again
175
- if first_common_ancestor.hexsha == merge_base.hexsha:
176
- if merge_base.parents:
177
- first_common_ancestor = repo.merge_base(
178
- other_merge_parent, merge_base.parents[0]
179
- )[0]
180
- else:
181
- logging.error(
182
- "merge_base has no parents, "
183
- "using merge_base as first_common_ancestor"
184
- )
185
- logging.info(
186
- f"{what} will be compared to "
187
- f"first common ancestor of {what} and {against}: "
188
- f"{ui.cyan(first_common_ancestor.hexsha[:8])}"
189
- )
190
- against = first_common_ancestor.hexsha
191
- else:
192
- logging.error(f"Can't find other merge parent for {merge_sha}")
193
- else:
194
- logging.warning(
195
- f"No merge‐commit found for {current_ref!r}→{against!r}; "
196
- "falling back to merge‐base diff"
197
- )
198
- else:
199
- # normal case: branch not yet merged
200
- against = merge_base.hexsha
201
- logging.info(
202
- f"Using merge base: {ui.cyan(merge_base.hexsha[:8])} ({merge_base.summary})"
203
- )
204
- except Exception as e:
205
- logging.error(f"Error finding merge base: {e}")
206
- logging.info(
207
- f"Making diff: {ui.green(what or 'INDEX')} vs {ui.yellow(against)}"
208
- )
209
- diff_content = repo.git.diff(against, what)
210
- diff = PatchSet.from_string(diff_content)
211
-
212
- # Filter out binary files
213
- non_binary_diff = PatchSet([])
214
- for patched_file in diff:
215
- # Check if the file is binary using the source or target file path
216
- file_path = (
217
- patched_file.target_file
218
- if patched_file.target_file != DEV_NULL
219
- else patched_file.source_file
220
- )
221
- if file_path == DEV_NULL:
222
- continue
223
- if is_binary_file(repo, file_path.removeprefix("b/")):
224
- logging.info(f"Skipping binary file: {patched_file.path}")
225
- continue
226
- non_binary_diff.append(patched_file)
227
- return non_binary_diff
228
-
229
-
230
- def filter_diff(
231
- patch_set: PatchSet | Iterable[PatchedFile], filters: str | list[str]
232
- ) -> PatchSet | Iterable[PatchedFile]:
233
- """
234
- Filter the diff files by the given fnmatch filters.
235
- """
236
- assert isinstance(filters, (list, str))
237
- if not isinstance(filters, list):
238
- filters = [f.strip() for f in filters.split(",") if f.strip()]
239
- if not filters:
240
- return patch_set
241
- files = [
242
- file
243
- for file in patch_set
244
- if any(fnmatch.fnmatch(file.path, pattern) for pattern in filters)
245
- ]
246
- return files
247
-
248
-
249
- def read_file(repo: Repo, file: str, use_local_files: bool = False) -> str:
250
- if use_local_files:
251
- file_path = Path(repo.working_tree_dir) / file
252
- try:
253
- return file_path.read_text(encoding='utf-8')
254
- except (FileNotFoundError, UnicodeDecodeError) as e:
255
- logging.warning(f"Could not read file {file} from working directory: {e}")
256
-
257
- # Read from HEAD (committed version)
258
- return repo.tree()[file].data_stream.read().decode('utf-8')
259
-
260
-
261
- def file_lines(repo: Repo, file: str, max_tokens: int = None, use_local_files: bool = False) -> str:
262
- text = read_file(repo=repo, file=file, use_local_files=use_local_files)
263
- lines = [f"{i + 1}: {line}\n" for i, line in enumerate(text.splitlines())]
264
- if max_tokens:
265
- lines, removed_qty = mc.tokenizing.fit_to_token_size(lines, max_tokens)
266
- if removed_qty:
267
- lines.append(
268
- f"(!) DISPLAYING ONLY FIRST {len(lines)} LINES DUE TO LARGE FILE SIZE\n"
269
- )
270
- return "".join(lines)
271
-
272
-
273
- def read_files(repo: Repo, files: list[str], max_tokens: int = None) -> dict:
274
- out = dict()
275
- total_tokens = 0
276
- for file in files:
277
- content = read_file(repo=repo, file=file, use_local_files=True)
278
- total_tokens += mc.tokenizing.num_tokens_from_string(file)
279
- total_tokens += mc.tokenizing.num_tokens_from_string(content)
280
- if max_tokens and total_tokens > max_tokens:
281
- logging.warning(
282
- f"Skipping file {file} due to exceeding max_tokens limit ({max_tokens})"
283
- )
284
- continue
285
- out[file] = content
286
- return out
287
-
288
-
289
- def make_cr_summary(ctx: Context, **kwargs) -> str:
290
- return (
291
- mc.prompt(
292
- ctx.config.summary_prompt,
293
- diff=mc.tokenizing.fit_to_token_size(ctx.diff, ctx.config.max_code_tokens)[0],
294
- issues=ctx.report.issues,
295
- pipeline_out=ctx.pipeline_out,
296
- env=Env,
297
- **ctx.config.prompt_vars,
298
- **kwargs,
299
- ).to_llm()
300
- if ctx.config.summary_prompt
301
- else ""
302
- )
303
-
304
-
305
- class NoChangesInContextError(Exception):
306
- """
307
- Exception raised when there are no changes in the context to review /answer questions.
308
- """
309
-
310
-
311
- def _prepare(
312
- repo: Repo = None,
313
- what: str = None,
314
- against: str = None,
315
- filters: str | list[str] = "",
316
- use_merge_base: bool = True,
317
- pr: str | int = None,
318
- ):
319
- repo = repo or Repo(".")
320
- cfg = ProjectConfig.load_for_repo(repo)
321
- diff = get_diff(
322
- repo=repo, what=what, against=against, use_merge_base=use_merge_base, pr=pr,
323
- )
324
- diff = filter_diff(diff, filters)
325
- if not diff:
326
- raise NoChangesInContextError()
327
- lines = {
328
- file_diff.path: (
329
- file_lines(
330
- repo,
331
- file_diff.path,
332
- cfg.max_code_tokens
333
- - mc.tokenizing.num_tokens_from_string(str(file_diff)),
334
- use_local_files=review_subject_is_index(what)
335
- )
336
- if file_diff.target_file != DEV_NULL and not file_diff.is_added_file
337
- else ""
338
- )
339
- for file_diff in diff
340
- }
341
- return repo, cfg, diff, lines
342
-
343
-
344
- def get_affected_code_block(repo: Repo, file: str, start_line: int, end_line: int) -> str | None:
345
- if not start_line or not end_line:
346
- return None
347
- try:
348
- if isinstance(start_line, str):
349
- start_line = int(start_line)
350
- if isinstance(end_line, str):
351
- end_line = int(end_line)
352
- lines = file_lines(repo, file, max_tokens=None, use_local_files=True)
353
- if lines:
354
- lines = [""] + lines.splitlines()
355
- return "\n".join(
356
- lines[start_line: end_line + 1]
357
- )
358
- except Exception as e:
359
- logging.error(
360
- f"Error getting affected code block for {file} from {start_line} to {end_line}: {e}"
361
- )
362
- return None
363
-
364
-
365
- def provide_affected_code_blocks(issues: dict, repo: Repo):
366
- for file, file_issues in issues.items():
367
- for issue in file_issues:
368
- for i in issue.get("affected_lines", []):
369
- file_name = i.get("file", issue.get("file", file))
370
- if block := get_affected_code_block(
371
- repo,
372
- file_name,
373
- i.get("start_line"),
374
- i.get("end_line")
375
- ):
376
- i["affected_code"] = block
377
-
378
-
379
- async def review(
380
- repo: Repo = None,
381
- what: str = None,
382
- against: str = None,
383
- filters: str | list[str] = "",
384
- use_merge_base: bool = True,
385
- out_folder: str | os.PathLike | None = None,
386
- pr: str | int = None
387
- ):
388
- try:
389
- repo, cfg, diff, lines = _prepare(
390
- repo=repo,
391
- what=what,
392
- against=against,
393
- filters=filters,
394
- use_merge_base=use_merge_base,
395
- pr=pr,
396
- )
397
- except NoChangesInContextError:
398
- logging.error("No changes to review")
399
- return
400
- responses = await mc.llm_parallel(
401
- [
402
- mc.prompt(
403
- cfg.prompt,
404
- input=file_diff,
405
- file_lines=lines[file_diff.path],
406
- **cfg.prompt_vars,
407
- )
408
- for file_diff in diff
409
- ],
410
- retries=cfg.retries,
411
- parse_json=True,
412
- )
413
- issues = {file.path: issues for file, issues in zip(diff, responses) if issues}
414
- provide_affected_code_blocks(issues, repo)
415
- exec(cfg.post_process, {"mc": mc, **locals()})
416
- out_folder = Path(out_folder or repo.working_tree_dir)
417
- out_folder.mkdir(parents=True, exist_ok=True)
418
- report = Report(issues=issues, number_of_processed_files=len(diff))
419
- ctx = Context(
420
- report=report,
421
- config=cfg,
422
- diff=diff,
423
- repo=repo,
424
- )
425
- if cfg.pipeline_steps:
426
- pipe = Pipeline(
427
- ctx=ctx,
428
- steps=cfg.pipeline_steps
429
- )
430
- pipe.run()
431
- else:
432
- logging.info("No pipeline steps defined, skipping pipeline execution")
433
-
434
- report.summary = make_cr_summary(ctx)
435
- report.save(file_name=out_folder / JSON_REPORT_FILE_NAME)
436
- report_text = report.render(cfg, Report.Format.MARKDOWN)
437
- text_report_path = out_folder / "code-review-report.md"
438
- text_report_path.write_text(report_text, encoding="utf-8")
439
- report.to_cli()
440
-
441
-
442
- def answer(
443
- question: str,
444
- repo: Repo = None,
445
- what: str = None,
446
- against: str = None,
447
- filters: str | list[str] = "",
448
- use_merge_base: bool = True,
449
- use_pipeline: bool = True,
450
- prompt_file: str = None,
451
- pr: str | int = None,
452
- aux_files: list[str] = None,
453
- ) -> str | None:
454
- try:
455
- repo, config, diff, lines = _prepare(
456
- repo=repo,
457
- what=what,
458
- against=against,
459
- filters=filters,
460
- use_merge_base=use_merge_base,
461
- pr=pr
462
- )
463
- except NoChangesInContextError:
464
- logging.error("No changes to review")
465
- return
466
-
467
- ctx = Context(
468
- repo=repo,
469
- diff=diff,
470
- config=config,
471
- report=Report()
472
- )
473
- if use_pipeline:
474
- pipe = Pipeline(
475
- ctx=ctx,
476
- steps=config.pipeline_steps
477
- )
478
- pipe.run()
479
-
480
- if aux_files or config.aux_files:
481
- aux_files_dict = read_files(
482
- repo,
483
- (aux_files or list()) + config.aux_files,
484
- config.max_code_tokens // 2
485
- )
486
- else:
487
- aux_files_dict = dict()
488
-
489
- if not prompt_file and config.answer_prompt.startswith("tpl:"):
490
- prompt_file = str(config.answer_prompt)[4:]
491
-
492
- if prompt_file:
493
- prompt_func = partial(mc.tpl, prompt_file)
494
- else:
495
- prompt_func = partial(mc.prompt, config.answer_prompt)
496
- prompt = prompt_func(
497
- question=question,
498
- diff=diff,
499
- all_file_lines=lines,
500
- pipeline_out=ctx.pipeline_out,
501
- aux_files=aux_files_dict,
502
- **config.prompt_vars,
503
- )
504
- response = mc.llm(
505
- prompt,
506
- callback=make_streaming_function() if Env.verbosity == 0 else None,
507
- )
508
- return response
1
+ import os
2
+ import fnmatch
3
+ import logging
4
+ from typing import Iterable
5
+ from pathlib import Path
6
+ from functools import partial
7
+
8
+ import microcore as mc
9
+ from gito.constants import REFS_VALUE_ALL
10
+ from gito.gh_api import gh_api
11
+ from microcore import ui
12
+ from git import Repo, Commit
13
+ from git.exc import GitCommandError
14
+ from unidiff import PatchSet, PatchedFile
15
+ from unidiff.constants import DEV_NULL
16
+
17
+ from .context import Context
18
+ from .project_config import ProjectConfig
19
+ from .report_struct import Report
20
+ from .constants import JSON_REPORT_FILE_NAME
21
+ from .utils import make_streaming_function
22
+ from .pipeline import Pipeline
23
+ from .env import Env
24
+
25
+
26
+ def review_subject_is_index(what):
27
+ return not what or what == 'INDEX'
28
+
29
+
30
+ def is_binary_file(repo: Repo, file_path: str) -> bool:
31
+ """
32
+ Check if a file is binary by attempting to read it as text.
33
+ Returns True if the file is binary, False otherwise.
34
+ """
35
+ try:
36
+ # Attempt to read the file content from the repository tree
37
+ content = repo.tree()[file_path].data_stream.read()
38
+ # Try decoding as UTF-8; if it fails, it's likely binary
39
+ content.decode("utf-8")
40
+ return False
41
+ except KeyError:
42
+ try:
43
+ fs_path = Path(repo.working_tree_dir) / file_path
44
+ fs_path.read_text(encoding='utf-8')
45
+ return False
46
+ except FileNotFoundError:
47
+ logging.error(f"File {file_path} not found in the repository.")
48
+ return True
49
+ except UnicodeDecodeError:
50
+ return True
51
+ except Exception as e:
52
+ logging.error(f"Error reading file {file_path}: {e}")
53
+ return True
54
+ except UnicodeDecodeError:
55
+ return True
56
+ except Exception as e:
57
+ logging.warning(f"Error checking if file {file_path} is binary: {e}")
58
+ return True # Conservatively treat errors as binary to avoid issues
59
+
60
+
61
+ def commit_in_branch(repo: Repo, commit: Commit, target_branch: str) -> bool:
62
+ try:
63
+ # exit code 0 if commit is ancestor of branch
64
+ repo.git.merge_base('--is-ancestor', commit.hexsha, target_branch)
65
+ return True
66
+ except GitCommandError:
67
+ pass
68
+ return False
69
+
70
+
71
+ def get_base_branch(repo: Repo, pr: int | str = None):
72
+ if os.getenv('GITHUB_ACTIONS'):
73
+
74
+ # triggered from PR
75
+ if base_ref := os.getenv('GITHUB_BASE_REF'):
76
+ logging.info(f"Using GITHUB_BASE_REF:{base_ref} as base branch")
77
+ return f'origin/{base_ref}'
78
+ logging.info("GITHUB_BASE_REF is not available...")
79
+ if pr:
80
+ api = gh_api(repo=repo)
81
+ pr_obj = api.pulls.get(pr)
82
+ logging.info(
83
+ f"Using 'origin/{pr_obj.base.ref}' as base branch "
84
+ f"(received via GH API for PR#{pr})"
85
+ )
86
+ return f'origin/{pr_obj.base.ref}'
87
+
88
+ try:
89
+ logging.info(
90
+ "Trying to resolve base branch from repo.remotes.origin.refs.HEAD.reference.name..."
91
+ )
92
+ # 'origin/main', 'origin/master', etc
93
+ # Stopped working in github actions since 07/2025
94
+ return repo.remotes.origin.refs.HEAD.reference.name
95
+ except AttributeError:
96
+ try:
97
+ logging.info(
98
+ "Checking if repo has 'main' or 'master' branchs to use as --against branch..."
99
+ )
100
+ remote_refs = repo.remotes.origin.refs
101
+ for branch_name in ['main', 'master']:
102
+ if hasattr(remote_refs, branch_name):
103
+ return f'origin/{branch_name}'
104
+ except Exception:
105
+ pass
106
+
107
+ logging.error("Could not determine default branch from remote refs.")
108
+ raise ValueError("No default branch found in the repository.")
109
+
110
+
111
+ def get_diff(
112
+ repo: Repo = None,
113
+ what: str = None,
114
+ against: str = None,
115
+ use_merge_base: bool = True,
116
+ pr: str | int = None
117
+ ) -> PatchSet | list[PatchedFile]:
118
+ repo = repo or Repo(".")
119
+ if what == REFS_VALUE_ALL:
120
+ what = get_base_branch(repo, pr=pr)
121
+ # Git's canonical empty tree hash
122
+ against = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
123
+ use_merge_base = False
124
+ if not against:
125
+ # 'origin/main', 'origin/master', etc
126
+ against = get_base_branch(repo, pr=pr)
127
+ if review_subject_is_index(what):
128
+ what = None # working copy
129
+ if use_merge_base:
130
+ try:
131
+ if review_subject_is_index(what):
132
+ try:
133
+ current_ref = repo.active_branch.name
134
+ except TypeError:
135
+ # In detached HEAD state, use HEAD directly
136
+ current_ref = "HEAD"
137
+ logging.info(
138
+ "Detected detached HEAD state, using HEAD as current reference"
139
+ )
140
+ else:
141
+ current_ref = what
142
+ merge_base = repo.merge_base(current_ref or repo.active_branch.name, against)[0]
143
+ logging.info(
144
+ f"Merge base({ui.green(current_ref)},{ui.yellow(against)})"
145
+ f" --> {ui.cyan(merge_base.hexsha)}"
146
+ )
147
+ # if branch is already an ancestor of "against", merge_base == branch ⇒ it’s been merged
148
+ if merge_base.hexsha == repo.commit(current_ref or repo.active_branch.name).hexsha:
149
+ # @todo: check case: reviewing working copy index in main branch #103
150
+ logging.info(
151
+ f"Branch is already merged. ({ui.green(current_ref)} vs {ui.yellow(against)})"
152
+ )
153
+ merge_sha = repo.git.log(
154
+ '--merges',
155
+ '--ancestry-path',
156
+ f'{current_ref}..{against}',
157
+ '-n',
158
+ '1',
159
+ '--pretty=format:%H'
160
+ ).strip()
161
+ if merge_sha:
162
+ logging.info(f"Merge commit is {ui.cyan(merge_sha)}")
163
+ merge_commit = repo.commit(merge_sha)
164
+
165
+ other_merge_parent = None
166
+ for parent in merge_commit.parents:
167
+ logging.info(f"Checking merge parent: {parent.hexsha[:8]}")
168
+ if parent.hexsha == merge_base.hexsha:
169
+ logging.info(f"merge parent is {ui.cyan(parent.hexsha[:8])}, skipping")
170
+ continue
171
+ if not commit_in_branch(repo, parent, against):
172
+ logging.warning(f"merge parent is not in {against}, skipping")
173
+ continue
174
+ logging.info(f"Found other merge parent: {ui.cyan(parent.hexsha[:8])}")
175
+ other_merge_parent = parent
176
+ break
177
+ if other_merge_parent:
178
+ first_common_ancestor = repo.merge_base(other_merge_parent, merge_base)[0]
179
+ # for gito remote (feature_branch vs origin/main)
180
+ # the same merge base appears in first_common_ancestor again
181
+ if first_common_ancestor.hexsha == merge_base.hexsha:
182
+ if merge_base.parents:
183
+ first_common_ancestor = repo.merge_base(
184
+ other_merge_parent, merge_base.parents[0]
185
+ )[0]
186
+ else:
187
+ logging.error(
188
+ "merge_base has no parents, "
189
+ "using merge_base as first_common_ancestor"
190
+ )
191
+ logging.info(
192
+ f"{what} will be compared to "
193
+ f"first common ancestor of {what} and {against}: "
194
+ f"{ui.cyan(first_common_ancestor.hexsha[:8])}"
195
+ )
196
+ against = first_common_ancestor.hexsha
197
+ else:
198
+ logging.error(f"Can't find other merge parent for {merge_sha}")
199
+ else:
200
+ logging.warning(
201
+ f"No merge‐commit found for {current_ref!r}→{against!r}; "
202
+ "falling back to mergebase diff"
203
+ )
204
+ else:
205
+ # normal case: branch not yet merged
206
+ against = merge_base.hexsha
207
+ logging.info(
208
+ f"Using merge base: {ui.cyan(merge_base.hexsha[:8])} ({merge_base.summary})"
209
+ )
210
+ except Exception as e:
211
+ logging.error(f"Error finding merge base: {e}")
212
+ logging.info(
213
+ f"Making diff: {ui.green(what or 'INDEX')} vs {ui.yellow(against)}"
214
+ )
215
+ diff_content = repo.git.diff(against, what)
216
+ diff = PatchSet.from_string(diff_content)
217
+
218
+ # Filter out binary files
219
+ non_binary_diff = PatchSet([])
220
+ for patched_file in diff:
221
+ # Check if the file is binary using the source or target file path
222
+ file_path = (
223
+ patched_file.target_file
224
+ if patched_file.target_file != DEV_NULL
225
+ else patched_file.source_file
226
+ )
227
+ if file_path == DEV_NULL:
228
+ continue
229
+ if is_binary_file(repo, file_path.removeprefix("b/")):
230
+ logging.info(f"Skipping binary file: {patched_file.path}")
231
+ continue
232
+ non_binary_diff.append(patched_file)
233
+ return non_binary_diff
234
+
235
+
236
+ def filter_diff(
237
+ patch_set: PatchSet | Iterable[PatchedFile], filters: str | list[str]
238
+ ) -> PatchSet | Iterable[PatchedFile]:
239
+ """
240
+ Filter the diff files by the given fnmatch filters.
241
+ """
242
+ assert isinstance(filters, (list, str))
243
+ if not isinstance(filters, list):
244
+ filters = [f.strip() for f in filters.split(",") if f.strip()]
245
+ if not filters:
246
+ return patch_set
247
+ files = [
248
+ file
249
+ for file in patch_set
250
+ if any(fnmatch.fnmatch(file.path, pattern) for pattern in filters)
251
+ ]
252
+ return files
253
+
254
+
255
+ def read_file(repo: Repo, file: str, use_local_files: bool = False) -> str:
256
+ if use_local_files:
257
+ file_path = Path(repo.working_tree_dir) / file
258
+ try:
259
+ return file_path.read_text(encoding='utf-8')
260
+ except (FileNotFoundError, UnicodeDecodeError) as e:
261
+ logging.warning(f"Could not read file {file} from working directory: {e}")
262
+
263
+ # Read from HEAD (committed version)
264
+ return repo.tree()[file].data_stream.read().decode('utf-8')
265
+
266
+
267
+ def file_lines(repo: Repo, file: str, max_tokens: int = None, use_local_files: bool = False) -> str:
268
+ text = read_file(repo=repo, file=file, use_local_files=use_local_files)
269
+ lines = [f"{i + 1}: {line}\n" for i, line in enumerate(text.splitlines())]
270
+ if max_tokens:
271
+ lines, removed_qty = mc.tokenizing.fit_to_token_size(lines, max_tokens)
272
+ if removed_qty:
273
+ lines.append(
274
+ f"(!) DISPLAYING ONLY FIRST {len(lines)} LINES DUE TO LARGE FILE SIZE\n"
275
+ )
276
+ return "".join(lines)
277
+
278
+
279
+ def read_files(repo: Repo, files: list[str], max_tokens: int = None) -> dict:
280
+ out = dict()
281
+ total_tokens = 0
282
+ for file in files:
283
+ content = read_file(repo=repo, file=file, use_local_files=True)
284
+ total_tokens += mc.tokenizing.num_tokens_from_string(file)
285
+ total_tokens += mc.tokenizing.num_tokens_from_string(content)
286
+ if max_tokens and total_tokens > max_tokens:
287
+ logging.warning(
288
+ f"Skipping file {file} due to exceeding max_tokens limit ({max_tokens})"
289
+ )
290
+ continue
291
+ out[file] = content
292
+ return out
293
+
294
+
295
+ def make_cr_summary(ctx: Context, **kwargs) -> str:
296
+ return (
297
+ mc.prompt(
298
+ ctx.config.summary_prompt,
299
+ diff=mc.tokenizing.fit_to_token_size(ctx.diff, ctx.config.max_code_tokens)[0],
300
+ issues=ctx.report.issues,
301
+ pipeline_out=ctx.pipeline_out,
302
+ env=Env,
303
+ **ctx.config.prompt_vars,
304
+ **kwargs,
305
+ ).to_llm()
306
+ if ctx.config.summary_prompt
307
+ else ""
308
+ )
309
+
310
+
311
+ class NoChangesInContextError(Exception):
312
+ """
313
+ Exception raised when there are no changes in the context to review /answer questions.
314
+ """
315
+
316
+
317
+ def _prepare(
318
+ repo: Repo = None,
319
+ what: str = None,
320
+ against: str = None,
321
+ filters: str | list[str] = "",
322
+ use_merge_base: bool = True,
323
+ pr: str | int = None,
324
+ ):
325
+ repo = repo or Repo(".")
326
+ cfg = ProjectConfig.load_for_repo(repo)
327
+ diff = get_diff(
328
+ repo=repo, what=what, against=against, use_merge_base=use_merge_base, pr=pr,
329
+ )
330
+ diff = filter_diff(diff, filters)
331
+ if not diff:
332
+ raise NoChangesInContextError()
333
+ lines = {
334
+ file_diff.path: (
335
+ file_lines(
336
+ repo,
337
+ file_diff.path,
338
+ cfg.max_code_tokens
339
+ - mc.tokenizing.num_tokens_from_string(str(file_diff)),
340
+ use_local_files=review_subject_is_index(what) or what == REFS_VALUE_ALL
341
+ )
342
+ if (
343
+ file_diff.target_file != DEV_NULL and not file_diff.is_added_file
344
+ ) or what == REFS_VALUE_ALL
345
+ else ""
346
+ )
347
+ for file_diff in diff
348
+ }
349
+ return repo, cfg, diff, lines
350
+
351
+
352
+ def get_affected_code_block(repo: Repo, file: str, start_line: int, end_line: int) -> str | None:
353
+ if not start_line or not end_line:
354
+ return None
355
+ try:
356
+ if isinstance(start_line, str):
357
+ start_line = int(start_line)
358
+ if isinstance(end_line, str):
359
+ end_line = int(end_line)
360
+ lines = file_lines(repo, file, max_tokens=None, use_local_files=True)
361
+ if lines:
362
+ lines = [""] + lines.splitlines()
363
+ return "\n".join(
364
+ lines[start_line: end_line + 1]
365
+ )
366
+ except Exception as e:
367
+ logging.error(
368
+ f"Error getting affected code block for {file} from {start_line} to {end_line}: {e}"
369
+ )
370
+ return None
371
+
372
+
373
+ def provide_affected_code_blocks(issues: dict, repo: Repo):
374
+ for file, file_issues in issues.items():
375
+ for issue in file_issues:
376
+ for i in issue.get("affected_lines", []):
377
+ file_name = i.get("file", issue.get("file", file))
378
+ if block := get_affected_code_block(
379
+ repo,
380
+ file_name,
381
+ i.get("start_line"),
382
+ i.get("end_line")
383
+ ):
384
+ i["affected_code"] = block
385
+
386
+
387
+ async def review(
388
+ repo: Repo = None,
389
+ what: str = None,
390
+ against: str = None,
391
+ filters: str | list[str] = "",
392
+ use_merge_base: bool = True,
393
+ out_folder: str | os.PathLike | None = None,
394
+ pr: str | int = None
395
+ ):
396
+ reviewing_all = what == REFS_VALUE_ALL
397
+ try:
398
+ repo, cfg, diff, lines = _prepare(
399
+ repo=repo,
400
+ what=what,
401
+ against=against,
402
+ filters=filters,
403
+ use_merge_base=use_merge_base,
404
+ pr=pr,
405
+ )
406
+ except NoChangesInContextError:
407
+ logging.error("No changes to review")
408
+ return
409
+ responses = await mc.llm_parallel(
410
+ [
411
+ mc.prompt(
412
+ cfg.prompt,
413
+ input=(
414
+ file_diff if not reviewing_all
415
+ else str(file_diff.path) + ":\n" + lines[file_diff.path]
416
+ ),
417
+ file_lines=lines[file_diff.path] if not reviewing_all else None,
418
+ **cfg.prompt_vars,
419
+ )
420
+ for file_diff in diff
421
+ ],
422
+ retries=cfg.retries,
423
+ parse_json=True,
424
+ )
425
+ issues = {file.path: issues for file, issues in zip(diff, responses) if issues}
426
+ provide_affected_code_blocks(issues, repo)
427
+ exec(cfg.post_process, {"mc": mc, **locals()})
428
+ out_folder = Path(out_folder or repo.working_tree_dir)
429
+ out_folder.mkdir(parents=True, exist_ok=True)
430
+ report = Report(issues=issues, number_of_processed_files=len(diff))
431
+ ctx = Context(
432
+ report=report,
433
+ config=cfg,
434
+ diff=diff,
435
+ repo=repo,
436
+ )
437
+ if cfg.pipeline_steps:
438
+ pipe = Pipeline(
439
+ ctx=ctx,
440
+ steps=cfg.pipeline_steps
441
+ )
442
+ pipe.run()
443
+ else:
444
+ logging.info("No pipeline steps defined, skipping pipeline execution")
445
+
446
+ report.summary = make_cr_summary(ctx)
447
+ report.save(file_name=out_folder / JSON_REPORT_FILE_NAME)
448
+ report_text = report.render(cfg, Report.Format.MARKDOWN)
449
+ text_report_path = out_folder / "code-review-report.md"
450
+ text_report_path.write_text(report_text, encoding="utf-8")
451
+ report.to_cli()
452
+
453
+
454
+ def answer(
455
+ question: str,
456
+ repo: Repo = None,
457
+ what: str = None,
458
+ against: str = None,
459
+ filters: str | list[str] = "",
460
+ use_merge_base: bool = True,
461
+ use_pipeline: bool = True,
462
+ prompt_file: str = None,
463
+ pr: str | int = None,
464
+ aux_files: list[str] = None,
465
+ ) -> str | None:
466
+ try:
467
+ repo, config, diff, lines = _prepare(
468
+ repo=repo,
469
+ what=what,
470
+ against=against,
471
+ filters=filters,
472
+ use_merge_base=use_merge_base,
473
+ pr=pr
474
+ )
475
+ except NoChangesInContextError:
476
+ logging.error("No changes to review")
477
+ return
478
+
479
+ ctx = Context(
480
+ repo=repo,
481
+ diff=diff,
482
+ config=config,
483
+ report=Report()
484
+ )
485
+ if use_pipeline:
486
+ pipe = Pipeline(
487
+ ctx=ctx,
488
+ steps=config.pipeline_steps
489
+ )
490
+ pipe.run()
491
+
492
+ if aux_files or config.aux_files:
493
+ aux_files_dict = read_files(
494
+ repo,
495
+ (aux_files or list()) + config.aux_files,
496
+ config.max_code_tokens // 2
497
+ )
498
+ else:
499
+ aux_files_dict = dict()
500
+
501
+ if not prompt_file and config.answer_prompt.startswith("tpl:"):
502
+ prompt_file = str(config.answer_prompt)[4:]
503
+
504
+ if prompt_file:
505
+ prompt_func = partial(mc.tpl, prompt_file)
506
+ else:
507
+ prompt_func = partial(mc.prompt, config.answer_prompt)
508
+ prompt = prompt_func(
509
+ question=question,
510
+ diff=diff,
511
+ all_file_lines=lines,
512
+ pipeline_out=ctx.pipeline_out,
513
+ aux_files=aux_files_dict,
514
+ **config.prompt_vars,
515
+ )
516
+ response = mc.llm(
517
+ prompt,
518
+ callback=make_streaming_function() if Env.verbosity == 0 else None,
519
+ )
520
+ return response