ai-cr 2.0.0.dev1__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gito/constants.py CHANGED
@@ -1,9 +1,12 @@
1
- from pathlib import Path
2
-
3
- PROJECT_GITO_FOLDER = ".gito"
4
- PROJECT_CONFIG_FILE_NAME = "config.toml"
5
- PROJECT_CONFIG_FILE_PATH = Path(".gito") / PROJECT_CONFIG_FILE_NAME
6
- PROJECT_CONFIG_BUNDLED_DEFAULTS_FILE = Path(__file__).resolve().parent / PROJECT_CONFIG_FILE_NAME
7
- HOME_ENV_PATH = Path("~/.gito/.env").expanduser()
8
- JSON_REPORT_FILE_NAME = "code-review-report.json"
9
- EXECUTABLE = "gito"
1
+ from pathlib import Path
2
+
3
+ PROJECT_GITO_FOLDER = ".gito"
4
+ PROJECT_CONFIG_FILE_NAME = "config.toml"
5
+ PROJECT_CONFIG_FILE_PATH = Path(".gito") / PROJECT_CONFIG_FILE_NAME
6
+ PROJECT_CONFIG_BUNDLED_DEFAULTS_FILE = Path(__file__).resolve().parent / PROJECT_CONFIG_FILE_NAME
7
+ HOME_ENV_PATH = Path("~/.gito/.env").expanduser()
8
+ JSON_REPORT_FILE_NAME = "code-review-report.json"
9
+ GITHUB_MD_REPORT_FILE_NAME = "code-review-report.md"
10
+ EXECUTABLE = "gito"
11
+ TEXT_ICON_URL = 'https://raw.githubusercontent.com/Nayjest/Gito/main/press-kit/logo/gito-bot-1_64top.png' # noqa: E501
12
+ HTML_TEXT_ICON = f'<a href="https://github.com/Nayjest/Gito"><img src="{TEXT_ICON_URL}" align="left" width=64 height=50 /></a>' # noqa: E501
gito/core.py CHANGED
@@ -1,221 +1,288 @@
1
- import fnmatch
2
- import logging
3
- from os import PathLike
4
- from typing import Iterable
5
- from pathlib import Path
6
-
7
- import microcore as mc
8
- from git import Repo
9
- from unidiff import PatchSet, PatchedFile
10
- from unidiff.constants import DEV_NULL
11
-
12
- from .project_config import ProjectConfig
13
- from .report_struct import Report
14
- from .constants import JSON_REPORT_FILE_NAME
15
-
16
-
17
- def review_subject_is_index(what):
18
- return not what or what == 'INDEX'
19
-
20
-
21
- def is_binary_file(repo: Repo, file_path: str) -> bool:
22
- """
23
- Check if a file is binary by attempting to read it as text.
24
- Returns True if the file is binary, False otherwise.
25
- """
26
- try:
27
- # Attempt to read the file content from the repository tree
28
- content = repo.tree()[file_path].data_stream.read()
29
- # Try decoding as UTF-8; if it fails, it's likely binary
30
- content.decode("utf-8")
31
- return False
32
- except KeyError:
33
- try:
34
- fs_path = Path(repo.working_tree_dir) / file_path
35
- fs_path.read_text(encoding='utf-8')
36
- return False
37
- except FileNotFoundError:
38
- logging.error(f"File {file_path} not found in the repository.")
39
- return True
40
- except UnicodeDecodeError:
41
- return True
42
- except Exception as e:
43
- logging.error(f"Error reading file {file_path}: {e}")
44
- return True
45
- except UnicodeDecodeError:
46
- return True
47
- except Exception as e:
48
- logging.warning(f"Error checking if file {file_path} is binary: {e}")
49
- return True # Conservatively treat errors as binary to avoid issues
50
-
51
-
52
- def get_diff(
53
- repo: Repo = None,
54
- what: str = None,
55
- against: str = None,
56
- use_merge_base: bool = True,
57
- ) -> PatchSet | list[PatchedFile]:
58
- repo = repo or Repo(".")
59
- if not against:
60
- # 'origin/main', 'origin/master', etc
61
- against = repo.remotes.origin.refs.HEAD.reference.name
62
- if review_subject_is_index(what):
63
- what = None # working copy
64
- if use_merge_base:
65
- if review_subject_is_index(what):
66
- try:
67
- current_ref = repo.active_branch.name
68
- except TypeError:
69
- # In detached HEAD state, use HEAD directly
70
- current_ref = "HEAD"
71
- logging.info(
72
- "Detected detached HEAD state, using HEAD as current reference"
73
- )
74
- else:
75
- current_ref = what
76
- merge_base = repo.merge_base(current_ref or repo.active_branch.name, against)[0]
77
- against = merge_base.hexsha
78
- logging.info(
79
- f"Using merge base: {mc.ui.cyan(merge_base.hexsha[:8])} ({merge_base.summary})"
80
- )
81
- logging.info(
82
- f"Making diff: {mc.ui.green(what or 'INDEX')} vs {mc.ui.yellow(against)}"
83
- )
84
- diff_content = repo.git.diff(against, what)
85
- diff = PatchSet.from_string(diff_content)
86
-
87
- # Filter out binary files
88
- non_binary_diff = PatchSet([])
89
- for patched_file in diff:
90
- # Check if the file is binary using the source or target file path
91
- file_path = (
92
- patched_file.target_file
93
- if patched_file.target_file != DEV_NULL
94
- else patched_file.source_file
95
- )
96
- if file_path == DEV_NULL:
97
- continue
98
- if is_binary_file(repo, file_path.lstrip("b/")):
99
- logging.info(f"Skipping binary file: {patched_file.path}")
100
- continue
101
- non_binary_diff.append(patched_file)
102
- return non_binary_diff
103
-
104
-
105
- def filter_diff(
106
- patch_set: PatchSet | Iterable[PatchedFile], filters: str | list[str]
107
- ) -> PatchSet | Iterable[PatchedFile]:
108
- """
109
- Filter the diff files by the given fnmatch filters.
110
- """
111
- assert isinstance(filters, (list, str))
112
- if not isinstance(filters, list):
113
- filters = [f.strip() for f in filters.split(",") if f.strip()]
114
- if not filters:
115
- return patch_set
116
- files = [
117
- file
118
- for file in patch_set
119
- if any(fnmatch.fnmatch(file.path, pattern) for pattern in filters)
120
- ]
121
- return files
122
-
123
-
124
- def file_lines(repo: Repo, file: str, max_tokens: int = None, use_local_files: bool = False) -> str:
125
- if use_local_files:
126
- file_path = Path(repo.working_tree_dir) / file
127
- try:
128
- text = file_path.read_text(encoding='utf-8')
129
- except (FileNotFoundError, UnicodeDecodeError) as e:
130
- logging.warning(f"Could not read file {file} from working directory: {e}")
131
- text = repo.tree()[file].data_stream.read().decode('utf-8')
132
- else:
133
- # Read from HEAD (committed version)
134
- text = repo.tree()[file].data_stream.read().decode('utf-8')
135
-
136
- lines = [f"{i + 1}: {line}\n" for i, line in enumerate(text.splitlines())]
137
- if max_tokens:
138
- lines, removed_qty = mc.tokenizing.fit_to_token_size(lines, max_tokens)
139
- if removed_qty:
140
- lines.append(
141
- f"(!) DISPLAYING ONLY FIRST {len(lines)} LINES DUE TO LARGE FILE SIZE\n"
142
- )
143
- return "".join(lines)
144
-
145
-
146
- def make_cr_summary(cfg: ProjectConfig, report: Report, diff):
147
- return (
148
- mc.prompt(
149
- cfg.summary_prompt,
150
- diff=mc.tokenizing.fit_to_token_size(diff, cfg.max_code_tokens)[0],
151
- issues=report.issues,
152
- **cfg.prompt_vars,
153
- ).to_llm()
154
- if cfg.summary_prompt
155
- else ""
156
- )
157
-
158
-
159
- async def review(
160
- repo: Repo = None,
161
- what: str = None,
162
- against: str = None,
163
- filters: str | list[str] = "",
164
- use_merge_base: bool = True,
165
- out_folder: str | PathLike | None = None,
166
- ):
167
- repo = repo or Repo(".")
168
- cfg = ProjectConfig.load_for_repo(repo)
169
- out_folder = Path(out_folder or repo.working_tree_dir)
170
- diff = get_diff(
171
- repo=repo, what=what, against=against, use_merge_base=use_merge_base
172
- )
173
- diff = filter_diff(diff, filters)
174
- if not diff:
175
- logging.error("Nothing to review")
176
- return
177
- lines = {
178
- file_diff.path: (
179
- file_lines(
180
- repo,
181
- file_diff.path,
182
- cfg.max_code_tokens
183
- - mc.tokenizing.num_tokens_from_string(str(file_diff)),
184
- use_local_files=review_subject_is_index(what)
185
- )
186
- if file_diff.target_file != DEV_NULL and not file_diff.is_added_file
187
- else ""
188
- )
189
- for file_diff in diff
190
- }
191
- responses = await mc.llm_parallel(
192
- [
193
- mc.prompt(
194
- cfg.prompt,
195
- input=file_diff,
196
- file_lines=lines[file_diff.path],
197
- **cfg.prompt_vars,
198
- )
199
- for file_diff in diff
200
- ],
201
- retries=cfg.retries,
202
- parse_json=True,
203
- )
204
- issues = {file.path: issues for file, issues in zip(diff, responses) if issues}
205
- for file, file_issues in issues.items():
206
- for issue in file_issues:
207
- for i in issue.get("affected_lines", []):
208
- if lines[file]:
209
- f_lines = [""] + lines[file].splitlines()
210
- i["affected_code"] = "\n".join(
211
- f_lines[i["start_line"]: i["end_line"] + 1]
212
- )
213
- exec(cfg.post_process, {"mc": mc, **locals()})
214
- out_folder.mkdir(parents=True, exist_ok=True)
215
- report = Report(issues=issues, number_of_processed_files=len(diff))
216
- report.summary = make_cr_summary(cfg, report, diff)
217
- report.save(file_name=out_folder / JSON_REPORT_FILE_NAME)
218
- report_text = report.render(cfg, Report.Format.MARKDOWN)
219
- text_report_path = out_folder / "code-review-report.md"
220
- text_report_path.write_text(report_text, encoding="utf-8")
221
- report.to_cli()
1
+ import fnmatch
2
+ import logging
3
+ from os import PathLike
4
+ from typing import Iterable
5
+ from pathlib import Path
6
+
7
+ import microcore as mc
8
+ from git import Repo
9
+ from gito.pipeline import Pipeline
10
+ from unidiff import PatchSet, PatchedFile
11
+ from unidiff.constants import DEV_NULL
12
+
13
+ from .project_config import ProjectConfig
14
+ from .report_struct import Report
15
+ from .constants import JSON_REPORT_FILE_NAME
16
+ from .utils import stream_to_cli
17
+
18
+
19
+ def review_subject_is_index(what):
20
+ return not what or what == 'INDEX'
21
+
22
+
23
+ def is_binary_file(repo: Repo, file_path: str) -> bool:
24
+ """
25
+ Check if a file is binary by attempting to read it as text.
26
+ Returns True if the file is binary, False otherwise.
27
+ """
28
+ try:
29
+ # Attempt to read the file content from the repository tree
30
+ content = repo.tree()[file_path].data_stream.read()
31
+ # Try decoding as UTF-8; if it fails, it's likely binary
32
+ content.decode("utf-8")
33
+ return False
34
+ except KeyError:
35
+ try:
36
+ fs_path = Path(repo.working_tree_dir) / file_path
37
+ fs_path.read_text(encoding='utf-8')
38
+ return False
39
+ except FileNotFoundError:
40
+ logging.error(f"File {file_path} not found in the repository.")
41
+ return True
42
+ except UnicodeDecodeError:
43
+ return True
44
+ except Exception as e:
45
+ logging.error(f"Error reading file {file_path}: {e}")
46
+ return True
47
+ except UnicodeDecodeError:
48
+ return True
49
+ except Exception as e:
50
+ logging.warning(f"Error checking if file {file_path} is binary: {e}")
51
+ return True # Conservatively treat errors as binary to avoid issues
52
+
53
+
54
+ def get_diff(
55
+ repo: Repo = None,
56
+ what: str = None,
57
+ against: str = None,
58
+ use_merge_base: bool = True,
59
+ ) -> PatchSet | list[PatchedFile]:
60
+ repo = repo or Repo(".")
61
+ if not against:
62
+ # 'origin/main', 'origin/master', etc
63
+ against = repo.remotes.origin.refs.HEAD.reference.name
64
+ if review_subject_is_index(what):
65
+ what = None # working copy
66
+ if use_merge_base:
67
+ if review_subject_is_index(what):
68
+ try:
69
+ current_ref = repo.active_branch.name
70
+ except TypeError:
71
+ # In detached HEAD state, use HEAD directly
72
+ current_ref = "HEAD"
73
+ logging.info(
74
+ "Detected detached HEAD state, using HEAD as current reference"
75
+ )
76
+ else:
77
+ current_ref = what
78
+ merge_base = repo.merge_base(current_ref or repo.active_branch.name, against)[0]
79
+ against = merge_base.hexsha
80
+ logging.info(
81
+ f"Using merge base: {mc.ui.cyan(merge_base.hexsha[:8])} ({merge_base.summary})"
82
+ )
83
+ logging.info(
84
+ f"Making diff: {mc.ui.green(what or 'INDEX')} vs {mc.ui.yellow(against)}"
85
+ )
86
+ diff_content = repo.git.diff(against, what)
87
+ diff = PatchSet.from_string(diff_content)
88
+
89
+ # Filter out binary files
90
+ non_binary_diff = PatchSet([])
91
+ for patched_file in diff:
92
+ # Check if the file is binary using the source or target file path
93
+ file_path = (
94
+ patched_file.target_file
95
+ if patched_file.target_file != DEV_NULL
96
+ else patched_file.source_file
97
+ )
98
+ if file_path == DEV_NULL:
99
+ continue
100
+ if is_binary_file(repo, file_path.lstrip("b/")):
101
+ logging.info(f"Skipping binary file: {patched_file.path}")
102
+ continue
103
+ non_binary_diff.append(patched_file)
104
+ return non_binary_diff
105
+
106
+
107
+ def filter_diff(
108
+ patch_set: PatchSet | Iterable[PatchedFile], filters: str | list[str]
109
+ ) -> PatchSet | Iterable[PatchedFile]:
110
+ """
111
+ Filter the diff files by the given fnmatch filters.
112
+ """
113
+ assert isinstance(filters, (list, str))
114
+ if not isinstance(filters, list):
115
+ filters = [f.strip() for f in filters.split(",") if f.strip()]
116
+ if not filters:
117
+ return patch_set
118
+ files = [
119
+ file
120
+ for file in patch_set
121
+ if any(fnmatch.fnmatch(file.path, pattern) for pattern in filters)
122
+ ]
123
+ return files
124
+
125
+
126
+ def file_lines(repo: Repo, file: str, max_tokens: int = None, use_local_files: bool = False) -> str:
127
+ if use_local_files:
128
+ file_path = Path(repo.working_tree_dir) / file
129
+ try:
130
+ text = file_path.read_text(encoding='utf-8')
131
+ except (FileNotFoundError, UnicodeDecodeError) as e:
132
+ logging.warning(f"Could not read file {file} from working directory: {e}")
133
+ text = repo.tree()[file].data_stream.read().decode('utf-8')
134
+ else:
135
+ # Read from HEAD (committed version)
136
+ text = repo.tree()[file].data_stream.read().decode('utf-8')
137
+
138
+ lines = [f"{i + 1}: {line}\n" for i, line in enumerate(text.splitlines())]
139
+ if max_tokens:
140
+ lines, removed_qty = mc.tokenizing.fit_to_token_size(lines, max_tokens)
141
+ if removed_qty:
142
+ lines.append(
143
+ f"(!) DISPLAYING ONLY FIRST {len(lines)} LINES DUE TO LARGE FILE SIZE\n"
144
+ )
145
+ return "".join(lines)
146
+
147
+
148
+ def make_cr_summary(config: ProjectConfig, report: Report, diff, **kwargs) -> str:
149
+ return (
150
+ mc.prompt(
151
+ config.summary_prompt,
152
+ diff=mc.tokenizing.fit_to_token_size(diff, config.max_code_tokens)[0],
153
+ issues=report.issues,
154
+ **config.prompt_vars,
155
+ **kwargs,
156
+ ).to_llm()
157
+ if config.summary_prompt
158
+ else ""
159
+ )
160
+
161
+
162
+ class NoChangesInContextError(Exception):
163
+ """
164
+ Exception raised when there are no changes in the context to review /answer questions.
165
+ """
166
+
167
+
168
+ def _prepare(
169
+ repo: Repo = None,
170
+ what: str = None,
171
+ against: str = None,
172
+ filters: str | list[str] = "",
173
+ use_merge_base: bool = True,
174
+ ):
175
+ repo = repo or Repo(".")
176
+ cfg = ProjectConfig.load_for_repo(repo)
177
+ diff = get_diff(
178
+ repo=repo, what=what, against=against, use_merge_base=use_merge_base
179
+ )
180
+ diff = filter_diff(diff, filters)
181
+ if not diff:
182
+ raise NoChangesInContextError()
183
+ lines = {
184
+ file_diff.path: (
185
+ file_lines(
186
+ repo,
187
+ file_diff.path,
188
+ cfg.max_code_tokens
189
+ - mc.tokenizing.num_tokens_from_string(str(file_diff)),
190
+ use_local_files=review_subject_is_index(what)
191
+ )
192
+ if file_diff.target_file != DEV_NULL and not file_diff.is_added_file
193
+ else ""
194
+ )
195
+ for file_diff in diff
196
+ }
197
+ return repo, cfg, diff, lines
198
+
199
+
200
+ async def review(
201
+ repo: Repo = None,
202
+ what: str = None,
203
+ against: str = None,
204
+ filters: str | list[str] = "",
205
+ use_merge_base: bool = True,
206
+ out_folder: str | PathLike | None = None,
207
+ ):
208
+ try:
209
+ repo, cfg, diff, lines = _prepare(
210
+ repo=repo, what=what, against=against, filters=filters, use_merge_base=use_merge_base
211
+ )
212
+ except NoChangesInContextError:
213
+ logging.error("No changes to review")
214
+ return
215
+ responses = await mc.llm_parallel(
216
+ [
217
+ mc.prompt(
218
+ cfg.prompt,
219
+ input=file_diff,
220
+ file_lines=lines[file_diff.path],
221
+ **cfg.prompt_vars,
222
+ )
223
+ for file_diff in diff
224
+ ],
225
+ retries=cfg.retries,
226
+ parse_json=True,
227
+ )
228
+ issues = {file.path: issues for file, issues in zip(diff, responses) if issues}
229
+ for file, file_issues in issues.items():
230
+ for issue in file_issues:
231
+ for i in issue.get("affected_lines", []):
232
+ if lines[file]:
233
+ f_lines = [""] + lines[file].splitlines()
234
+ i["affected_code"] = "\n".join(
235
+ f_lines[i["start_line"]: i["end_line"] + 1]
236
+ )
237
+ exec(cfg.post_process, {"mc": mc, **locals()})
238
+ out_folder = Path(out_folder or repo.working_tree_dir)
239
+ out_folder.mkdir(parents=True, exist_ok=True)
240
+ report = Report(issues=issues, number_of_processed_files=len(diff))
241
+ ctx = dict(
242
+ report=report,
243
+ config=cfg,
244
+ diff=diff,
245
+ repo=repo,
246
+ pipeline_out={},
247
+ )
248
+ if cfg.pipeline_steps:
249
+ pipe = Pipeline(
250
+ ctx=ctx,
251
+ steps=cfg.pipeline_steps
252
+ )
253
+ pipe.run()
254
+ else:
255
+ logging.info("No pipeline steps defined, skipping pipeline execution")
256
+
257
+ report.summary = make_cr_summary(**ctx)
258
+ report.save(file_name=out_folder / JSON_REPORT_FILE_NAME)
259
+ report_text = report.render(cfg, Report.Format.MARKDOWN)
260
+ text_report_path = out_folder / "code-review-report.md"
261
+ text_report_path.write_text(report_text, encoding="utf-8")
262
+ report.to_cli()
263
+
264
+
265
+ def answer(
266
+ question: str,
267
+ repo: Repo = None,
268
+ what: str = None,
269
+ against: str = None,
270
+ filters: str | list[str] = "",
271
+ use_merge_base: bool = True,
272
+ ) -> str | None:
273
+ try:
274
+ repo, cfg, diff, lines = _prepare(
275
+ repo=repo, what=what, against=against, filters=filters, use_merge_base=use_merge_base
276
+ )
277
+ except NoChangesInContextError:
278
+ logging.error("No changes to review")
279
+ return
280
+ response = mc.llm(mc.prompt(
281
+ cfg.answer_prompt,
282
+ question=question,
283
+ diff=diff,
284
+ all_file_lines=lines,
285
+ **cfg.prompt_vars,
286
+ callback=stream_to_cli
287
+ ))
288
+ return response
gito/gh_api.py ADDED
@@ -0,0 +1,35 @@
1
+ import logging
2
+
3
+ import requests
4
+
5
+
6
+ def post_gh_comment(
7
+ gh_repository: str, # e.g. "owner/repo"
8
+ pr_or_issue_number: int,
9
+ gh_token: str,
10
+ text: str,
11
+ ) -> bool:
12
+ """
13
+ Post a comment to a GitHub pull request or issue.
14
+ Arguments:
15
+ gh_repository (str): The GitHub repository in the format "owner/repo".
16
+ pr_or_issue_number (int): The pull request or issue number.
17
+ gh_token (str): GitHub personal access token with permissions to post comments.
18
+ text (str): The comment text to post.
19
+ Returns:
20
+ True if the comment was posted successfully, False otherwise.
21
+ """
22
+ api_url = f"https://api.github.com/repos/{gh_repository}/issues/{pr_or_issue_number}/comments"
23
+ headers = {
24
+ "Authorization": f"token {gh_token}",
25
+ "Accept": "application/vnd.github+json",
26
+ }
27
+ data = {"body": text}
28
+
29
+ resp = requests.post(api_url, headers=headers, json=data)
30
+ if 200 <= resp.status_code < 300:
31
+ logging.info(f"Posted review comment to #{pr_or_issue_number} in {gh_repository}")
32
+ return True
33
+ else:
34
+ logging.error(f"Failed to post comment: {resp.status_code} {resp.reason}\n{resp.text}")
35
+ return False
gito/issue_trackers.py ADDED
@@ -0,0 +1,49 @@
1
+ import logging
2
+ import os
3
+ import re
4
+ from dataclasses import dataclass, field
5
+
6
+ import git
7
+ from gito.utils import is_running_in_github_action
8
+
9
+
10
+ @dataclass
11
+ class IssueTrackerIssue:
12
+ title: str = field(default="")
13
+ description: str = field(default="")
14
+ url: str = field(default="")
15
+
16
+
17
+ def extract_issue_key(branch_name: str, min_len=2, max_len=10) -> str | None:
18
+ pattern = fr"\b[A-Z][A-Z0-9]{{{min_len - 1},{max_len - 1}}}-\d+\b"
19
+ match = re.search(pattern, branch_name)
20
+ return match.group(0) if match else None
21
+
22
+
23
+ def get_branch(repo: git.Repo):
24
+ if is_running_in_github_action():
25
+ branch_name = os.getenv('GITHUB_HEAD_REF')
26
+ if branch_name:
27
+ return branch_name
28
+
29
+ github_ref = os.getenv('GITHUB_REF', '')
30
+ if github_ref.startswith('refs/heads/'):
31
+ return github_ref.replace('refs/heads/', '')
32
+ try:
33
+ branch_name = repo.active_branch.name
34
+ return branch_name
35
+ except Exception as e: # @todo: specify more precise exception
36
+ logging.error("Could not determine the active branch name: %s", e)
37
+ return None
38
+
39
+
40
+ def resolve_issue_key(repo: git.Repo):
41
+ branch_name = get_branch(repo)
42
+ if not branch_name:
43
+ logging.error("No active branch found in the repository, cannot determine issue key.")
44
+ return None
45
+
46
+ if not (issue_key := extract_issue_key(branch_name)):
47
+ logging.error(f"No issue key found in branch name: {branch_name}")
48
+ return None
49
+ return issue_key