PyPI - ai-cr - Versions diffs - 2.0.0.dev1__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

ai-cr 2.0.0.dev1py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{ai_cr-2.0.0.dev1.dist-info → ai_cr-2.0.1.dist-info}/LICENSE +21 -21
{ai_cr-2.0.0.dev1.dist-info → ai_cr-2.0.1.dist-info}/METADATA +14 -4
ai_cr-2.0.1.dist-info/RECORD +26 -0
{ai_cr-2.0.0.dev1.dist-info → ai_cr-2.0.1.dist-info}/WHEEL +1 -1
gito/__main__.py +4 -4
gito/bootstrap.py +66 -66
gito/cli.py +217 -254
gito/commands/__init__.py +1 -1
gito/commands/fix.py +157 -124
gito/commands/gh_post_review_comment.py +63 -0
gito/commands/gh_react_to_comment.py +194 -0
gito/commands/repl.py +5 -2
gito/config.toml +453 -385
gito/constants.py +12 -9
gito/core.py +288 -221
gito/gh_api.py +35 -0
gito/issue_trackers.py +49 -0
gito/pipeline.py +82 -0
gito/pipeline_steps/__init__.py +0 -0
gito/pipeline_steps/jira.py +57 -0
gito/pipeline_steps/linear.py +84 -0
gito/project_config.py +73 -119
gito/report_struct.py +134 -132
gito/utils.py +226 -132
ai_cr-2.0.0.dev1.dist-info/RECORD +0 -18
{ai_cr-2.0.0.dev1.dist-info → ai_cr-2.0.1.dist-info}/entry_points.txt +0 -0

gito/constants.py CHANGED Viewed

@@ -1,9 +1,12 @@
-from pathlib import Path
-PROJECT_GITO_FOLDER = ".gito"
-PROJECT_CONFIG_FILE_NAME = "config.toml"
-PROJECT_CONFIG_FILE_PATH = Path(".gito") / PROJECT_CONFIG_FILE_NAME
-PROJECT_CONFIG_BUNDLED_DEFAULTS_FILE = Path(__file__).resolve().parent / PROJECT_CONFIG_FILE_NAME
-HOME_ENV_PATH = Path("~/.gito/.env").expanduser()
-JSON_REPORT_FILE_NAME = "code-review-report.json"
-EXECUTABLE = "gito"
+from pathlib import Path
+PROJECT_GITO_FOLDER = ".gito"
+PROJECT_CONFIG_FILE_NAME = "config.toml"
+PROJECT_CONFIG_FILE_PATH = Path(".gito") / PROJECT_CONFIG_FILE_NAME
+PROJECT_CONFIG_BUNDLED_DEFAULTS_FILE = Path(__file__).resolve().parent / PROJECT_CONFIG_FILE_NAME
+HOME_ENV_PATH = Path("~/.gito/.env").expanduser()
+JSON_REPORT_FILE_NAME = "code-review-report.json"
+GITHUB_MD_REPORT_FILE_NAME = "code-review-report.md"
+EXECUTABLE = "gito"
+TEXT_ICON_URL = 'https://raw.githubusercontent.com/Nayjest/Gito/main/press-kit/logo/gito-bot-1_64top.png'  # noqa: E501
+HTML_TEXT_ICON = f'<a href="https://github.com/Nayjest/Gito"><img src="{TEXT_ICON_URL}" align="left" width=64 height=50 /></a>'  # noqa: E501

gito/core.py CHANGED Viewed

@@ -1,221 +1,288 @@
-import fnmatch
-import logging
-from os import PathLike
-from typing import Iterable
-from pathlib import Path
-import microcore as mc
-from git import Repo
-from unidiff import PatchSet, PatchedFile
-from unidiff.constants import DEV_NULL
-from .project_config import ProjectConfig
-from .report_struct import Report
-from .constants import JSON_REPORT_FILE_NAME
-def review_subject_is_index(what):
-    return not what or what == 'INDEX'
-def is_binary_file(repo: Repo, file_path: str) -> bool:
-    """
-    Check if a file is binary by attempting to read it as text.
-    Returns True if the file is binary, False otherwise.
-    """
-    try:
-        # Attempt to read the file content from the repository tree
-        content = repo.tree()[file_path].data_stream.read()
-        # Try decoding as UTF-8; if it fails, it's likely binary
-        content.decode("utf-8")
-        return False
-    except KeyError:
-        try:
-            fs_path = Path(repo.working_tree_dir) / file_path
-            fs_path.read_text(encoding='utf-8')
-            return False
-        except FileNotFoundError:
-            logging.error(f"File {file_path} not found in the repository.")
-            return True
-        except UnicodeDecodeError:
-            return True
-        except Exception as e:
-            logging.error(f"Error reading file {file_path}: {e}")
-            return True
-    except UnicodeDecodeError:
-        return True
-    except Exception as e:
-        logging.warning(f"Error checking if file {file_path} is binary: {e}")
-        return True  # Conservatively treat errors as binary to avoid issues
-def get_diff(
-    repo: Repo = None,
-    what: str = None,
-    against: str = None,
-    use_merge_base: bool = True,
-) -> PatchSet | list[PatchedFile]:
-    repo = repo or Repo(".")
-    if not against:
-        # 'origin/main', 'origin/master', etc
-        against = repo.remotes.origin.refs.HEAD.reference.name
-    if review_subject_is_index(what):
-        what = None  # working copy
-    if use_merge_base:
-        if review_subject_is_index(what):
-            try:
-                current_ref = repo.active_branch.name
-            except TypeError:
-                # In detached HEAD state, use HEAD directly
-                current_ref = "HEAD"
-                logging.info(
-                    "Detected detached HEAD state, using HEAD as current reference"
-                )
-        else:
-            current_ref = what
-        merge_base = repo.merge_base(current_ref or repo.active_branch.name, against)[0]
-        against = merge_base.hexsha
-        logging.info(
-            f"Using merge base: {mc.ui.cyan(merge_base.hexsha[:8])} ({merge_base.summary})"
-        )
-    logging.info(
-        f"Making diff: {mc.ui.green(what or 'INDEX')} vs {mc.ui.yellow(against)}"
-    )
-    diff_content = repo.git.diff(against, what)
-    diff = PatchSet.from_string(diff_content)
-    # Filter out binary files
-    non_binary_diff = PatchSet([])
-    for patched_file in diff:
-        # Check if the file is binary using the source or target file path
-        file_path = (
-            patched_file.target_file
-            if patched_file.target_file != DEV_NULL
-            else patched_file.source_file
-        )
-        if file_path == DEV_NULL:
-            continue
-        if is_binary_file(repo, file_path.lstrip("b/")):
-            logging.info(f"Skipping binary file: {patched_file.path}")
-            continue
-        non_binary_diff.append(patched_file)
-    return non_binary_diff
-def filter_diff(
-    patch_set: PatchSet | Iterable[PatchedFile], filters: str | list[str]
-) -> PatchSet | Iterable[PatchedFile]:
-    """
-    Filter the diff files by the given fnmatch filters.
-    """
-    assert isinstance(filters, (list, str))
-    if not isinstance(filters, list):
-        filters = [f.strip() for f in filters.split(",") if f.strip()]
-    if not filters:
-        return patch_set
-    files = [
-        file
-        for file in patch_set
-        if any(fnmatch.fnmatch(file.path, pattern) for pattern in filters)
-    ]
-    return files
-def file_lines(repo: Repo, file: str, max_tokens: int = None, use_local_files: bool = False) -> str:
-    if use_local_files:
-        file_path = Path(repo.working_tree_dir) / file
-        try:
-            text = file_path.read_text(encoding='utf-8')
-        except (FileNotFoundError, UnicodeDecodeError) as e:
-            logging.warning(f"Could not read file {file} from working directory: {e}")
-            text = repo.tree()[file].data_stream.read().decode('utf-8')
-    else:
-        # Read from HEAD (committed version)
-        text = repo.tree()[file].data_stream.read().decode('utf-8')
-    lines = [f"{i + 1}: {line}\n" for i, line in enumerate(text.splitlines())]
-    if max_tokens:
-        lines, removed_qty = mc.tokenizing.fit_to_token_size(lines, max_tokens)
-        if removed_qty:
-            lines.append(
-                f"(!) DISPLAYING ONLY FIRST {len(lines)} LINES DUE TO LARGE FILE SIZE\n"
-            )
-    return "".join(lines)
-def make_cr_summary(cfg: ProjectConfig, report: Report, diff):
-    return (
-        mc.prompt(
-            cfg.summary_prompt,
-            diff=mc.tokenizing.fit_to_token_size(diff, cfg.max_code_tokens)[0],
-            issues=report.issues,
-            **cfg.prompt_vars,
-        ).to_llm()
-        if cfg.summary_prompt
-        else ""
-    )
-async def review(
-    repo: Repo = None,
-    what: str = None,
-    against: str = None,
-    filters: str | list[str] = "",
-    use_merge_base: bool = True,
-    out_folder: str | PathLike | None = None,
-):
-    repo = repo or Repo(".")
-    cfg = ProjectConfig.load_for_repo(repo)
-    out_folder = Path(out_folder or repo.working_tree_dir)
-    diff = get_diff(
-        repo=repo, what=what, against=against, use_merge_base=use_merge_base
-    )
-    diff = filter_diff(diff, filters)
-    if not diff:
-        logging.error("Nothing to review")
-        return
-    lines = {
-        file_diff.path: (
-            file_lines(
-                repo,
-                file_diff.path,
-                cfg.max_code_tokens
-                - mc.tokenizing.num_tokens_from_string(str(file_diff)),
-                use_local_files=review_subject_is_index(what)
-            )
-            if file_diff.target_file != DEV_NULL and not file_diff.is_added_file
-            else ""
-        )
-        for file_diff in diff
-    }
-    responses = await mc.llm_parallel(
-        [
-            mc.prompt(
-                cfg.prompt,
-                input=file_diff,
-                file_lines=lines[file_diff.path],
-                **cfg.prompt_vars,
-            )
-            for file_diff in diff
-        ],
-        retries=cfg.retries,
-        parse_json=True,
-    )
-    issues = {file.path: issues for file, issues in zip(diff, responses) if issues}
-    for file, file_issues in issues.items():
-        for issue in file_issues:
-            for i in issue.get("affected_lines", []):
-                if lines[file]:
-                    f_lines = [""] + lines[file].splitlines()
-                    i["affected_code"] = "\n".join(
-                        f_lines[i["start_line"]: i["end_line"] + 1]
-                    )
-    exec(cfg.post_process, {"mc": mc, **locals()})
-    out_folder.mkdir(parents=True, exist_ok=True)
-    report = Report(issues=issues, number_of_processed_files=len(diff))
-    report.summary = make_cr_summary(cfg, report, diff)
-    report.save(file_name=out_folder / JSON_REPORT_FILE_NAME)
-    report_text = report.render(cfg, Report.Format.MARKDOWN)
-    text_report_path = out_folder / "code-review-report.md"
-    text_report_path.write_text(report_text, encoding="utf-8")
-    report.to_cli()
+import fnmatch
+import logging
+from os import PathLike
+from typing import Iterable
+from pathlib import Path
+import microcore as mc
+from git import Repo
+from gito.pipeline import Pipeline
+from unidiff import PatchSet, PatchedFile
+from unidiff.constants import DEV_NULL
+from .project_config import ProjectConfig
+from .report_struct import Report
+from .constants import JSON_REPORT_FILE_NAME
+from .utils import stream_to_cli
+def review_subject_is_index(what):
+    return not what or what == 'INDEX'
+def is_binary_file(repo: Repo, file_path: str) -> bool:
+    """
+    Check if a file is binary by attempting to read it as text.
+    Returns True if the file is binary, False otherwise.
+    """
+    try:
+        # Attempt to read the file content from the repository tree
+        content = repo.tree()[file_path].data_stream.read()
+        # Try decoding as UTF-8; if it fails, it's likely binary
+        content.decode("utf-8")
+        return False
+    except KeyError:
+        try:
+            fs_path = Path(repo.working_tree_dir) / file_path
+            fs_path.read_text(encoding='utf-8')
+            return False
+        except FileNotFoundError:
+            logging.error(f"File {file_path} not found in the repository.")
+            return True
+        except UnicodeDecodeError:
+            return True
+        except Exception as e:
+            logging.error(f"Error reading file {file_path}: {e}")
+            return True
+    except UnicodeDecodeError:
+        return True
+    except Exception as e:
+        logging.warning(f"Error checking if file {file_path} is binary: {e}")
+        return True  # Conservatively treat errors as binary to avoid issues
+def get_diff(
+    repo: Repo = None,
+    what: str = None,
+    against: str = None,
+    use_merge_base: bool = True,
+) -> PatchSet | list[PatchedFile]:
+    repo = repo or Repo(".")
+    if not against:
+        # 'origin/main', 'origin/master', etc
+        against = repo.remotes.origin.refs.HEAD.reference.name
+    if review_subject_is_index(what):
+        what = None  # working copy
+    if use_merge_base:
+        if review_subject_is_index(what):
+            try:
+                current_ref = repo.active_branch.name
+            except TypeError:
+                # In detached HEAD state, use HEAD directly
+                current_ref = "HEAD"
+                logging.info(
+                    "Detected detached HEAD state, using HEAD as current reference"
+                )
+        else:
+            current_ref = what
+        merge_base = repo.merge_base(current_ref or repo.active_branch.name, against)[0]
+        against = merge_base.hexsha
+        logging.info(
+            f"Using merge base: {mc.ui.cyan(merge_base.hexsha[:8])} ({merge_base.summary})"
+        )
+    logging.info(
+        f"Making diff: {mc.ui.green(what or 'INDEX')} vs {mc.ui.yellow(against)}"
+    )
+    diff_content = repo.git.diff(against, what)
+    diff = PatchSet.from_string(diff_content)
+    # Filter out binary files
+    non_binary_diff = PatchSet([])
+    for patched_file in diff:
+        # Check if the file is binary using the source or target file path
+        file_path = (
+            patched_file.target_file
+            if patched_file.target_file != DEV_NULL
+            else patched_file.source_file
+        )
+        if file_path == DEV_NULL:
+            continue
+        if is_binary_file(repo, file_path.lstrip("b/")):
+            logging.info(f"Skipping binary file: {patched_file.path}")
+            continue
+        non_binary_diff.append(patched_file)
+    return non_binary_diff
+def filter_diff(
+    patch_set: PatchSet | Iterable[PatchedFile], filters: str | list[str]
+) -> PatchSet | Iterable[PatchedFile]:
+    """
+    Filter the diff files by the given fnmatch filters.
+    """
+    assert isinstance(filters, (list, str))
+    if not isinstance(filters, list):
+        filters = [f.strip() for f in filters.split(",") if f.strip()]
+    if not filters:
+        return patch_set
+    files = [
+        file
+        for file in patch_set
+        if any(fnmatch.fnmatch(file.path, pattern) for pattern in filters)
+    ]
+    return files
+def file_lines(repo: Repo, file: str, max_tokens: int = None, use_local_files: bool = False) -> str:
+    if use_local_files:
+        file_path = Path(repo.working_tree_dir) / file
+        try:
+            text = file_path.read_text(encoding='utf-8')
+        except (FileNotFoundError, UnicodeDecodeError) as e:
+            logging.warning(f"Could not read file {file} from working directory: {e}")
+            text = repo.tree()[file].data_stream.read().decode('utf-8')
+    else:
+        # Read from HEAD (committed version)
+        text = repo.tree()[file].data_stream.read().decode('utf-8')
+    lines = [f"{i + 1}: {line}\n" for i, line in enumerate(text.splitlines())]
+    if max_tokens:
+        lines, removed_qty = mc.tokenizing.fit_to_token_size(lines, max_tokens)
+        if removed_qty:
+            lines.append(
+                f"(!) DISPLAYING ONLY FIRST {len(lines)} LINES DUE TO LARGE FILE SIZE\n"
+            )
+    return "".join(lines)
+def make_cr_summary(config: ProjectConfig, report: Report, diff, **kwargs) -> str:
+    return (
+        mc.prompt(
+            config.summary_prompt,
+            diff=mc.tokenizing.fit_to_token_size(diff, config.max_code_tokens)[0],
+            issues=report.issues,
+            **config.prompt_vars,
+            **kwargs,
+        ).to_llm()
+        if config.summary_prompt
+        else ""
+    )
+class NoChangesInContextError(Exception):
+    """
+    Exception raised when there are no changes in the context to review /answer questions.
+    """
+def _prepare(
+    repo: Repo = None,
+    what: str = None,
+    against: str = None,
+    filters: str | list[str] = "",
+    use_merge_base: bool = True,
+):
+    repo = repo or Repo(".")
+    cfg = ProjectConfig.load_for_repo(repo)
+    diff = get_diff(
+        repo=repo, what=what, against=against, use_merge_base=use_merge_base
+    )
+    diff = filter_diff(diff, filters)
+    if not diff:
+        raise NoChangesInContextError()
+    lines = {
+        file_diff.path: (
+            file_lines(
+                repo,
+                file_diff.path,
+                cfg.max_code_tokens
+                - mc.tokenizing.num_tokens_from_string(str(file_diff)),
+                use_local_files=review_subject_is_index(what)
+            )
+            if file_diff.target_file != DEV_NULL and not file_diff.is_added_file
+            else ""
+        )
+        for file_diff in diff
+    }
+    return repo, cfg, diff, lines
+async def review(
+    repo: Repo = None,
+    what: str = None,
+    against: str = None,
+    filters: str | list[str] = "",
+    use_merge_base: bool = True,
+    out_folder: str | PathLike | None = None,
+):
+    try:
+        repo, cfg, diff, lines = _prepare(
+            repo=repo, what=what, against=against, filters=filters, use_merge_base=use_merge_base
+        )
+    except NoChangesInContextError:
+        logging.error("No changes to review")
+        return
+    responses = await mc.llm_parallel(
+        [
+            mc.prompt(
+                cfg.prompt,
+                input=file_diff,
+                file_lines=lines[file_diff.path],
+                **cfg.prompt_vars,
+            )
+            for file_diff in diff
+        ],
+        retries=cfg.retries,
+        parse_json=True,
+    )
+    issues = {file.path: issues for file, issues in zip(diff, responses) if issues}
+    for file, file_issues in issues.items():
+        for issue in file_issues:
+            for i in issue.get("affected_lines", []):
+                if lines[file]:
+                    f_lines = [""] + lines[file].splitlines()
+                    i["affected_code"] = "\n".join(
+                        f_lines[i["start_line"]: i["end_line"] + 1]
+                    )
+    exec(cfg.post_process, {"mc": mc, **locals()})
+    out_folder = Path(out_folder or repo.working_tree_dir)
+    out_folder.mkdir(parents=True, exist_ok=True)
+    report = Report(issues=issues, number_of_processed_files=len(diff))
+    ctx = dict(
+        report=report,
+        config=cfg,
+        diff=diff,
+        repo=repo,
+        pipeline_out={},
+    )
+    if cfg.pipeline_steps:
+        pipe = Pipeline(
+            ctx=ctx,
+            steps=cfg.pipeline_steps
+        )
+        pipe.run()
+    else:
+        logging.info("No pipeline steps defined, skipping pipeline execution")
+    report.summary = make_cr_summary(**ctx)
+    report.save(file_name=out_folder / JSON_REPORT_FILE_NAME)
+    report_text = report.render(cfg, Report.Format.MARKDOWN)
+    text_report_path = out_folder / "code-review-report.md"
+    text_report_path.write_text(report_text, encoding="utf-8")
+    report.to_cli()
+def answer(
+    question: str,
+    repo: Repo = None,
+    what: str = None,
+    against: str = None,
+    filters: str | list[str] = "",
+    use_merge_base: bool = True,
+) -> str | None:
+    try:
+        repo, cfg, diff, lines = _prepare(
+            repo=repo, what=what, against=against, filters=filters, use_merge_base=use_merge_base
+        )
+    except NoChangesInContextError:
+        logging.error("No changes to review")
+        return
+    response = mc.llm(mc.prompt(
+        cfg.answer_prompt,
+        question=question,
+        diff=diff,
+        all_file_lines=lines,
+        **cfg.prompt_vars,
+        callback=stream_to_cli
+    ))
+    return response

gito/gh_api.py ADDED Viewed

@@ -0,0 +1,35 @@
+import logging
+import requests
+def post_gh_comment(
+    gh_repository: str,  # e.g. "owner/repo"
+    pr_or_issue_number: int,
+    gh_token: str,
+    text: str,
+) -> bool:
+    """
+    Post a comment to a GitHub pull request or issue.
+    Arguments:
+        gh_repository (str): The GitHub repository in the format "owner/repo".
+        pr_or_issue_number (int): The pull request or issue number.
+        gh_token (str): GitHub personal access token with permissions to post comments.
+        text (str): The comment text to post.
+    Returns:
+        True if the comment was posted successfully, False otherwise.
+    """
+    api_url = f"https://api.github.com/repos/{gh_repository}/issues/{pr_or_issue_number}/comments"
+    headers = {
+        "Authorization": f"token {gh_token}",
+        "Accept": "application/vnd.github+json",
+    }
+    data = {"body": text}
+    resp = requests.post(api_url, headers=headers, json=data)
+    if 200 <= resp.status_code < 300:
+        logging.info(f"Posted review comment to #{pr_or_issue_number} in {gh_repository}")
+        return True
+    else:
+        logging.error(f"Failed to post comment: {resp.status_code} {resp.reason}\n{resp.text}")
+        return False

gito/issue_trackers.py ADDED Viewed

@@ -0,0 +1,49 @@
+import logging
+import os
+import re
+from dataclasses import dataclass, field
+import git
+from gito.utils import is_running_in_github_action
+@dataclass
+class IssueTrackerIssue:
+    title: str = field(default="")
+    description: str = field(default="")
+    url: str = field(default="")
+def extract_issue_key(branch_name: str, min_len=2, max_len=10) -> str | None:
+    pattern = fr"\b[A-Z][A-Z0-9]{{{min_len - 1},{max_len - 1}}}-\d+\b"
+    match = re.search(pattern, branch_name)
+    return match.group(0) if match else None
+def get_branch(repo: git.Repo):
+    if is_running_in_github_action():
+        branch_name = os.getenv('GITHUB_HEAD_REF')
+        if branch_name:
+            return branch_name
+        github_ref = os.getenv('GITHUB_REF', '')
+        if github_ref.startswith('refs/heads/'):
+            return github_ref.replace('refs/heads/', '')
+    try:
+        branch_name = repo.active_branch.name
+        return branch_name
+    except Exception as e:  # @todo: specify more precise exception
+        logging.error("Could not determine the active branch name: %s", e)
+        return None
+def resolve_issue_key(repo: git.Repo):
+    branch_name = get_branch(repo)
+    if not branch_name:
+        logging.error("No active branch found in the repository, cannot determine issue key.")
+        return None
+    if not (issue_key := extract_issue_key(branch_name)):
+        logging.error(f"No issue key found in branch name: {branch_name}")
+        return None
+    return issue_key

ai-cr 2.0.0.dev1__py3-none-any.whl → 2.0.1__py3-none-any.whl

ai-cr 2.0.0.dev1py3-none-any.whl → 2.0.1py3-none-any.whl