prlens-core 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. prlens_core-0.1.4/PKG-INFO +23 -0
  2. prlens_core-0.1.4/pyproject.toml +41 -0
  3. prlens_core-0.1.4/setup.cfg +4 -0
  4. prlens_core-0.1.4/src/prlens_core/__init__.py +0 -0
  5. prlens_core-0.1.4/src/prlens_core/config.py +65 -0
  6. prlens_core-0.1.4/src/prlens_core/gh/__init__.py +0 -0
  7. prlens_core-0.1.4/src/prlens_core/gh/pull_request.py +39 -0
  8. prlens_core-0.1.4/src/prlens_core/guidelines/backend.md +37 -0
  9. prlens_core-0.1.4/src/prlens_core/guidelines/frontend.md +65 -0
  10. prlens_core-0.1.4/src/prlens_core/providers/__init__.py +0 -0
  11. prlens_core-0.1.4/src/prlens_core/providers/anthropic.py +36 -0
  12. prlens_core-0.1.4/src/prlens_core/providers/base.py +194 -0
  13. prlens_core-0.1.4/src/prlens_core/providers/openai.py +34 -0
  14. prlens_core-0.1.4/src/prlens_core/reviewer.py +482 -0
  15. prlens_core-0.1.4/src/prlens_core/utils/__init__.py +0 -0
  16. prlens_core-0.1.4/src/prlens_core/utils/code.py +30 -0
  17. prlens_core-0.1.4/src/prlens_core/utils/context.py +350 -0
  18. prlens_core-0.1.4/src/prlens_core.egg-info/PKG-INFO +23 -0
  19. prlens_core-0.1.4/src/prlens_core.egg-info/SOURCES.txt +28 -0
  20. prlens_core-0.1.4/src/prlens_core.egg-info/dependency_links.txt +1 -0
  21. prlens_core-0.1.4/src/prlens_core.egg-info/requires.txt +20 -0
  22. prlens_core-0.1.4/src/prlens_core.egg-info/top_level.txt +1 -0
  23. prlens_core-0.1.4/tests/test_code_utils.py +27 -0
  24. prlens_core-0.1.4/tests/test_config.py +91 -0
  25. prlens_core-0.1.4/tests/test_context.py +437 -0
  26. prlens_core-0.1.4/tests/test_diff_positions.py +89 -0
  27. prlens_core-0.1.4/tests/test_providers.py +181 -0
  28. prlens_core-0.1.4/tests/test_pull_request.py +64 -0
  29. prlens_core-0.1.4/tests/test_reviewer_core.py +641 -0
  30. prlens_core-0.1.4/tests/test_reviewer_helpers.py +101 -0
@@ -0,0 +1,23 @@
1
+ Metadata-Version: 2.4
2
+ Name: prlens-core
3
+ Version: 0.1.4
4
+ Summary: Core review engine for prlens — AI-powered GitHub PR code reviewer
5
+ License: MIT
6
+ Requires-Python: >=3.9
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: PyGithub>=2.1
9
+ Requires-Dist: pyyaml>=6.0
10
+ Requires-Dist: python-dotenv>=1.0
11
+ Provides-Extra: anthropic
12
+ Requires-Dist: anthropic>=0.25; extra == "anthropic"
13
+ Provides-Extra: openai
14
+ Requires-Dist: openai>=1.0; extra == "openai"
15
+ Provides-Extra: all
16
+ Requires-Dist: anthropic>=0.25; extra == "all"
17
+ Requires-Dist: openai>=1.0; extra == "all"
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest>=8.0; extra == "dev"
20
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
21
+ Requires-Dist: pytest-mock>=3.12; extra == "dev"
22
+ Requires-Dist: black>=24.0; extra == "dev"
23
+ Requires-Dist: flake8>=7.0; extra == "dev"
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "prlens-core"
7
+ version = "0.1.4"
8
+ description = "Core review engine for prlens — AI-powered GitHub PR code reviewer"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ dependencies = [
13
+ "PyGithub>=2.1",
14
+ "pyyaml>=6.0",
15
+ "python-dotenv>=1.0",
16
+ ]
17
+
18
+ [project.optional-dependencies]
19
+ anthropic = ["anthropic>=0.25"]
20
+ openai = ["openai>=1.0"]
21
+ all = ["anthropic>=0.25", "openai>=1.0"]
22
+ dev = [
23
+ "pytest>=8.0",
24
+ "pytest-cov>=5.0",
25
+ "pytest-mock>=3.12",
26
+ "black>=24.0",
27
+ "flake8>=7.0",
28
+ ]
29
+
30
+ [tool.setuptools.packages.find]
31
+ where = ["src"]
32
+
33
+ [tool.setuptools.package-data]
34
+ prlens_core = ["guidelines/*.md"]
35
+
36
+ [tool.pytest.ini_options]
37
+ testpaths = ["tests"]
38
+
39
+ [tool.black]
40
+ line-length = 120
41
+ target-version = ["py39", "py310", "py311", "py312"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
@@ -0,0 +1,65 @@
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ import yaml
6
+
7
+ DEFAULT_CONFIG: dict = {
8
+ "model": "anthropic",
9
+ "max_chars_per_file": 20000,
10
+ "batch_limit": 60,
11
+ "guidelines": None, # None = use built-in default; set to a path string to override
12
+ "exclude": [], # fnmatch patterns or directory names to skip (e.g. "migrations/", "*.min.js")
13
+ "review_draft_prs": False,
14
+ }
15
+
16
+ BUILTIN_GUIDELINES_DIR = Path(__file__).parent / "guidelines"
17
+ _BUILTIN_DEFAULT = BUILTIN_GUIDELINES_DIR / "backend.md"
18
+
19
+
20
+ def load_config(config_path: str = ".prlens.yml", cli_overrides: Optional[dict] = None) -> dict:
21
+ """
22
+ Load configuration by merging (in order of precedence):
23
+ 1. Built-in defaults
24
+ 2. .prlens.yml in the current directory
25
+ 3. CLI argument overrides
26
+ """
27
+ config = {**DEFAULT_CONFIG, "exclude": list(DEFAULT_CONFIG["exclude"])}
28
+
29
+ path = Path(config_path)
30
+ if path.exists():
31
+ with open(path) as f:
32
+ file_config = yaml.safe_load(f) or {}
33
+ config.update(file_config)
34
+
35
+ if cli_overrides:
36
+ for key, value in cli_overrides.items():
37
+ if value is not None:
38
+ config[key] = value
39
+
40
+ # Resolve credentials from environment variables
41
+ config["github_token"] = os.environ.get("GITHUB_TOKEN")
42
+ config["anthropic_api_key"] = os.environ.get("ANTHROPIC_API_KEY")
43
+ config["openai_api_key"] = os.environ.get("OPENAI_API_KEY")
44
+
45
+ return config
46
+
47
+
48
+ def load_guidelines(config: dict) -> str:
49
+ """
50
+ Load review guidelines.
51
+
52
+ If ``guidelines`` is set in config, loads from that path (relative to cwd).
53
+ Otherwise falls back to the built-in default.
54
+ """
55
+ custom_path = config.get("guidelines")
56
+ if custom_path:
57
+ p = Path(custom_path)
58
+ if not p.exists():
59
+ raise FileNotFoundError(f"Guidelines file not found: {custom_path}")
60
+ return p.read_text()
61
+
62
+ if _BUILTIN_DEFAULT.exists():
63
+ return _BUILTIN_DEFAULT.read_text()
64
+
65
+ raise FileNotFoundError("No guidelines configured and built-in default is missing.")
File without changes
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from github import Github
6
+
7
+ _SHA_MARKER_RE = re.compile(r"<!-- prlens-sha: ([0-9a-f]{40}) -->")
8
+
9
+
10
+ def get_repo(repo_name: str, token: str):
11
+ return Github(token).get_repo(repo_name)
12
+
13
+
14
+ def get_pull(repo, pr_number: int):
15
+ return repo.get_pull(pr_number)
16
+
17
+
18
+ def get_pull_requests(repo, state: str = "open"):
19
+ return repo.get_pulls(state=state)
20
+
21
+
22
+ def get_diff(pr):
23
+ return pr.get_files()
24
+
25
+
26
+ def get_last_reviewed_sha(pr) -> str | None:
27
+ """Return the most recent HEAD SHA stored by prlens in a review body, or None."""
28
+ last_sha = None
29
+ for review in pr.get_reviews():
30
+ match = _SHA_MARKER_RE.search(review.body or "")
31
+ if match:
32
+ last_sha = match.group(1)
33
+ return last_sha
34
+
35
+
36
+ def get_incremental_files(repo, base_sha: str, head_sha: str):
37
+ """Return files changed between two commits using GitHub's compare API."""
38
+ comparison = repo.compare(base_sha, head_sha)
39
+ return comparison.files
@@ -0,0 +1,37 @@
1
+ # Backend Code Review Guidelines
2
+
3
+ ## REST & Architecture
4
+
5
+ - Use proper HTTP verbs (e.g., `POST /users` not `POST /users/create`).
6
+ - Keep business logic in service layers — views/controllers should only orchestrate.
7
+ - Avoid putting business logic in model `save()` methods or view handlers.
8
+ - Avoid boolean flags that alter method behavior; create explicit, separate methods instead.
9
+
10
+ ## Code Structure & Reusability
11
+
12
+ - Place shared logic in a shared library if it is reused across multiple services.
13
+ - Wrap external integrations (e.g., Slack, email, storage) in clean service layers that do not depend on internal app logic.
14
+ - Avoid adding executable code in `__init__.py` files — use them only for imports and package exposure.
15
+
16
+ ## Django-Specific Practices (if applicable)
17
+
18
+ - Use `select_related` / `prefetch_related` to avoid N+1 queries.
19
+ - Validate inputs in serializers, not in views or services.
20
+ - Use custom domain exceptions instead of generic ones for consistent error handling.
21
+ - Organize code modularly by domain (e.g., `users`, `payments`, `notifications`).
22
+
23
+ ## Python Best Practices
24
+
25
+ - Use type hints in all function and method signatures.
26
+ - Avoid wildcard imports (`from module import *`).
27
+ - Use `logging` with appropriate levels instead of `print()`.
28
+ - Replace magic strings and numbers with named constants or enums.
29
+ - Prefer `pathlib.Path` over string-based file paths.
30
+ - Use context managers (`with` statements) for files and resources.
31
+
32
+ ## Testing & Maintainability
33
+
34
+ - Write unit tests for all new service logic.
35
+ - Keep tests fast and deterministic; mock all external dependencies.
36
+ - Use environment variables for secrets and configuration — never hardcode them.
37
+ - Add docstrings and API documentation for new public endpoints and logic.
@@ -0,0 +1,65 @@
1
+ # Frontend Code Review Guidelines
2
+
3
+ ## API Handling
4
+
5
+ - Store API responses in global state (e.g., Redux) only if needed across multiple components.
6
+ - Use component-local state (`useState`/`useEffect`) for view-specific or session-specific data.
7
+ - Avoid flag-based conditional API logic inside components — extract it to helper functions or hooks.
8
+ - Optimize for performance: debounce search inputs, paginate large datasets, cache static responses.
9
+
10
+ ## State Management
11
+
12
+ - Use global state slices for shared state only.
13
+ - Avoid duplicating state between global state and component-local state.
14
+ - Encapsulate side effects and data-fetching in reusable custom hooks.
15
+
16
+ ## Component Architecture
17
+
18
+ - Follow a clean separation of concerns:
19
+ - `components/` — Dumb, reusable UI elements
20
+ - `containers/` — Smart components with data-fetching
21
+ - `hooks/` — Reusable logic for side effects
22
+ - `utils/`, `constants/` — Low-level modules and config
23
+
24
+ ## Do's
25
+
26
+ - Use constants or enums for repeated value-label pairs (e.g., statuses, categories).
27
+ - Keep components small and composable.
28
+ - Write tests for custom hooks, logic, and critical UI behaviors.
29
+
30
+ ## Don'ts
31
+
32
+ - Don't use wildcard imports (e.g., `import * as lib`) — prefer named imports.
33
+ - Don't hardcode magic values — define them as constants or enums.
34
+ - Don't embed conditional API logic directly in components.
35
+ - Don't bloat container components — move logic to hooks or services.
36
+
37
+ ## Value-to-Label Mapping
38
+
39
+ Use a structured class with static getters for value-label constants:
40
+
41
+ ```js
42
+ // Good
43
+ class DocumentType {
44
+ static get PASSPORT() {
45
+ return { code: "passport", title: "Passport" };
46
+ }
47
+ static get ALL() {
48
+ return [DocumentType.PASSPORT];
49
+ }
50
+ }
51
+
52
+ // Bad
53
+ const DocumentType = {
54
+ PASSPORT: { code: "passport", title: "Passport" },
55
+ };
56
+ ```
57
+
58
+ Using a class prevents unintentional mutation and supports lazy initialization.
59
+
60
+ ## Syntax & Language Notes
61
+
62
+ - Use named imports: `import { Button } from "antd";`
63
+ - Avoid wildcard imports: `import * as antd from "antd";`
64
+ - Do not suggest removing fallback logic (e.g., `|| []`) unless the value is guaranteed non-null.
65
+ Note: JavaScript's `Map.get()` does not support default values like Python's `dict.get()`.
@@ -0,0 +1,36 @@
1
+ from __future__ import annotations
2
+
3
+ from prlens_core.providers.base import BaseReviewer
4
+
5
+
6
+ class AnthropicReviewer(BaseReviewer):
7
+ MODEL = "claude-sonnet-4-20250514"
8
+ # temperature=0.3 for Anthropic — slightly higher than OpenAI's 0.2 to
9
+ # allow more natural phrasing in review comments while keeping the output
10
+ # deterministic enough for consistent JSON structure.
11
+ TEMPERATURE = 0.3
12
+
13
+ def __init__(self, api_key: str):
14
+ try:
15
+ from anthropic import Anthropic
16
+ except ImportError:
17
+ raise ImportError(
18
+ "The 'anthropic' package is required for this provider. "
19
+ "Install it with: pip install 'prlens[anthropic]'"
20
+ )
21
+ self.client = Anthropic(api_key=api_key)
22
+
23
+ def _call_api(self, system_prompt: str, user_prompt: str) -> str:
24
+ # Imported inside the method because the anthropic package is optional;
25
+ # __init__ already validated it is installed before we reach here.
26
+ from anthropic.types import TextBlock
27
+
28
+ response = self.client.messages.create(
29
+ model=self.MODEL,
30
+ system=system_prompt,
31
+ messages=[{"role": "user", "content": user_prompt}],
32
+ temperature=self.TEMPERATURE,
33
+ max_tokens=self.MAX_TOKENS,
34
+ )
35
+ text_blocks = [block.text for block in response.content if isinstance(block, TextBlock)]
36
+ return "".join(text_blocks).strip()
@@ -0,0 +1,194 @@
1
+ """Base reviewer implementing the Template Method pattern.
2
+
3
+ All providers share the same review algorithm:
4
+ review() → _build_system_prompt() + _build_user_prompt()
5
+ → _call_with_retry() → _call_api() ← only this differs per provider
6
+ → _parse()
7
+
8
+ Subclasses implement two things only:
9
+ - __init__: validate and store the SDK client
10
+ - _call_api: make one raw API call and return the text response
11
+
12
+ Everything else — prompt construction, JSON parsing, retry logic — lives here
13
+ so it is defined once and inherited consistently by every provider.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import logging
20
+ import time
21
+ from abc import ABC, abstractmethod
22
+ from typing import TYPE_CHECKING
23
+
24
+ from prlens_core.utils.context import build_context_section
25
+
26
+ if TYPE_CHECKING:
27
+ from prlens_core.utils.context import RepoContext
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Shared defaults — subclasses may override as class attributes if needed.
32
+ _MAX_RETRIES = 3
33
+ _MAX_TOKENS = 4096
34
+
35
+
36
+ class BaseReviewer(ABC):
37
+ MAX_RETRIES: int = _MAX_RETRIES
38
+ MAX_TOKENS: int = _MAX_TOKENS
39
+
40
+ # ------------------------------------------------------------------ #
41
+ # Public interface #
42
+ # ------------------------------------------------------------------ #
43
+
44
+ def review(
45
+ self,
46
+ description: str,
47
+ file_name: str,
48
+ diff_patch: str,
49
+ file_content: str,
50
+ guidelines: str,
51
+ repo_context: RepoContext | None = None,
52
+ ) -> list[dict]:
53
+ """Orchestrate a single-file review and return inline comments.
54
+
55
+ Concrete here because the algorithm is identical for every provider:
56
+ build prompts → call API with retry → parse JSON response.
57
+ Only the raw API call (_call_api) is delegated to subclasses.
58
+ """
59
+ system = self._build_system_prompt(guidelines)
60
+ user = self._build_user_prompt(description, file_name, diff_patch, file_content, repo_context)
61
+ raw = self._call_with_retry(system, user)
62
+ if raw is None:
63
+ return []
64
+ return self._parse(raw)
65
+
66
+ # ------------------------------------------------------------------ #
67
+ # Abstract — implement in each provider #
68
+ # ------------------------------------------------------------------ #
69
+
70
+ @abstractmethod
71
+ def _call_api(self, system_prompt: str, user_prompt: str) -> str:
72
+ """Make a single API call and return the raw text response.
73
+
74
+ This is the only method subclasses must implement. It should raise
75
+ on failure — _call_with_retry handles retries and logging.
76
+ """
77
+
78
+ # ------------------------------------------------------------------ #
79
+ # Shared implementations #
80
+ # ------------------------------------------------------------------ #
81
+
82
+ def _call_with_retry(self, system_prompt: str, user_prompt: str) -> str | None:
83
+ """Retry _call_api up to MAX_RETRIES times with exponential backoff.
84
+
85
+ Separating retry logic from the raw API call means each provider's
86
+ _call_api stays focused on a single attempt, and the backoff/logging
87
+ behaviour is defined once rather than copied into every provider.
88
+ """
89
+ for attempt in range(self.MAX_RETRIES):
90
+ try:
91
+ return self._call_api(system_prompt, user_prompt)
92
+ except Exception as e:
93
+ if attempt == self.MAX_RETRIES - 1:
94
+ logger.error(
95
+ "%s API failed after %d attempts: %s",
96
+ self.__class__.__name__,
97
+ self.MAX_RETRIES,
98
+ e,
99
+ )
100
+ return None
101
+ delay = 2**attempt
102
+ logger.warning(
103
+ "%s API error (attempt %d/%d): %s. Retrying in %ds...",
104
+ self.__class__.__name__,
105
+ attempt + 1,
106
+ self.MAX_RETRIES,
107
+ e,
108
+ delay,
109
+ )
110
+ time.sleep(delay)
111
+
112
+ def _build_system_prompt(self, guidelines: str) -> str:
113
+ """Build the system prompt injected once per review call.
114
+
115
+ Kept in base so all providers produce a consistent reviewer persona
116
+ and rule set — the only variable is the guidelines content itself.
117
+ """
118
+ return f"""You are a strict and precise senior code reviewer.
119
+ Review the patch below and identify issues according to the guidelines.
120
+
121
+ {guidelines}
122
+
123
+ Rules:
124
+ - Focus on added lines (starting with '+') for direct violations.
125
+ - Also consider implications of removed lines (starting with '-') — e.g. deleted null checks,
126
+ removed error handling, dropped permission guards.
127
+ - Do not comment on code that already follows best practices.
128
+ - Avoid assumptions when context is unclear. Be concise and actionable."""
129
+
130
+ def _build_user_prompt(
131
+ self,
132
+ description: str,
133
+ file_name: str,
134
+ diff_patch: str,
135
+ file_content: str,
136
+ repo_context: RepoContext | None = None,
137
+ ) -> str:
138
+ """Build the per-file user prompt including any codebase context.
139
+
140
+ Kept in base so both providers produce structurally identical prompts.
141
+ The output format instructions are here rather than in the system
142
+ prompt because they are specific to the file being reviewed, not to
143
+ the reviewer's general behaviour.
144
+ """
145
+ context_section = build_context_section(repo_context)
146
+ return f"""You are reviewing `{file_name}` in the context of the full repository.
147
+ {context_section}
148
+ ## PR Description
149
+ {description}
150
+
151
+ ## Diff
152
+ {diff_patch}
153
+
154
+ ## Full File Content
155
+ {file_content}
156
+
157
+ ### Output Format:
158
+ Respond with **only** a valid JSON list:
159
+
160
+ [
161
+ {{
162
+ "line": <line number in the new file (integer)>,
163
+ "severity": "<critical|major|minor|nitpick>",
164
+ "comment": "<concise, actionable comment>"
165
+ }},
166
+ ...
167
+ ]
168
+
169
+ Severity guide:
170
+ - critical: security vulnerability, data loss risk, crash
171
+ - major: logic bug, missing error handling, significant performance issue
172
+ - minor: code smell, unclear naming, missing type hint
173
+ - nitpick: style preference, minor formatting
174
+
175
+ If there are no issues, return: []
176
+ Do not return any text outside the JSON block."""
177
+
178
+ def _parse(self, raw: str) -> list[dict]:
179
+ """Parse the model's raw text response into a list of comment dicts.
180
+
181
+ Kept in base because the expected JSON schema is identical for every
182
+ provider — stripping markdown fences and loading JSON is not
183
+ provider-specific behaviour.
184
+ """
185
+ try:
186
+ cleaned = raw.replace("```json", "").replace("```", "").strip()
187
+ return json.loads(cleaned)
188
+ except json.JSONDecodeError:
189
+ logger.warning(
190
+ "%s: failed to parse response as JSON: %s",
191
+ self.__class__.__name__,
192
+ raw[:200],
193
+ )
194
+ return []
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ try:
4
+ from openai import OpenAI as _OpenAI
5
+ except ImportError:
6
+ _OpenAI = None # type: ignore[assignment,misc]
7
+
8
+ from prlens_core.providers.base import BaseReviewer
9
+
10
+
11
+ class OpenAIReviewer(BaseReviewer):
12
+ MODEL = "gpt-4o"
13
+ # temperature=0.2 for OpenAI — lower than Anthropic's 0.3 to lean toward
14
+ # more deterministic, structured JSON output from GPT-4o.
15
+ TEMPERATURE = 0.2
16
+
17
+ def __init__(self, api_key: str):
18
+ if _OpenAI is None:
19
+ raise ImportError(
20
+ "The 'openai' package is required for this provider. " "Install it with: pip install 'prlens[openai]'"
21
+ )
22
+ self.client = _OpenAI(api_key=api_key)
23
+
24
+ def _call_api(self, system_prompt: str, user_prompt: str) -> str:
25
+ response = self.client.chat.completions.create(
26
+ model=self.MODEL,
27
+ messages=[
28
+ {"role": "system", "content": system_prompt},
29
+ {"role": "user", "content": user_prompt},
30
+ ],
31
+ temperature=self.TEMPERATURE,
32
+ max_tokens=self.MAX_TOKENS,
33
+ )
34
+ return response.choices[0].message.content