ai-agentreflect 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_agentreflect-0.1.0/AGENTS.md +12 -0
- ai_agentreflect-0.1.0/CHANGELOG.md +26 -0
- ai_agentreflect-0.1.0/CLAUDE.md +41 -0
- ai_agentreflect-0.1.0/PKG-INFO +135 -0
- ai_agentreflect-0.1.0/README.md +106 -0
- ai_agentreflect-0.1.0/agentreflect/__init__.py +3 -0
- ai_agentreflect-0.1.0/agentreflect/cli.py +171 -0
- ai_agentreflect-0.1.0/agentreflect/generators/__init__.py +5 -0
- ai_agentreflect-0.1.0/agentreflect/generators/llm_generator.py +96 -0
- ai_agentreflect-0.1.0/agentreflect/generators/pattern_generator.py +307 -0
- ai_agentreflect-0.1.0/agentreflect/models.py +29 -0
- ai_agentreflect-0.1.0/agentreflect/output/__init__.py +7 -0
- ai_agentreflect-0.1.0/agentreflect/output/apply.py +54 -0
- ai_agentreflect-0.1.0/agentreflect/output/diff_formatter.py +57 -0
- ai_agentreflect-0.1.0/agentreflect/output/markdown_formatter.py +42 -0
- ai_agentreflect-0.1.0/agentreflect/parsers/__init__.py +7 -0
- ai_agentreflect-0.1.0/agentreflect/parsers/git_parser.py +91 -0
- ai_agentreflect-0.1.0/agentreflect/parsers/notes_parser.py +79 -0
- ai_agentreflect-0.1.0/agentreflect/parsers/pytest_parser.py +167 -0
- ai_agentreflect-0.1.0/pyproject.toml +61 -0
- ai_agentreflect-0.1.0/tests/__init__.py +0 -0
- ai_agentreflect-0.1.0/tests/fixtures/sample-agents.md +17 -0
- ai_agentreflect-0.1.0/tests/fixtures/sample-failures.txt +60 -0
- ai_agentreflect-0.1.0/tests/test_cli.py +145 -0
- ai_agentreflect-0.1.0/tests/test_generators.py +103 -0
- ai_agentreflect-0.1.0/tests/test_output.py +109 -0
- ai_agentreflect-0.1.0/tests/test_parsers.py +226 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# AGENTS.md — agentreflect repo
|
|
2
|
+
|
|
3
|
+
## Rules for AI coding agents working in this repo
|
|
4
|
+
|
|
5
|
+
- Run `pytest tests/ -v` before committing any changes
|
|
6
|
+
- All public functions must have docstrings
|
|
7
|
+
- Use type annotations on all function signatures
|
|
8
|
+
- Never modify `tests/fixtures/` without updating corresponding tests
|
|
9
|
+
- The basic mode (no `--llm`) MUST work without any API key
|
|
10
|
+
- Keep `anthropic` import inside the function that needs it (lazy import)
|
|
11
|
+
- Confidence scores must be floats between 0.0 and 1.0
|
|
12
|
+
- When adding new error type mappings, add tests in `test_generators.py`
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to `agentreflect` will be documented here.
|
|
4
|
+
|
|
5
|
+
## [0.1.0] - 2026-03-11
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- `agentreflect generate` command with three input modes:
|
|
9
|
+
- `--from-pytest <file>`: parse pytest output for failure patterns
|
|
10
|
+
- `--from-git`: analyze git log for fix/bug/revert commits
|
|
11
|
+
- `--from-notes "text"`: parse plain text failure descriptions
|
|
12
|
+
- Pattern-based analysis engine (no API key required)
|
|
13
|
+
- Maps 15+ exception types to targeted rule suggestions
|
|
14
|
+
- Confidence scoring for each suggestion
|
|
15
|
+
- LLM-enhanced mode (`--llm`) using `claude-3-5-haiku-latest`
|
|
16
|
+
- Requires `ANTHROPIC_API_KEY`
|
|
17
|
+
- Graceful fallback to pattern mode if key not set
|
|
18
|
+
- Three output formats:
|
|
19
|
+
- Markdown (default): formatted suggestion report
|
|
20
|
+
- Diff (`--format diff`): unified diff ready to apply
|
|
21
|
+
- Apply (`--apply <file>`): direct AGENTS.md modification
|
|
22
|
+
- `--yes` flag to skip confirmation on file apply
|
|
23
|
+
- `--min-confidence` threshold filtering
|
|
24
|
+
- 40+ tests covering parsers, generators, output, and CLI integration
|
|
25
|
+
|
|
26
|
+
[0.1.0]: https://github.com/mikiships/agentreflect/releases/tag/v0.1.0
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# CLAUDE.md — agentreflect repo
|
|
2
|
+
|
|
3
|
+
## Project overview
|
|
4
|
+
|
|
5
|
+
`agentreflect` is a Python CLI tool that analyzes AI coding agent failure logs and generates targeted AGENTS.md rule suggestions. It is part of the agent quality trilogy: coderace → agentmd → agentlint → agentreflect.
|
|
6
|
+
|
|
7
|
+
## Commands
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Install in dev mode
|
|
11
|
+
pip install -e '.[dev]'
|
|
12
|
+
|
|
13
|
+
# Run tests
|
|
14
|
+
pytest tests/ -v
|
|
15
|
+
|
|
16
|
+
# Run with coverage
|
|
17
|
+
pytest tests/ --cov=agentreflect --cov-report=term-missing
|
|
18
|
+
|
|
19
|
+
# Build package
|
|
20
|
+
python -m build
|
|
21
|
+
|
|
22
|
+
# Test CLI
|
|
23
|
+
agentreflect --version
|
|
24
|
+
agentreflect generate --from-notes "agent forgot to check None"
|
|
25
|
+
agentreflect generate --from-pytest tests/fixtures/sample-failures.txt
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Architecture
|
|
29
|
+
|
|
30
|
+
- `agentreflect/parsers/` — input parsers (pytest, git, notes)
|
|
31
|
+
- `agentreflect/generators/` — suggestion generators (pattern-based, LLM)
|
|
32
|
+
- `agentreflect/output/` — formatters (markdown, diff, apply)
|
|
33
|
+
- `agentreflect/models.py` — FailureRecord, RuleSuggestion dataclasses
|
|
34
|
+
- `agentreflect/cli.py` — click-based CLI entry point
|
|
35
|
+
|
|
36
|
+
## Key constraints
|
|
37
|
+
|
|
38
|
+
1. Basic mode (no `--llm`) must work without any API key
|
|
39
|
+
2. The `anthropic` package is only imported when `--llm` is used
|
|
40
|
+
3. Pattern generator must always return at least one suggestion
|
|
41
|
+
4. Apply mode appends to file, never overwrites existing content
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ai-agentreflect
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Generate AGENTS.md rules from agent failure logs
|
|
5
|
+
Project-URL: Homepage, https://github.com/mikiships/agentreflect
|
|
6
|
+
Project-URL: Repository, https://github.com/mikiships/agentreflect
|
|
7
|
+
Author: agentreflect contributors
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: AGENTS.md,agent-quality,agents,ai,claude,code-quality,codex
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
19
|
+
Classifier: Topic :: Software Development :: Testing
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Requires-Dist: click>=8.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: anthropic>=0.20.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
26
|
+
Provides-Extra: llm
|
|
27
|
+
Requires-Dist: anthropic>=0.20.0; extra == 'llm'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# agentreflect
|
|
31
|
+
|
|
32
|
+
Generate `AGENTS.md` rules from AI coding agent failure logs. Closes the feedback loop in the agent quality trilogy.
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
Measure (coderace) → Generate (agentmd) → Guard (agentlint) → Learn (agentreflect)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## What it does
|
|
39
|
+
|
|
40
|
+
Every developer using Claude Code or Codex has this problem: their agent makes a mistake, they fix it manually, update `AGENTS.md`, and hope it doesn't happen again. `agentreflect` automates the "update AGENTS.md" step.
|
|
41
|
+
|
|
42
|
+
Feed it failure logs → get targeted rule suggestions → apply them to your `AGENTS.md`.
|
|
43
|
+
|
|
44
|
+
## Install
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install ai-agentreflect
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
For LLM-enhanced mode:
|
|
51
|
+
```bash
|
|
52
|
+
pip install 'ai-agentreflect[llm]'
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Usage
|
|
56
|
+
|
|
57
|
+
### From pytest output
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Capture failures
|
|
61
|
+
pytest --tb=short 2>&1 | tee failures.txt
|
|
62
|
+
|
|
63
|
+
# Generate suggestions
|
|
64
|
+
agentreflect generate --from-pytest failures.txt
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### From git log
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
agentreflect generate --from-git
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Analyzes `fix:`, `bug:`, `revert:` commits and agent-related mistake commits.
|
|
74
|
+
|
|
75
|
+
### From plain text notes
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
agentreflect generate --from-notes "agent forgot to check for None before accessing .value"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Output formats
|
|
82
|
+
|
|
83
|
+
### Markdown (default)
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
## agentreflect suggestions (2026-03-11)
|
|
87
|
+
|
|
88
|
+
### From: pytest failures (failures.txt)
|
|
89
|
+
- [ ] Always check for None before attribute access: use `if obj is not None` or `hasattr(obj, 'attr')`
|
|
90
|
+
- [ ] When catching AttributeError, log the object type with `type(obj).__name__`
|
|
91
|
+
|
|
92
|
+
_Source: 3 failures analyzed, 2 suggestions generated_
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Diff format
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
agentreflect generate --from-pytest failures.txt --format diff
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Outputs a unified diff ready to apply to `AGENTS.md`.
|
|
102
|
+
|
|
103
|
+
### Apply directly
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
agentreflect generate --from-notes "agent used wrong variable" --apply AGENTS.md
|
|
107
|
+
# Asks for confirmation
|
|
108
|
+
|
|
109
|
+
agentreflect generate --from-pytest failures.txt --apply AGENTS.md --yes
|
|
110
|
+
# Applies without confirmation
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## LLM-enhanced mode
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
export ANTHROPIC_API_KEY=your_key_here
|
|
117
|
+
agentreflect generate --from-pytest failures.txt --llm
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Uses `claude-3-5-haiku-latest` for contextual, specific suggestions tailored to your actual failures. Cost: ~$0.001 per analysis.
|
|
121
|
+
|
|
122
|
+
Basic pattern mode works without any API key.
|
|
123
|
+
|
|
124
|
+
## Integration with the trilogy
|
|
125
|
+
|
|
126
|
+
| Tool | Role |
|
|
127
|
+
|------|------|
|
|
128
|
+
| [coderace](https://pypi.org/project/ai-coderace/) | Measure agent output quality |
|
|
129
|
+
| [agentmd](https://pypi.org/project/ai-agentmd/) | Generate AGENTS.md from scratch |
|
|
130
|
+
| [agentlint](https://pypi.org/project/ai-agentlint/) | Guard/validate AGENTS.md rules |
|
|
131
|
+
| **agentreflect** | **Learn from failures → update rules** |
|
|
132
|
+
|
|
133
|
+
## License
|
|
134
|
+
|
|
135
|
+
MIT
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# agentreflect
|
|
2
|
+
|
|
3
|
+
Generate `AGENTS.md` rules from AI coding agent failure logs. Closes the feedback loop in the agent quality trilogy.
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
Measure (coderace) → Generate (agentmd) → Guard (agentlint) → Learn (agentreflect)
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## What it does
|
|
10
|
+
|
|
11
|
+
Every developer using Claude Code or Codex has this problem: their agent makes a mistake, they fix it manually, update `AGENTS.md`, and hope it doesn't happen again. `agentreflect` automates the "update AGENTS.md" step.
|
|
12
|
+
|
|
13
|
+
Feed it failure logs → get targeted rule suggestions → apply them to your `AGENTS.md`.
|
|
14
|
+
|
|
15
|
+
## Install
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install ai-agentreflect
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
For LLM-enhanced mode:
|
|
22
|
+
```bash
|
|
23
|
+
pip install 'ai-agentreflect[llm]'
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
### From pytest output
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
# Capture failures
|
|
32
|
+
pytest --tb=short 2>&1 | tee failures.txt
|
|
33
|
+
|
|
34
|
+
# Generate suggestions
|
|
35
|
+
agentreflect generate --from-pytest failures.txt
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### From git log
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
agentreflect generate --from-git
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Analyzes `fix:`, `bug:`, `revert:` commits and agent-related mistake commits.
|
|
45
|
+
|
|
46
|
+
### From plain text notes
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
agentreflect generate --from-notes "agent forgot to check for None before accessing .value"
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Output formats
|
|
53
|
+
|
|
54
|
+
### Markdown (default)
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
## agentreflect suggestions (2026-03-11)
|
|
58
|
+
|
|
59
|
+
### From: pytest failures (failures.txt)
|
|
60
|
+
- [ ] Always check for None before attribute access: use `if obj is not None` or `hasattr(obj, 'attr')`
|
|
61
|
+
- [ ] When catching AttributeError, log the object type with `type(obj).__name__`
|
|
62
|
+
|
|
63
|
+
_Source: 3 failures analyzed, 2 suggestions generated_
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Diff format
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
agentreflect generate --from-pytest failures.txt --format diff
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Outputs a unified diff ready to apply to `AGENTS.md`.
|
|
73
|
+
|
|
74
|
+
### Apply directly
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
agentreflect generate --from-notes "agent used wrong variable" --apply AGENTS.md
|
|
78
|
+
# Asks for confirmation
|
|
79
|
+
|
|
80
|
+
agentreflect generate --from-pytest failures.txt --apply AGENTS.md --yes
|
|
81
|
+
# Applies without confirmation
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## LLM-enhanced mode
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
export ANTHROPIC_API_KEY=your_key_here
|
|
88
|
+
agentreflect generate --from-pytest failures.txt --llm
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Uses `claude-3-5-haiku-latest` for contextual, specific suggestions tailored to your actual failures. Cost: ~$0.001 per analysis.
|
|
92
|
+
|
|
93
|
+
Basic pattern mode works without any API key.
|
|
94
|
+
|
|
95
|
+
## Integration with the trilogy
|
|
96
|
+
|
|
97
|
+
| Tool | Role |
|
|
98
|
+
|------|------|
|
|
99
|
+
| [coderace](https://pypi.org/project/ai-coderace/) | Measure agent output quality |
|
|
100
|
+
| [agentmd](https://pypi.org/project/ai-agentmd/) | Generate AGENTS.md from scratch |
|
|
101
|
+
| [agentlint](https://pypi.org/project/ai-agentlint/) | Guard/validate AGENTS.md rules |
|
|
102
|
+
| **agentreflect** | **Learn from failures → update rules** |
|
|
103
|
+
|
|
104
|
+
## License
|
|
105
|
+
|
|
106
|
+
MIT
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""CLI entry point for agentreflect."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from . import __version__
|
|
10
|
+
from .parsers import PytestParser, GitLogParser, NotesParser
|
|
11
|
+
from .generators import PatternGenerator
|
|
12
|
+
from .models import FailureRecord
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.group()
|
|
16
|
+
@click.version_option(version=__version__, prog_name="agentreflect")
|
|
17
|
+
def cli():
|
|
18
|
+
"""agentreflect — generate AGENTS.md rules from agent failure logs.
|
|
19
|
+
|
|
20
|
+
Analyze AI coding agent failures and get targeted rule suggestions
|
|
21
|
+
to prevent recurrence. Part of the agent quality trilogy:
|
|
22
|
+
coderace → agentmd → agentlint → agentreflect.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@cli.command()
|
|
27
|
+
@click.option(
|
|
28
|
+
"--from-pytest",
|
|
29
|
+
"pytest_file",
|
|
30
|
+
type=click.Path(exists=True),
|
|
31
|
+
default=None,
|
|
32
|
+
help="Path to pytest output file (e.g. captured with `pytest ... 2>&1 | tee failures.txt`)",
|
|
33
|
+
)
|
|
34
|
+
@click.option(
|
|
35
|
+
"--from-git",
|
|
36
|
+
"from_git",
|
|
37
|
+
is_flag=True,
|
|
38
|
+
default=False,
|
|
39
|
+
help="Analyze git log of current directory for fix/bug/revert commits",
|
|
40
|
+
)
|
|
41
|
+
@click.option(
|
|
42
|
+
"--from-notes",
|
|
43
|
+
"notes",
|
|
44
|
+
default=None,
|
|
45
|
+
help="Plain text description of what went wrong (quoted string)",
|
|
46
|
+
)
|
|
47
|
+
@click.option(
|
|
48
|
+
"--llm",
|
|
49
|
+
"use_llm",
|
|
50
|
+
is_flag=True,
|
|
51
|
+
default=False,
|
|
52
|
+
help="Use Anthropic API for enhanced analysis (requires ANTHROPIC_API_KEY)",
|
|
53
|
+
)
|
|
54
|
+
@click.option(
|
|
55
|
+
"--format",
|
|
56
|
+
"output_format",
|
|
57
|
+
type=click.Choice(["markdown", "diff"]),
|
|
58
|
+
default="markdown",
|
|
59
|
+
show_default=True,
|
|
60
|
+
help="Output format",
|
|
61
|
+
)
|
|
62
|
+
@click.option(
|
|
63
|
+
"--apply",
|
|
64
|
+
"apply_file",
|
|
65
|
+
type=click.Path(),
|
|
66
|
+
default=None,
|
|
67
|
+
help="Apply suggestions directly to an AGENTS.md file",
|
|
68
|
+
)
|
|
69
|
+
@click.option(
|
|
70
|
+
"--yes",
|
|
71
|
+
"-y",
|
|
72
|
+
is_flag=True,
|
|
73
|
+
default=False,
|
|
74
|
+
help="Skip confirmation when applying to file",
|
|
75
|
+
)
|
|
76
|
+
@click.option(
|
|
77
|
+
"--min-confidence",
|
|
78
|
+
type=float,
|
|
79
|
+
default=0.0,
|
|
80
|
+
help="Minimum confidence threshold for suggestions (0.0–1.0)",
|
|
81
|
+
)
|
|
82
|
+
def generate(
|
|
83
|
+
pytest_file: Optional[str],
|
|
84
|
+
from_git: bool,
|
|
85
|
+
notes: Optional[str],
|
|
86
|
+
use_llm: bool,
|
|
87
|
+
output_format: str,
|
|
88
|
+
apply_file: Optional[str],
|
|
89
|
+
yes: bool,
|
|
90
|
+
min_confidence: float,
|
|
91
|
+
):
|
|
92
|
+
"""Generate AGENTS.md rule suggestions from failure logs."""
|
|
93
|
+
|
|
94
|
+
if not any([pytest_file, from_git, notes]):
|
|
95
|
+
raise click.UsageError(
|
|
96
|
+
"At least one input is required: --from-pytest, --from-git, or --from-notes"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
all_records: list[FailureRecord] = []
|
|
100
|
+
source_labels = []
|
|
101
|
+
|
|
102
|
+
# Parse inputs
|
|
103
|
+
if pytest_file:
|
|
104
|
+
content = Path(pytest_file).read_text(encoding="utf-8")
|
|
105
|
+
parser = PytestParser()
|
|
106
|
+
records = parser.parse(content)
|
|
107
|
+
all_records.extend(records)
|
|
108
|
+
source_labels.append(f"pytest failures ({pytest_file})")
|
|
109
|
+
if not records:
|
|
110
|
+
click.echo(
|
|
111
|
+
f"[warn] No failures found in {pytest_file}. "
|
|
112
|
+
"Make sure the file contains pytest output with FAILED/ERROR lines.",
|
|
113
|
+
err=True,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if from_git:
|
|
117
|
+
records = GitLogParser.from_repo(".")
|
|
118
|
+
all_records.extend(records)
|
|
119
|
+
source_labels.append("git log")
|
|
120
|
+
if not records:
|
|
121
|
+
click.echo(
|
|
122
|
+
"[info] No fix/bug/revert commits found in git log.",
|
|
123
|
+
err=True,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
if notes:
|
|
127
|
+
parser = NotesParser()
|
|
128
|
+
records = parser.parse(notes)
|
|
129
|
+
all_records.extend(records)
|
|
130
|
+
source_labels.append("notes")
|
|
131
|
+
|
|
132
|
+
# Generate suggestions
|
|
133
|
+
if use_llm:
|
|
134
|
+
from .generators.llm_generator import generate_with_llm
|
|
135
|
+
|
|
136
|
+
agents_content = None
|
|
137
|
+
if apply_file and Path(apply_file).exists():
|
|
138
|
+
agents_content = Path(apply_file).read_text(encoding="utf-8")
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
suggestions = generate_with_llm(all_records, agents_content)
|
|
142
|
+
except (ImportError, ValueError) as e:
|
|
143
|
+
click.echo(f"[error] LLM mode failed: {e}", err=True)
|
|
144
|
+
click.echo("[info] Falling back to pattern-based analysis.", err=True)
|
|
145
|
+
suggestions = PatternGenerator().generate(all_records)
|
|
146
|
+
else:
|
|
147
|
+
suggestions = PatternGenerator().generate(all_records)
|
|
148
|
+
|
|
149
|
+
# Filter by confidence
|
|
150
|
+
if min_confidence > 0:
|
|
151
|
+
suggestions = [s for s in suggestions if s.confidence >= min_confidence]
|
|
152
|
+
|
|
153
|
+
source_label = " + ".join(source_labels)
|
|
154
|
+
|
|
155
|
+
# Output
|
|
156
|
+
if apply_file:
|
|
157
|
+
from .output.apply import apply_to_file
|
|
158
|
+
|
|
159
|
+
apply_to_file(suggestions, apply_file, yes=yes, echo=click.echo)
|
|
160
|
+
elif output_format == "diff":
|
|
161
|
+
from .output.diff_formatter import format_diff
|
|
162
|
+
|
|
163
|
+
existing = None
|
|
164
|
+
if Path("AGENTS.md").exists():
|
|
165
|
+
existing = Path("AGENTS.md").read_text(encoding="utf-8")
|
|
166
|
+
|
|
167
|
+
click.echo(format_diff(suggestions, "AGENTS.md", existing), nl=False)
|
|
168
|
+
else:
|
|
169
|
+
from .output.markdown_formatter import format_markdown
|
|
170
|
+
|
|
171
|
+
click.echo(format_markdown(suggestions, all_records, source_label), nl=False)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""LLM-enhanced rule generator using Anthropic API."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
from ..models import FailureRecord, RuleSuggestion
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def generate_with_llm(
|
|
9
|
+
records: List[FailureRecord],
|
|
10
|
+
agents_md_content: Optional[str] = None,
|
|
11
|
+
) -> List[RuleSuggestion]:
|
|
12
|
+
"""Generate rule suggestions using the Anthropic API.
|
|
13
|
+
|
|
14
|
+
Requires ANTHROPIC_API_KEY environment variable.
|
|
15
|
+
"""
|
|
16
|
+
# Import only when needed
|
|
17
|
+
try:
|
|
18
|
+
import anthropic
|
|
19
|
+
except ImportError:
|
|
20
|
+
raise ImportError(
|
|
21
|
+
"anthropic package is required for LLM mode. Install with: pip install anthropic"
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
25
|
+
if not api_key:
|
|
26
|
+
raise ValueError(
|
|
27
|
+
"ANTHROPIC_API_KEY environment variable is not set. "
|
|
28
|
+
"Set it to use LLM mode, or omit --llm to use pattern-based analysis."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
32
|
+
|
|
33
|
+
# Build failure summary
|
|
34
|
+
failure_lines = []
|
|
35
|
+
for i, record in enumerate(records[:20], 1): # Cap at 20 records
|
|
36
|
+
failure_lines.append(f"{i}. [{record.type}] {record.message[:200]}")
|
|
37
|
+
|
|
38
|
+
failures_text = "\n".join(failure_lines) if failure_lines else "No specific failures provided."
|
|
39
|
+
|
|
40
|
+
# Build prompt
|
|
41
|
+
agents_section = ""
|
|
42
|
+
if agents_md_content:
|
|
43
|
+
# Truncate to avoid token limits
|
|
44
|
+
truncated = agents_md_content[:3000]
|
|
45
|
+
agents_section = f"""
|
|
46
|
+
Current AGENTS.md content (for context, avoid duplicates):
|
|
47
|
+
```
|
|
48
|
+
{truncated}
|
|
49
|
+
```
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
prompt = f"""You are helping a developer improve their AGENTS.md file — a set of rules for AI coding agents like Claude Code or Codex.
|
|
53
|
+
|
|
54
|
+
The following failures were detected from their agent's recent work:
|
|
55
|
+
|
|
56
|
+
{failures_text}
|
|
57
|
+
{agents_section}
|
|
58
|
+
Based on these failures, suggest 3-5 specific, actionable rules that would prevent these failures in the future.
|
|
59
|
+
|
|
60
|
+
Requirements:
|
|
61
|
+
- Each rule should be a single, concrete action item
|
|
62
|
+
- Rules should be specific enough to be immediately actionable
|
|
63
|
+
- Avoid vague advice like "be careful" — give specific patterns, checks, or practices
|
|
64
|
+
- Format each rule as a markdown list item starting with "- "
|
|
65
|
+
- Focus on preventing the exact type of failure shown
|
|
66
|
+
|
|
67
|
+
Respond ONLY with the markdown list items, nothing else."""
|
|
68
|
+
|
|
69
|
+
message = client.messages.create(
|
|
70
|
+
model="claude-3-5-haiku-latest",
|
|
71
|
+
max_tokens=1024,
|
|
72
|
+
messages=[{"role": "user", "content": prompt}],
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
response_text = message.content[0].text.strip()
|
|
76
|
+
|
|
77
|
+
# Parse the response into RuleSuggestion objects
|
|
78
|
+
suggestions = []
|
|
79
|
+
for line in response_text.splitlines():
|
|
80
|
+
line = line.strip()
|
|
81
|
+
if line.startswith("- "):
|
|
82
|
+
text = line[2:].strip()
|
|
83
|
+
if text:
|
|
84
|
+
# Remove markdown checkbox if present
|
|
85
|
+
if text.startswith("[ ] "):
|
|
86
|
+
text = text[4:]
|
|
87
|
+
suggestions.append(
|
|
88
|
+
RuleSuggestion(
|
|
89
|
+
category="llm-generated",
|
|
90
|
+
text=text,
|
|
91
|
+
confidence=0.85,
|
|
92
|
+
source="anthropic/claude-3-5-haiku-latest",
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return suggestions
|