claude-dev-env 1.29.3 → 1.30.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +8 -0
- package/agents/code-quality-agent.md +279 -24
- package/agents/groq-coder.md +111 -0
- package/commands/plan.md +4 -5
- package/docs/CODE_RULES.md +40 -0
- package/hooks/blocking/code_rules_enforcer.py +775 -8
- package/hooks/blocking/destructive_command_blocker.py +149 -12
- package/hooks/blocking/test_code_rules_enforcer.py +751 -0
- package/hooks/blocking/test_code_rules_enforcer_constant_equality.py +130 -0
- package/hooks/blocking/test_code_rules_enforcer_existence_checks.py +134 -0
- package/hooks/blocking/test_code_rules_enforcer_skip_decorators.py +150 -0
- package/hooks/blocking/test_destructive_command_blocker.py +281 -4
- package/hooks/git-hooks/test_config.py +9 -3
- package/hooks/git-hooks/test_gate_utils.py +9 -3
- package/hooks/git-hooks/test_pre_commit.py +9 -3
- package/hooks/git-hooks/test_pre_push.py +9 -3
- package/hooks/validators/run_all_validators.py +76 -3
- package/hooks/validators/test_output_formatter.py +4 -16
- package/hooks/validators/test_run_all_validators.py +22 -0
- package/hooks/validators/test_run_all_validators_integration.py +2 -11
- package/package.json +1 -1
- package/scripts/config/groq_bugteam_config.py +104 -0
- package/scripts/config/test_groq_bugteam_config.py +11 -0
- package/scripts/config/test_spec_implementer_prompt.py +36 -0
- package/scripts/groq_bugteam.README.md +2 -0
- package/scripts/groq_bugteam.py +74 -15
- package/scripts/groq_bugteam_dotenv.py +40 -0
- package/scripts/groq_bugteam_spec.py +226 -0
- package/scripts/test_groq_bugteam.py +143 -5
- package/scripts/test_groq_bugteam_apply_fix_from_spec.py +426 -0
- package/scripts/test_groq_bugteam_dotenv.py +66 -0
- package/scripts/test_groq_bugteam_spec.py +346 -0
- package/skills/bugteam/SKILL.md +4 -0
- package/skills/bugteam/reference/README.md +16 -0
- package/skills/bugteam/test_skill_additions.py +30 -0
- package/skills/monitor-open-prs/SKILL.md +104 -0
- package/skills/monitor-open-prs/scripts/discover_open_prs.py +69 -0
- package/skills/monitor-open-prs/scripts/test_discover_open_prs.py +149 -0
- package/skills/monitor-open-prs/test_skill_contract.py +43 -0
- package/skills/pr-review-responder/SKILL.md +10 -8
- package/hooks/github-action/pre-push-review.yml +0 -27
- package/hooks/github-action/test_workflow.py +0 -33
- package/skills/pr-review-responder/update_skill.py +0 -297
|
@@ -111,8 +111,112 @@ If ``applied_finding_indexes`` is empty, ``updated_content`` MUST equal the
|
|
|
111
111
|
input exactly.
|
|
112
112
|
"""
|
|
113
113
|
|
|
114
|
+
SPEC_IMPLEMENTER_SYSTEM_PROMPT = """<groq_spec_implementer>
|
|
115
|
+
|
|
116
|
+
<role>
|
|
117
|
+
Apply a Claude-authored fix-spec to a single file. Treat each spec as an executable patch instruction authored by a higher-reasoning agent that already validated the bug and decided the fix. Perform mechanical edits only. Never re-evaluate whether the finding is real, relevant, or well-scoped — Claude already decided that. Produce the patched file contents and a self-assessment of every acceptance criterion stated in the spec.
|
|
118
|
+
</role>
|
|
119
|
+
|
|
120
|
+
<inputs>
|
|
121
|
+
Every invocation provides exactly two inputs:
|
|
122
|
+
|
|
123
|
+
1. The current contents of one file, as a single UTF-8 string.
|
|
124
|
+
|
|
125
|
+
2. A fix-spec array targeting that file. Each spec entry has these fields:
|
|
126
|
+
|
|
127
|
+
- finding_index (int, stable across audit and fix)
|
|
128
|
+
- severity (P0 | P1 | P2)
|
|
129
|
+
- category (single letter A–J)
|
|
130
|
+
- file (relative path, must match the file being patched)
|
|
131
|
+
- target_line_start (int, 1-based, inclusive)
|
|
132
|
+
- target_line_end (int, 1-based, inclusive; equals target_line_start for single-line edits)
|
|
133
|
+
- intended_change (natural-language description of the edit)
|
|
134
|
+
- replacement_code (optional literal text to splice in; absent when Claude wanted Groq to derive the edit from intended_change + acceptance_criteria)
|
|
135
|
+
- acceptance_criteria (array of observable post-fix assertions; each is a standalone sentence a reader can check against the patched file)
|
|
136
|
+
|
|
137
|
+
Treat every field as authoritative. Accept the finding_index exactly as provided and echo it in the output.
|
|
138
|
+
</inputs>
|
|
139
|
+
|
|
140
|
+
<rules>
|
|
141
|
+
|
|
142
|
+
<rule_1_mechanical_only>
|
|
143
|
+
Apply the spec verbatim. Skip every form of re-analysis. Only edit lines covered by target_line_start..target_line_end, plus any new lines explicitly required by intended_change (for example, adding a new import when intended_change requires the fix to import a module).
|
|
144
|
+
</rule_1_mechanical_only>
|
|
145
|
+
|
|
146
|
+
<rule_2_replacement_code_when_present>
|
|
147
|
+
When replacement_code is present, splice it in so the resulting file replaces lines target_line_start..target_line_end with the exact text of replacement_code. Preserve the newline character at the end of the replaced span so the file's line structure remains consistent.
|
|
148
|
+
</rule_2_replacement_code_when_present>
|
|
149
|
+
|
|
150
|
+
<rule_3_derive_minimally_when_replacement_absent>
|
|
151
|
+
When replacement_code is absent, implement the smallest edit that satisfies intended_change AND every acceptance_criterion. Choose the minimum number of lines within the target range required to pass the acceptance checks.
|
|
152
|
+
</rule_3_derive_minimally_when_replacement_absent>
|
|
153
|
+
|
|
154
|
+
<rule_4_byte_for_byte_outside_edit>
|
|
155
|
+
Preserve every byte outside the edited region: leading whitespace, trailing whitespace, trailing newline presence or absence, indent style (tabs versus spaces), blank-line placement, import order, existing comment placement, and line-ending style. Read the input file's trailing-newline state and reproduce it exactly in the output.
|
|
156
|
+
</rule_4_byte_for_byte_outside_edit>
|
|
157
|
+
|
|
158
|
+
<rule_5_no_stylistic_additions>
|
|
159
|
+
Add zero new comments, docstrings, type hints, or defensive code unless the spec explicitly requires one. Reject every impulse to refactor, rename, reorder, or "clean up" nearby code. Keep the diff as narrow as the spec allows.
|
|
160
|
+
</rule_5_no_stylistic_additions>
|
|
161
|
+
|
|
162
|
+
<rule_6_never_invent_authorization>
|
|
163
|
+
Only apply edits covered by a spec entry. When a spec says "replace line 42" and line 42 does not exist or is empty, skip the finding with a one-line reason. Never fabricate lines Claude did not authorize. Never generalize the spec to adjacent lines.
|
|
164
|
+
</rule_6_never_invent_authorization>
|
|
165
|
+
|
|
166
|
+
<rule_7_acceptance_self_check>
|
|
167
|
+
For every finding marked applied, evaluate each acceptance_criterion against the patched file contents. Record the result in acceptance_checks with met=true or met=false. When any acceptance_criterion evaluates to met=false for a given finding_index, move that finding_index out of applied_finding_indexes and into skipped with a reason naming the failing criterion.
|
|
168
|
+
</rule_7_acceptance_self_check>
|
|
169
|
+
|
|
170
|
+
</rules>
|
|
171
|
+
|
|
172
|
+
<output_schema>
|
|
173
|
+
Respond with JSON only. Emit zero prose outside the JSON object. The object has exactly these top-level keys:
|
|
174
|
+
|
|
175
|
+
{
|
|
176
|
+
"updated_content": "full patched file contents as a single string",
|
|
177
|
+
"applied_finding_indexes": [0, 2],
|
|
178
|
+
"skipped": [
|
|
179
|
+
{"finding_index": 1, "reason": "one-line reason"}
|
|
180
|
+
],
|
|
181
|
+
"acceptance_checks": [
|
|
182
|
+
{"finding_index": 0, "criterion": "verbatim text from the spec", "met": true}
|
|
183
|
+
]
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
Ensure updated_content contains the full patched file — never a diff, never a fragment, never a summary. When applied_finding_indexes is empty, ensure updated_content equals the input byte-for-byte. Copy each acceptance_criterion string verbatim from the spec into the corresponding acceptance_checks entry.
|
|
187
|
+
</output_schema>
|
|
188
|
+
|
|
189
|
+
<failure_mode>
|
|
190
|
+
Skip the finding and preserve the file unchanged when any of these hold:
|
|
191
|
+
|
|
192
|
+
- target_line_start or target_line_end points outside the file.
|
|
193
|
+
- target_line_start > target_line_end.
|
|
194
|
+
- replacement_code contains a syntax error detectable on inspection.
|
|
195
|
+
- acceptance_criteria contradict the current file state in a way no valid patch can satisfy.
|
|
196
|
+
- intended_change and acceptance_criteria disagree with each other.
|
|
197
|
+
- Applying the spec would require editing lines outside target_line_start..target_line_end AND intended_change does not explicitly authorize that wider scope.
|
|
198
|
+
|
|
199
|
+
In every skip case, set the corresponding entry in skipped with a one-line reason naming the exact condition that failed. Return updated_content equal to the input when every finding is skipped. Never guess. Never partially apply. Never emit prose explanations outside the JSON object.
|
|
200
|
+
</failure_mode>
|
|
201
|
+
|
|
202
|
+
</groq_spec_implementer>
|
|
203
|
+
"""
|
|
204
|
+
|
|
114
205
|
JSON_INDENT_SPACES = 2
|
|
115
206
|
PIPELINE_FAILURE_EXIT_CODE = 2
|
|
116
207
|
|
|
117
208
|
TEXT_CLAMP_HEAD_PARTS = 1
|
|
118
209
|
TEXT_CLAMP_TOTAL_PARTS = 2
|
|
210
|
+
|
|
211
|
+
SPEC_MODE_FLAG = "--mode"
|
|
212
|
+
SPEC_MODE_VALUE = "spec"
|
|
213
|
+
MISSING_API_KEY_ERROR = (
|
|
214
|
+
"GROQ_API_KEY not set in environment; create packages/claude-dev-env/.env "
|
|
215
|
+
"from packages/claude-dev-env/.env.example (gitignored) or export GROQ_API_KEY"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
REQUIRED_GROQ_BUGTEAM_ATTRIBUTES: tuple[str, ...] = (
|
|
219
|
+
"call_groq_with_fallback",
|
|
220
|
+
"parse_json_object",
|
|
221
|
+
"preserve_trailing_newline",
|
|
222
|
+
)
|
|
@@ -70,3 +70,14 @@ def test_fix_budget_exceeds_audit_budget():
|
|
|
70
70
|
groq_bugteam_config.GROQ_FIX_MAX_COMPLETION_TOKENS
|
|
71
71
|
> groq_bugteam_config.GROQ_AUDIT_MAX_COMPLETION_TOKENS
|
|
72
72
|
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_spec_implementer_prompt_is_distinct_from_fix_prompt():
|
|
76
|
+
assert (
|
|
77
|
+
groq_bugteam_config.SPEC_IMPLEMENTER_SYSTEM_PROMPT
|
|
78
|
+
!= groq_bugteam_config.FIX_SYSTEM_PROMPT
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_spec_implementer_prompt_contains_mechanical_discipline_marker():
|
|
83
|
+
assert "mechanical edits only" in groq_bugteam_config.SPEC_IMPLEMENTER_SYSTEM_PROMPT
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Existence and coherence check for SPEC_IMPLEMENTER_SYSTEM_PROMPT."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib.util
|
|
6
|
+
import pathlib
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _load_config_module():
|
|
11
|
+
module_path = pathlib.Path(__file__).parent / "groq_bugteam_config.py"
|
|
12
|
+
module_spec = importlib.util.spec_from_file_location(
|
|
13
|
+
"groq_bugteam_config_spec", module_path
|
|
14
|
+
)
|
|
15
|
+
loaded_module = importlib.util.module_from_spec(module_spec)
|
|
16
|
+
sys.modules["groq_bugteam_config_spec"] = loaded_module
|
|
17
|
+
module_spec.loader.exec_module(loaded_module)
|
|
18
|
+
return loaded_module
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
groq_bugteam_config = _load_config_module()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_spec_implementer_prompt_exists():
|
|
25
|
+
assert hasattr(groq_bugteam_config, "SPEC_IMPLEMENTER_SYSTEM_PROMPT")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_spec_implementer_prompt_is_non_empty_string():
|
|
29
|
+
prompt_text = groq_bugteam_config.SPEC_IMPLEMENTER_SYSTEM_PROMPT
|
|
30
|
+
assert isinstance(prompt_text, str)
|
|
31
|
+
assert len(prompt_text.strip()) > 0
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_spec_implementer_prompt_declares_mechanical_only_discipline():
|
|
35
|
+
prompt_text = groq_bugteam_config.SPEC_IMPLEMENTER_SYSTEM_PROMPT
|
|
36
|
+
assert "mechanical edits only" in prompt_text
|
|
@@ -58,6 +58,7 @@ The caller posts the review to GitHub. This script does not touch the GitHub API
|
|
|
58
58
|
## Required environment
|
|
59
59
|
|
|
60
60
|
- `GROQ_API_KEY` — from https://console.groq.com/keys. Free tier is enough for a single PR.
|
|
61
|
+
- **Local file (preferred):** copy `packages/claude-dev-env/.env.example` to `packages/claude-dev-env/.env`, set `GROQ_API_KEY=...` inside the copy. That path is gitignored; `groq_bugteam.py` loads it on startup when the file exists (without overriding variables already exported in your shell).
|
|
61
62
|
- `git` on PATH, configured to push to the target remote.
|
|
62
63
|
- Python 3.10+. No external deps (stdlib `urllib.request` only).
|
|
63
64
|
|
|
@@ -65,6 +66,7 @@ The caller posts the review to GitHub. This script does not touch the GitHub API
|
|
|
65
66
|
|
|
66
67
|
```bash
|
|
67
68
|
# Assumes you already have a git worktree checked out to the PR head branch.
|
|
69
|
+
# Either use packages/claude-dev-env/.env (see Required environment) or:
|
|
68
70
|
export GROQ_API_KEY=gsk_...
|
|
69
71
|
|
|
70
72
|
python3 - <<'EOF' | python3 packages/claude-dev-env/scripts/groq_bugteam.py
|
package/scripts/groq_bugteam.py
CHANGED
|
@@ -7,7 +7,9 @@ team, no 10-loop convergence: one audit call, one fix call, one commit and
|
|
|
7
7
|
push per PR.
|
|
8
8
|
|
|
9
9
|
Stateless and PII-free. All GitHub identifiers arrive on stdin as JSON;
|
|
10
|
-
``GROQ_API_KEY`` is read from the environment
|
|
10
|
+
``GROQ_API_KEY`` is read from the environment after loading
|
|
11
|
+
``packages/claude-dev-env/.env`` when that file exists (gitignored; see
|
|
12
|
+
``.env.example``). Output is JSON on stdout.
|
|
11
13
|
|
|
12
14
|
Pipeline (per invocation):
|
|
13
15
|
1. Read PR metadata, unified diff, file contents from stdin.
|
|
@@ -74,6 +76,7 @@ from config.groq_bugteam_config import (
|
|
|
74
76
|
MAXIMUM_DIFF_CHARACTERS,
|
|
75
77
|
MAXIMUM_FILE_CONTENT_CHARACTERS,
|
|
76
78
|
MAXIMUM_FINDINGS_PER_PR,
|
|
79
|
+
MISSING_API_KEY_ERROR,
|
|
77
80
|
NO_FINDINGS_REVIEW_BODY,
|
|
78
81
|
PIPELINE_FAILURE_EXIT_CODE,
|
|
79
82
|
REVIEW_BODY_HEADER_TEMPLATE,
|
|
@@ -81,6 +84,9 @@ from config.groq_bugteam_config import (
|
|
|
81
84
|
TEXT_CLAMP_TOTAL_PARTS,
|
|
82
85
|
)
|
|
83
86
|
|
|
87
|
+
from groq_bugteam_dotenv import load_claude_dev_env_dotenv_file
|
|
88
|
+
|
|
89
|
+
|
|
84
90
|
@dataclass(frozen=True)
|
|
85
91
|
class GroqCallResult:
|
|
86
92
|
content: str
|
|
@@ -98,12 +104,21 @@ def should_skip_to_next_model(error: urllib.error.HTTPError) -> bool:
|
|
|
98
104
|
def clamp_text(text: str, max_characters: int) -> str:
|
|
99
105
|
if len(text) <= max_characters:
|
|
100
106
|
return text
|
|
101
|
-
|
|
102
|
-
|
|
107
|
+
truncated_count = len(text)
|
|
108
|
+
while True:
|
|
109
|
+
truncation_marker = f"\n\n... [truncated {truncated_count} chars] ...\n\n"
|
|
110
|
+
if len(truncation_marker) >= max_characters:
|
|
111
|
+
return text[:max_characters]
|
|
112
|
+
content_budget = max_characters - len(truncation_marker)
|
|
113
|
+
refined_truncated_count = len(text) - content_budget
|
|
114
|
+
if refined_truncated_count == truncated_count:
|
|
115
|
+
break
|
|
116
|
+
truncated_count = refined_truncated_count
|
|
117
|
+
head_length = content_budget * TEXT_CLAMP_HEAD_PARTS // TEXT_CLAMP_TOTAL_PARTS
|
|
118
|
+
tail_length = content_budget - head_length
|
|
103
119
|
head = text[:head_length]
|
|
104
|
-
tail = text[-tail_length:]
|
|
105
|
-
|
|
106
|
-
return f"{head}\n\n... [truncated {truncated_count} chars] ...\n\n{tail}"
|
|
120
|
+
tail = text[-tail_length:] if tail_length else ""
|
|
121
|
+
return f"{head}{truncation_marker}{tail}"
|
|
107
122
|
|
|
108
123
|
|
|
109
124
|
def post_to_groq(
|
|
@@ -160,7 +175,9 @@ def call_groq_with_fallback(
|
|
|
160
175
|
if should_skip_to_next_model(http_error):
|
|
161
176
|
break
|
|
162
177
|
if not is_recoverable_http_error(http_error):
|
|
163
|
-
|
|
178
|
+
raise RuntimeError(
|
|
179
|
+
f"Groq request failed with non-recoverable HTTP error: {http_error}"
|
|
180
|
+
) from http_error
|
|
164
181
|
except (
|
|
165
182
|
urllib.error.URLError,
|
|
166
183
|
TimeoutError,
|
|
@@ -181,6 +198,30 @@ def parse_json_object(raw_text: str) -> dict:
|
|
|
181
198
|
return json.loads(match.group(0))
|
|
182
199
|
|
|
183
200
|
|
|
201
|
+
def coerce_indexes_to_int_set(raw_indexes: list | None) -> set[int]:
|
|
202
|
+
coerced: set[int] = set()
|
|
203
|
+
for each_raw_index in raw_indexes or []:
|
|
204
|
+
try:
|
|
205
|
+
coerced.add(int(each_raw_index))
|
|
206
|
+
except (TypeError, ValueError):
|
|
207
|
+
continue
|
|
208
|
+
return coerced
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def coerce_skipped_entries(raw_skipped: list | None) -> dict[int, str]:
|
|
212
|
+
coerced: dict[int, str] = {}
|
|
213
|
+
for each_entry in raw_skipped or []:
|
|
214
|
+
if not isinstance(each_entry, dict):
|
|
215
|
+
continue
|
|
216
|
+
try:
|
|
217
|
+
finding_index = int(each_entry.get("finding_index"))
|
|
218
|
+
except (TypeError, ValueError):
|
|
219
|
+
continue
|
|
220
|
+
raw_reason = each_entry.get("reason", "")
|
|
221
|
+
coerced[finding_index] = "" if raw_reason is None else str(raw_reason)
|
|
222
|
+
return coerced
|
|
223
|
+
|
|
224
|
+
|
|
184
225
|
def normalize_findings(raw_findings: list, files_content: dict) -> list:
|
|
185
226
|
normalized = []
|
|
186
227
|
for each_raw in raw_findings:
|
|
@@ -248,7 +289,10 @@ def should_write_fixed_file(
|
|
|
248
289
|
def is_safe_relative_path(each_path: str) -> bool:
|
|
249
290
|
if os.path.isabs(each_path):
|
|
250
291
|
return False
|
|
251
|
-
|
|
292
|
+
posix_style_each_path = each_path.replace("\\", "/")
|
|
293
|
+
if posix_style_each_path.startswith("/"):
|
|
294
|
+
return False
|
|
295
|
+
if each_path.startswith("\\"):
|
|
252
296
|
return False
|
|
253
297
|
normalized = os.path.normpath(each_path)
|
|
254
298
|
if normalized.startswith(".." + os.sep) or normalized == "..":
|
|
@@ -436,9 +480,10 @@ def build_review_body(
|
|
|
436
480
|
|
|
437
481
|
|
|
438
482
|
def run_pipeline(input_data: dict) -> dict:
|
|
483
|
+
load_claude_dev_env_dotenv_file()
|
|
439
484
|
api_key = os.environ.get("GROQ_API_KEY", "").strip()
|
|
440
485
|
if not api_key:
|
|
441
|
-
return {"error":
|
|
486
|
+
return {"error": MISSING_API_KEY_ERROR}
|
|
442
487
|
|
|
443
488
|
diff_text = input_data.get("diff", "")
|
|
444
489
|
files_content = input_data.get("files_content", {})
|
|
@@ -477,11 +522,10 @@ def run_pipeline(input_data: dict) -> dict:
|
|
|
477
522
|
)
|
|
478
523
|
continue
|
|
479
524
|
raw_updated_content = fix_result.get("updated_content", current_content)
|
|
480
|
-
applied_indexes =
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
}
|
|
525
|
+
applied_indexes = coerce_indexes_to_int_set(
|
|
526
|
+
fix_result.get("applied_finding_indexes", [])
|
|
527
|
+
)
|
|
528
|
+
skipped_entries = coerce_skipped_entries(fix_result.get("skipped", []))
|
|
485
529
|
updated_content = preserve_trailing_newline(current_content, raw_updated_content)
|
|
486
530
|
content_changed = updated_content != current_content
|
|
487
531
|
if should_write_fixed_file(applied_indexes, updated_content, current_content):
|
|
@@ -565,7 +609,7 @@ def run_pipeline(input_data: dict) -> dict:
|
|
|
565
609
|
}
|
|
566
610
|
|
|
567
611
|
|
|
568
|
-
def
|
|
612
|
+
def run_default_pipeline_main() -> None:
|
|
569
613
|
try:
|
|
570
614
|
stdin_text = sys.stdin.read()
|
|
571
615
|
input_data = json.loads(stdin_text)
|
|
@@ -584,5 +628,20 @@ def main() -> None:
|
|
|
584
628
|
sys.exit(PIPELINE_FAILURE_EXIT_CODE)
|
|
585
629
|
|
|
586
630
|
|
|
631
|
+
from groq_bugteam_spec import (
|
|
632
|
+
apply_fix_from_spec,
|
|
633
|
+
is_spec_mode_invocation,
|
|
634
|
+
run_spec_mode_main,
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def main() -> None:
|
|
639
|
+
load_claude_dev_env_dotenv_file()
|
|
640
|
+
if is_spec_mode_invocation(sys.argv[1:]):
|
|
641
|
+
run_spec_mode_main()
|
|
642
|
+
return
|
|
643
|
+
run_default_pipeline_main()
|
|
644
|
+
|
|
645
|
+
|
|
587
646
|
if __name__ == "__main__":
|
|
588
647
|
main()
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Load ``packages/claude-dev-env/.env`` into ``os.environ`` for local Groq use.
|
|
2
|
+
|
|
3
|
+
Does not override variables already set in the process environment. Uses a
|
|
4
|
+
minimal KEY=value parser (stdlib only) so ``groq_bugteam.py`` stays dependency
|
|
5
|
+
free.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def claude_dev_env_dotenv_path() -> Path:
|
|
15
|
+
"""Absolute path to the gitignored ``.env`` beside ``groq_bugteam.py``'s package."""
|
|
16
|
+
return Path(__file__).resolve().parent.parent / ".env"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def load_claude_dev_env_dotenv_file(dotenv_path: Path | None = None) -> None:
|
|
20
|
+
"""Apply KEY=value lines from the dotenv file when the file exists."""
|
|
21
|
+
resolved_path = dotenv_path if dotenv_path is not None else claude_dev_env_dotenv_path()
|
|
22
|
+
if not resolved_path.is_file():
|
|
23
|
+
return
|
|
24
|
+
raw_text = resolved_path.read_text(encoding="utf-8")
|
|
25
|
+
for each_line in raw_text.splitlines():
|
|
26
|
+
stripped_line = each_line.strip()
|
|
27
|
+
if not stripped_line or stripped_line.startswith("#"):
|
|
28
|
+
continue
|
|
29
|
+
if stripped_line.startswith("export "):
|
|
30
|
+
stripped_line = stripped_line.removeprefix("export ").strip()
|
|
31
|
+
if "=" not in stripped_line:
|
|
32
|
+
continue
|
|
33
|
+
key_part, _, value_part = stripped_line.partition("=")
|
|
34
|
+
key_name = key_part.strip()
|
|
35
|
+
value_text = value_part.strip()
|
|
36
|
+
if len(value_text) >= 2 and value_text[0] == value_text[-1] and value_text[0] in "\"'":
|
|
37
|
+
value_text = value_text[1:-1]
|
|
38
|
+
if not key_name or key_name in os.environ:
|
|
39
|
+
continue
|
|
40
|
+
os.environ[key_name] = value_text
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""Spec-mode implementer for groq_bugteam.
|
|
2
|
+
|
|
3
|
+
Splits the Claude-authored fix-spec pipeline into its own module so
|
|
4
|
+
groq_bugteam.py can keep the single-shot audit+fix pipeline isolated
|
|
5
|
+
from the mechanical patch applier. Both entrypoints share the same
|
|
6
|
+
HTTP client (``call_groq_with_fallback``), string helpers
|
|
7
|
+
(``parse_json_object``, ``preserve_trailing_newline``), and config
|
|
8
|
+
constants via the parent groq_bugteam module, resolved at call time
|
|
9
|
+
through resolve_groq_bugteam_module(). That resolver handles both
|
|
10
|
+
contexts: tests register the parent as ``sys.modules["groq_bugteam"]``
|
|
11
|
+
via spec_from_file_location, while a direct CLI invocation of
|
|
12
|
+
``python groq_bugteam.py --mode spec`` runs the parent as
|
|
13
|
+
``sys.modules["__main__"]``. The resolver also keeps tests able to
|
|
14
|
+
monkeypatch ``groq_bugteam.call_groq_with_fallback`` and have the
|
|
15
|
+
patch reach this module.
|
|
16
|
+
|
|
17
|
+
This module is imported from the bottom of groq_bugteam.py so
|
|
18
|
+
groq_bugteam.apply_fix_from_spec remains attribute-accessible to
|
|
19
|
+
existing callers and tests. The module must not import groq_bugteam
|
|
20
|
+
at its top level -- that would close the cycle during CLI startup
|
|
21
|
+
and raise ImportError before groq_bugteam_spec finishes defining its
|
|
22
|
+
public names.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import json
|
|
28
|
+
import os
|
|
29
|
+
import sys
|
|
30
|
+
from types import ModuleType
|
|
31
|
+
|
|
32
|
+
from config.groq_bugteam_config import (
|
|
33
|
+
GROQ_FIX_MAX_COMPLETION_TOKENS,
|
|
34
|
+
GROQ_FIX_TEMPERATURE,
|
|
35
|
+
JSON_INDENT_SPACES,
|
|
36
|
+
MISSING_API_KEY_ERROR,
|
|
37
|
+
PIPELINE_FAILURE_EXIT_CODE,
|
|
38
|
+
REQUIRED_GROQ_BUGTEAM_ATTRIBUTES,
|
|
39
|
+
SPEC_IMPLEMENTER_SYSTEM_PROMPT,
|
|
40
|
+
SPEC_MODE_FLAG,
|
|
41
|
+
SPEC_MODE_VALUE,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
from groq_bugteam_dotenv import load_claude_dev_env_dotenv_file
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def extract_failing_criteria_by_finding(
|
|
48
|
+
acceptance_checks: list[dict],
|
|
49
|
+
) -> dict[int, list[str]]:
|
|
50
|
+
failing_by_finding: dict[int, list[str]] = {}
|
|
51
|
+
for each_check in acceptance_checks:
|
|
52
|
+
if each_check.get("met"):
|
|
53
|
+
continue
|
|
54
|
+
each_finding_index = each_check.get("finding_index")
|
|
55
|
+
if not isinstance(each_finding_index, int):
|
|
56
|
+
continue
|
|
57
|
+
each_criterion_text = each_check.get("criterion", "")
|
|
58
|
+
failing_by_finding.setdefault(each_finding_index, []).append(
|
|
59
|
+
each_criterion_text
|
|
60
|
+
)
|
|
61
|
+
return failing_by_finding
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def demote_findings_with_failing_criteria(
|
|
65
|
+
applied_finding_indexes: list[int],
|
|
66
|
+
skipped_entries: list[dict],
|
|
67
|
+
failing_criteria_by_finding: dict[int, list[str]],
|
|
68
|
+
) -> tuple[list[int], list[dict]]:
|
|
69
|
+
demoted_applied = [
|
|
70
|
+
each_index
|
|
71
|
+
for each_index in applied_finding_indexes
|
|
72
|
+
if each_index not in failing_criteria_by_finding
|
|
73
|
+
]
|
|
74
|
+
already_skipped_indexes = {
|
|
75
|
+
each.get("finding_index")
|
|
76
|
+
for each in skipped_entries
|
|
77
|
+
if each.get("finding_index") is not None
|
|
78
|
+
}
|
|
79
|
+
augmented_skipped = list(skipped_entries)
|
|
80
|
+
for (
|
|
81
|
+
each_finding_index,
|
|
82
|
+
each_failing_criteria,
|
|
83
|
+
) in failing_criteria_by_finding.items():
|
|
84
|
+
if each_finding_index in already_skipped_indexes:
|
|
85
|
+
continue
|
|
86
|
+
reason_text = "; ".join(each_failing_criteria)
|
|
87
|
+
augmented_skipped.append(
|
|
88
|
+
{"finding_index": each_finding_index, "reason": reason_text}
|
|
89
|
+
)
|
|
90
|
+
return demoted_applied, augmented_skipped
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def build_spec_user_message(spec_list: list[dict], current_content: str) -> str:
|
|
94
|
+
payload = {"spec": spec_list, "current_content": current_content}
|
|
95
|
+
return json.dumps(payload, indent=JSON_INDENT_SPACES)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def find_missing_required_attributes(candidate_module: ModuleType) -> list[str]:
|
|
99
|
+
return [
|
|
100
|
+
each_attribute_name
|
|
101
|
+
for each_attribute_name in REQUIRED_GROQ_BUGTEAM_ATTRIBUTES
|
|
102
|
+
if not hasattr(candidate_module, each_attribute_name)
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def resolve_groq_bugteam_module() -> ModuleType:
|
|
107
|
+
registered_module = sys.modules.get("groq_bugteam")
|
|
108
|
+
if registered_module is not None and not find_missing_required_attributes(
|
|
109
|
+
registered_module
|
|
110
|
+
):
|
|
111
|
+
return registered_module
|
|
112
|
+
main_module = sys.modules.get("__main__")
|
|
113
|
+
if main_module is not None and not find_missing_required_attributes(main_module):
|
|
114
|
+
return main_module
|
|
115
|
+
stub_module = registered_module if registered_module is not None else main_module
|
|
116
|
+
if stub_module is not None:
|
|
117
|
+
missing_attributes = find_missing_required_attributes(stub_module)
|
|
118
|
+
raise RuntimeError(
|
|
119
|
+
"groq_bugteam module found but missing required attributes: "
|
|
120
|
+
+ ", ".join(missing_attributes)
|
|
121
|
+
)
|
|
122
|
+
raise RuntimeError(
|
|
123
|
+
"groq_bugteam module not found in sys.modules; "
|
|
124
|
+
"groq_bugteam_spec must be invoked from a context where "
|
|
125
|
+
"groq_bugteam is the parent module (test loader or CLI)."
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def coerce_to_list(candidate_value: object) -> list:
|
|
130
|
+
if isinstance(candidate_value, list):
|
|
131
|
+
return candidate_value
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def coerce_to_string_or_fallback(
|
|
136
|
+
candidate_value: object, fallback_value: str
|
|
137
|
+
) -> str:
|
|
138
|
+
if isinstance(candidate_value, str):
|
|
139
|
+
return candidate_value
|
|
140
|
+
return fallback_value
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def apply_fix_from_spec(spec_list: list[dict], current_content: str) -> dict:
|
|
144
|
+
load_claude_dev_env_dotenv_file()
|
|
145
|
+
api_key = os.environ.get("GROQ_API_KEY", "").strip()
|
|
146
|
+
if not api_key:
|
|
147
|
+
raise RuntimeError(MISSING_API_KEY_ERROR)
|
|
148
|
+
|
|
149
|
+
groq_bugteam_module = resolve_groq_bugteam_module()
|
|
150
|
+
user_message = build_spec_user_message(spec_list, current_content)
|
|
151
|
+
groq_result = groq_bugteam_module.call_groq_with_fallback(
|
|
152
|
+
api_key,
|
|
153
|
+
messages=[
|
|
154
|
+
{"role": "system", "content": SPEC_IMPLEMENTER_SYSTEM_PROMPT},
|
|
155
|
+
{"role": "user", "content": user_message},
|
|
156
|
+
],
|
|
157
|
+
temperature=GROQ_FIX_TEMPERATURE,
|
|
158
|
+
max_completion_tokens=GROQ_FIX_MAX_COMPLETION_TOKENS,
|
|
159
|
+
)
|
|
160
|
+
parsed_response = groq_bugteam_module.parse_json_object(groq_result.content)
|
|
161
|
+
|
|
162
|
+
raw_updated_content = coerce_to_string_or_fallback(
|
|
163
|
+
parsed_response.get("updated_content"), current_content
|
|
164
|
+
)
|
|
165
|
+
applied_finding_indexes = coerce_to_list(
|
|
166
|
+
parsed_response.get("applied_finding_indexes")
|
|
167
|
+
)
|
|
168
|
+
skipped_entries = coerce_to_list(parsed_response.get("skipped"))
|
|
169
|
+
acceptance_checks = coerce_to_list(parsed_response.get("acceptance_checks"))
|
|
170
|
+
|
|
171
|
+
failing_criteria_by_finding = extract_failing_criteria_by_finding(acceptance_checks)
|
|
172
|
+
demoted_applied, augmented_skipped = demote_findings_with_failing_criteria(
|
|
173
|
+
applied_finding_indexes, skipped_entries, failing_criteria_by_finding
|
|
174
|
+
)
|
|
175
|
+
updated_content = groq_bugteam_module.preserve_trailing_newline(
|
|
176
|
+
current_content, raw_updated_content
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
"updated_content": updated_content,
|
|
181
|
+
"applied_finding_indexes": demoted_applied,
|
|
182
|
+
"skipped": augmented_skipped,
|
|
183
|
+
"acceptance_checks": acceptance_checks,
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def read_spec_input_from_stdin() -> tuple[list[dict], str]:
|
|
188
|
+
stdin_text = sys.stdin.read()
|
|
189
|
+
parsed_input = json.loads(stdin_text)
|
|
190
|
+
spec_list = parsed_input.get("spec", [])
|
|
191
|
+
current_content = parsed_input.get("current_content", "")
|
|
192
|
+
return spec_list, current_content
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def run_spec_mode() -> dict:
|
|
196
|
+
try:
|
|
197
|
+
spec_list, current_content = read_spec_input_from_stdin()
|
|
198
|
+
except (json.JSONDecodeError, ValueError) as parse_error:
|
|
199
|
+
return {"error": f"stdin is not valid JSON: {parse_error}"}
|
|
200
|
+
try:
|
|
201
|
+
return apply_fix_from_spec(spec_list, current_content)
|
|
202
|
+
except Exception as spec_error:
|
|
203
|
+
return {"error": f"spec-mode fix failed: {spec_error}"}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def is_spec_mode_invocation(argv: list[str]) -> bool:
|
|
207
|
+
for each_argv_index, each_argv_token in enumerate(argv):
|
|
208
|
+
if each_argv_token != SPEC_MODE_FLAG:
|
|
209
|
+
continue
|
|
210
|
+
if each_argv_index + 1 >= len(argv):
|
|
211
|
+
continue
|
|
212
|
+
if argv[each_argv_index + 1] == SPEC_MODE_VALUE:
|
|
213
|
+
return True
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def emit_outcome(outcome: dict) -> None:
|
|
218
|
+
json.dump(outcome, sys.stdout, indent=JSON_INDENT_SPACES)
|
|
219
|
+
sys.stdout.write("\n")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def run_spec_mode_main() -> None:
|
|
223
|
+
spec_outcome = run_spec_mode()
|
|
224
|
+
emit_outcome(spec_outcome)
|
|
225
|
+
if "error" in spec_outcome:
|
|
226
|
+
sys.exit(PIPELINE_FAILURE_EXIT_CODE)
|