pi-lens 2.0.44 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/README.md +121 -1
- package/clients/agent-behavior-client.js +110 -0
- package/clients/agent-behavior-client.test.js +94 -0
- package/clients/agent-behavior-client.test.ts +116 -0
- package/clients/agent-behavior-client.ts +152 -0
- package/clients/architect-client.js +1 -0
- package/clients/architect-client.ts +1 -0
- package/clients/ast-grep-client.js +15 -14
- package/clients/ast-grep-client.ts +30 -15
- package/clients/auto-loop.js +5 -1
- package/clients/auto-loop.ts +23 -7
- package/clients/biome-client.js +3 -12
- package/clients/biome-client.test.js +3 -1
- package/clients/biome-client.test.ts +3 -1
- package/clients/biome-client.ts +3 -12
- package/clients/cache-manager.js +245 -0
- package/clients/cache-manager.test.js +197 -0
- package/clients/cache-manager.test.ts +299 -0
- package/clients/cache-manager.ts +331 -0
- package/clients/complexity-client.js +3 -7
- package/clients/complexity-client.ts +12 -13
- package/clients/dependency-checker.js +26 -5
- package/clients/dependency-checker.ts +32 -6
- package/clients/dispatch/dispatcher.js +242 -0
- package/clients/dispatch/dispatcher.test.js +115 -0
- package/clients/dispatch/dispatcher.test.ts +148 -0
- package/clients/dispatch/dispatcher.ts +333 -0
- package/clients/dispatch/integration.js +58 -0
- package/clients/dispatch/integration.ts +71 -0
- package/clients/dispatch/plan.js +131 -0
- package/clients/dispatch/plan.ts +145 -0
- package/clients/dispatch/runners/architect.js +69 -0
- package/clients/dispatch/runners/architect.ts +87 -0
- package/clients/dispatch/runners/ast-grep.js +115 -0
- package/clients/dispatch/runners/ast-grep.ts +139 -0
- package/clients/dispatch/runners/biome.js +69 -0
- package/clients/dispatch/runners/biome.ts +93 -0
- package/clients/dispatch/runners/go-vet.js +72 -0
- package/clients/dispatch/runners/go-vet.ts +92 -0
- package/clients/dispatch/runners/index.js +22 -0
- package/clients/dispatch/runners/index.ts +24 -0
- package/clients/dispatch/runners/ruff.js +68 -0
- package/clients/dispatch/runners/ruff.ts +88 -0
- package/clients/dispatch/runners/rust-clippy.js +106 -0
- package/clients/dispatch/runners/rust-clippy.ts +131 -0
- package/clients/dispatch/runners/secrets.js +109 -0
- package/clients/dispatch/runners/ts-lsp.js +53 -0
- package/clients/dispatch/runners/ts-lsp.ts +71 -0
- package/clients/dispatch/runners/type-safety.js +142 -0
- package/clients/dispatch/runners/type-safety.ts +183 -0
- package/clients/dispatch/runners/utils.js +53 -0
- package/clients/dispatch/runners/utils.ts +60 -0
- package/clients/dispatch/types.js +13 -0
- package/clients/dispatch/types.ts +152 -0
- package/clients/file-kinds.js +177 -0
- package/clients/file-kinds.test.js +169 -0
- package/clients/file-kinds.test.ts +210 -0
- package/clients/file-kinds.ts +216 -0
- package/clients/fix-scanners.js +23 -4
- package/clients/fix-scanners.ts +25 -7
- package/clients/jscpd-client.js +22 -1
- package/clients/jscpd-client.test.js +1 -1
- package/clients/jscpd-client.test.ts +1 -1
- package/clients/jscpd-client.ts +25 -1
- package/clients/metrics-history.js +3 -2
- package/clients/metrics-history.ts +3 -2
- package/clients/ruff-client.js +3 -2
- package/clients/ruff-client.ts +3 -2
- package/clients/rules-scanner.js +97 -0
- package/clients/rules-scanner.ts +120 -0
- package/clients/sanitize.js +291 -0
- package/clients/sanitize.test.js +177 -0
- package/clients/sanitize.test.ts +223 -0
- package/clients/sanitize.ts +356 -0
- package/clients/secrets-scanner.js +113 -0
- package/clients/secrets-scanner.test.js +100 -0
- package/clients/secrets-scanner.test.ts +113 -0
- package/clients/secrets-scanner.ts +134 -0
- package/clients/sg-runner.js +25 -5
- package/clients/sg-runner.ts +44 -7
- package/clients/todo-scanner.js +1 -1
- package/clients/todo-scanner.ts +1 -1
- package/clients/tool-availability.js +213 -0
- package/clients/tool-availability.ts +256 -0
- package/commands/booboo.js +114 -48
- package/commands/booboo.ts +119 -53
- package/commands/fix.js +60 -54
- package/commands/fix.ts +118 -67
- package/commands/refactor.js +35 -13
- package/commands/refactor.ts +48 -16
- package/default-architect.yaml +15 -5
- package/index.ts +406 -422
- package/package.json +14 -1
- package/rules/ast-grep-rules/rules/large-class.yml +1 -1
- package/rules/ast-grep-rules/rules/no-default-export.yml +19 -0
- package/rules/ast-grep-rules/rules/no-hardcoded-secrets.yml +9 -6
- package/rules/ast-grep-rules/rules/no-process-env.yml +12 -12
- package/rules/ast-grep-rules/rules/no-relative-imports.yml +21 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to pi-lens will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [2.1.1] - 2026-03-29
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- **Content-level secret scanning**: Catches secrets in ANY file type on write/edit (`.env`, `.yaml`, `.json`, not just TypeScript). Blocks before save with patterns for `sk-*`, `ghp_*`, `AKIA*`, private keys, hardcoded passwords.
|
|
9
|
+
- **Project rules integration**: Scans for `.claude/rules/`, `.agents/rules/`, `CLAUDE.md`, `AGENTS.md` at session start and surfaces in system prompt.
|
|
10
|
+
- **Grep-ability rules**: New ast-grep rules for `no-default-export` and `no-relative-cross-package-import` to improve agent searchability.
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
- **Inline feedback stripped to blocking only**: Warnings no longer shown inline (noise). Only blocking violations and test failures interrupt the agent.
|
|
14
|
+
- **booboo-fix output compacted**: Summary in terminal, full plan in `.pi-lens/reports/fix-plan.tsv`.
|
|
15
|
+
- **booboo-refactor output compacted**: Top 5 worst offenders in terminal, full ranked list in `.pi-lens/reports/refactor-ranked.tsv`.
|
|
16
|
+
- **`ast_grep_search` new params**: Added `selector` (extract specific AST node) and `context` (show surrounding lines).
|
|
17
|
+
- **`ast_grep_replace` mode indicator**: Shows `[DRY-RUN]` or `[APPLIED]` prefix.
|
|
18
|
+
- **no-hardcoded-secrets**: Fixed to only flag actual hardcoded strings (not `process.env` assignments).
|
|
19
|
+
- **no-process-env**: Now only flags secret-related env vars (not PORT, NODE_ENV, etc.).
|
|
20
|
+
- **Removed Factory AI article reference** from architect.yaml.
|
|
21
|
+
|
|
5
22
|
## [2.0.40] - 2026-03-27
|
|
6
23
|
|
|
7
24
|
### Changed
|
package/README.md
CHANGED
|
@@ -16,6 +16,107 @@ pi install git:github.com/apmantza/pi-lens
|
|
|
16
16
|
|
|
17
17
|
---
|
|
18
18
|
|
|
19
|
+
## What's New (v2.1)
|
|
20
|
+
|
|
21
|
+
### Content-Level Secret Scanning
|
|
22
|
+
|
|
23
|
+
Secrets are now blocked **before** they're saved, on **all file types**:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
🔴 STOP — 1 potential secret(s) in src/config.ts:
|
|
27
|
+
L12: Possible Stripe or OpenAI API key (sk-*)
|
|
28
|
+
→ Remove before continuing. Use env vars instead.
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Works on `.env`, `.yaml`, `.json`, `.md` — not just TypeScript. Catches `sk-*`, `ghp_*`, `AKIA*`, private keys, hardcoded passwords.
|
|
32
|
+
|
|
33
|
+
### Compact Output
|
|
34
|
+
|
|
35
|
+
Inline feedback stripped to **blocking only** — no more warning noise:
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
🔴 STOP — 2 issue(s) must fixed:
|
|
39
|
+
L23: var total = sum(items); — use 'let' or 'const'
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Warnings are tracked and surfaced via `/lens-booboo`. booboo-fix and booboo-refactor output compacted to summaries with TSV files for full details.
|
|
43
|
+
|
|
44
|
+
### Project Rules Integration
|
|
45
|
+
|
|
46
|
+
Scans for `.claude/rules/`, `.agents/rules/`, `CLAUDE.md`, `AGENTS.md` at session start. Project-specific rules are surfaced in the system prompt — the agent knows to read them when relevant. Works alongside pi-lens architect rules.
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## What's New (v2.0)
|
|
51
|
+
|
|
52
|
+
### Declarative Dispatch System
|
|
53
|
+
|
|
54
|
+
The core linting engine has been redesigned from ~400 lines of nested `if/else` blocks into a clean, extensible dispatch architecture:
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
file → detectFileKind() → getRunnersForKind() → run all runners → aggregate output
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Key improvements:**
|
|
61
|
+
- **Extensible**: Add new linters by dropping a runner file in `clients/dispatch/runners/` — no need to touch the core
|
|
62
|
+
- **Unified output**: All tools report through the same format (🔴 blocking, 🟡 warning, ✅ fixed)
|
|
63
|
+
- **Delta mode built-in**: Each runner supports baseline tracking to show only *new* violations
|
|
64
|
+
- **Conditional execution**: Runners can have `when` conditions (e.g., only run when `--autofix` is enabled)
|
|
65
|
+
|
|
66
|
+
**Runners:** `ts-lsp`, `biome`, `ruff`, `ast-grep`, `type-safety`, `architect`, `go-vet`, `rust-clippy`
|
|
67
|
+
|
|
68
|
+
### Asynchronous Session Start
|
|
69
|
+
|
|
70
|
+
Session initialization now runs all scans concurrently with caching:
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
session_start
|
|
74
|
+
├─ TODO/FIXME scan (fast, uncached)
|
|
75
|
+
├─ Knip dead code (cached 30 min)
|
|
76
|
+
├─ jscpd duplicates (cached 30 min)
|
|
77
|
+
├─ Type coverage (cached 30 min)
|
|
78
|
+
└─ Export scanning (async, for duplicate export detection)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Each scan runs independently — expensive scans (jscpd, knip) are cached in `.pi-lens/cache/` with 30-minute TTL. The agent sees results immediately without waiting for slow tools.
|
|
82
|
+
|
|
83
|
+
### Inline Messaging
|
|
84
|
+
|
|
85
|
+
Every `write` or `edit` operation returns structured feedback directly in the tool result:
|
|
86
|
+
|
|
87
|
+
```typescript
|
|
88
|
+
// tool_result handler runs dispatchLint() → formats output → appends to result
|
|
89
|
+
return {
|
|
90
|
+
content: [...event.content, { type: "text", text: lspOutput }],
|
|
91
|
+
};
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
**Message types:**
|
|
95
|
+
| Prefix | Meaning |
|
|
96
|
+
|--------|---------|
|
|
97
|
+
| 🔴 | Blocking error — must fix before continuing |
|
|
98
|
+
| 🟡 | Warning — should fix, but not blocking |
|
|
99
|
+
| ✅ | Auto-fixed issue — no action needed |
|
|
100
|
+
| 📊 | Silent metric — tracked but not shown |
|
|
101
|
+
| 📐 | Architectural rule — reference only |
|
|
102
|
+
|
|
103
|
+
### File Type Detection
|
|
104
|
+
|
|
105
|
+
Centralized file-kind detection (`clients/file-kinds.ts`) replaces scattered regex checks:
|
|
106
|
+
|
|
107
|
+
```typescript
|
|
108
|
+
const kind = detectFileKind(filePath); // "jsts" | "python" | "go" | "rust" | ...
|
|
109
|
+
const runners = getRunnersForKind(kind); // All applicable runners
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Project Rules Integration
|
|
113
|
+
|
|
114
|
+
pi-lens now scans for project-specific rule files (`.claude/rules/`, `.agents/rules/`, `CLAUDE.md`, `AGENTS.md`) at session start. These are surfaced in the system prompt so the agent knows to read them when relevant.
|
|
115
|
+
|
|
116
|
+
This works **alongside** pi-lens architect rules — your project's markdown rules provide general guidance, while pi-lens handles automated regex-based checks on every write.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
19
120
|
## Features
|
|
20
121
|
|
|
21
122
|
### On every write / edit
|
|
@@ -80,6 +181,25 @@ On every new session, scans run silently in the background. Data is cached for r
|
|
|
80
181
|
| **jscpd** | Duplicate detection on write; `/lens-booboo` reports |
|
|
81
182
|
| **type-coverage** | `/lens-booboo` reports |
|
|
82
183
|
| **Complexity baselines** | Regressed/improved delta tracking via `/lens-metrics` |
|
|
184
|
+
| **Project rules** | Scans for `.claude/rules/`, `.agents/rules/`, `CLAUDE.md`, `AGENTS.md` |
|
|
185
|
+
|
|
186
|
+
### Project Rules Integration
|
|
187
|
+
|
|
188
|
+
pi-lens scans for project-specific rule files at session start. If found, they're listed in the system prompt so the agent knows to read them when relevant:
|
|
189
|
+
|
|
190
|
+
```
|
|
191
|
+
📋 Project rules found: 2 file(s) in .claude/rules, root. These apply alongside pi-lens defaults.
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
**Scanned locations:**
|
|
195
|
+
| Location | Description |
|
|
196
|
+
|----------|-------------|
|
|
197
|
+
| `.claude/rules/` | Claude Code rule files (recursive) |
|
|
198
|
+
| `.agents/rules/` | Generic agent rule files (recursive) |
|
|
199
|
+
| `CLAUDE.md` | Claude Code project context |
|
|
200
|
+
| `AGENTS.md` | Generic agent context |
|
|
201
|
+
|
|
202
|
+
These files provide **general project guidance** (coding conventions, workflow rules, architecture notes). They coexist with pi-lens architect rules — your rules inform the agent's behavior, while pi-lens provides automated regex-based checks on every write.
|
|
83
203
|
|
|
84
204
|
### On-demand commands
|
|
85
205
|
|
|
@@ -95,7 +215,7 @@ On every new session, scans run silently in the background. Data is cached for r
|
|
|
95
215
|
|
|
96
216
|
| Tool | Description |
|
|
97
217
|
|---|---|
|
|
98
|
-
| **`ast_grep_search`** | Search code patterns using AST-aware matching. Supports meta-variables: `$VAR` (single node), `$$$` (multiple). Example: `console.log($MSG)` |
|
|
218
|
+
| **`ast_grep_search`** | Search code patterns using AST-aware matching. Supports meta-variables: `$VAR` (single node), `$$$` (multiple). Optional: `selector` (extract specific AST node), `context` (show surrounding lines). Example: `console.log($MSG)` |
|
|
99
219
|
| **`ast_grep_replace`** | Replace code patterns with AST-aware rewriting. Dry-run by default, use `apply=true` to apply changes. Example: `pattern='console.log($MSG)' rewrite='logger.info($MSG)'` |
|
|
100
220
|
|
|
101
221
|
Supported languages: c, cpp, csharp, css, dart, elixir, go, haskell, html, java, javascript, json, kotlin, lua, php, python, ruby, rust, scala, sql, swift, tsx, typescript, yaml
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AgentBehaviorClient for pi-lens
|
|
3
|
+
*
|
|
4
|
+
* Tracks tool call sequences and flags anti-patterns in real-time:
|
|
5
|
+
* - Blind writes: editing or writing without reading first
|
|
6
|
+
* - Thrashing: repeated identical tool calls with no progress
|
|
7
|
+
*
|
|
8
|
+
* No external dependencies — purely tracks tool call history.
|
|
9
|
+
*/
|
|
10
|
+
// --- Constants ---
|
|
11
|
+
const WRITE_OPS = new Set(["edit", "write", "multiedit"]);
|
|
12
|
+
const READ_OPS = new Set(["read", "bash", "grep", "glob", "find", "rg"]);
|
|
13
|
+
const BLIND_WRITE_WINDOW = 5; // Check last N tool calls for a read
|
|
14
|
+
const THRASH_THRESHOLD = 3; // Flag after N consecutive identical tools
|
|
15
|
+
const THRASH_TIMEOUT_MS = 30000; // Reset thrash counter if gap > 30s
|
|
16
|
+
// --- Client ---
|
|
17
|
+
export class AgentBehaviorClient {
|
|
18
|
+
constructor() {
|
|
19
|
+
this.toolHistory = [];
|
|
20
|
+
this.consecutiveCount = 0;
|
|
21
|
+
this.lastToolName = null;
|
|
22
|
+
this.lastToolTimestamp = 0;
|
|
23
|
+
// Per-file tracking
|
|
24
|
+
this.fileEditCount = new Map();
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Record a tool call and return any warnings triggered.
|
|
28
|
+
* Called from tool_result handler.
|
|
29
|
+
*/
|
|
30
|
+
recordToolCall(toolName, filePath) {
|
|
31
|
+
const warnings = [];
|
|
32
|
+
const now = Date.now();
|
|
33
|
+
// Track consecutive identical tools (thrashing)
|
|
34
|
+
if (toolName === this.lastToolName &&
|
|
35
|
+
now - this.lastToolTimestamp < THRASH_TIMEOUT_MS) {
|
|
36
|
+
this.consecutiveCount++;
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
this.consecutiveCount = 1;
|
|
40
|
+
}
|
|
41
|
+
this.lastToolName = toolName;
|
|
42
|
+
this.lastToolTimestamp = now;
|
|
43
|
+
// Check for thrashing
|
|
44
|
+
if (this.consecutiveCount === THRASH_THRESHOLD) {
|
|
45
|
+
warnings.push({
|
|
46
|
+
type: "thrashing",
|
|
47
|
+
message: `🔴 THRASHING — ${THRASH_THRESHOLD} consecutive \`${toolName}\` calls with no other action. Consider fixing the root cause instead of re-running.`,
|
|
48
|
+
severity: "error",
|
|
49
|
+
details: {
|
|
50
|
+
toolName,
|
|
51
|
+
callCount: this.consecutiveCount,
|
|
52
|
+
},
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
// Check for blind writes
|
|
56
|
+
if (WRITE_OPS.has(toolName)) {
|
|
57
|
+
const recentWindow = this.toolHistory.slice(-BLIND_WRITE_WINDOW);
|
|
58
|
+
const hasRecentRead = recentWindow.some((r) => READ_OPS.has(r.toolName));
|
|
59
|
+
if (!hasRecentRead && recentWindow.length > 0) {
|
|
60
|
+
// Count how many writes in the window without reads
|
|
61
|
+
const writesWithoutRead = recentWindow.filter((r) => WRITE_OPS.has(r.toolName)).length;
|
|
62
|
+
if (writesWithoutRead >= 2) {
|
|
63
|
+
warnings.push({
|
|
64
|
+
type: "blind-write",
|
|
65
|
+
message: `⚠ BLIND WRITE — editing \`${filePath ?? "file"}\` without reading in the last ${BLIND_WRITE_WINDOW} tool calls. Read the file first to avoid assumptions.`,
|
|
66
|
+
severity: "warning",
|
|
67
|
+
details: {
|
|
68
|
+
filePath,
|
|
69
|
+
windowSize: recentWindow.length,
|
|
70
|
+
},
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Track edits per file
|
|
75
|
+
if (filePath) {
|
|
76
|
+
this.fileEditCount.set(filePath, (this.fileEditCount.get(filePath) ?? 0) + 1);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// Add to history (keep last 50 entries)
|
|
80
|
+
this.toolHistory.push({ toolName, filePath, timestamp: now });
|
|
81
|
+
if (this.toolHistory.length > 50) {
|
|
82
|
+
this.toolHistory = this.toolHistory.slice(-50);
|
|
83
|
+
}
|
|
84
|
+
return warnings;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Format warnings for LLM consumption.
|
|
88
|
+
*/
|
|
89
|
+
formatWarnings(warnings) {
|
|
90
|
+
if (warnings.length === 0)
|
|
91
|
+
return "";
|
|
92
|
+
return warnings.map((w) => w.message).join("\n");
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Get edit count for a file in this session.
|
|
96
|
+
*/
|
|
97
|
+
getEditCount(filePath) {
|
|
98
|
+
return this.fileEditCount.get(filePath) ?? 0;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Reset state (e.g., on session start).
|
|
102
|
+
*/
|
|
103
|
+
reset() {
|
|
104
|
+
this.toolHistory = [];
|
|
105
|
+
this.consecutiveCount = 0;
|
|
106
|
+
this.lastToolName = null;
|
|
107
|
+
this.lastToolTimestamp = 0;
|
|
108
|
+
this.fileEditCount.clear();
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it } from "vitest";
|
|
2
|
+
import { AgentBehaviorClient } from "./agent-behavior-client.js";
|
|
3
|
+
describe("AgentBehaviorClient", () => {
|
|
4
|
+
let client;
|
|
5
|
+
beforeEach(() => {
|
|
6
|
+
client = new AgentBehaviorClient();
|
|
7
|
+
client.reset();
|
|
8
|
+
});
|
|
9
|
+
describe("blind write detection", () => {
|
|
10
|
+
it("should NOT warn when read precedes write", () => {
|
|
11
|
+
client.recordToolCall("read", "src/file.ts");
|
|
12
|
+
client.recordToolCall("edit", "src/file.ts");
|
|
13
|
+
const warnings = client.recordToolCall("write", "src/file.ts");
|
|
14
|
+
expect(warnings).toHaveLength(0);
|
|
15
|
+
});
|
|
16
|
+
it("should warn when multiple writes happen without reads", () => {
|
|
17
|
+
// First write is OK (no history)
|
|
18
|
+
client.recordToolCall("write", "src/file1.ts");
|
|
19
|
+
// Second write - still in window, accumulates
|
|
20
|
+
client.recordToolCall("edit", "src/file2.ts");
|
|
21
|
+
// Third write without any read - now we have a pattern
|
|
22
|
+
const warnings = client.recordToolCall("edit", "src/file3.ts");
|
|
23
|
+
expect(warnings).toHaveLength(1);
|
|
24
|
+
expect(warnings[0].type).toBe("blind-write");
|
|
25
|
+
});
|
|
26
|
+
it("should not warn for single write with no history", () => {
|
|
27
|
+
const warnings = client.recordToolCall("write", "src/file.ts");
|
|
28
|
+
expect(warnings).toHaveLength(0);
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
describe("thrashing detection", () => {
|
|
32
|
+
it("should warn after 3 consecutive identical tool calls", () => {
|
|
33
|
+
client.recordToolCall("bash", undefined);
|
|
34
|
+
// Second call - no warning yet
|
|
35
|
+
let warnings = client.recordToolCall("bash", undefined);
|
|
36
|
+
expect(warnings).toHaveLength(0);
|
|
37
|
+
// Third consecutive - should warn
|
|
38
|
+
warnings = client.recordToolCall("bash", undefined);
|
|
39
|
+
expect(warnings).toHaveLength(1);
|
|
40
|
+
expect(warnings[0].type).toBe("thrashing");
|
|
41
|
+
expect(warnings[0].details.callCount).toBe(3);
|
|
42
|
+
});
|
|
43
|
+
it("should NOT warn for different tool calls", () => {
|
|
44
|
+
client.recordToolCall("read", "src/file.ts");
|
|
45
|
+
client.recordToolCall("bash", "npm test");
|
|
46
|
+
const warnings = client.recordToolCall("edit", "src/file.ts");
|
|
47
|
+
expect(warnings).toHaveLength(0);
|
|
48
|
+
});
|
|
49
|
+
it("should reset count when tool changes", () => {
|
|
50
|
+
client.recordToolCall("bash", undefined);
|
|
51
|
+
client.recordToolCall("bash", undefined);
|
|
52
|
+
// Different tool resets the count
|
|
53
|
+
client.recordToolCall("read", "src/file.ts");
|
|
54
|
+
// Now start new consecutive sequence
|
|
55
|
+
client.recordToolCall("bash", undefined);
|
|
56
|
+
const warnings = client.recordToolCall("bash", undefined);
|
|
57
|
+
expect(warnings).toHaveLength(0); // Only 2 consecutive, not 3
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
describe("edit counting", () => {
|
|
61
|
+
it("should track edit count per file", () => {
|
|
62
|
+
client.recordToolCall("edit", "src/a.ts");
|
|
63
|
+
client.recordToolCall("edit", "src/a.ts");
|
|
64
|
+
client.recordToolCall("edit", "src/b.ts");
|
|
65
|
+
expect(client.getEditCount("src/a.ts")).toBe(2);
|
|
66
|
+
expect(client.getEditCount("src/b.ts")).toBe(1);
|
|
67
|
+
expect(client.getEditCount("src/c.ts")).toBe(0);
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
describe("formatWarnings", () => {
|
|
71
|
+
it("should format multiple warnings", () => {
|
|
72
|
+
const warnings = [
|
|
73
|
+
{
|
|
74
|
+
type: "blind-write",
|
|
75
|
+
message: "⚠ BLIND WRITE — editing file",
|
|
76
|
+
severity: "warning",
|
|
77
|
+
details: {},
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
type: "thrashing",
|
|
81
|
+
message: "🔴 THRASHING — 3 consecutive calls",
|
|
82
|
+
severity: "error",
|
|
83
|
+
details: {},
|
|
84
|
+
},
|
|
85
|
+
];
|
|
86
|
+
const formatted = client.formatWarnings(warnings);
|
|
87
|
+
expect(formatted).toContain("BLIND WRITE");
|
|
88
|
+
expect(formatted).toContain("THRASHING");
|
|
89
|
+
});
|
|
90
|
+
it("should return empty string for no warnings", () => {
|
|
91
|
+
expect(client.formatWarnings([])).toBe("");
|
|
92
|
+
});
|
|
93
|
+
});
|
|
94
|
+
});
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { beforeEach, describe, expect, it } from "vitest";
|
|
2
|
+
import { AgentBehaviorClient } from "./agent-behavior-client.js";
|
|
3
|
+
|
|
4
|
+
describe("AgentBehaviorClient", () => {
|
|
5
|
+
let client: AgentBehaviorClient;
|
|
6
|
+
|
|
7
|
+
beforeEach(() => {
|
|
8
|
+
client = new AgentBehaviorClient();
|
|
9
|
+
client.reset();
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
describe("blind write detection", () => {
|
|
13
|
+
it("should NOT warn when read precedes write", () => {
|
|
14
|
+
client.recordToolCall("read", "src/file.ts");
|
|
15
|
+
client.recordToolCall("edit", "src/file.ts");
|
|
16
|
+
|
|
17
|
+
const warnings = client.recordToolCall("write", "src/file.ts");
|
|
18
|
+
expect(warnings).toHaveLength(0);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it("should warn when multiple writes happen without reads", () => {
|
|
22
|
+
// First write is OK (no history)
|
|
23
|
+
client.recordToolCall("write", "src/file1.ts");
|
|
24
|
+
|
|
25
|
+
// Second write - still in window, accumulates
|
|
26
|
+
client.recordToolCall("edit", "src/file2.ts");
|
|
27
|
+
|
|
28
|
+
// Third write without any read - now we have a pattern
|
|
29
|
+
const warnings = client.recordToolCall("edit", "src/file3.ts");
|
|
30
|
+
expect(warnings).toHaveLength(1);
|
|
31
|
+
expect(warnings[0].type).toBe("blind-write");
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it("should not warn for single write with no history", () => {
|
|
35
|
+
const warnings = client.recordToolCall("write", "src/file.ts");
|
|
36
|
+
expect(warnings).toHaveLength(0);
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
describe("thrashing detection", () => {
|
|
41
|
+
it("should warn after 3 consecutive identical tool calls", () => {
|
|
42
|
+
client.recordToolCall("bash", undefined);
|
|
43
|
+
|
|
44
|
+
// Second call - no warning yet
|
|
45
|
+
let warnings = client.recordToolCall("bash", undefined);
|
|
46
|
+
expect(warnings).toHaveLength(0);
|
|
47
|
+
|
|
48
|
+
// Third consecutive - should warn
|
|
49
|
+
warnings = client.recordToolCall("bash", undefined);
|
|
50
|
+
expect(warnings).toHaveLength(1);
|
|
51
|
+
expect(warnings[0].type).toBe("thrashing");
|
|
52
|
+
expect(warnings[0].details.callCount).toBe(3);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("should NOT warn for different tool calls", () => {
|
|
56
|
+
client.recordToolCall("read", "src/file.ts");
|
|
57
|
+
client.recordToolCall("bash", "npm test");
|
|
58
|
+
|
|
59
|
+
const warnings = client.recordToolCall("edit", "src/file.ts");
|
|
60
|
+
expect(warnings).toHaveLength(0);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("should reset count when tool changes", () => {
|
|
64
|
+
client.recordToolCall("bash", undefined);
|
|
65
|
+
client.recordToolCall("bash", undefined);
|
|
66
|
+
|
|
67
|
+
// Different tool resets the count
|
|
68
|
+
client.recordToolCall("read", "src/file.ts");
|
|
69
|
+
|
|
70
|
+
// Now start new consecutive sequence
|
|
71
|
+
client.recordToolCall("bash", undefined);
|
|
72
|
+
|
|
73
|
+
const warnings = client.recordToolCall("bash", undefined);
|
|
74
|
+
expect(warnings).toHaveLength(0); // Only 2 consecutive, not 3
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
describe("edit counting", () => {
|
|
79
|
+
it("should track edit count per file", () => {
|
|
80
|
+
client.recordToolCall("edit", "src/a.ts");
|
|
81
|
+
client.recordToolCall("edit", "src/a.ts");
|
|
82
|
+
client.recordToolCall("edit", "src/b.ts");
|
|
83
|
+
|
|
84
|
+
expect(client.getEditCount("src/a.ts")).toBe(2);
|
|
85
|
+
expect(client.getEditCount("src/b.ts")).toBe(1);
|
|
86
|
+
expect(client.getEditCount("src/c.ts")).toBe(0);
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
describe("formatWarnings", () => {
|
|
91
|
+
it("should format multiple warnings", () => {
|
|
92
|
+
const warnings = [
|
|
93
|
+
{
|
|
94
|
+
type: "blind-write" as const,
|
|
95
|
+
message: "⚠ BLIND WRITE — editing file",
|
|
96
|
+
severity: "warning" as const,
|
|
97
|
+
details: {},
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
type: "thrashing" as const,
|
|
101
|
+
message: "🔴 THRASHING — 3 consecutive calls",
|
|
102
|
+
severity: "error" as const,
|
|
103
|
+
details: {},
|
|
104
|
+
},
|
|
105
|
+
];
|
|
106
|
+
|
|
107
|
+
const formatted = client.formatWarnings(warnings);
|
|
108
|
+
expect(formatted).toContain("BLIND WRITE");
|
|
109
|
+
expect(formatted).toContain("THRASHING");
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it("should return empty string for no warnings", () => {
|
|
113
|
+
expect(client.formatWarnings([])).toBe("");
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
});
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AgentBehaviorClient for pi-lens
|
|
3
|
+
*
|
|
4
|
+
* Tracks tool call sequences and flags anti-patterns in real-time:
|
|
5
|
+
* - Blind writes: editing or writing without reading first
|
|
6
|
+
* - Thrashing: repeated identical tool calls with no progress
|
|
7
|
+
*
|
|
8
|
+
* No external dependencies — purely tracks tool call history.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
// --- Types ---
|
|
12
|
+
|
|
13
|
+
export type BehaviorWarning = {
|
|
14
|
+
type: "blind-write" | "thrashing";
|
|
15
|
+
message: string;
|
|
16
|
+
severity: "warning" | "error";
|
|
17
|
+
details: {
|
|
18
|
+
filePath?: string;
|
|
19
|
+
callCount?: number;
|
|
20
|
+
toolName?: string;
|
|
21
|
+
windowSize?: number;
|
|
22
|
+
};
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
interface ToolCallRecord {
|
|
26
|
+
toolName: string;
|
|
27
|
+
filePath?: string;
|
|
28
|
+
timestamp: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// --- Constants ---
|
|
32
|
+
|
|
33
|
+
const WRITE_OPS = new Set(["edit", "write", "multiedit"]);
|
|
34
|
+
const READ_OPS = new Set(["read", "bash", "grep", "glob", "find", "rg"]);
|
|
35
|
+
|
|
36
|
+
const BLIND_WRITE_WINDOW = 5; // Check last N tool calls for a read
|
|
37
|
+
const THRASH_THRESHOLD = 3; // Flag after N consecutive identical tools
|
|
38
|
+
const THRASH_TIMEOUT_MS = 30_000; // Reset thrash counter if gap > 30s
|
|
39
|
+
|
|
40
|
+
// --- Client ---
|
|
41
|
+
|
|
42
|
+
export class AgentBehaviorClient {
|
|
43
|
+
private toolHistory: ToolCallRecord[] = [];
|
|
44
|
+
private consecutiveCount = 0;
|
|
45
|
+
private lastToolName: string | null = null;
|
|
46
|
+
private lastToolTimestamp = 0;
|
|
47
|
+
|
|
48
|
+
// Per-file tracking
|
|
49
|
+
private fileEditCount = new Map<string, number>();
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Record a tool call and return any warnings triggered.
|
|
53
|
+
* Called from tool_result handler.
|
|
54
|
+
*/
|
|
55
|
+
recordToolCall(toolName: string, filePath?: string): BehaviorWarning[] {
|
|
56
|
+
const warnings: BehaviorWarning[] = [];
|
|
57
|
+
const now = Date.now();
|
|
58
|
+
|
|
59
|
+
// Track consecutive identical tools (thrashing)
|
|
60
|
+
if (
|
|
61
|
+
toolName === this.lastToolName &&
|
|
62
|
+
now - this.lastToolTimestamp < THRASH_TIMEOUT_MS
|
|
63
|
+
) {
|
|
64
|
+
this.consecutiveCount++;
|
|
65
|
+
} else {
|
|
66
|
+
this.consecutiveCount = 1;
|
|
67
|
+
}
|
|
68
|
+
this.lastToolName = toolName;
|
|
69
|
+
this.lastToolTimestamp = now;
|
|
70
|
+
|
|
71
|
+
// Check for thrashing
|
|
72
|
+
if (this.consecutiveCount === THRASH_THRESHOLD) {
|
|
73
|
+
warnings.push({
|
|
74
|
+
type: "thrashing",
|
|
75
|
+
message: `🔴 THRASHING — ${THRASH_THRESHOLD} consecutive \`${toolName}\` calls with no other action. Consider fixing the root cause instead of re-running.`,
|
|
76
|
+
severity: "error",
|
|
77
|
+
details: {
|
|
78
|
+
toolName,
|
|
79
|
+
callCount: this.consecutiveCount,
|
|
80
|
+
},
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Check for blind writes
|
|
85
|
+
if (WRITE_OPS.has(toolName)) {
|
|
86
|
+
const recentWindow = this.toolHistory.slice(-BLIND_WRITE_WINDOW);
|
|
87
|
+
const hasRecentRead = recentWindow.some((r) => READ_OPS.has(r.toolName));
|
|
88
|
+
|
|
89
|
+
if (!hasRecentRead && recentWindow.length > 0) {
|
|
90
|
+
// Count how many writes in the window without reads
|
|
91
|
+
const writesWithoutRead = recentWindow.filter((r) =>
|
|
92
|
+
WRITE_OPS.has(r.toolName),
|
|
93
|
+
).length;
|
|
94
|
+
|
|
95
|
+
if (writesWithoutRead >= 2) {
|
|
96
|
+
warnings.push({
|
|
97
|
+
type: "blind-write",
|
|
98
|
+
message: `⚠ BLIND WRITE — editing \`${filePath ?? "file"}\` without reading in the last ${BLIND_WRITE_WINDOW} tool calls. Read the file first to avoid assumptions.`,
|
|
99
|
+
severity: "warning",
|
|
100
|
+
details: {
|
|
101
|
+
filePath,
|
|
102
|
+
windowSize: recentWindow.length,
|
|
103
|
+
},
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Track edits per file
|
|
109
|
+
if (filePath) {
|
|
110
|
+
this.fileEditCount.set(
|
|
111
|
+
filePath,
|
|
112
|
+
(this.fileEditCount.get(filePath) ?? 0) + 1,
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Add to history (keep last 50 entries)
|
|
118
|
+
this.toolHistory.push({ toolName, filePath, timestamp: now });
|
|
119
|
+
if (this.toolHistory.length > 50) {
|
|
120
|
+
this.toolHistory = this.toolHistory.slice(-50);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return warnings;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Format warnings for LLM consumption.
|
|
128
|
+
*/
|
|
129
|
+
formatWarnings(warnings: BehaviorWarning[]): string {
|
|
130
|
+
if (warnings.length === 0) return "";
|
|
131
|
+
|
|
132
|
+
return warnings.map((w) => w.message).join("\n");
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Get edit count for a file in this session.
|
|
137
|
+
*/
|
|
138
|
+
getEditCount(filePath: string): number {
|
|
139
|
+
return this.fileEditCount.get(filePath) ?? 0;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Reset state (e.g., on session start).
|
|
144
|
+
*/
|
|
145
|
+
reset(): void {
|
|
146
|
+
this.toolHistory = [];
|
|
147
|
+
this.consecutiveCount = 0;
|
|
148
|
+
this.lastToolName = null;
|
|
149
|
+
this.lastToolTimestamp = 0;
|
|
150
|
+
this.fileEditCount.clear();
|
|
151
|
+
}
|
|
152
|
+
}
|