pi-lens 3.8.40 → 3.8.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +43 -0
  2. package/README.md +37 -1
  3. package/clients/cache/rule-cache.ts +1 -1
  4. package/clients/complexity-client.ts +1 -1
  5. package/clients/dependency-checker.ts +1 -1
  6. package/clients/dispatch/diagnostic-taxonomy.ts +13 -1
  7. package/clients/dispatch/dispatcher.ts +9 -0
  8. package/clients/dispatch/fact-scheduler.ts +1 -1
  9. package/clients/dispatch/integration.ts +56 -3
  10. package/clients/dispatch/runners/index.ts +2 -0
  11. package/clients/dispatch/runners/semgrep.ts +269 -0
  12. package/clients/dispatch/runners/shellcheck.ts +2 -8
  13. package/clients/dispatch/runners/tree-sitter.ts +32 -11
  14. package/clients/dispatch/tool-profile.ts +1 -0
  15. package/clients/format-service.ts +10 -0
  16. package/clients/formatters.ts +22 -8
  17. package/clients/installer/index.ts +3 -3
  18. package/clients/knip-client.ts +360 -362
  19. package/clients/lsp/aggregation.ts +91 -0
  20. package/clients/lsp/client.ts +29 -11
  21. package/clients/lsp/index.ts +76 -71
  22. package/clients/lsp/server-strategies.ts +71 -0
  23. package/clients/path-utils.ts +17 -0
  24. package/clients/pipeline.ts +2 -0
  25. package/clients/production-readiness.ts +2 -2
  26. package/clients/read-guard-logger.ts +41 -1
  27. package/clients/read-guard-tool-lines.ts +2 -2
  28. package/clients/read-guard.ts +40 -11
  29. package/clients/runtime-agent-end.ts +3 -0
  30. package/clients/runtime-session.ts +3 -0
  31. package/clients/runtime-tool-result.ts +24 -0
  32. package/clients/runtime-turn.ts +48 -4
  33. package/clients/sanitize.ts +1 -1
  34. package/clients/semgrep-config.ts +213 -0
  35. package/clients/tree-sitter-client.ts +1 -1
  36. package/clients/widget-state.ts +283 -0
  37. package/commands/booboo.ts +1 -1
  38. package/index.ts +201 -9
  39. package/package.json +2 -1
package/CHANGELOG.md CHANGED
@@ -4,6 +4,49 @@ All notable changes to pi-lens will be documented in this file.
4
4
 
5
5
  ## [Unreleased]
6
6
 
7
+ ## [3.8.41] - 2026-05-05
8
+
9
+ ### Fixed
10
+
11
+ - **tree-sitter wasm abort loop and memory leak (fixes #56)** — when the emscripten wasm runtime aborts (OOM or assertion failure on large workspaces), the module-level heap is permanently corrupted. pi-lens was re-invoking the dead runtime on every subsequent file write, printing `Aborted()` to stderr on each query and leaking memory on each retry. Added a module-level `_wasmAborted` flag: the first abort detected in the query catch loop poisons the singleton and prevents any further tree-sitter calls for the session. The runner skips cleanly with `reason: wasm_aborted_fatal` logged to `tree-sitter.log`.
12
+ - **`turn_end` phases now instrumented in latency log** — `handleTurnEnd` previously had no `logLatency` calls; all timing data was buried in plain-text `dbg()` lines in `sessionstart.log`. Added per-phase latency entries for `cascade_merge`, `jscpd`, `knip`, and `madge`, plus a `tool_result` total with `fileCount` and `blockerSections`. This gives a baseline for measuring the cost of future turn_end additions (e.g. LSP re-query).
13
+ - **Cascade ran graph build on non-code files** — markdown, YAML, JSON, and other files without a dispatchable kind were reaching `buildOrUpdateGraph`, causing cold graph builds that took up to 3–4 seconds per write with zero useful output. `computeCascadeForFile` now exits immediately with `cascade_skip / non_code_file` when `detectFileKind` returns `undefined`, consistent with the existing `shouldDispatch` gate used by the lint pipeline.
14
+
15
+ ### Added
16
+
17
+ - **Per-server LSP diagnostic strategies** — new `clients/lsp/server-strategies.ts` codifies known server behavior (TypeScript, rust-analyzer, pyright, ESLint) so timing decisions are automatic rather than one-size-fits-all. Strategies control first-push seeding, debounce window, pull retry budget, aggregate wait timeout, and whether a server benefits from a semantic second pull pass. Env var overrides (`PI_LENS_LSP_*`) take precedence. Unknown servers get a conservative default.
18
+ - **Result-aware diagnostic racing (`raceToCompletion`)** — new `clients/lsp/aggregation.ts` replaces the simple `Promise.race` + grace window pattern with a result-quality-aware aggregator. The grace window only triggers when at least one client has returned non-empty diagnostics, preventing premature resolution when the fastest client returns empty (e.g., TypeScript's syntactic pass). Document mode uses 0ms grace; full mode keeps the 400ms default.
19
+ - **`seedFirstPush` early-exit for clean files** — `raceToCompletion`'s completion predicate now also fires when a `seedFirstPush` server (TypeScript, ESLint) returns any result, even an empty one. These servers' first push is authoritative — waiting further yields nothing. Cuts clean-file diagnostic latency from ~1000ms to ~450ms in full mode and to near-zero in document mode (cascade neighbor touches).
20
+
21
+ - **`/lens-toggle` session switch** — added a single command to toggle pi-lens on/off at runtime without restarting pi. When off, write/edit analysis, read-guard, formatting, cascade, turn-end checks, and context injection are paused; running `/lens-toggle` again resumes them. `--no-lens` starts a session in the disabled state. Closes #49.
22
+ - **Experimental Semgrep CLI dispatch integration** — added a config-gated `semgrep` dispatch runner that normalizes Semgrep JSON findings into pi-lens diagnostics. The runner never auto-installs Semgrep and only runs when a local `.semgrep.yml`/`.semgrep.yaml`/`semgrep.yml`/`semgrep.yaml` is discovered or when explicitly configured with `--lens-semgrep --lens-semgrep-config <auto|p/pack|path>` / `/lens-semgrep enable --config <...>`. Dispatch scans pass `--metrics=off`; local rule scans do not require a Semgrep token, while Semgrep AppSec/Pro/managed configs may require `semgrep login` or `SEMGREP_APP_TOKEN`.
23
+ - **`/lens-semgrep` command** — new project command for managing Semgrep dispatch: `status` shows CLI/config/effective state, `init` writes a starter `.semgrep.yml` and enables dispatch, `enable [--config <auto|p/pack|path>]` persists activation in `.pi-lens/semgrep.json`, `disable` persists opt-out, and `clear` removes the pi-lens Semgrep config to return to local-config auto-discovery.
24
+ - **Semgrep severity policy metadata** — Semgrep rules can opt into pi-lens blocking semantics with metadata such as `metadata.pi-lens.semantic: blocking` and `metadata.pi-lens.defect_class: injection`. Otherwise, pi-lens promotes only high-signal Semgrep `ERROR` findings in security defect classes (`injection`, `secrets`, `safety`) to blockers and leaves other findings as warnings.
25
+ - **Experimental terminal dashboard** — `--lens-dashboard` / `PI_LENS_DASHBOARD=1` streams redacted session telemetry to a per-session JSONL file (`~/.pi-lens/dashboard-events/{sessionId}.jsonl`) and opens a live terminal dashboard. The dashboard shows the working folder, detected languages, formatter/linter activity, LSP servers spawned, diagnostics grouped by file with OSC-8 clickable links, and a session-start summary of languages, tools, configs, and autoinstalls. Each session gets its own event file; old files are pruned after 7 days (configurable via `PI_LENS_DASHBOARD_RETENTION_DAYS`). Use `PI_LENS_DASHBOARD_LOG_ONLY=1` to emit JSONL without opening a terminal. The viewer auto-scrolls to the latest content on each render.
26
+
27
+ ### Changed
28
+
29
+ - **LSP diagnostic pipeline latency optimization** — six targeted refactors reduce per-file diagnostic wait times by 50–900ms depending on the language server: first-push seeding skips the debounce timer for TypeScript and ESLint (~150–200ms saved); adaptive debounce computes remaining wait from `pushDiagnosticTimestamps` (50–140ms saved); per-server aggregate wait times (1000ms for TypeScript, 3000ms for rust-analyzer, 1500ms default); semantic settle pass gated to rust-analyzer only; pull retry budget zeroed for TypeScript/ESLint. Global constants `DIAGNOSTICS_DEBOUNCE_MS`, `PULL_DIAGNOSTICS_RETRY_BUDGET_MS`, and `DIAGNOSTICS_AGGREGATE_WAIT_MS` replaced by per-server strategy values from the new `server-strategies.ts`.
30
+
31
+ ### Fixed
32
+
33
+ - **Cascade neighbor touch cache ignores `writeSeq` on hit** — the A5 neighbor touch cache checked only `turnSeq` on cache hits, so a neighbor diagnosed at writeSeq=1 was served stale results when a second file write (writeSeq=2) cascaded to the same neighbor in the same turn. Fixed by requiring both `turnSeq` and `writeSeq` to match before using the cached entry.
34
+ - **Cascade fallback neighbors include other primary files** — `appendFallbackNeighbors` (the degraded-LSP path) excluded only the current primary file from the passive diagnostic snapshot sweep, but not other files edited as primary this turn. Those files could appear as cascade neighbors even though their own pipeline run is the authoritative diagnostic source. Fixed by adding a `primaryFilesThisTurn` check consistent with the B10 filter in the main neighbor path.
35
+
36
+ - **Semgrep dispatch plan regression** — kept the experimental Semgrep runner out of static `TOOL_PLANS` exposure and appends it only at runtime when Semgrep is actually configured. Fixes CI regressions in plan-shape tests while preserving config-gated Semgrep dispatch.
37
+ - **Widget theme method binding crash** — `renderWidget` now calls `theme.fg(...)` directly instead of destructuring `fg`, preserving the `this` binding required by pi's `Theme` class. Fixes the `Cannot read properties of undefined (reading 'fgColors')` widget render crash. Closes #53.
38
+ - **Read-guard follow-up edits after own writes** — tuned `file_modified` handling so a file changed by the agent's own prior allowed edit, immediate format, autofix, or deferred `agent_end` formatting does not force a redundant re-read when the next edit is still within already-read ranges. The guard still blocks zero-read and out-of-range edits, and external/stale changes outside the own-edit grace window remain protected. `PI_LENS_READ_GUARD_OWN_EDIT_GRACE_MS` controls the default 120s grace window.
39
+ - **Read-guard log noise and growth** — `~/.pi-lens/read-guard.log` now defaults to block/warn/anomaly events instead of logging every read and allowed edit. Verbose logging is available with `PI_LENS_READ_GUARD_VERBOSE=1` or `PI_LENS_READ_GUARD_LOG=verbose`; allowed-edit logging can be restored with `PI_LENS_READ_GUARD_LOG_ALLOWS=1`. The log now rotates at 1MB by default (`PI_LENS_READ_GUARD_MAX_BYTES`).
40
+ - **Pipelines skipped for external and vendor files** — agents reading dependency source (global npm packages, project-local `node_modules`) previously triggered LSP server spawns, tree-sitter read-range expansion, read-guard recording, and complexity baseline capture on those files — all noise with no diagnostic value. Added `isExternalOrVendorFile()` (built on the existing `isUnderDir` helper for correct Windows case handling) and gated all five pipeline paths: LSP auto-touch, tree-sitter expansion, read-guard recording, complexity baseline, and the full dispatch pipeline on write/edit.
41
+ - **Security: absolute paths for `cmd.exe` and `osascript` spawn calls** — dashboard terminal launch now resolves both executables via `process.env.SystemRoot` / absolute macOS path instead of relying on `PATH`, eliminating the SonarCloud S4036 PATH-injection finding.
42
+ - **Security: installed binary permissions tightened** — `chmod` calls on downloaded tool binaries changed from `0o755` to `0o750`, removing world-execute permission (SonarCloud S2612). GitHub Actions `contents: write` permission moved from workflow level to the `release` job only (S8233).
43
+ - **Agent messages: full-file-read options removed** — read-guard block messages no longer offer "read the full file" as an alternative. The out-of-range block now presents only the pre-computed targeted `offset`/`limit`; the zero-read block gives a single imperative directive. "Re-read the file" fallback text in ambiguous-edit messages replaced with "Re-read the relevant section" throughout.
44
+ - **Agent messages: indentation-mismatch RETRYABLE made explicitly directive** — the block now opens with "Retry the same edit call immediately with the corrected oldText shown below — copy it exactly as-is" and labels each corrected entry with "do not shorten, do not change newText", preventing agents from improvising instead of copying the corrected text verbatim.
45
+ - **SonarCloud reliability fixes** — five `.sort()` calls on string arrays given explicit `localeCompare` comparators (S2871); three identical-branch conditionals collapsed (S3923 in `knip-client.ts`, `shellcheck.ts`, `production-readiness.ts`); emoji character class converted to alternation to handle multi-codepoint variation-selector emojis (S5868); regex alternation precedence made explicit with non-capturing groups (S5850); `| 0` in hash function annotated as intentional 32-bit truncation (S7767).
46
+ - **CI: build step added before tests** — Vitest's native ESM resolver requires compiled `.js` output when `vi.resetModules()` is used; without a prior `tsc` build, imports of newly-added exports resolved as `undefined` in CI.
47
+ - **Widget: diagnostic rows exceeded terminal width** — the custom `truncate()` helper stripped ANSI sequences to measure length but sliced the raw string, losing OSC-8 hyperlinks and SGR sequences from the count. Replaced with pi-tui's `truncateToWidth()` / `visibleWidth()` which correctly account for all escape sequences. All widget lines (header, file rows, separators, diagnostic detail, LSP status) are now clamped. Closes #54.
48
+ - **Widget: file list capped at 5 entries, basename deduplication** — reduced max file rows from 6 to 5 to keep the widget compact. Added basename deduplication (last write wins) so that different files with the same name (e.g. `pi-lens/index.ts` and `pi-webaio/index.ts`) show as a single merged entry instead of flooding the widget with near-identical labels.
49
+
7
50
  ## [3.8.40] - 2026-05-04
8
51
 
9
52
  ### Added
package/README.md CHANGED
@@ -16,7 +16,7 @@ On every `write` and `edit`, pi-lens runs a fast, language-aware pipeline (check
16
16
  2. **Auto-format** — deferred to `agent_end` by default; queued files are formatted once after all agent tool calls complete. Use `--immediate-format` for per-edit formatting
17
17
  3. **Auto-fix** — safe autofixes from 6 tools (Biome `check --write`, Ruff `check --fix`, ESLint `--fix`, stylelint `--fix`, sqlfluff `fix`, RuboCop `-a`) applied before analysis
18
18
  4. **LSP file sync** — opens/updates the file in active language servers
19
- 5. **Dispatch lint** — parallel runner groups: LSP diagnostics, tree-sitter structural rules, ast-grep security/correctness rules, fact rules, language-specific linters, similarity detection
19
+ 5. **Dispatch lint** — parallel runner groups: LSP diagnostics, tree-sitter structural rules, ast-grep security/correctness rules, fact rules, language-specific linters, experimental Semgrep security scans, similarity detection
20
20
  6. **Cascade diagnostics** — review-graph impact cascade showing which other files were affected and how diagnostics propagated
21
21
 
22
22
  Results are inline and actionable:
@@ -153,6 +153,36 @@ Structural rules are organized by language in `rules/tree-sitter-queries/`:
153
153
  - **Style/smells** — nested-ternary, long-parameter-list, large-class, prefer-optional-chain, redundant-state, require-await
154
154
  - **Agent stubs** — no-unimplemented-stub, no-raise-not-implemented, no-ellipsis-body
155
155
 
156
+ ### Semgrep CLI Integration (Experimental)
157
+
158
+ pi-lens can run the locally installed `semgrep` CLI as an optional dispatch runner for security-focused findings. Semgrep diagnostics are normalized into the same pi-lens `Diagnostic` model as LSP, tree-sitter, ast-grep, and linters: high-signal security findings can become blocking, while other findings remain warnings for `/lens-booboo`/history.
159
+
160
+ Activation is intentionally gated:
161
+
162
+ - pi-lens **does not auto-install Semgrep**.
163
+ - A local `.semgrep.yml`, `.semgrep.yaml`, `semgrep.yml`, or `semgrep.yaml` enables the runner when the `semgrep` CLI is available.
164
+ - Without a local config, Semgrep stays skipped unless explicitly configured with `--lens-semgrep --lens-semgrep-config <auto|p/pack|path>` or `/lens-semgrep enable --config <auto|p/pack|path>`.
165
+ - Local `.semgrep.yml` scans do not require a Semgrep token. Semgrep AppSec/Pro/managed configurations may require `semgrep login` or `SEMGREP_APP_TOKEN`.
166
+ - pi-lens passes `--metrics=off` for dispatch scans.
167
+
168
+ Commands:
169
+
170
+ - `/lens-semgrep status` — show CLI availability, discovered local config, persisted pi-lens config, and effective dispatch state
171
+ - `/lens-semgrep init` — create a starter `.semgrep.yml` with a blocking `eval(...)` rule and enable Semgrep dispatch
172
+ - `/lens-semgrep enable [--config <auto|p/pack|path>]` — persist Semgrep dispatch activation in `.pi-lens/semgrep.json`
173
+ - `/lens-semgrep disable` — persistently disable Semgrep dispatch for this project
174
+ - `/lens-semgrep clear` — remove `.pi-lens/semgrep.json` and return to local-config auto-discovery
175
+
176
+ Local rules can opt into pi-lens blocking semantics with metadata:
177
+
178
+ ```yaml
179
+ metadata:
180
+ pi-lens:
181
+ semantic: blocking
182
+ defect_class: injection
183
+ confidence: high
184
+ ```
185
+
156
186
  ## Dependencies
157
187
 
158
188
  Auto-install behavior depends on gate type:
@@ -199,6 +229,7 @@ Auto-install behavior depends on gate type:
199
229
  | `vscode-html-languageserver-bin` | HTML LSP | Yes | Language-default |
200
230
  | `svelte-language-server` | Svelte LSP | Yes | Flow-gated |
201
231
  | `@vue/language-server` | Vue LSP | Yes | Flow-gated |
232
+ | `semgrep` | Experimental security dispatch | Manual | Local config / explicit opt-in |
202
233
  | `psscriptanalyzer` | PowerShell linting | Manual | — |
203
234
 
204
235
  Additional language servers (gopls, ruby-lsp, solargraph, etc.) are auto-detected from PATH or installed via native package managers (`go install`, `gem install`) when their language is detected.
@@ -210,6 +241,7 @@ Additional language servers (gopls, ruby-lsp, solargraph, etc.) are auto-detecte
210
241
  pi
211
242
 
212
243
  # Optional switches
244
+ pi --no-lens # Start pi-lens disabled for this session; /lens-toggle can re-enable
213
245
  pi --no-lsp # Disable unified LSP diagnostics
214
246
  pi --no-autoformat # Skip auto-formatting entirely
215
247
  pi --immediate-format # Format immediately after each edit instead of deferring to agent_end
@@ -217,6 +249,8 @@ pi --no-autofix # Skip auto-fix (Biome, Ruff, ESLint, stylelint, sqlfl
217
249
  pi --no-tests # Skip test runner
218
250
  pi --no-delta # Disable delta mode (show all diagnostics, not just new ones)
219
251
  pi --lens-guard # Block git commit/push when unresolved blockers exist (experimental)
252
+ pi --lens-semgrep # Enable Semgrep dispatch when a local/configured Semgrep config exists
253
+ pi --lens-semgrep-config p/ci # Explicit Semgrep config for dispatch (requires --lens-semgrep)
220
254
  ```
221
255
 
222
256
  ## Environment Variables
@@ -233,10 +267,12 @@ pi --lens-guard # Block git commit/push when unresolved blockers exist
233
267
 
234
268
  ## Key Commands
235
269
 
270
+ - `/lens-toggle` — toggle pi-lens on/off for the current session without restarting
236
271
  - `/lens-booboo` — full quality report for current project state
237
272
  - `/lens-health` — runtime health, latency, and diagnostic telemetry
238
273
  - `/lens-tools` — tool installation status: globally installed, auto-installed, or npx fallback
239
274
  - `/lens-tdi` — Technical Debt Index (TDI) and project health trend
275
+ - `/lens-semgrep` — manage experimental Semgrep dispatch (`status`, `init`, `enable`, `disable`, `clear`)
240
276
 
241
277
  ## Language Coverage
242
278
 
@@ -50,7 +50,7 @@ export class RuleCache {
50
50
 
51
51
  private computeRuleHash(ruleFiles: string[]): string {
52
52
  const hash = crypto.createHash("sha256");
53
- for (const file of ruleFiles.sort()) {
53
+ for (const file of ruleFiles.sort((a, b) => a.localeCompare(b))) {
54
54
  if (fs.existsSync(file)) {
55
55
  const stat = fs.statSync(file);
56
56
  hash.update(`${file}:${stat.mtimeMs}:${stat.size}`);
@@ -386,7 +386,7 @@ export class ComplexityClient {
386
386
  let count = 0;
387
387
 
388
388
  const aiPatterns = [
389
- /[🔍✅📝🔧🐛⚠️🚀💡🎯📌🏷️🔑🏗️🧪🗑️🔄♻️📋🔖📊💬🔥💎⭐🌟🎯🎨🔧🛠️]/u,
389
+ /(?:🔍|✅|📝|🔧|🐛|⚠️|🚀|💡|🎯|📌|🏷️|🔑|🏗️|🧪|🗑️|🔄|♻️|📋|🔖|📊|💬|🔥|💎|⭐|🌟|🎨|🛠️)/u,
390
390
  /\/\/\s*(Initialize|Setup|Clean up|Create|Define|Check if|Handle|Process|Validate|Return|Get|Set|Add|Remove|Update|Fetch)\b/i,
391
391
  /\/\/\s*(This function|This method|This code|Here we|Now we)\b/i,
392
392
  /\/\*\*?\s*(Overview|Summary|Description|Example|Usage)\s*\*?\//i,
@@ -408,7 +408,7 @@ export class DependencyChecker {
408
408
  let output = `[Circular Deps] ${circular.length} cycle(s) found:\n`;
409
409
 
410
410
  for (const dep of circular) {
411
- const cycleKey = dep.path.sort().join("→");
411
+ const cycleKey = dep.path.sort((a, b) => a.localeCompare(b)).join("→");
412
412
  if (seen.has(cycleKey)) continue;
413
413
  seen.add(cycleKey);
414
414
 
@@ -12,6 +12,9 @@ const SILENT_ERROR_HINTS = [
12
12
 
13
13
  const INJECTION_HINTS = [
14
14
  "sql-injection",
15
+ "command-injection",
16
+ "template-injection",
17
+ "xss",
15
18
  "eval",
16
19
  "exec",
17
20
  "inner-html",
@@ -56,7 +59,16 @@ export function classifyDefect(
56
59
  return "correctness";
57
60
  }
58
61
 
59
- if (text.includes("unsafe") || text.includes("security")) return "safety";
62
+ if (
63
+ text.includes("unsafe") ||
64
+ text.includes("security") ||
65
+ text.includes("ssrf") ||
66
+ text.includes("path-traversal") ||
67
+ text.includes("deserial") ||
68
+ text.includes("auth-bypass") ||
69
+ text.includes("crypto")
70
+ )
71
+ return "safety";
60
72
  if (text.includes("style") || text.includes("format")) return "style";
61
73
 
62
74
  return "unknown";
@@ -16,6 +16,7 @@
16
16
 
17
17
  import * as path from "node:path";
18
18
  import type { FileKind } from "../file-kinds.js";
19
+ import { recordRunner } from "../widget-state.js";
19
20
  import { detectFileKind } from "../file-kinds.js";
20
21
  import { isTestFile } from "../file-utils.js";
21
22
  import { getPrimaryDispatchGroup } from "../language-policy.js";
@@ -389,6 +390,7 @@ function buildCoverageNotice(
389
390
  "similarity",
390
391
  "spellcheck",
391
392
  "fact-rules",
393
+ "semgrep",
392
394
  ]);
393
395
  const anyLinterHasCoverage = runnerLatencies.some(
394
396
  (r) =>
@@ -597,6 +599,13 @@ async function runGroup(
597
599
  diagnosticCount: result.diagnostics.length,
598
600
  semantic: result.semantic ?? semantic,
599
601
  });
602
+ recordRunner(
603
+ ctx.filePath,
604
+ runnerId,
605
+ result.status,
606
+ result.diagnostics.length,
607
+ duration,
608
+ );
600
609
 
601
610
  diagnostics.push(...result.diagnostics);
602
611
 
@@ -69,7 +69,7 @@ export function scheduleProviders(providers: FactProvider[]): FactProvider[] {
69
69
  const cycleParticipants = providers
70
70
  .filter((p) => !result.includes(p))
71
71
  .map((p) => p.id)
72
- .sort();
72
+ .sort((a, b) => a.localeCompare(b));
73
73
  throw new Error(
74
74
  `Cycle detected among FactProviders: ${cycleParticipants.join(", ")}`,
75
75
  );
@@ -16,6 +16,7 @@ import {
16
16
  formatSlopScoreSummary,
17
17
  type SlopScoreSummary,
18
18
  } from "../session-summary.js";
19
+ import { resolveSemgrepConfig } from "../semgrep-config.js";
19
20
  import {
20
21
  clearCoverageNoticeState,
21
22
  clearLatencyReports,
@@ -290,6 +291,52 @@ export function getDispatchSlopScoreLine(): string {
290
291
  return formatSlopScoreSummary(summary);
291
292
  }
292
293
 
294
+ const SEMGREP_SUPPORTED_KINDS = new Set<FileKind>([
295
+ "csharp",
296
+ "css",
297
+ "cxx",
298
+ "dart",
299
+ "docker",
300
+ "go",
301
+ "html",
302
+ "java",
303
+ "json",
304
+ "jsts",
305
+ "kotlin",
306
+ "lua",
307
+ "php",
308
+ "python",
309
+ "ruby",
310
+ "rust",
311
+ "shell",
312
+ "swift",
313
+ "terraform",
314
+ "yaml",
315
+ ]);
316
+
317
+ function withSemgrepGroup(
318
+ kind: FileKind,
319
+ groups: RunnerGroup[],
320
+ ctx: ReturnType<typeof createDispatchContext>,
321
+ ): RunnerGroup[] {
322
+ if (!SEMGREP_SUPPORTED_KINDS.has(kind)) return groups;
323
+ const config = resolveSemgrepConfig(ctx.cwd, {
324
+ enabled: Boolean(ctx.pi.getFlag("lens-semgrep")),
325
+ config: ctx.pi.getFlag("lens-semgrep-config"),
326
+ });
327
+ if (!config.enabled) return groups;
328
+ if (groups.some((group) => group.runnerIds.includes("semgrep"))) return groups;
329
+ return [
330
+ ...groups,
331
+ {
332
+ mode: "all",
333
+ runnerIds: ["semgrep"],
334
+ filterKinds: [kind],
335
+ semantic: "warning",
336
+ },
337
+ ];
338
+ }
339
+
293
340
  function withPrimaryPolicyGroup(
294
341
  kind: keyof typeof TOOL_PLANS,
295
342
  groups: RunnerGroup[],
@@ -430,6 +477,11 @@ export async function computeCascadeForFile(
430
477
  return undefined;
431
478
  }
432
479
 
480
+ if (!detectFileKind(filePath)) {
481
+ logCascade({ phase: "cascade_skip", filePath, reason: "non_code_file" });
482
+ return undefined;
483
+ }
484
+
433
485
  const normalizedFile = resolveRunnerPath(cwd, filePath);
434
486
  const normalizedFileKey = normalizeMapKey(normalizedFile);
435
487
 
@@ -586,7 +638,7 @@ export async function computeCascadeForFile(
586
638
  // write sequence. A new write (higher writeSeq) invalidates the cache entry.
587
639
  const cached =
588
640
  writeSeq != null ? neighborTouchCache.get(cacheKey) : undefined;
589
- if (cached?.turnSeq === turnSeq) {
641
+ if (cached?.turnSeq === turnSeq && cached?.writeSeq === writeSeq) {
590
642
  producedLspData = true;
591
643
  const durationMs = Date.now() - neighborStart;
592
644
  logCascade({
@@ -821,6 +873,7 @@ function appendFallbackNeighbors(
821
873
  for (const [diagPath, { diags, ts }] of allDiags) {
822
874
  const diagKey = normalizeMapKey(diagPath);
823
875
  if (diagKey === normalizedFileKey || seen.has(diagKey)) continue;
876
+ if (primaryFilesThisTurn.has(diagKey)) continue;
824
877
  if (!nodeFs.existsSync(diagPath)) continue;
825
878
  if (now - ts > CASCADE_TTL_MS) continue;
826
879
  const errors = convertLspDiagnostics(
@@ -919,7 +972,7 @@ export async function dispatchLint(
919
972
  const kind = ctx.kind;
920
973
  if (!kind) return "";
921
974
 
922
- const groups = getDispatchGroupsForKind(kind, pi);
975
+ const groups = withSemgrepGroup(kind, getDispatchGroupsForKind(kind, pi), ctx);
923
976
  if (groups.length === 0) return "";
924
977
 
925
978
  await runProviders(ctx);
@@ -962,7 +1015,7 @@ export async function dispatchLintWithResult(
962
1015
  };
963
1016
  }
964
1017
 
965
- const groups = getDispatchGroupsForKind(kind, pi);
1018
+ const groups = withSemgrepGroup(kind, getDispatchGroupsForKind(kind, pi), ctx);
966
1019
  if (groups.length === 0) {
967
1020
  return {
968
1021
  diagnostics: [],
@@ -34,6 +34,7 @@ import pythonSlopRunner from "./python-slop.js";
34
34
  import rubocopRunner from "./rubocop.js";
35
35
  import ruffRunner from "./ruff.js";
36
36
  import rustClippyRunner from "./rust-clippy.js";
37
+ import semgrepRunner from "./semgrep.js";
37
38
  import shellcheckRunner from "./shellcheck.js";
38
39
  import shfmtRunner from "./shfmt.js";
39
40
  // Import similarity runner
@@ -65,6 +66,7 @@ export function registerDefaultRunners(registry: RunnerRegistry): void {
65
66
  registry.register(pythonSlopRunner); // Python slop via CLI (priority 25)
66
67
  registry.register(typeSafetyRunner); // Type safety checks (priority 20)
67
68
  registry.register(shellcheckRunner); // Shell script linting (priority 20)
69
+ registry.register(semgrepRunner); // Semgrep security/deep static analysis (config/flag-gated, priority 50)
68
70
  // DISABLED: registerRunner(astGrepRunner); // Replaced by ast-grep-napi for dispatch
69
71
  // CLI ast-grep kept for ast_grep_search/ast_grep_replace tools only
70
72
  registry.register(similarityRunner); // Semantic reuse detection (priority 35)
@@ -0,0 +1,269 @@
1
+ import * as path from "node:path";
2
+ import { classifyDefect } from "../diagnostic-taxonomy.js";
3
+ import { PRIORITY } from "../priorities.js";
4
+ import type {
5
+ DefectClass,
6
+ Diagnostic,
7
+ DispatchContext,
8
+ OutputSemantic,
9
+ RunnerDefinition,
10
+ RunnerResult,
11
+ } from "../types.js";
12
+ import { safeSpawnAsync } from "../../safe-spawn.js";
13
+ import { resolveSemgrepConfig } from "../../semgrep-config.js";
14
+ import { createAvailabilityChecker } from "./utils/runner-helpers.js";
15
+
16
+ const semgrep = createAvailabilityChecker("semgrep", ".exe");
17
+ const MAX_DIAGNOSTICS = 50;
18
+
19
+ interface SemgrepJsonOutput {
20
+ results?: SemgrepResult[];
21
+ errors?: Array<{ message?: string; type?: string; level?: string }>;
22
+ }
23
+
24
+ interface SemgrepResult {
25
+ check_id?: string;
26
+ path?: string;
27
+ start?: { line?: number; col?: number };
28
+ extra?: {
29
+ message?: string;
30
+ severity?: string;
31
+ metadata?: Record<string, unknown>;
32
+ fix?: string;
33
+ fix_regex?: unknown;
34
+ };
35
+ }
36
+
37
+ function getPiLensMetadata(
38
+ metadata: Record<string, unknown>,
39
+ ): Record<string, unknown> {
40
+ const nested = metadata["pi-lens"] ?? metadata.pi_lens;
41
+ return nested && typeof nested === "object"
42
+ ? (nested as Record<string, unknown>)
43
+ : {};
44
+ }
45
+
46
+ function metadataString(
47
+ metadata: Record<string, unknown>,
48
+ piLens: Record<string, unknown>,
49
+ key: string,
50
+ ): string | undefined {
51
+ const direct = piLens[key] ?? metadata[`pi_lens_${key}`];
52
+ return typeof direct === "string" && direct.trim()
53
+ ? direct.trim()
54
+ : undefined;
55
+ }
56
+
57
+ function metadataBoolean(
58
+ metadata: Record<string, unknown>,
59
+ piLens: Record<string, unknown>,
60
+ key: string,
61
+ ): boolean {
62
+ return piLens[key] === true || metadata[`pi_lens_${key}`] === true;
63
+ }
64
+
65
+ function normalizeDefectClass(
66
+ value: string | undefined,
67
+ ): DefectClass | undefined {
68
+ if (!value) return undefined;
69
+ const normalized = value.toLowerCase().replace(/_/g, "-");
70
+ if (
71
+ normalized === "silent-error" ||
72
+ normalized === "injection" ||
73
+ normalized === "secrets" ||
74
+ normalized === "async-misuse" ||
75
+ normalized === "correctness" ||
76
+ normalized === "safety" ||
77
+ normalized === "style" ||
78
+ normalized === "unknown" ||
79
+ normalized === "unused-value"
80
+ ) {
81
+ return normalized;
82
+ }
83
+ if (
84
+ normalized.includes("traversal") ||
85
+ normalized.includes("ssrf") ||
86
+ normalized.includes("xss") ||
87
+ normalized.includes("deserial") ||
88
+ normalized.includes("crypto") ||
89
+ normalized.includes("auth")
90
+ ) {
91
+ return "safety";
92
+ }
93
+ return undefined;
94
+ }
95
+
96
+ function semgrepSemantic(
97
+ result: SemgrepResult,
98
+ defectClass: DefectClass,
99
+ ): OutputSemantic {
100
+ const metadata = result.extra?.metadata ?? {};
101
+ const piLens = getPiLensMetadata(metadata);
102
+ const explicitSemantic = metadataString(metadata, piLens, "semantic");
103
+ if (
104
+ explicitSemantic === "blocking" ||
105
+ metadataBoolean(metadata, piLens, "blocking")
106
+ ) {
107
+ return "blocking";
108
+ }
109
+ if (explicitSemantic === "warning" || explicitSemantic === "silent") {
110
+ return explicitSemantic;
111
+ }
112
+
113
+ const severity = String(result.extra?.severity ?? "").toUpperCase();
114
+ const confidence = String(
115
+ metadata.confidence ??
116
+ metadata.semgrep_confidence ??
117
+ piLens.confidence ??
118
+ "",
119
+ ).toLowerCase();
120
+ const highSignalSecurity =
121
+ defectClass === "injection" ||
122
+ defectClass === "secrets" ||
123
+ defectClass === "safety";
124
+
125
+ if (severity === "ERROR" && highSignalSecurity && confidence !== "low") {
126
+ return "blocking";
127
+ }
128
+
129
+ return "warning";
130
+ }
131
+
132
+ function mapSeverity(
133
+ semgrepSeverity: string | undefined,
134
+ semantic: OutputSemantic,
135
+ ): Diagnostic["severity"] {
136
+ if (semantic === "blocking") return "error";
137
+ const severity = String(semgrepSeverity ?? "").toUpperCase();
138
+ if (severity === "ERROR") return "error";
139
+ if (severity === "INFO") return "info";
140
+ return "warning";
141
+ }
142
+
143
+ function parseSemgrepJson(raw: string, ctx: DispatchContext): Diagnostic[] {
144
+ if (!raw.trim()) return [];
145
+ let parsed: SemgrepJsonOutput;
146
+ try {
147
+ parsed = JSON.parse(raw) as SemgrepJsonOutput;
148
+ } catch {
149
+ return [];
150
+ }
151
+
152
+ const results = Array.isArray(parsed.results) ? parsed.results : [];
153
+ const diagnostics: Diagnostic[] = [];
154
+
155
+ for (const [index, result] of results.entries()) {
156
+ if (diagnostics.length >= MAX_DIAGNOSTICS) break;
157
+ const rule = result.check_id || "semgrep";
158
+ const message = result.extra?.message || rule;
159
+ const metadata = result.extra?.metadata ?? {};
160
+ const piLens = getPiLensMetadata(metadata);
161
+ const explicitDefect = normalizeDefectClass(
162
+ metadataString(metadata, piLens, "defect_class"),
163
+ );
164
+ const defectClass =
165
+ explicitDefect ?? classifyDefect(rule, "semgrep", message);
166
+ const semantic = semgrepSemantic(result, defectClass);
167
+ const filePath = result.path || ctx.filePath;
168
+ const line = result.start?.line ?? 1;
169
+ const column = result.start?.col ?? 1;
170
+ const fixSuggestion =
171
+ metadataString(metadata, piLens, "fix") ??
172
+ (typeof result.extra?.fix === "string" ? result.extra.fix : undefined);
173
+
174
+ diagnostics.push({
175
+ id: `semgrep:${rule}:${path.basename(filePath)}:${line}:${column}:${index}`,
176
+ message: `[${rule}] ${message}`,
177
+ filePath,
178
+ line,
179
+ column,
180
+ severity: mapSeverity(result.extra?.severity, semantic),
181
+ semantic,
182
+ tool: "semgrep",
183
+ rule,
184
+ defectClass,
185
+ fixable: Boolean(fixSuggestion || result.extra?.fix_regex),
186
+ autoFixAvailable: false,
187
+ fixKind:
188
+ fixSuggestion || result.extra?.fix_regex ? "suggestion" : undefined,
189
+ fixSuggestion,
190
+ });
191
+ }
192
+
193
+ return diagnostics;
194
+ }
195
+
196
+ const semgrepRunner: RunnerDefinition = {
197
+ id: "semgrep",
198
+ appliesTo: [
199
+ "csharp",
200
+ "css",
201
+ "cxx",
202
+ "dart",
203
+ "docker",
204
+ "go",
205
+ "html",
206
+ "java",
207
+ "json",
208
+ "jsts",
209
+ "kotlin",
210
+ "lua",
211
+ "php",
212
+ "python",
213
+ "ruby",
214
+ "rust",
215
+ "shell",
216
+ "swift",
217
+ "terraform",
218
+ "yaml",
219
+ ],
220
+ priority: PRIORITY.DEEP_LANGUAGE_ANALYSIS,
221
+ enabledByDefault: false,
222
+
223
+ async when(ctx: DispatchContext): Promise<boolean> {
224
+ return resolveSemgrepConfig(ctx.cwd, {
225
+ enabled: Boolean(ctx.pi.getFlag("lens-semgrep")),
226
+ config: ctx.pi.getFlag("lens-semgrep-config"),
227
+ }).enabled;
228
+ },
229
+
230
+ async run(ctx: DispatchContext): Promise<RunnerResult> {
231
+ const cwd = ctx.cwd || process.cwd();
232
+ const resolved = resolveSemgrepConfig(cwd, {
233
+ enabled: Boolean(ctx.pi.getFlag("lens-semgrep")),
234
+ config: ctx.pi.getFlag("lens-semgrep-config"),
235
+ });
236
+ if (!resolved.enabled) {
237
+ return { status: "skipped", diagnostics: [], semantic: "none" };
238
+ }
239
+
240
+ if (!semgrep.isAvailable(cwd)) {
241
+ return { status: "skipped", diagnostics: [], semantic: "none" };
242
+ }
243
+ const cmd = semgrep.getCommand(cwd) ?? "semgrep";
244
+ const args = ["scan", "--json", "--metrics=off", "--timeout", "5"];
245
+ if (resolved.configArg) args.push("--config", resolved.configArg);
246
+ args.push(ctx.filePath);
247
+
248
+ const result = await safeSpawnAsync(cmd, args, { cwd, timeout: 20000 });
249
+ const raw = result.stdout || "";
250
+ const diagnostics = parseSemgrepJson(raw, ctx);
251
+ if (diagnostics.length === 0) {
252
+ return {
253
+ status: result.error ? "failed" : "succeeded",
254
+ diagnostics: [],
255
+ semantic: "none",
256
+ rawOutput: (result.stderr || "").slice(0, 500),
257
+ };
258
+ }
259
+
260
+ const hasBlocking = diagnostics.some((d) => d.semantic === "blocking");
261
+ return {
262
+ status: hasBlocking ? "failed" : "succeeded",
263
+ diagnostics,
264
+ semantic: hasBlocking ? "blocking" : "warning",
265
+ };
266
+ },
267
+ };
268
+
269
+ export default semgrepRunner;
@@ -148,14 +148,8 @@ const shellcheckRunner: RunnerDefinition = {
148
148
  }
149
149
  if (!cmd) return { status: "skipped", diagnostics: [], semantic: "none" };
150
150
 
151
- // Determine shell dialect from file extension
152
- const shellDialect = ctx.filePath.endsWith(".zsh")
153
- ? "bash"
154
- : ctx.filePath.endsWith(".fish")
155
- ? "bash"
156
- : ctx.filePath.endsWith(".sh")
157
- ? "bash"
158
- : "bash"; // Default to bash for generic shell files
151
+ // Determine shell dialect from file extension (all map to bash for shellcheck)
152
+ const shellDialect = "bash";
159
153
 
160
154
  // Build args
161
155
  // --format json: JSON output