@vigolium/piolium 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,9 +6,11 @@
6
6
  <p align="center"><a href="https://www.vigolium.com">www.vigolium.com</a> - <a href="https://docs.vigolium.com">docs.vigolium.com</a></p>
7
7
  </p>
8
8
 
9
+ ![Vigolium Audit](https://github.com/vigolium/docs/blob/main/images/audit/vigolium-audit-with-piolium.png?raw=true)
10
+
9
11
  # Piolium
10
12
 
11
- Piolium is Vigolium's Pi-native repository security audit agent. It runs multi-phase source audits with specialist sub-agents, resumable state, controlled concurrency, PoC generation, and final reporting.
13
+ Piolium is [Vigolium](https://www.vigolium.com/)'s Pi-native repository security audit agent. It runs multi-phase source audits with specialist sub-agents, resumable state, controlled concurrency, PoC generation, and final reporting.
12
14
 
13
15
  Piolium is packaged as a Pi extension. Once installed, it registers `/piolium-*` slash commands inside Pi sessions and also provides a standalone `piolium` launcher when installed through the quick installer.
14
16
 
@@ -36,7 +38,7 @@ For development from this checkout:
36
38
  ```bash
37
39
  bun install
38
40
  bun run import-archon -- --src /path/to/archon-audit
39
- pi install ./
41
+ pi install .
40
42
  ```
41
43
 
42
44
  More install, build, release, auth, and development details are in [HACKING.md](HACKING.md).
@@ -35,40 +35,48 @@ For each finding, extract:
35
35
 
36
36
  ### 2. Categorize Results
37
37
 
38
- Group findings into confirmation categories. Each finding gets ONE category — when both V4 and V5 produced verdicts, pick the strongest in this priority order: `confirmed-live` > `confirmed-test` > `confirmed-fp` > `analytical-only` > `unconfirmed` > `inconclusive` > `blocked` > `no-poc` > `error`.
38
+ Group findings into confirmation categories. Each finding gets ONE category — when both V4 and V5 produced verdicts, pick the strongest in this priority order: `live-verified` > `test-verified` > `false-positive` > `analytical` > `not-reproduced` > `flaky` > `blocked` > `no-poc` > `errored`.
39
39
 
40
- The category is independent of `Documented-Intent`. A `match: yes` finding can still be `confirmed-live` — the PoC ran and the documented behavior was exactly what it produced. The reader uses both columns together to decide whether to triage further.
40
+ The category is independent of `Documented-Intent`. A `match: yes` finding can still be `live-verified` — the PoC ran and the documented behavior was exactly what it produced. The reader uses both columns together to decide whether to triage further.
41
41
 
42
42
  | Category | Criteria |
43
43
  |----------|---------|
44
- | `confirmed-live` | PoC executed successfully against live environment (structured-output `status: confirmed`) |
45
- | `confirmed-test` | Generated test demonstrated the vulnerability |
46
- | `confirmed-fp` | fp-check determined the original draft was a false positive (drain from severity counts) |
47
- | `analytical-only` | Finding's `Protocol: non-exploitable` — confirmation is structural, not behavioural |
48
- | `unconfirmed` | PoC failed AND test could not confirm |
49
- | `inconclusive` | PoC's structured output reported `inconclusive` (e.g., race condition that didn't trigger) |
44
+ | `live-verified` | PoC executed successfully against live environment (structured-output `status: confirmed`) |
45
+ | `test-verified` | Generated test demonstrated the vulnerability |
46
+ | `false-positive` | fp-check determined the original draft was a false positive (drain from severity counts) |
47
+ | `analytical` | Finding's `Protocol: non-exploitable` — confirmation is structural, not behavioural |
48
+ | `not-reproduced` | PoC ran cleanly AND/OR test ran cleanly without demonstrating the issue (covers both V4 `Confirm-Status: not-reproduced` and V5 `Confirm-Status: not-reproduced` — `Confirm-Method` tells the two apart) |
49
+ | `flaky` | PoC's structured output reported `inconclusive` (e.g., race condition that didn't trigger deterministically) |
50
50
  | `blocked` | App unreachable, missing interpreter, missing auth token, install failure, test timeout, or no test framework |
51
51
  | `no-poc` | Finding had no PoC script and no testable code path |
52
- | `error` | Pipeline error during confirmation (record the failure for re-run) |
52
+ | `errored` | Pipeline error during confirmation (record the failure for re-run) |
53
53
 
54
54
  **Deduplication rule**: a single finding ID appears in EXACTLY ONE category. Do not double-count when a finding was attempted by both V4 and V5 — the priority order above resolves it.
55
55
 
56
- ### 3. Stage Confirmed Findings
56
+ ### 3. Stage Findings by Verdict
57
57
 
58
- Before writing the report, mirror every finding that received a verdict into `archon/confirm-workspace/confirmed-findings/`, grouped by category. This gives reviewers a single place to scan only the findings the confirmer reached a conclusion on, without having to cross-reference `confirmation-report.md` against `archon/findings/`.
58
+ Before writing the report, mirror every finding that received a verdict into two top-level buckets under `archon/confirm-workspace/`, each grouped by category. This makes the outcome self-evident from the directory layout — a reviewer sees at a glance which findings the confirmer stood behind and which it could not, without cross-referencing `confirmation-report.md` against `archon/findings/`.
59
59
 
60
- Included categories: `confirmed-live`, `confirmed-test`, `analytical-only`, `confirmed-fp`. Findings in `unconfirmed | inconclusive | blocked | no-poc | error` are NOT staged they remain only in `archon/findings/` and the report.
60
+ - `archon/confirm-workspace/report-ready/<category>/` findings the confirmer reached a positive conclusion on (the ship list). Categories: `live-verified`, `test-verified`, `analytical`, `false-positive`.
61
+ - `archon/confirm-workspace/needs-review/<category>/` — every finding that did NOT confirm (the followup queue). Categories: `not-reproduced`, `flaky`, `blocked`, `no-poc`, `errored`.
62
+
63
+ Both buckets are derived, disposable copies, regenerated each run. `archon/findings/` remains the canonical source of truth, and each staged `report.md` still carries the exact `Confirm-Status`, so the category folder is a convenience index, not authoritative.
61
64
 
62
65
  ```bash
63
- # Wipe any prior staging so the folder reflects only this run.
64
- rm -rf archon/confirm-workspace/confirmed-findings
65
- mkdir -p archon/confirm-workspace/confirmed-findings/{confirmed-live,confirmed-test,analytical-only,confirmed-fp}
66
+ # Wipe any prior staging so the folders reflect only this run.
67
+ rm -rf archon/confirm-workspace/report-ready archon/confirm-workspace/needs-review
68
+ mkdir -p archon/confirm-workspace/report-ready/{live-verified,test-verified,analytical,false-positive}
69
+ mkdir -p archon/confirm-workspace/needs-review/{not-reproduced,flaky,blocked,no-poc,errored}
66
70
  ```
67
71
 
68
- For each finding whose resolved category is one of the four above:
72
+ For each finding, copy its directory into the bucket matching its resolved category from §2 ship-list categories go to `report-ready/<category>/`, the rest to `needs-review/<category>/`:
69
73
 
70
74
  ```bash
71
- cp -R "archon/findings/<ID>-<slug>/" "archon/confirm-workspace/confirmed-findings/<category>/"
75
+ # live-verified | test-verified | analytical | false-positive
76
+ cp -R "archon/findings/<ID>-<slug>/" "archon/confirm-workspace/report-ready/<category>/"
77
+
78
+ # not-reproduced | flaky | blocked | no-poc | errored
79
+ cp -R "archon/findings/<ID>-<slug>/" "archon/confirm-workspace/needs-review/<category>/"
72
80
  ```
73
81
 
74
82
  `cp -R` copies the full directory (report.md, PoC scripts, `confirm-evidence/`, `confirm-test*`, etc.) so each staged entry is self-contained for review. If the source directory is missing (e.g., a finding ID survived in the report but its directory was deleted), log a warning and skip — do not abort report generation.
@@ -87,35 +95,43 @@ Write `archon/confirmation-report.md`:
87
95
  | Confirmed at | <ISO timestamp> |
88
96
  | Environment | <method_used from env-connection.json or "test-only" or "--target URL"> |
89
97
  | Original audit mode | <mode from audit-state.json, or "unknown"> |
90
- | Confirmed-findings staging | `archon/confirm-workspace/confirmed-findings/` (grouped by verdict) |
98
+ | Findings staging | `archon/confirm-workspace/report-ready/` + `needs-review/` (grouped by verdict category) |
91
99
 
92
100
  ## Summary
93
101
 
94
- | Status | Count | Findings |
95
- |--------|-------|----------|
96
- | confirmed-live | N | C1, H2, ... |
97
- | confirmed-test | N | H3, M1, ... |
98
- | confirmed-fp | N | ... |
99
- | analytical-only | N | ... |
100
- | unconfirmed | N | M2, ... |
101
- | inconclusive | N | ... |
102
+ | Verdict | Count | Findings |
103
+ |---------|-------|----------|
104
+ | live-verified | N | C1, H2, ... |
105
+ | test-verified | N | H3, M1, ... |
106
+ | false-positive | N | ... |
107
+ | analytical | N | ... |
108
+ | not-reproduced | N | M2, ... |
109
+ | flaky | N | ... |
102
110
  | blocked | N | ... |
103
111
  | no-poc | N | ... |
104
- | error | N | ... |
112
+ | errored | N | ... |
105
113
 
106
- **Confirmation rate**: X/Y findings confirmed (Z%) — `confirmed-fp` and `analytical-only` are excluded from the denominator (they're not pending verification).
114
+ **Confirmation rate**: X/Y findings confirmed (Z%) — `false-positive` and `analytical` are excluded from the denominator (they're not pending verification).
107
115
 
108
116
  ## Breakdown by Exploitability Class
109
117
 
110
118
  (read from `archon/confirm-workspace/findings-inventory.json:by_class`)
111
119
 
112
- | Class | Total | confirmed-live | confirmed-test | unconfirmed | blocked | analytical-only |
113
- |-------|-------|----------------|----------------|-------------|---------|-----------------|
120
+ | Class | Total | live-verified | test-verified | not-reproduced | blocked | analytical |
121
+ |-------|-------|---------------|---------------|----------------|---------|------------|
114
122
  | network-exploitable | N | N | N | N | N | — |
115
123
  | local-exploitable | N | — | N | N | N | — |
116
124
  | non-exploitable | N | — | — | — | — | N |
117
125
 
118
- ## Confirmed Findings (Live)
126
+ ## Pre-Auth Exposure
127
+
128
+ (cross-cut index — list every finding whose `report.md` has `Auth-Required: no`, regardless of verdict. These are exploitable without credentials and are the highest priority for client reports. Omit the section entirely if no finding has `Auth-Required: no`.)
129
+
130
+ | ID | Title | Severity | Verdict | Vector |
131
+ |----|-------|----------|---------|--------|
132
+ | C1 | ... | CRITICAL | live-verified | unauthenticated HTTP |
133
+
134
+ ## Report-Ready — Live Verified
119
135
 
120
136
  ### <ID> — <title> [<severity>]
121
137
 
@@ -127,7 +143,7 @@ Write `archon/confirmation-report.md`:
127
143
 
128
144
  ---
129
145
 
130
- ## Confirmed Findings (Test)
146
+ ## Report-Ready Test Verified
131
147
 
132
148
  ### <ID> — <title> [<severity>]
133
149
 
@@ -139,7 +155,7 @@ Write `archon/confirmation-report.md`:
139
155
 
140
156
  ---
141
157
 
142
- ## Unconfirmed Findings
158
+ ## Needs-Review — Not Reproduced
143
159
 
144
160
  ### <ID> — <title> [<severity>]
145
161
 
@@ -151,7 +167,7 @@ Write `archon/confirmation-report.md`:
151
167
 
152
168
  ---
153
169
 
154
- ## Blocked Findings
170
+ ## Needs-Review — Blocked
155
171
 
156
172
  ### <ID> — <title> [<severity>]
157
173
 
@@ -215,15 +231,15 @@ If `archon/audit-state.json` exists, update the latest audit entry. Two writes:
215
231
  "environment_method": "<method_used or 'remote' or 'test-only'>",
216
232
  "target_url": "<base_url or --target URL>",
217
233
  "results": {
218
- "confirmed_live": <count>,
219
- "confirmed_test": <count>,
220
- "confirmed_fp": <count>,
221
- "analytical_only": <count>,
222
- "unconfirmed": <count>,
223
- "inconclusive": <count>,
234
+ "live_verified": <count>,
235
+ "test_verified": <count>,
236
+ "false_positive": <count>,
237
+ "analytical": <count>,
238
+ "not_reproduced": <count>,
239
+ "flaky": <count>,
224
240
  "blocked": <count>,
225
241
  "no_poc": <count>,
226
- "error": <count>
242
+ "errored": <count>
227
243
  },
228
244
  "by_class": {"network-exploitable": <count>, "local-exploitable": <count>, "non-exploitable": <count>},
229
245
  "confirmation_rate": "<X/Y (Z%)>"
@@ -241,7 +257,7 @@ If `archon/audit-state.json` exists, update the latest audit entry. Two writes:
241
257
  "started_at": "<ISO timestamp>",
242
258
  "completed_at": "<ISO timestamp>",
243
259
  "target_url": "<base_url>",
244
- "results": {"confirmed_live": N, "confirmed_test": N, "...": "..."}
260
+ "results": {"live_verified": N, "test_verified": N, "...": "..."}
245
261
  }
246
262
  ]
247
263
  }
@@ -43,9 +43,9 @@ Read the finding report at `archon/findings/<ID>-<slug>/report.md`. Extract:
43
43
  - `Protocol:` field (`http`, `grpc`, `graphql`, `websocket`, `tcp`, `local`, `non-exploitable`) — written by poc-author. Defaults to `http` if absent.
44
44
  - `Auth-Required:` field (`yes` / `no`) — defaults to `no` if absent.
45
45
  - Expected security effect (what the PoC should demonstrate)
46
- - Current `Confirm-Status` (skip if already `confirmed-live` from a previous run)
46
+ - Current `Confirm-Status` (skip if already `live-verified` from a previous run)
47
47
 
48
- If `Protocol: non-exploitable`, write `Confirm-Status: analytical-only` and exit cleanly — there is no live verification to run.
48
+ If `Protocol: non-exploitable`, write `Confirm-Status: analytical` and exit cleanly — there is no live verification to run.
49
49
 
50
50
  ### 2. Locate the PoC Script
51
51
 
@@ -158,17 +158,17 @@ Allowed `status` values: `confirmed`, `failed`, `inconclusive`.
158
158
 
159
159
  Parse the LAST line of `exploit.log` matching `^\{.*"status".*\}$`. Map directly:
160
160
 
161
- - `confirmed` → `Confirm-Status: confirmed-live`
162
- - `failed` → `Confirm-Status: failed` (try variant 2 if not yet attempted)
163
- - `inconclusive` → `Confirm-Status: inconclusive` (treated like failed for V5 fallback purposes; reporter surfaces it distinctly)
161
+ - `confirmed` → `Confirm-Status: live-verified`
162
+ - `failed` → `Confirm-Status: not-reproduced` (try variant 2 if not yet attempted)
163
+ - `inconclusive` → `Confirm-Status: flaky` (treated like not-reproduced for V5 fallback purposes; reporter surfaces it distinctly)
164
164
 
165
- **Legacy PoC fallback**: if no structured line is present (older PoCs from before the contract), apply the heuristic — non-zero exit + no security marker = `failed`; security marker present = `confirmed-live`. Add `Confirm-Notes: legacy-poc-format` so the operator knows to upgrade.
165
+ **Legacy PoC fallback**: if no structured line is present (older PoCs from before the contract), apply the heuristic — non-zero exit + no security marker = `not-reproduced`; security marker present = `live-verified`. Add `Confirm-Notes: legacy-poc-format` so the operator knows to upgrade.
166
166
 
167
- For **failed** results from variant 1: run variant 2 with a different payload encoding, alternate endpoint path, or alternative auth identity (e.g., switch `{{TOKEN_user}}` ↔ `{{TOKEN_admin}}` for privilege-escalation-shaped findings).
167
+ For **not-reproduced** results from variant 1: run variant 2 with a different payload encoding, alternate endpoint path, or alternative auth identity (e.g., switch `{{TOKEN_user}}` ↔ `{{TOKEN_admin}}` for privilege-escalation-shaped findings).
168
168
 
169
- For **failed** results after both variants: run the `fp-check` skill on the original draft (`archon/findings/<ID>-<slug>/draft.md`) using the live evidence as context. Two outcomes:
170
- - fp-check confirms the draft is itself a false positive → `Confirm-Status: confirmed-fp`
171
- - fp-check finds the draft sound but the live PoC weak → keep `Confirm-Status: failed` and let V5 generate a reproducer test
169
+ For **not-reproduced** results after both variants: run the `fp-check` skill on the original draft (`archon/findings/<ID>-<slug>/draft.md`) using the live evidence as context. Two outcomes:
170
+ - fp-check confirms the draft is itself a false positive → `Confirm-Status: false-positive`
171
+ - fp-check finds the draft sound but the live PoC weak → keep `Confirm-Status: not-reproduced` and let V5 generate a reproducer test
172
172
 
173
173
  Record each attempt and the fp-check verdict in `archon/findings/<ID>-<slug>/confirm-evidence/attempts.log`.
174
174
 
@@ -176,7 +176,7 @@ Record each attempt and the fp-check verdict in `archon/findings/<ID>-<slug>/con
176
176
 
177
177
  Write confirmation status back to the finding:
178
178
  ```
179
- Confirm-Status: confirmed-live | failed | inconclusive | error | blocked | confirmed-fp | analytical-only | no-poc
179
+ Confirm-Status: live-verified | not-reproduced | flaky | errored | blocked | false-positive | analytical | no-poc
180
180
  Confirm-Timestamp: <ISO timestamp>
181
181
  Confirm-Evidence: archon/findings/<ID>-<slug>/confirm-evidence/
182
182
  Confirm-Variant-Count: <1 or 2>
@@ -184,9 +184,9 @@ Confirm-FpCheck: ran | not-run
184
184
  Confirm-Notes: <brief description of what was observed>
185
185
  ```
186
186
 
187
- If **failed** or **inconclusive** after all attempts, the finding is queued for test-locator (V5) fallback.
187
+ If **not-reproduced** or **flaky** after all attempts, the finding is queued for test-locator (V5) fallback.
188
188
  If **blocked** (missing interpreter, missing auth token, app unreachable), the finding is queued for V5 too — V5 may succeed where the live PoC could not.
189
- If **confirmed-fp** or **analytical-only**, the finding skips V5 entirely.
189
+ If **false-positive** or **analytical**, the finding skips V5 entirely.
190
190
 
191
191
  ## Completion
192
192
 
@@ -5,7 +5,7 @@ model: sonnet
5
5
  color: blue
6
6
  permissionMode: bypassPermissions
7
7
  effort: low
8
- description: Confirmation phase V5 test-based verification agent that maps unconfirmed findings to existing test files, generates minimal reproducer tests targeting each vulnerability, executes them in isolation within archon/findings/<ID>/, and updates confirmation status
8
+ description: Confirmation phase V5 test-based verification agent that maps not-reproduced / blocked / no-poc findings to existing test files, generates minimal reproducer tests targeting each vulnerability, executes them in isolation within archon/findings/<ID>/, and updates confirmation status
9
9
  ---
10
10
 
11
11
  You are a test mapper for the confirmation phase of a security audit. You verify findings by generating and running targeted test cases when live PoC execution is not possible.
@@ -184,17 +184,17 @@ The outer `timeout 90` is a belt-and-suspenders cap — if the runner ignores it
184
184
  ### 8. Assess Result
185
185
 
186
186
  - **Test passes** (exit 0): the vulnerability is confirmed — malicious input reached the sink
187
- → `Confirm-Status: confirmed-test`
187
+ → `Confirm-Status: test-verified`
188
188
  - **Test fails** (assertion error): the application sanitized/blocked the input — not confirmed this way
189
- → `Confirm-Status: unconfirmed`
189
+ → `Confirm-Status: not-reproduced`
190
190
  - **Test errors** (import error, syntax error, runtime crash): test couldn't execute
191
- → `Confirm-Status: unconfirmed` with `Confirm-Notes` explaining the error
191
+ → `Confirm-Status: not-reproduced` with `Confirm-Notes` explaining the error
192
192
 
193
193
  ### 9. Update Finding
194
194
 
195
195
  Write back to the finding report:
196
196
  ```
197
- Confirm-Status: confirmed-test | unconfirmed | blocked
197
+ Confirm-Status: test-verified | not-reproduced | blocked
198
198
  Confirm-Method: generated-test
199
199
  Confirm-Test: archon/findings/<ID>-<slug>/confirm-test.{ext}
200
200
  Confirm-Test-Output: archon/findings/<ID>-<slug>/confirm-test-output.log
@@ -2,6 +2,7 @@ import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSy
2
2
  import { basename, dirname, extname, isAbsolute, join, relative, resolve } from "node:path";
3
3
  import { splitFrontmatter } from "./agents.ts";
4
4
  import { type FindingDraft, listFindingDirs, readFindingFrontmatter } from "./findings.ts";
5
+ import type { ConfirmVerdict } from "./modes/confirm.ts";
5
6
 
6
7
  export type ExportFormat = "json" | "md-dir";
7
8
 
@@ -197,8 +198,8 @@ function includeFinding(
197
198
 
198
199
  function isConfirmed(confirmStatus: string | undefined): boolean {
199
200
  return (
200
- confirmStatus === "confirmed-live" ||
201
- confirmStatus === "confirmed-test" ||
201
+ confirmStatus === ("live-verified" satisfies ConfirmVerdict) ||
202
+ confirmStatus === ("test-verified" satisfies ConfirmVerdict) ||
202
203
  confirmStatus === "confirmed"
203
204
  );
204
205
  }
@@ -2,7 +2,7 @@
2
2
  * Confirm mode (`/piolium-confirm`).
3
3
  *
4
4
  * Verification pass over an already-completed audit (command-defs/confirm.md,
5
- * archon-audit @ 2026-05-16). Seven phases:
5
+ * vigolium-audit @ 2026-05-20). Seven phases:
6
6
  *
7
7
  * V1 findings inventory (env-profiler surveys & classifies findings by
8
8
  * exploitability: network / local / non-exploitable)
@@ -28,15 +28,7 @@
28
28
  * `piolium/confirm-workspace/env-connection.json`.
29
29
  */
30
30
 
31
- import {
32
- existsSync,
33
- mkdirSync,
34
- readFileSync,
35
- readdirSync,
36
- renameSync,
37
- statSync,
38
- writeFileSync,
39
- } from "node:fs";
31
+ import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs";
40
32
  import { basename, extname, join } from "node:path";
41
33
  import type { AgentRuntimeModel } from "../agent-runner.ts";
42
34
  import { loadAgents } from "../agents.ts";
@@ -73,12 +65,28 @@ const WORK = CONFIRM_WORKSPACE;
73
65
  const REPORT = CONFIRM_REPORT;
74
66
  export const POC_RESULTS = `${WORK}/poc-results.json`;
75
67
  export const INTENT_CORPUS = `${WORK}/intent-corpus.json`;
76
- const FP_RENAMES = `${WORK}/false-positive-renames.json`;
77
68
  export const CLEANUP_SUMMARY = `${WORK}/cleanup-summary.json`;
78
69
  const MAX_REDACTABLE_BYTES = 5 * 1024 * 1024;
79
70
 
80
71
  export const CONFIRM_AGENT_PHASES = ["V1", "V1.5", "V2", "V3", "V4", "V5", "V6"] as const;
81
72
 
73
+ export const REPORT_READY_VERDICTS = [
74
+ "live-verified",
75
+ "test-verified",
76
+ "analytical",
77
+ "false-positive",
78
+ ] as const;
79
+ export const NEEDS_REVIEW_VERDICTS = [
80
+ "not-reproduced",
81
+ "flaky",
82
+ "blocked",
83
+ "no-poc",
84
+ "errored",
85
+ ] as const;
86
+ export type ConfirmVerdict =
87
+ | (typeof REPORT_READY_VERDICTS)[number]
88
+ | (typeof NEEDS_REVIEW_VERDICTS)[number];
89
+
82
90
  const TEXT_EXTENSIONS = new Set([
83
91
  ".csv",
84
92
  ".curl",
@@ -159,7 +167,7 @@ const CONFIRMATION_STANDARD = [
159
167
  "- Write evidence under each finding's `evidence/` directory; include enough detail for replay.",
160
168
  "- Do not mark confirmed from code plausibility alone.",
161
169
  "- Mark `Confirm-Status: false-positive` only when real execution or a targeted reproducer proves the claimed exploit path is blocked, unreachable, or contradicted by code/runtime behavior.",
162
- "- If evidence is incomplete, use `blocked`, `inconclusive`, or `unconfirmed` instead of false-positive.",
170
+ "- If evidence is incomplete, use `blocked`, `flaky`, or `not-reproduced` instead of false-positive.",
163
171
  ].join("\n");
164
172
 
165
173
  export function buildConfirmTask(phase: string, target: string | undefined): string {
@@ -214,12 +222,12 @@ export function buildConfirmTask(phase: string, target: string | undefined): str
214
222
  case "V4":
215
223
  return [
216
224
  "You are running V4 (PoC Execution) of /piolium-confirm.",
217
- "Read findings-inventory.json and env-connection.json. Skip non-exploitable findings as `Confirm-Status: analytical-only`; route local-only findings to V5.",
225
+ "Read findings-inventory.json and env-connection.json. Skip non-exploitable findings as `Confirm-Status: analytical`; route local-only findings to V5.",
218
226
  "Before per-finding execution, run one reachability check against base_url with a 5s timeout; if unreachable, mark queued network findings `blocked` and record the reason.",
219
227
  "For every network-exploitable finding with a PoC, execute the real PoC against the target. Use a 30s timeout per variant, max 2 variants.",
220
228
  "Capture exact command, relevant env, HTTP request/response or stdout/stderr, and observable before/after state to `<finding-dir>/evidence/confirmed-<timestamp>.log`.",
221
229
  "Parse structured PoC output if present: final JSON line `{status,evidence,notes}`.",
222
- "Update each `report.md` with `Confirm-Status: confirmed-live | failed | blocked | analytical-only | false-positive` and `Confirm-Evidence:` pointing at the evidence file.",
230
+ "Update each `report.md` with `Confirm-Status: live-verified | not-reproduced | flaky | blocked | analytical | false-positive` and `Confirm-Evidence:` pointing at the evidence file.",
223
231
  `Write aggregate results to \`${POC_RESULTS}\`.`,
224
232
  CONFIRMATION_STANDARD,
225
233
  ].join("\n\n");
@@ -229,20 +237,29 @@ export function buildConfirmTask(phase: string, target: string | undefined): str
229
237
  "For findings whose live PoC did not confirm, had no PoC, or are local-exploitable, generate the smallest reproducer test in the existing test framework.",
230
238
  "Actually run the test with a 60s cap (pytest timeout, jest --testTimeout, go test -timeout, etc.).",
231
239
  "Keep reproducer files/evidence under each finding dir and write command/output logs under `evidence/`.",
232
- "Update `report.md`: `Confirm-Status: confirmed-test | failed | blocked | false-positive` and `Confirm-Evidence:`.",
240
+ "Update `report.md`: `Confirm-Status: test-verified | not-reproduced | blocked | false-positive` and `Confirm-Evidence:`.",
233
241
  "Only mark `false-positive` when the reproducer proves the claimed vulnerable path is unreachable, patched, protected, or based on an invalid assumption.",
234
242
  `Write \`${WORK}/test-mapping.json\` with per-finding verdicts and evidence pointers.`,
235
243
  CONFIRMATION_STANDARD,
236
244
  ].join("\n\n");
237
- case "V6":
245
+ case "V6": {
246
+ const reportReady = REPORT_READY_VERDICTS.join(", ");
247
+ const needsReview = NEEDS_REVIEW_VERDICTS.join(", ");
248
+ const allVerdicts = [...REPORT_READY_VERDICTS, ...NEEDS_REVIEW_VERDICTS].join(", ");
238
249
  return [
239
250
  "You are running V6 (Confirmation Report) of /piolium-confirm.",
240
- "Read `piolium/findings/`, including any directories renamed with `FP-` after V5.",
241
- `Compose \`${REPORT}\` with: confirmed-live, confirmed-test, analytical-only, blocked, inconclusive/unconfirmed, and false-positive counts.`,
242
- "Include one line per finding with status, evidence pointer, and reproduction command summary.",
243
- "Create a dedicated false-positive section listing every `FP-*` directory and the evidence that disproved it.",
251
+ "Read every `report.md` under `piolium/findings/` and treat it as the source of truth.",
252
+ "Stage every finding into one of two derived buckets under `piolium/confirm-workspace/` (regenerated each run, wipe prior staging first):",
253
+ ` - \`piolium/confirm-workspace/report-ready/<category>/\` for ${reportReady} (the ship list)`,
254
+ ` - \`piolium/confirm-workspace/needs-review/<category>/\` for ${needsReview} (the followup queue)`,
255
+ "Use `cp -R` so each staged entry is self-contained (report.md, PoC scripts, confirm-evidence/, confirm-test*).",
256
+ `Compose \`${REPORT}\` with: a Summary table of all nine verdicts (${allVerdicts}), a Breakdown by Exploitability Class section, and a Pre-Auth Exposure cross-cut index that lists every finding whose \`report.md\` has \`Auth-Required: no\` (omit the section if none).`,
257
+ "For each verdict category that has findings, include a section with one entry per finding (ID — title [severity], vulnerability class, method, evidence pointer, observation).",
258
+ "Confirmation rate denominator excludes `false-positive` and `analytical`.",
259
+ "If `piolium/audit-state.json` exists, append a new entry to `audits[-1].confirmation_history[]` and refresh `audits[-1].confirmation` with the latest run's summary — never overwrite the history array.",
244
260
  "Include environment setup notes, target URL/base_url, cleanup result, and methodology.",
245
261
  ].join("\n\n");
262
+ }
246
263
  default:
247
264
  return "Unknown V phase.";
248
265
  }
@@ -288,58 +305,11 @@ export function writeRemoteConnection(cwd: string, target: string): void {
288
305
  );
289
306
  }
290
307
 
291
- function reportMarksFalsePositive(text: string): boolean {
292
- return (
293
- /^(?:Confirm-Status|Confirmation|Confirm-Verdict|Verdict)\s*:\s*(?:false[-_ ]positive|fp)\b/im.test(
294
- text,
295
- ) || /"confirm_status"\s*:\s*"false[-_ ]positive"/i.test(text)
296
- );
297
- }
298
-
299
- function uniqueDest(root: string, name: string): string {
300
- let candidate = join(root, name);
301
- let suffix = 2;
302
- while (existsSync(candidate)) {
303
- candidate = join(root, `${name}-${suffix}`);
304
- suffix++;
305
- }
306
- return candidate;
307
- }
308
-
309
- export function renameFalsePositiveFindings(cwd: string): string[] {
310
- const root = join(cwd, "piolium", "findings");
311
- if (!existsSync(root)) return [];
312
- const renames: string[] = [];
313
- for (const entry of readdirSync(root).sort()) {
314
- if (entry.startsWith("FP-")) continue;
315
- const dir = join(root, entry);
316
- try {
317
- if (!statSync(dir).isDirectory()) continue;
318
- const reportPath = join(dir, "report.md");
319
- if (!existsSync(reportPath)) continue;
320
- if (!reportMarksFalsePositive(readFileSync(reportPath, "utf8"))) continue;
321
- const destName = `FP-${entry}`;
322
- const dest = uniqueDest(root, destName);
323
- renameSync(dir, dest);
324
- renames.push(`${entry} -> ${basename(dest)}`);
325
- } catch {
326
- // Keep confirmation moving; V6 will still report available evidence.
327
- }
328
- }
329
- ensureConfirmWorkdir(cwd);
330
- writeFileSync(
331
- join(cwd, FP_RENAMES),
332
- `${JSON.stringify({ renamed_at: new Date().toISOString(), renames }, null, "\t")}\n`,
333
- );
334
- return renames;
335
- }
336
-
337
308
  export interface ConfirmCleanupResult {
338
309
  summaryPath: string;
339
310
  checkedFindingDirs: string[];
340
311
  createdEvidenceDirs: string[];
341
312
  formatIssues: string[];
342
- falsePositiveRenames: string[];
343
313
  redactedFiles: Array<{ path: string; replacements: Record<string, number> }>;
344
314
  skippedFiles: Array<{ path: string; reason: string }>;
345
315
  }
@@ -546,7 +516,6 @@ function normalizeFindingLayout(
546
516
 
547
517
  export function cleanupConfirmArtifacts(cwd: string): ConfirmCleanupResult {
548
518
  ensureConfirmWorkdir(cwd);
549
- const falsePositiveRenames = renameFalsePositiveFindings(cwd);
550
519
  const layout = normalizeFindingLayout(cwd);
551
520
  const skippedFiles: ConfirmCleanupResult["skippedFiles"] = [];
552
521
  const candidates: string[] = [];
@@ -560,7 +529,6 @@ export function cleanupConfirmArtifacts(cwd: string): ConfirmCleanupResult {
560
529
  const result: ConfirmCleanupResult = {
561
530
  summaryPath: CLEANUP_SUMMARY,
562
531
  ...layout,
563
- falsePositiveRenames,
564
532
  redactedFiles,
565
533
  skippedFiles,
566
534
  };
@@ -618,15 +586,6 @@ export async function runConfirmAudit(opts: RunConfirmOptions): Promise<RunConfi
618
586
  });
619
587
  continue;
620
588
  }
621
- if (name === "V6") {
622
- const renames = renameFalsePositiveFindings(cwd);
623
- if (renames.length > 0) {
624
- ui?.notify?.(
625
- `Renamed ${renames.length} false-positive finding folder(s) with FP- prefix.`,
626
- "warning",
627
- );
628
- }
629
- }
630
589
  try {
631
590
  await runAgentPhase({
632
591
  cwd,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vigolium/piolium",
3
- "version": "0.0.2",
3
+ "version": "0.0.3",
4
4
  "description": "Pi-native port of archon-audit. Multi-phase security audits with specialist sub-agents, isolated context windows, capped concurrency, and resumable state — packaged as a Pi extension.",
5
5
  "keywords": ["pi-package", "security", "audit", "subagents", "piolium"],
6
6
  "license": "MIT",