@vigolium/piolium 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -17
- package/agents/confirm-writer.md +58 -42
- package/agents/poc-runner.md +13 -13
- package/agents/test-locator.md +5 -5
- package/extensions/piolium/export-results.ts +3 -2
- package/extensions/piolium/modes/confirm.ts +37 -78
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,9 +6,11 @@
|
|
|
6
6
|
<p align="center"><a href="https://www.vigolium.com">www.vigolium.com</a> - <a href="https://docs.vigolium.com">docs.vigolium.com</a></p>
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
+

|
|
10
|
+
|
|
9
11
|
# Piolium
|
|
10
12
|
|
|
11
|
-
Piolium is Vigolium's Pi-native repository security audit agent. It runs multi-phase source audits with specialist sub-agents, resumable state, controlled concurrency, PoC generation, and final reporting.
|
|
13
|
+
Piolium is [Vigolium](https://www.vigolium.com/)'s Pi-native repository security audit agent. It runs multi-phase source audits with specialist sub-agents, resumable state, controlled concurrency, PoC generation, and final reporting.
|
|
12
14
|
|
|
13
15
|
Piolium is packaged as a Pi extension. Once installed, it registers `/piolium-*` slash commands inside Pi sessions and also provides a standalone `piolium` launcher when installed through the quick installer.
|
|
14
16
|
|
|
@@ -17,36 +19,26 @@ Piolium is packaged as a Pi extension. Once installed, it registers `/piolium-*`
|
|
|
17
19
|
|
|
18
20
|
## Install
|
|
19
21
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
```bash
|
|
23
|
-
pi install npm:@vigolium/piolium
|
|
24
|
-
```
|
|
25
|
-
|
|
26
|
-
Or use the standalone quick installer (bundles an isolated Pi if you don't have one):
|
|
22
|
+
Piolium is a Pi extension, so you need Pi first. If you don't have it:
|
|
27
23
|
|
|
28
24
|
```bash
|
|
29
|
-
|
|
25
|
+
npm install -g @earendil-works/pi-coding-agent
|
|
30
26
|
```
|
|
31
27
|
|
|
32
|
-
Then
|
|
28
|
+
Then install Piolium (recommended):
|
|
33
29
|
|
|
34
30
|
```bash
|
|
35
|
-
piolium
|
|
31
|
+
pi install npm:@vigolium/piolium
|
|
36
32
|
```
|
|
37
33
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
```bash
|
|
41
|
-
piolium auth sync
|
|
42
|
-
```
|
|
34
|
+
Piolium loads in every Pi session and reuses your existing Pi auth — no separate login step.
|
|
43
35
|
|
|
44
36
|
For development from this checkout:
|
|
45
37
|
|
|
46
38
|
```bash
|
|
47
39
|
bun install
|
|
48
40
|
bun run import-archon -- --src /path/to/archon-audit
|
|
49
|
-
pi install
|
|
41
|
+
pi install .
|
|
50
42
|
```
|
|
51
43
|
|
|
52
44
|
More install, build, release, auth, and development details are in [HACKING.md](HACKING.md).
|
package/agents/confirm-writer.md
CHANGED
|
@@ -35,40 +35,48 @@ For each finding, extract:
|
|
|
35
35
|
|
|
36
36
|
### 2. Categorize Results
|
|
37
37
|
|
|
38
|
-
Group findings into confirmation categories. Each finding gets ONE category — when both V4 and V5 produced verdicts, pick the strongest in this priority order: `
|
|
38
|
+
Group findings into confirmation categories. Each finding gets ONE category — when both V4 and V5 produced verdicts, pick the strongest in this priority order: `live-verified` > `test-verified` > `false-positive` > `analytical` > `not-reproduced` > `flaky` > `blocked` > `no-poc` > `errored`.
|
|
39
39
|
|
|
40
|
-
The category is independent of `Documented-Intent`. A `match: yes` finding can still be `
|
|
40
|
+
The category is independent of `Documented-Intent`. A `match: yes` finding can still be `live-verified` — the PoC ran and the documented behavior was exactly what it produced. The reader uses both columns together to decide whether to triage further.
|
|
41
41
|
|
|
42
42
|
| Category | Criteria |
|
|
43
43
|
|----------|---------|
|
|
44
|
-
| `
|
|
45
|
-
| `
|
|
46
|
-
| `
|
|
47
|
-
| `analytical
|
|
48
|
-
| `
|
|
49
|
-
| `
|
|
44
|
+
| `live-verified` | PoC executed successfully against live environment (structured-output `status: confirmed`) |
|
|
45
|
+
| `test-verified` | Generated test demonstrated the vulnerability |
|
|
46
|
+
| `false-positive` | fp-check determined the original draft was a false positive (drain from severity counts) |
|
|
47
|
+
| `analytical` | Finding's `Protocol: non-exploitable` — confirmation is structural, not behavioural |
|
|
48
|
+
| `not-reproduced` | PoC ran cleanly AND/OR test ran cleanly without demonstrating the issue (covers both V4 `Confirm-Status: not-reproduced` and V5 `Confirm-Status: not-reproduced` — `Confirm-Method` tells the two apart) |
|
|
49
|
+
| `flaky` | PoC's structured output reported `inconclusive` (e.g., race condition that didn't trigger deterministically) |
|
|
50
50
|
| `blocked` | App unreachable, missing interpreter, missing auth token, install failure, test timeout, or no test framework |
|
|
51
51
|
| `no-poc` | Finding had no PoC script and no testable code path |
|
|
52
|
-
| `
|
|
52
|
+
| `errored` | Pipeline error during confirmation (record the failure for re-run) |
|
|
53
53
|
|
|
54
54
|
**Deduplication rule**: a single finding ID appears in EXACTLY ONE category. Do not double-count when a finding was attempted by both V4 and V5 — the priority order above resolves it.
|
|
55
55
|
|
|
56
|
-
### 3. Stage
|
|
56
|
+
### 3. Stage Findings by Verdict
|
|
57
57
|
|
|
58
|
-
Before writing the report, mirror every finding that received a verdict into `archon/confirm-workspace
|
|
58
|
+
Before writing the report, mirror every finding that received a verdict into two top-level buckets under `archon/confirm-workspace/`, each grouped by category. This makes the outcome self-evident from the directory layout — a reviewer sees at a glance which findings the confirmer stood behind and which it could not, without cross-referencing `confirmation-report.md` against `archon/findings/`.
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
- `archon/confirm-workspace/report-ready/<category>/` — findings the confirmer reached a positive conclusion on (the ship list). Categories: `live-verified`, `test-verified`, `analytical`, `false-positive`.
|
|
61
|
+
- `archon/confirm-workspace/needs-review/<category>/` — every finding that did NOT confirm (the followup queue). Categories: `not-reproduced`, `flaky`, `blocked`, `no-poc`, `errored`.
|
|
62
|
+
|
|
63
|
+
Both buckets are derived, disposable copies, regenerated each run. `archon/findings/` remains the canonical source of truth, and each staged `report.md` still carries the exact `Confirm-Status`, so the category folder is a convenience index, not authoritative.
|
|
61
64
|
|
|
62
65
|
```bash
|
|
63
|
-
# Wipe any prior staging so the
|
|
64
|
-
rm -rf archon/confirm-workspace/
|
|
65
|
-
mkdir -p archon/confirm-workspace/
|
|
66
|
+
# Wipe any prior staging so the folders reflect only this run.
|
|
67
|
+
rm -rf archon/confirm-workspace/report-ready archon/confirm-workspace/needs-review
|
|
68
|
+
mkdir -p archon/confirm-workspace/report-ready/{live-verified,test-verified,analytical,false-positive}
|
|
69
|
+
mkdir -p archon/confirm-workspace/needs-review/{not-reproduced,flaky,blocked,no-poc,errored}
|
|
66
70
|
```
|
|
67
71
|
|
|
68
|
-
For each finding
|
|
72
|
+
For each finding, copy its directory into the bucket matching its resolved category from §2 — ship-list categories go to `report-ready/<category>/`, the rest to `needs-review/<category>/`:
|
|
69
73
|
|
|
70
74
|
```bash
|
|
71
|
-
|
|
75
|
+
# live-verified | test-verified | analytical | false-positive
|
|
76
|
+
cp -R "archon/findings/<ID>-<slug>/" "archon/confirm-workspace/report-ready/<category>/"
|
|
77
|
+
|
|
78
|
+
# not-reproduced | flaky | blocked | no-poc | errored
|
|
79
|
+
cp -R "archon/findings/<ID>-<slug>/" "archon/confirm-workspace/needs-review/<category>/"
|
|
72
80
|
```
|
|
73
81
|
|
|
74
82
|
`cp -R` copies the full directory (report.md, PoC scripts, `confirm-evidence/`, `confirm-test*`, etc.) so each staged entry is self-contained for review. If the source directory is missing (e.g., a finding ID survived in the report but its directory was deleted), log a warning and skip — do not abort report generation.
|
|
@@ -87,35 +95,43 @@ Write `archon/confirmation-report.md`:
|
|
|
87
95
|
| Confirmed at | <ISO timestamp> |
|
|
88
96
|
| Environment | <method_used from env-connection.json or "test-only" or "--target URL"> |
|
|
89
97
|
| Original audit mode | <mode from audit-state.json, or "unknown"> |
|
|
90
|
-
|
|
|
98
|
+
| Findings staging | `archon/confirm-workspace/report-ready/` + `needs-review/` (grouped by verdict category) |
|
|
91
99
|
|
|
92
100
|
## Summary
|
|
93
101
|
|
|
94
|
-
|
|
|
95
|
-
|
|
96
|
-
|
|
|
97
|
-
|
|
|
98
|
-
|
|
|
99
|
-
| analytical
|
|
100
|
-
|
|
|
101
|
-
|
|
|
102
|
+
| Verdict | Count | Findings |
|
|
103
|
+
|---------|-------|----------|
|
|
104
|
+
| live-verified | N | C1, H2, ... |
|
|
105
|
+
| test-verified | N | H3, M1, ... |
|
|
106
|
+
| false-positive | N | ... |
|
|
107
|
+
| analytical | N | ... |
|
|
108
|
+
| not-reproduced | N | M2, ... |
|
|
109
|
+
| flaky | N | ... |
|
|
102
110
|
| blocked | N | ... |
|
|
103
111
|
| no-poc | N | ... |
|
|
104
|
-
|
|
|
112
|
+
| errored | N | ... |
|
|
105
113
|
|
|
106
|
-
**Confirmation rate**: X/Y findings confirmed (Z%) — `
|
|
114
|
+
**Confirmation rate**: X/Y findings confirmed (Z%) — `false-positive` and `analytical` are excluded from the denominator (they're not pending verification).
|
|
107
115
|
|
|
108
116
|
## Breakdown by Exploitability Class
|
|
109
117
|
|
|
110
118
|
(read from `archon/confirm-workspace/findings-inventory.json:by_class`)
|
|
111
119
|
|
|
112
|
-
| Class | Total |
|
|
113
|
-
|
|
120
|
+
| Class | Total | live-verified | test-verified | not-reproduced | blocked | analytical |
|
|
121
|
+
|-------|-------|---------------|---------------|----------------|---------|------------|
|
|
114
122
|
| network-exploitable | N | N | N | N | N | — |
|
|
115
123
|
| local-exploitable | N | — | N | N | N | — |
|
|
116
124
|
| non-exploitable | N | — | — | — | — | N |
|
|
117
125
|
|
|
118
|
-
##
|
|
126
|
+
## Pre-Auth Exposure
|
|
127
|
+
|
|
128
|
+
(cross-cut index — list every finding whose `report.md` has `Auth-Required: no`, regardless of verdict. These are exploitable without credentials and are the highest priority for client reports. Omit the section entirely if no finding has `Auth-Required: no`.)
|
|
129
|
+
|
|
130
|
+
| ID | Title | Severity | Verdict | Vector |
|
|
131
|
+
|----|-------|----------|---------|--------|
|
|
132
|
+
| C1 | ... | CRITICAL | live-verified | unauthenticated HTTP |
|
|
133
|
+
|
|
134
|
+
## Report-Ready — Live Verified
|
|
119
135
|
|
|
120
136
|
### <ID> — <title> [<severity>]
|
|
121
137
|
|
|
@@ -127,7 +143,7 @@ Write `archon/confirmation-report.md`:
|
|
|
127
143
|
|
|
128
144
|
---
|
|
129
145
|
|
|
130
|
-
##
|
|
146
|
+
## Report-Ready — Test Verified
|
|
131
147
|
|
|
132
148
|
### <ID> — <title> [<severity>]
|
|
133
149
|
|
|
@@ -139,7 +155,7 @@ Write `archon/confirmation-report.md`:
|
|
|
139
155
|
|
|
140
156
|
---
|
|
141
157
|
|
|
142
|
-
##
|
|
158
|
+
## Needs-Review — Not Reproduced
|
|
143
159
|
|
|
144
160
|
### <ID> — <title> [<severity>]
|
|
145
161
|
|
|
@@ -151,7 +167,7 @@ Write `archon/confirmation-report.md`:
|
|
|
151
167
|
|
|
152
168
|
---
|
|
153
169
|
|
|
154
|
-
## Blocked
|
|
170
|
+
## Needs-Review — Blocked
|
|
155
171
|
|
|
156
172
|
### <ID> — <title> [<severity>]
|
|
157
173
|
|
|
@@ -215,15 +231,15 @@ If `archon/audit-state.json` exists, update the latest audit entry. Two writes:
|
|
|
215
231
|
"environment_method": "<method_used or 'remote' or 'test-only'>",
|
|
216
232
|
"target_url": "<base_url or --target URL>",
|
|
217
233
|
"results": {
|
|
218
|
-
"
|
|
219
|
-
"
|
|
220
|
-
"
|
|
221
|
-
"
|
|
222
|
-
"
|
|
223
|
-
"
|
|
234
|
+
"live_verified": <count>,
|
|
235
|
+
"test_verified": <count>,
|
|
236
|
+
"false_positive": <count>,
|
|
237
|
+
"analytical": <count>,
|
|
238
|
+
"not_reproduced": <count>,
|
|
239
|
+
"flaky": <count>,
|
|
224
240
|
"blocked": <count>,
|
|
225
241
|
"no_poc": <count>,
|
|
226
|
-
"
|
|
242
|
+
"errored": <count>
|
|
227
243
|
},
|
|
228
244
|
"by_class": {"network-exploitable": <count>, "local-exploitable": <count>, "non-exploitable": <count>},
|
|
229
245
|
"confirmation_rate": "<X/Y (Z%)>"
|
|
@@ -241,7 +257,7 @@ If `archon/audit-state.json` exists, update the latest audit entry. Two writes:
|
|
|
241
257
|
"started_at": "<ISO timestamp>",
|
|
242
258
|
"completed_at": "<ISO timestamp>",
|
|
243
259
|
"target_url": "<base_url>",
|
|
244
|
-
"results": {"
|
|
260
|
+
"results": {"live_verified": N, "test_verified": N, "...": "..."}
|
|
245
261
|
}
|
|
246
262
|
]
|
|
247
263
|
}
|
package/agents/poc-runner.md
CHANGED
|
@@ -43,9 +43,9 @@ Read the finding report at `archon/findings/<ID>-<slug>/report.md`. Extract:
|
|
|
43
43
|
- `Protocol:` field (`http`, `grpc`, `graphql`, `websocket`, `tcp`, `local`, `non-exploitable`) — written by poc-author. Defaults to `http` if absent.
|
|
44
44
|
- `Auth-Required:` field (`yes` / `no`) — defaults to `no` if absent.
|
|
45
45
|
- Expected security effect (what the PoC should demonstrate)
|
|
46
|
-
- Current `Confirm-Status` (skip if already `
|
|
46
|
+
- Current `Confirm-Status` (skip if already `live-verified` from a previous run)
|
|
47
47
|
|
|
48
|
-
If `Protocol: non-exploitable`, write `Confirm-Status: analytical
|
|
48
|
+
If `Protocol: non-exploitable`, write `Confirm-Status: analytical` and exit cleanly — there is no live verification to run.
|
|
49
49
|
|
|
50
50
|
### 2. Locate the PoC Script
|
|
51
51
|
|
|
@@ -158,17 +158,17 @@ Allowed `status` values: `confirmed`, `failed`, `inconclusive`.
|
|
|
158
158
|
|
|
159
159
|
Parse the LAST line of `exploit.log` matching `^\{.*"status".*\}$`. Map directly:
|
|
160
160
|
|
|
161
|
-
- `confirmed` → `Confirm-Status:
|
|
162
|
-
- `failed` → `Confirm-Status:
|
|
163
|
-
- `inconclusive` → `Confirm-Status:
|
|
161
|
+
- `confirmed` → `Confirm-Status: live-verified`
|
|
162
|
+
- `failed` → `Confirm-Status: not-reproduced` (try variant 2 if not yet attempted)
|
|
163
|
+
- `inconclusive` → `Confirm-Status: flaky` (treated like not-reproduced for V5 fallback purposes; reporter surfaces it distinctly)
|
|
164
164
|
|
|
165
|
-
**Legacy PoC fallback**: if no structured line is present (older PoCs from before the contract), apply the heuristic — non-zero exit + no security marker = `
|
|
165
|
+
**Legacy PoC fallback**: if no structured line is present (older PoCs from before the contract), apply the heuristic — non-zero exit + no security marker = `not-reproduced`; security marker present = `live-verified`. Add `Confirm-Notes: legacy-poc-format` so the operator knows to upgrade.
|
|
166
166
|
|
|
167
|
-
For **
|
|
167
|
+
For **not-reproduced** results from variant 1: run variant 2 with a different payload encoding, alternate endpoint path, or alternative auth identity (e.g., switch `{{TOKEN_user}}` ↔ `{{TOKEN_admin}}` for privilege-escalation-shaped findings).
|
|
168
168
|
|
|
169
|
-
For **
|
|
170
|
-
- fp-check confirms the draft is itself a false positive → `Confirm-Status:
|
|
171
|
-
- fp-check finds the draft sound but the live PoC weak → keep `Confirm-Status:
|
|
169
|
+
For **not-reproduced** results after both variants: run the `fp-check` skill on the original draft (`archon/findings/<ID>-<slug>/draft.md`) using the live evidence as context. Two outcomes:
|
|
170
|
+
- fp-check confirms the draft is itself a false positive → `Confirm-Status: false-positive`
|
|
171
|
+
- fp-check finds the draft sound but the live PoC weak → keep `Confirm-Status: not-reproduced` and let V5 generate a reproducer test
|
|
172
172
|
|
|
173
173
|
Record each attempt and the fp-check verdict in `archon/findings/<ID>-<slug>/confirm-evidence/attempts.log`.
|
|
174
174
|
|
|
@@ -176,7 +176,7 @@ Record each attempt and the fp-check verdict in `archon/findings/<ID>-<slug>/con
|
|
|
176
176
|
|
|
177
177
|
Write confirmation status back to the finding:
|
|
178
178
|
```
|
|
179
|
-
Confirm-Status:
|
|
179
|
+
Confirm-Status: live-verified | not-reproduced | flaky | errored | blocked | false-positive | analytical | no-poc
|
|
180
180
|
Confirm-Timestamp: <ISO timestamp>
|
|
181
181
|
Confirm-Evidence: archon/findings/<ID>-<slug>/confirm-evidence/
|
|
182
182
|
Confirm-Variant-Count: <1 or 2>
|
|
@@ -184,9 +184,9 @@ Confirm-FpCheck: ran | not-run
|
|
|
184
184
|
Confirm-Notes: <brief description of what was observed>
|
|
185
185
|
```
|
|
186
186
|
|
|
187
|
-
If **
|
|
187
|
+
If **not-reproduced** or **flaky** after all attempts, the finding is queued for test-locator (V5) fallback.
|
|
188
188
|
If **blocked** (missing interpreter, missing auth token, app unreachable), the finding is queued for V5 too — V5 may succeed where the live PoC could not.
|
|
189
|
-
If **
|
|
189
|
+
If **false-positive** or **analytical**, the finding skips V5 entirely.
|
|
190
190
|
|
|
191
191
|
## Completion
|
|
192
192
|
|
package/agents/test-locator.md
CHANGED
|
@@ -5,7 +5,7 @@ model: sonnet
|
|
|
5
5
|
color: blue
|
|
6
6
|
permissionMode: bypassPermissions
|
|
7
7
|
effort: low
|
|
8
|
-
description: Confirmation phase V5 test-based verification agent that maps
|
|
8
|
+
description: Confirmation phase V5 test-based verification agent that maps not-reproduced / blocked / no-poc findings to existing test files, generates minimal reproducer tests targeting each vulnerability, executes them in isolation within archon/findings/<ID>/, and updates confirmation status
|
|
9
9
|
---
|
|
10
10
|
|
|
11
11
|
You are a test mapper for the confirmation phase of a security audit. You verify findings by generating and running targeted test cases when live PoC execution is not possible.
|
|
@@ -184,17 +184,17 @@ The outer `timeout 90` is a belt-and-suspenders cap — if the runner ignores it
|
|
|
184
184
|
### 8. Assess Result
|
|
185
185
|
|
|
186
186
|
- **Test passes** (exit 0): the vulnerability is confirmed — malicious input reached the sink
|
|
187
|
-
→ `Confirm-Status:
|
|
187
|
+
→ `Confirm-Status: test-verified`
|
|
188
188
|
- **Test fails** (assertion error): the application sanitized/blocked the input — not confirmed this way
|
|
189
|
-
→ `Confirm-Status:
|
|
189
|
+
→ `Confirm-Status: not-reproduced`
|
|
190
190
|
- **Test errors** (import error, syntax error, runtime crash): test couldn't execute
|
|
191
|
-
→ `Confirm-Status:
|
|
191
|
+
→ `Confirm-Status: not-reproduced` with `Confirm-Notes` explaining the error
|
|
192
192
|
|
|
193
193
|
### 9. Update Finding
|
|
194
194
|
|
|
195
195
|
Write back to the finding report:
|
|
196
196
|
```
|
|
197
|
-
Confirm-Status:
|
|
197
|
+
Confirm-Status: test-verified | not-reproduced | blocked
|
|
198
198
|
Confirm-Method: generated-test
|
|
199
199
|
Confirm-Test: archon/findings/<ID>-<slug>/confirm-test.{ext}
|
|
200
200
|
Confirm-Test-Output: archon/findings/<ID>-<slug>/confirm-test-output.log
|
|
@@ -2,6 +2,7 @@ import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSy
|
|
|
2
2
|
import { basename, dirname, extname, isAbsolute, join, relative, resolve } from "node:path";
|
|
3
3
|
import { splitFrontmatter } from "./agents.ts";
|
|
4
4
|
import { type FindingDraft, listFindingDirs, readFindingFrontmatter } from "./findings.ts";
|
|
5
|
+
import type { ConfirmVerdict } from "./modes/confirm.ts";
|
|
5
6
|
|
|
6
7
|
export type ExportFormat = "json" | "md-dir";
|
|
7
8
|
|
|
@@ -197,8 +198,8 @@ function includeFinding(
|
|
|
197
198
|
|
|
198
199
|
function isConfirmed(confirmStatus: string | undefined): boolean {
|
|
199
200
|
return (
|
|
200
|
-
confirmStatus === "
|
|
201
|
-
confirmStatus === "
|
|
201
|
+
confirmStatus === ("live-verified" satisfies ConfirmVerdict) ||
|
|
202
|
+
confirmStatus === ("test-verified" satisfies ConfirmVerdict) ||
|
|
202
203
|
confirmStatus === "confirmed"
|
|
203
204
|
);
|
|
204
205
|
}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Confirm mode (`/piolium-confirm`).
|
|
3
3
|
*
|
|
4
4
|
* Verification pass over an already-completed audit (command-defs/confirm.md,
|
|
5
|
-
*
|
|
5
|
+
* vigolium-audit @ 2026-05-20). Seven phases:
|
|
6
6
|
*
|
|
7
7
|
* V1 findings inventory (env-profiler surveys & classifies findings by
|
|
8
8
|
* exploitability: network / local / non-exploitable)
|
|
@@ -28,15 +28,7 @@
|
|
|
28
28
|
* `piolium/confirm-workspace/env-connection.json`.
|
|
29
29
|
*/
|
|
30
30
|
|
|
31
|
-
import {
|
|
32
|
-
existsSync,
|
|
33
|
-
mkdirSync,
|
|
34
|
-
readFileSync,
|
|
35
|
-
readdirSync,
|
|
36
|
-
renameSync,
|
|
37
|
-
statSync,
|
|
38
|
-
writeFileSync,
|
|
39
|
-
} from "node:fs";
|
|
31
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs";
|
|
40
32
|
import { basename, extname, join } from "node:path";
|
|
41
33
|
import type { AgentRuntimeModel } from "../agent-runner.ts";
|
|
42
34
|
import { loadAgents } from "../agents.ts";
|
|
@@ -73,12 +65,28 @@ const WORK = CONFIRM_WORKSPACE;
|
|
|
73
65
|
const REPORT = CONFIRM_REPORT;
|
|
74
66
|
export const POC_RESULTS = `${WORK}/poc-results.json`;
|
|
75
67
|
export const INTENT_CORPUS = `${WORK}/intent-corpus.json`;
|
|
76
|
-
const FP_RENAMES = `${WORK}/false-positive-renames.json`;
|
|
77
68
|
export const CLEANUP_SUMMARY = `${WORK}/cleanup-summary.json`;
|
|
78
69
|
const MAX_REDACTABLE_BYTES = 5 * 1024 * 1024;
|
|
79
70
|
|
|
80
71
|
export const CONFIRM_AGENT_PHASES = ["V1", "V1.5", "V2", "V3", "V4", "V5", "V6"] as const;
|
|
81
72
|
|
|
73
|
+
export const REPORT_READY_VERDICTS = [
|
|
74
|
+
"live-verified",
|
|
75
|
+
"test-verified",
|
|
76
|
+
"analytical",
|
|
77
|
+
"false-positive",
|
|
78
|
+
] as const;
|
|
79
|
+
export const NEEDS_REVIEW_VERDICTS = [
|
|
80
|
+
"not-reproduced",
|
|
81
|
+
"flaky",
|
|
82
|
+
"blocked",
|
|
83
|
+
"no-poc",
|
|
84
|
+
"errored",
|
|
85
|
+
] as const;
|
|
86
|
+
export type ConfirmVerdict =
|
|
87
|
+
| (typeof REPORT_READY_VERDICTS)[number]
|
|
88
|
+
| (typeof NEEDS_REVIEW_VERDICTS)[number];
|
|
89
|
+
|
|
82
90
|
const TEXT_EXTENSIONS = new Set([
|
|
83
91
|
".csv",
|
|
84
92
|
".curl",
|
|
@@ -159,7 +167,7 @@ const CONFIRMATION_STANDARD = [
|
|
|
159
167
|
"- Write evidence under each finding's `evidence/` directory; include enough detail for replay.",
|
|
160
168
|
"- Do not mark confirmed from code plausibility alone.",
|
|
161
169
|
"- Mark `Confirm-Status: false-positive` only when real execution or a targeted reproducer proves the claimed exploit path is blocked, unreachable, or contradicted by code/runtime behavior.",
|
|
162
|
-
"- If evidence is incomplete, use `blocked`, `
|
|
170
|
+
"- If evidence is incomplete, use `blocked`, `flaky`, or `not-reproduced` instead of false-positive.",
|
|
163
171
|
].join("\n");
|
|
164
172
|
|
|
165
173
|
export function buildConfirmTask(phase: string, target: string | undefined): string {
|
|
@@ -214,12 +222,12 @@ export function buildConfirmTask(phase: string, target: string | undefined): str
|
|
|
214
222
|
case "V4":
|
|
215
223
|
return [
|
|
216
224
|
"You are running V4 (PoC Execution) of /piolium-confirm.",
|
|
217
|
-
"Read findings-inventory.json and env-connection.json. Skip non-exploitable findings as `Confirm-Status: analytical
|
|
225
|
+
"Read findings-inventory.json and env-connection.json. Skip non-exploitable findings as `Confirm-Status: analytical`; route local-only findings to V5.",
|
|
218
226
|
"Before per-finding execution, run one reachability check against base_url with a 5s timeout; if unreachable, mark queued network findings `blocked` and record the reason.",
|
|
219
227
|
"For every network-exploitable finding with a PoC, execute the real PoC against the target. Use a 30s timeout per variant, max 2 variants.",
|
|
220
228
|
"Capture exact command, relevant env, HTTP request/response or stdout/stderr, and observable before/after state to `<finding-dir>/evidence/confirmed-<timestamp>.log`.",
|
|
221
229
|
"Parse structured PoC output if present: final JSON line `{status,evidence,notes}`.",
|
|
222
|
-
"Update each `report.md` with `Confirm-Status:
|
|
230
|
+
"Update each `report.md` with `Confirm-Status: live-verified | not-reproduced | flaky | blocked | analytical | false-positive` and `Confirm-Evidence:` pointing at the evidence file.",
|
|
223
231
|
`Write aggregate results to \`${POC_RESULTS}\`.`,
|
|
224
232
|
CONFIRMATION_STANDARD,
|
|
225
233
|
].join("\n\n");
|
|
@@ -229,20 +237,29 @@ export function buildConfirmTask(phase: string, target: string | undefined): str
|
|
|
229
237
|
"For findings whose live PoC did not confirm, had no PoC, or are local-exploitable, generate the smallest reproducer test in the existing test framework.",
|
|
230
238
|
"Actually run the test with a 60s cap (pytest timeout, jest --testTimeout, go test -timeout, etc.).",
|
|
231
239
|
"Keep reproducer files/evidence under each finding dir and write command/output logs under `evidence/`.",
|
|
232
|
-
"Update `report.md`: `Confirm-Status:
|
|
240
|
+
"Update `report.md`: `Confirm-Status: test-verified | not-reproduced | blocked | false-positive` and `Confirm-Evidence:`.",
|
|
233
241
|
"Only mark `false-positive` when the reproducer proves the claimed vulnerable path is unreachable, patched, protected, or based on an invalid assumption.",
|
|
234
242
|
`Write \`${WORK}/test-mapping.json\` with per-finding verdicts and evidence pointers.`,
|
|
235
243
|
CONFIRMATION_STANDARD,
|
|
236
244
|
].join("\n\n");
|
|
237
|
-
case "V6":
|
|
245
|
+
case "V6": {
|
|
246
|
+
const reportReady = REPORT_READY_VERDICTS.join(", ");
|
|
247
|
+
const needsReview = NEEDS_REVIEW_VERDICTS.join(", ");
|
|
248
|
+
const allVerdicts = [...REPORT_READY_VERDICTS, ...NEEDS_REVIEW_VERDICTS].join(", ");
|
|
238
249
|
return [
|
|
239
250
|
"You are running V6 (Confirmation Report) of /piolium-confirm.",
|
|
240
|
-
"Read `piolium/findings
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
251
|
+
"Read every `report.md` under `piolium/findings/` and treat it as the source of truth.",
|
|
252
|
+
"Stage every finding into one of two derived buckets under `piolium/confirm-workspace/` (regenerated each run, wipe prior staging first):",
|
|
253
|
+
` - \`piolium/confirm-workspace/report-ready/<category>/\` for ${reportReady} (the ship list)`,
|
|
254
|
+
` - \`piolium/confirm-workspace/needs-review/<category>/\` for ${needsReview} (the followup queue)`,
|
|
255
|
+
"Use `cp -R` so each staged entry is self-contained (report.md, PoC scripts, confirm-evidence/, confirm-test*).",
|
|
256
|
+
`Compose \`${REPORT}\` with: a Summary table of all nine verdicts (${allVerdicts}), a Breakdown by Exploitability Class section, and a Pre-Auth Exposure cross-cut index that lists every finding whose \`report.md\` has \`Auth-Required: no\` (omit the section if none).`,
|
|
257
|
+
"For each verdict category that has findings, include a section with one entry per finding (ID — title [severity], vulnerability class, method, evidence pointer, observation).",
|
|
258
|
+
"Confirmation rate denominator excludes `false-positive` and `analytical`.",
|
|
259
|
+
"If `piolium/audit-state.json` exists, append a new entry to `audits[-1].confirmation_history[]` and refresh `audits[-1].confirmation` with the latest run's summary — never overwrite the history array.",
|
|
244
260
|
"Include environment setup notes, target URL/base_url, cleanup result, and methodology.",
|
|
245
261
|
].join("\n\n");
|
|
262
|
+
}
|
|
246
263
|
default:
|
|
247
264
|
return "Unknown V phase.";
|
|
248
265
|
}
|
|
@@ -288,58 +305,11 @@ export function writeRemoteConnection(cwd: string, target: string): void {
|
|
|
288
305
|
);
|
|
289
306
|
}
|
|
290
307
|
|
|
291
|
-
function reportMarksFalsePositive(text: string): boolean {
|
|
292
|
-
return (
|
|
293
|
-
/^(?:Confirm-Status|Confirmation|Confirm-Verdict|Verdict)\s*:\s*(?:false[-_ ]positive|fp)\b/im.test(
|
|
294
|
-
text,
|
|
295
|
-
) || /"confirm_status"\s*:\s*"false[-_ ]positive"/i.test(text)
|
|
296
|
-
);
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
function uniqueDest(root: string, name: string): string {
|
|
300
|
-
let candidate = join(root, name);
|
|
301
|
-
let suffix = 2;
|
|
302
|
-
while (existsSync(candidate)) {
|
|
303
|
-
candidate = join(root, `${name}-${suffix}`);
|
|
304
|
-
suffix++;
|
|
305
|
-
}
|
|
306
|
-
return candidate;
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
export function renameFalsePositiveFindings(cwd: string): string[] {
|
|
310
|
-
const root = join(cwd, "piolium", "findings");
|
|
311
|
-
if (!existsSync(root)) return [];
|
|
312
|
-
const renames: string[] = [];
|
|
313
|
-
for (const entry of readdirSync(root).sort()) {
|
|
314
|
-
if (entry.startsWith("FP-")) continue;
|
|
315
|
-
const dir = join(root, entry);
|
|
316
|
-
try {
|
|
317
|
-
if (!statSync(dir).isDirectory()) continue;
|
|
318
|
-
const reportPath = join(dir, "report.md");
|
|
319
|
-
if (!existsSync(reportPath)) continue;
|
|
320
|
-
if (!reportMarksFalsePositive(readFileSync(reportPath, "utf8"))) continue;
|
|
321
|
-
const destName = `FP-${entry}`;
|
|
322
|
-
const dest = uniqueDest(root, destName);
|
|
323
|
-
renameSync(dir, dest);
|
|
324
|
-
renames.push(`${entry} -> ${basename(dest)}`);
|
|
325
|
-
} catch {
|
|
326
|
-
// Keep confirmation moving; V6 will still report available evidence.
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
ensureConfirmWorkdir(cwd);
|
|
330
|
-
writeFileSync(
|
|
331
|
-
join(cwd, FP_RENAMES),
|
|
332
|
-
`${JSON.stringify({ renamed_at: new Date().toISOString(), renames }, null, "\t")}\n`,
|
|
333
|
-
);
|
|
334
|
-
return renames;
|
|
335
|
-
}
|
|
336
|
-
|
|
337
308
|
export interface ConfirmCleanupResult {
|
|
338
309
|
summaryPath: string;
|
|
339
310
|
checkedFindingDirs: string[];
|
|
340
311
|
createdEvidenceDirs: string[];
|
|
341
312
|
formatIssues: string[];
|
|
342
|
-
falsePositiveRenames: string[];
|
|
343
313
|
redactedFiles: Array<{ path: string; replacements: Record<string, number> }>;
|
|
344
314
|
skippedFiles: Array<{ path: string; reason: string }>;
|
|
345
315
|
}
|
|
@@ -546,7 +516,6 @@ function normalizeFindingLayout(
|
|
|
546
516
|
|
|
547
517
|
export function cleanupConfirmArtifacts(cwd: string): ConfirmCleanupResult {
|
|
548
518
|
ensureConfirmWorkdir(cwd);
|
|
549
|
-
const falsePositiveRenames = renameFalsePositiveFindings(cwd);
|
|
550
519
|
const layout = normalizeFindingLayout(cwd);
|
|
551
520
|
const skippedFiles: ConfirmCleanupResult["skippedFiles"] = [];
|
|
552
521
|
const candidates: string[] = [];
|
|
@@ -560,7 +529,6 @@ export function cleanupConfirmArtifacts(cwd: string): ConfirmCleanupResult {
|
|
|
560
529
|
const result: ConfirmCleanupResult = {
|
|
561
530
|
summaryPath: CLEANUP_SUMMARY,
|
|
562
531
|
...layout,
|
|
563
|
-
falsePositiveRenames,
|
|
564
532
|
redactedFiles,
|
|
565
533
|
skippedFiles,
|
|
566
534
|
};
|
|
@@ -618,15 +586,6 @@ export async function runConfirmAudit(opts: RunConfirmOptions): Promise<RunConfi
|
|
|
618
586
|
});
|
|
619
587
|
continue;
|
|
620
588
|
}
|
|
621
|
-
if (name === "V6") {
|
|
622
|
-
const renames = renameFalsePositiveFindings(cwd);
|
|
623
|
-
if (renames.length > 0) {
|
|
624
|
-
ui?.notify?.(
|
|
625
|
-
`Renamed ${renames.length} false-positive finding folder(s) with FP- prefix.`,
|
|
626
|
-
"warning",
|
|
627
|
-
);
|
|
628
|
-
}
|
|
629
|
-
}
|
|
630
589
|
try {
|
|
631
590
|
await runAgentPhase({
|
|
632
591
|
cwd,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vigolium/piolium",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.3",
|
|
4
4
|
"description": "Pi-native port of archon-audit. Multi-phase security audits with specialist sub-agents, isolated context windows, capped concurrency, and resumable state — packaged as a Pi extension.",
|
|
5
5
|
"keywords": ["pi-package", "security", "audit", "subagents", "piolium"],
|
|
6
6
|
"license": "MIT",
|