opencode-swarm 7.62.1 → 7.64.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.opencode/skills/swarm-pr-review/SKILL.md +124 -0
- package/README.md +57 -0
- package/dist/agents/architect.d.ts +1 -1
- package/dist/agents/explorer.d.ts +1 -1
- package/dist/agents/reviewer-directive-compliance.d.ts +43 -0
- package/dist/cli/index.js +3418 -1029
- package/dist/config/constants.d.ts +2 -0
- package/dist/config/evidence-schema.d.ts +44 -44
- package/dist/config/schema.d.ts +201 -0
- package/dist/hooks/delegate-ack-collector.d.ts +51 -0
- package/dist/hooks/delegate-directive-injection.d.ts +33 -0
- package/dist/hooks/knowledge-application.d.ts +4 -3
- package/dist/hooks/knowledge-curator.d.ts +73 -1
- package/dist/hooks/knowledge-escalator.d.ts +50 -0
- package/dist/hooks/knowledge-events.d.ts +63 -3
- package/dist/hooks/knowledge-injector.d.ts +69 -1
- package/dist/hooks/knowledge-types.d.ts +41 -1
- package/dist/hooks/knowledge-validator.d.ts +43 -0
- package/dist/hooks/micro-reflector.d.ts +91 -0
- package/dist/hooks/phase-complete-directive-gate.d.ts +44 -0
- package/dist/hooks/phase-directives.d.ts +29 -0
- package/dist/hooks/reviewer-verdict-parser.d.ts +64 -0
- package/dist/hooks/search-knowledge.d.ts +33 -0
- package/dist/index.js +8600 -3951
- package/dist/memory/schema.d.ts +2 -2
- package/dist/services/directive-predicate-runner.d.ts +72 -0
- package/dist/services/external-skill-store.d.ts +96 -0
- package/dist/services/external-skill-validator.d.ts +160 -0
- package/dist/services/knowledge-diagnostics.d.ts +19 -0
- package/dist/services/skill-improver.d.ts +11 -0
- package/dist/services/status-service.d.ts +3 -0
- package/dist/services/synonym-map.d.ts +136 -0
- package/dist/services/trajectory-cluster.d.ts +49 -0
- package/dist/services/unactionable-hardening.d.ts +47 -0
- package/dist/tools/external-skill-delete.d.ts +16 -0
- package/dist/tools/external-skill-discover.d.ts +21 -0
- package/dist/tools/external-skill-inspect.d.ts +15 -0
- package/dist/tools/external-skill-list.d.ts +15 -0
- package/dist/tools/external-skill-promote.d.ts +20 -0
- package/dist/tools/external-skill-reject.d.ts +15 -0
- package/dist/tools/external-skill-revoke.d.ts +17 -0
- package/dist/tools/index.d.ts +7 -0
- package/dist/tools/manifest.d.ts +7 -0
- package/dist/tools/phase-complete.d.ts +10 -0
- package/dist/tools/tool-metadata.d.ts +28 -0
- package/package.json +1 -1
package/dist/memory/schema.d.ts
CHANGED
|
@@ -213,9 +213,9 @@ export declare const MemoryProposalSchema: z.ZodObject<{
|
|
|
213
213
|
rationale: z.ZodString;
|
|
214
214
|
evidenceRefs: z.ZodArray<z.ZodString>;
|
|
215
215
|
status: z.ZodEnum<{
|
|
216
|
-
approved: "approved";
|
|
217
|
-
rejected: "rejected";
|
|
218
216
|
pending: "pending";
|
|
217
|
+
rejected: "rejected";
|
|
218
|
+
approved: "approved";
|
|
219
219
|
applied: "applied";
|
|
220
220
|
superseded: "superseded";
|
|
221
221
|
}>;
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Directive verification predicate runner (Swarm Learning System, Change 2 /
|
|
3
|
+
* Task 2.2).
|
|
4
|
+
*
|
|
5
|
+
* Executes a small, fail-closed predicate DSL attached to a knowledge directive
|
|
6
|
+
* (`verification_predicate`). Handlers:
|
|
7
|
+
*
|
|
8
|
+
* grep:<regex>:<path-glob> PASS when ripgrep finds zero matches in the glob.
|
|
9
|
+
* (A "forbidden pattern" predicate: absence = pass.)
|
|
10
|
+
* tool:<argv> PASS when the command exits 0. Shell-free (argv
|
|
11
|
+
* array), binary must be on a conservative allowlist.
|
|
12
|
+
* file_not_modified:<path> PASS when <path> is unchanged in the working tree.
|
|
13
|
+
* file_modified:<path> PASS when <path> is changed in the working tree.
|
|
14
|
+
*
|
|
15
|
+
* Security posture (the adversarial contract):
|
|
16
|
+
* - No shell, ever. Commands run via argv arrays (`bunSpawn`), so shell
|
|
17
|
+
* metacharacters (; | && $() ` > <) are inert literals.
|
|
18
|
+
* - Path/glob arguments are validated to stay inside the working directory:
|
|
19
|
+
* null bytes, absolute paths, and `..` traversal are rejected.
|
|
20
|
+
* - `tool:` binaries are restricted to a conservative read-only allowlist;
|
|
21
|
+
* code interpreters (node/bun/python/deno/npx) are intentionally excluded.
|
|
22
|
+
* - Hard 15s timeout; the child is killed on timeout.
|
|
23
|
+
* - Fail-closed: any parse error, unknown handler, disallowed path, or
|
|
24
|
+
* unexpected state returns `result:'error'` (never silently `pass`).
|
|
25
|
+
*
|
|
26
|
+
* Residual risk: true network isolation is not available in this runtime. The
|
|
27
|
+
* mitigation is the absence of network-capable binaries from the allowlist plus
|
|
28
|
+
* the hard timeout. Build tools that can run arbitrary scripts (cargo/go) are
|
|
29
|
+
* NOT on the allowlist for this reason.
|
|
30
|
+
*/
|
|
31
|
+
export type PredicateResult = 'pass' | 'fail' | 'error';
|
|
32
|
+
export interface PredicateOutcome {
|
|
33
|
+
result: PredicateResult;
|
|
34
|
+
detail: string;
|
|
35
|
+
}
|
|
36
|
+
/** Hard wall-clock cap for any single predicate execution. */
|
|
37
|
+
export declare const PREDICATE_TIMEOUT_MS = 15000;
|
|
38
|
+
/**
|
|
39
|
+
* Conservative allowlist of `tool:` binaries. Read-only verification/lint tools
|
|
40
|
+
* only. Code interpreters and arbitrary build runners are deliberately excluded
|
|
41
|
+
* because they can execute attacker-influenced code or reach the network.
|
|
42
|
+
*/
|
|
43
|
+
export declare const TOOL_BINARY_ALLOWLIST: ReadonlySet<string>;
|
|
44
|
+
/**
|
|
45
|
+
* Validate that a repo-relative path/glob stays inside `directory`. Returns the
|
|
46
|
+
* trimmed value on success or null when it is unsafe. Globs (`*`, `**`, `?`,
|
|
47
|
+
* `{}`) are permitted; traversal and absolute paths are not.
|
|
48
|
+
*/
|
|
49
|
+
export declare function validateRepoRelativeGlob(directory: string, value: string): string | null;
|
|
50
|
+
interface RunResult {
|
|
51
|
+
exitCode: number | null;
|
|
52
|
+
stdout: string;
|
|
53
|
+
stderr: string;
|
|
54
|
+
timedOut: boolean;
|
|
55
|
+
}
|
|
56
|
+
/** Run an argv array, shell-free, with a hard timeout. Never throws.
|
|
57
|
+
* AGENTS.md invariant 3: stdin is 'ignore' (a never-closed stdin pipe can
|
|
58
|
+
* block the child from exiting under Bun on Windows) and the child is
|
|
59
|
+
* best-effort killed in `finally` so no code path leaks a process. */
|
|
60
|
+
export declare function runArgv(argv: string[], cwd: string): Promise<RunResult>;
|
|
61
|
+
/**
|
|
62
|
+
* Run a single verification predicate. Fail-closed: any parse error, unknown
|
|
63
|
+
* handler, or unexpected state returns `result:'error'`. Never throws.
|
|
64
|
+
*/
|
|
65
|
+
export declare function runDirectivePredicate(predicate: string, directory: string): Promise<PredicateOutcome>;
|
|
66
|
+
export declare const _internals: {
|
|
67
|
+
validateRepoRelativeGlob: typeof validateRepoRelativeGlob;
|
|
68
|
+
runArgv: typeof runArgv;
|
|
69
|
+
runDirectivePredicate: typeof runDirectivePredicate;
|
|
70
|
+
TOOL_BINARY_ALLOWLIST: ReadonlySet<string>;
|
|
71
|
+
};
|
|
72
|
+
export {};
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* External skill candidate quarantine store.
|
|
3
|
+
*
|
|
4
|
+
* Manages external skill candidates persisted as individual JSON files under
|
|
5
|
+
* `.swarm/skills/candidates/<uuid>.json`. Each candidate goes through a
|
|
6
|
+
* quarantine lifecycle (pending → in_review → quarantined → passed/rejected →
|
|
7
|
+
* promoted/revoked) before it can be activated as a generated skill.
|
|
8
|
+
*
|
|
9
|
+
* All writes are atomic (temp-file + rename) via `atomicWriteFile` from the
|
|
10
|
+
* evidence subsystem. File-system I/O is funnelled through `_internals` so
|
|
11
|
+
* that tests can replace individual operations without cross-module mock
|
|
12
|
+
* leakage (Bun's `mock.module` is intentionally avoided for I/O seams).
|
|
13
|
+
*
|
|
14
|
+
* Invariants:
|
|
15
|
+
* - Candidate IDs are UUID v4 (cryptographically random), never derived from
|
|
16
|
+
* user input, to prevent path-traversal attacks.
|
|
17
|
+
* - `passed`, `promoted`, and `revoked` candidates are NEVER evicted.
|
|
18
|
+
* - The store directory is derived from the injected `directory` parameter
|
|
19
|
+
* (typically `ctx.directory`); no `process.cwd()` calls.
|
|
20
|
+
*/
|
|
21
|
+
import * as crypto from 'node:crypto';
|
|
22
|
+
import * as fs from 'node:fs/promises';
|
|
23
|
+
import type { ExternalSkillCandidate, ExternalSkillCandidateEvaluationVerdict } from '../config/schema';
|
|
24
|
+
import { atomicWriteFile } from '../evidence/task-file';
|
|
25
|
+
/** Configuration for the store. */
|
|
26
|
+
export interface ExternalSkillStoreConfig {
|
|
27
|
+
/** Maximum number of candidates before FIFO eviction kicks in. */
|
|
28
|
+
max_candidates: number;
|
|
29
|
+
}
|
|
30
|
+
/** Optional filters for listing candidates. */
|
|
31
|
+
export interface ExternalSkillListFilter {
|
|
32
|
+
/** Restrict to candidates with this evaluation verdict. */
|
|
33
|
+
verdict?: ExternalSkillCandidateEvaluationVerdict;
|
|
34
|
+
/** Restrict to candidates from this source type (e.g. 'github'). */
|
|
35
|
+
source_type?: string;
|
|
36
|
+
/** Restrict to candidates with this exact source URL. */
|
|
37
|
+
source_url?: string;
|
|
38
|
+
/** ISO datetime — only return candidates fetched at or after this time. */
|
|
39
|
+
since?: string;
|
|
40
|
+
}
|
|
41
|
+
/** Patch fields accepted by `update`. */
|
|
42
|
+
export type ExternalSkillCandidatePatch = Partial<Pick<ExternalSkillCandidate, 'evaluation_verdict' | 'risk_flags' | 'evaluation_history' | 'skill_name' | 'skill_description'>>;
|
|
43
|
+
/**
|
|
44
|
+
* Public interface returned by the factory function.
|
|
45
|
+
*
|
|
46
|
+
* Every method is scoped to the store directory derived at creation time.
|
|
47
|
+
*/
|
|
48
|
+
export interface ExternalSkillStore {
|
|
49
|
+
/** Create a new candidate and persist it atomically. */
|
|
50
|
+
add(candidate: Omit<ExternalSkillCandidate, 'id'>): Promise<ExternalSkillCandidate>;
|
|
51
|
+
/** Read a single candidate by UUID. Returns `null` if not found. */
|
|
52
|
+
get(id: string): Promise<ExternalSkillCandidate | null>;
|
|
53
|
+
/** List candidates with optional filters, sorted by `fetched_at` descending. */
|
|
54
|
+
list(filter?: ExternalSkillListFilter): Promise<ExternalSkillCandidate[]>;
|
|
55
|
+
/** Patch an existing candidate (read-modify-write). Appends to `evaluation_history`. */
|
|
56
|
+
update(id: string, patch: ExternalSkillCandidatePatch): Promise<ExternalSkillCandidate | null>;
|
|
57
|
+
/** Remove a candidate file. Returns `true` if the file existed and was deleted. */
|
|
58
|
+
delete(id: string): Promise<boolean>;
|
|
59
|
+
/**
|
|
60
|
+
* Evict the oldest `pending` or `rejected` candidates when the store
|
|
61
|
+
* exceeds `max_candidates`. Never evicts `passed`, `promoted`, or
|
|
62
|
+
* `revoked` candidates. Returns the number of evicted files.
|
|
63
|
+
*/
|
|
64
|
+
evictIfNeeded(): Promise<number>;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Dependency-injection seam for testing.
|
|
68
|
+
*
|
|
69
|
+
* Tests can temporarily replace individual entries to exercise failure paths
|
|
70
|
+
* (e.g. file-not-found, permission errors) without `mock.module` leakage.
|
|
71
|
+
* Restore each entry in `afterEach` via the saved original reference.
|
|
72
|
+
*/
|
|
73
|
+
export declare const _internals: {
|
|
74
|
+
/** UUID generator — default is `crypto.randomUUID()`. */
|
|
75
|
+
randomUUID: typeof crypto.randomUUID;
|
|
76
|
+
/** Async filesystem operations. */
|
|
77
|
+
fs: {
|
|
78
|
+
mkdir: typeof fs.mkdir;
|
|
79
|
+
readFile: typeof fs.readFile;
|
|
80
|
+
readdir: typeof fs.readdir;
|
|
81
|
+
unlink: typeof fs.unlink;
|
|
82
|
+
};
|
|
83
|
+
/** Atomic write primitive (temp-file + rename). */
|
|
84
|
+
atomicWriteFile: typeof atomicWriteFile;
|
|
85
|
+
};
|
|
86
|
+
/**
|
|
87
|
+
* Create an `ExternalSkillStore` scoped to the given project root directory.
|
|
88
|
+
*
|
|
89
|
+
* The store persists candidate files under
|
|
90
|
+
* `<directory>/.swarm/skills/candidates/<uuid>.json`.
|
|
91
|
+
*
|
|
92
|
+
* @param directory — Project root (typically `ctx.directory`). Must NOT contain
|
|
93
|
+
* user-controlled path components.
|
|
94
|
+
* @param config — Store configuration including capacity limits.
|
|
95
|
+
*/
|
|
96
|
+
export declare function createExternalSkillStore(directory: string, config: ExternalSkillStoreConfig): ExternalSkillStore;
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validation gates for external skill candidates.
|
|
3
|
+
*
|
|
4
|
+
* Provides shared types and individual gate functions that scan candidate
|
|
5
|
+
* fields for security threats (prompt injection, unsafe instructions,
|
|
6
|
+
* provenance integrity). Each gate returns a structured `ValidationGateResult`
|
|
7
|
+
* that the curation pipeline can use to block, warn, or pass a candidate.
|
|
8
|
+
*
|
|
9
|
+
* Gate 1 — `scanPromptInjection`: static regex-based detection of prompt-
|
|
10
|
+
* injection patterns, prototype pollution, script injection, and obfuscated
|
|
11
|
+
* content in candidate fields. Severity is modulated by the candidate's trust
|
|
12
|
+
* level (FR-004).
|
|
13
|
+
*
|
|
14
|
+
* Uses an `_internals` DI seam for testability — no `mock.module` leakage.
|
|
15
|
+
*/
|
|
16
|
+
import type { ExternalSkillCandidate } from '../config/schema';
|
|
17
|
+
/** Result from a single validation gate scan. */
|
|
18
|
+
export interface ValidationGateResult {
|
|
19
|
+
/** Which gate produced this result. */
|
|
20
|
+
gate: 'prompt_injection' | 'unsafe_instructions' | 'provenance_integrity';
|
|
21
|
+
/** Overall pass/fail/warn verdict. */
|
|
22
|
+
verdict: 'pass' | 'fail' | 'warn';
|
|
23
|
+
/** Individual findings from the scan. */
|
|
24
|
+
findings: ValidationFinding[];
|
|
25
|
+
/** The candidate fields that were scanned. */
|
|
26
|
+
fields_scanned: string[];
|
|
27
|
+
}
|
|
28
|
+
/** A single finding from a validation gate. */
|
|
29
|
+
export interface ValidationFinding {
|
|
30
|
+
/** What was detected. */
|
|
31
|
+
pattern: string;
|
|
32
|
+
/** Which candidate field triggered the finding. */
|
|
33
|
+
field: string;
|
|
34
|
+
/** Human-readable description. */
|
|
35
|
+
description: string;
|
|
36
|
+
/**
|
|
37
|
+
* Severity: 'error' blocks promotion, 'warning' is advisory
|
|
38
|
+
* (unless trust_level=low).
|
|
39
|
+
*/
|
|
40
|
+
severity: 'error' | 'warning';
|
|
41
|
+
/** The matched text snippet (truncated to 100 chars for safety). */
|
|
42
|
+
match: string;
|
|
43
|
+
}
|
|
44
|
+
/** Result of running all validation gates against a candidate. */
|
|
45
|
+
export interface CandidateEvaluationResult {
|
|
46
|
+
/** Individual gate results. */
|
|
47
|
+
gate_results: ValidationGateResult[];
|
|
48
|
+
/** Aggregated verdict across all gates. */
|
|
49
|
+
overall_verdict: 'passed' | 'quarantined';
|
|
50
|
+
/** All findings from all gates combined. */
|
|
51
|
+
all_findings: ValidationFinding[];
|
|
52
|
+
/** Risk flags derived from findings (unique pattern names). */
|
|
53
|
+
risk_flags: string[];
|
|
54
|
+
}
|
|
55
|
+
/** Describes a single detection pattern used by the prompt-injection gate. */
|
|
56
|
+
export interface PromptInjectionPattern {
|
|
57
|
+
/** Regex to test against field text. */
|
|
58
|
+
pattern: RegExp;
|
|
59
|
+
/** Human-readable name for the pattern. */
|
|
60
|
+
name: string;
|
|
61
|
+
/** Description shown in findings. */
|
|
62
|
+
description: string;
|
|
63
|
+
/** Base severity before trust-level modulation. */
|
|
64
|
+
severity: 'error' | 'warning';
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Static regex patterns for the prompt-injection gate (FR-004).
|
|
68
|
+
*
|
|
69
|
+
* ERROR-severity patterns always block promotion. WARNING-severity patterns
|
|
70
|
+
* are modulated by the candidate's trust level:
|
|
71
|
+
* - trust_level='low' → warnings promoted to errors
|
|
72
|
+
* - trust_level='medium'/'high' → warnings stay warnings
|
|
73
|
+
*/
|
|
74
|
+
export declare const PROMPT_INJECTION_PATTERNS: PromptInjectionPattern[];
|
|
75
|
+
/** Describes a single detection pattern used by the unsafe-instruction gate. */
|
|
76
|
+
export interface UnsafeInstructionPattern {
|
|
77
|
+
/** Regex to test against field text. */
|
|
78
|
+
pattern: RegExp;
|
|
79
|
+
/** Human-readable name for the pattern. */
|
|
80
|
+
name: string;
|
|
81
|
+
/** Description shown in findings. */
|
|
82
|
+
description: string;
|
|
83
|
+
/** Base severity before trust-level modulation. */
|
|
84
|
+
severity: 'error' | 'warning';
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Static regex patterns for the unsafe-instruction gate.
|
|
88
|
+
*
|
|
89
|
+
* Extends the DANGEROUS_COMMAND_PATTERNS and SECURITY_DEGRADING_PATTERNS from
|
|
90
|
+
* knowledge-validator.ts with additional destructive command, privilege
|
|
91
|
+
* escalation, shell execution, security bypass, and data exfiltration patterns.
|
|
92
|
+
*
|
|
93
|
+
* ERROR-severity patterns always block promotion. WARNING-severity patterns
|
|
94
|
+
* are modulated by the candidate's trust level:
|
|
95
|
+
* - trust_level='low' → warnings promoted to errors
|
|
96
|
+
* - trust_level='medium'/'high' → warnings stay warnings
|
|
97
|
+
*/
|
|
98
|
+
export declare const UNSAFE_INSTRUCTION_PATTERNS: UnsafeInstructionPattern[];
|
|
99
|
+
/** Rate limit defaults for validation operations (FR-007). */
|
|
100
|
+
export declare const VALIDATION_RATE_LIMITS: {
|
|
101
|
+
/** Maximum candidates per discovery invocation. */
|
|
102
|
+
readonly max_candidates_per_discovery: 50;
|
|
103
|
+
/** Maximum concurrent fetch operations. */
|
|
104
|
+
readonly max_concurrent_fetches: 5;
|
|
105
|
+
/** Timeout for individual fetch operations in milliseconds. */
|
|
106
|
+
readonly fetch_timeout_ms: 30000;
|
|
107
|
+
};
|
|
108
|
+
/**
|
|
109
|
+
* Scan an external skill candidate for prompt-injection patterns.
|
|
110
|
+
*
|
|
111
|
+
* Returns a `ValidationGateResult` with gate=`'prompt_injection'`.
|
|
112
|
+
* The verdict is modulated by `trustLevel`:
|
|
113
|
+
* - `'low'`: warnings promoted to errors → verdict is `'fail'` if any finding.
|
|
114
|
+
* - `'medium'`/`'high'`: warnings stay warnings → verdict is `'warn'` if only
|
|
115
|
+
* warnings, `'fail'` if any error-severity finding.
|
|
116
|
+
*/
|
|
117
|
+
export declare function scanPromptInjection(candidate: ExternalSkillCandidate, trustLevel?: 'low' | 'medium' | 'high'): ValidationGateResult;
|
|
118
|
+
/**
|
|
119
|
+
* Scan an external skill candidate for unsafe instruction patterns.
|
|
120
|
+
*
|
|
121
|
+
* Covers destructive commands, privilege escalation, shell execution
|
|
122
|
+
* vectors, security bypass instructions, and data exfiltration indicators.
|
|
123
|
+
*
|
|
124
|
+
* Returns a `ValidationGateResult` with gate=`'unsafe_instructions'`.
|
|
125
|
+
* The verdict is modulated by `trustLevel`:
|
|
126
|
+
* - `'low'`: warnings promoted to errors → verdict is `'fail'` if any finding.
|
|
127
|
+
* - `'medium'`/`'high'`: warnings stay warnings → verdict is `'warn'` if only
|
|
128
|
+
* warnings, `'fail'` if any error-severity finding.
|
|
129
|
+
*/
|
|
130
|
+
export declare function scanUnsafeInstructions(candidate: ExternalSkillCandidate, trustLevel?: 'low' | 'medium' | 'high'): ValidationGateResult;
|
|
131
|
+
/**
|
|
132
|
+
* Scan an external skill candidate for provenance field integrity.
|
|
133
|
+
*
|
|
134
|
+
* Validates SHA-256 hash format, fetched_at timing (not in future, not stale),
|
|
135
|
+
* source_url validity, publisher presence, and content-hash verification.
|
|
136
|
+
*
|
|
137
|
+
* Returns a `ValidationGateResult` with gate=`'provenance_integrity'`.
|
|
138
|
+
* The verdict is modulated by `trustLevel`:
|
|
139
|
+
* - `'low'`: warnings promoted to errors → verdict is `'fail'` if any finding.
|
|
140
|
+
* - `'medium'`/`'high'`: warnings stay warnings → verdict is `'warn'` if only
|
|
141
|
+
* warnings, `'fail'` if any error-severity finding.
|
|
142
|
+
*/
|
|
143
|
+
export declare function scanProvenanceIntegrity(candidate: ExternalSkillCandidate, trustLevel?: 'low' | 'medium' | 'high', ttlDays?: number): ValidationGateResult;
|
|
144
|
+
/**
|
|
145
|
+
* Run all three validation gates against a candidate and produce an
|
|
146
|
+
* aggregated evaluation result (FR-004, FR-007).
|
|
147
|
+
*
|
|
148
|
+
* Gates are run sequentially: prompt-injection → unsafe-instructions →
|
|
149
|
+
* provenance-integrity. Any gate that returns `'fail'` causes the overall
|
|
150
|
+
* verdict to be `'quarantined'`. Warnings are advisory unless trust_level
|
|
151
|
+
* is `'low'` (which promotes them to errors inside each gate).
|
|
152
|
+
*/
|
|
153
|
+
export declare function evaluateCandidate(candidate: ExternalSkillCandidate, options?: {
|
|
154
|
+
trust_level?: 'low' | 'medium' | 'high';
|
|
155
|
+
ttl_days?: number;
|
|
156
|
+
}): CandidateEvaluationResult;
|
|
157
|
+
export declare const _internals: {
|
|
158
|
+
getTimestamp: () => string;
|
|
159
|
+
computeSha256: (content: string) => string;
|
|
160
|
+
};
|
|
@@ -27,6 +27,25 @@ export interface KnowledgeDebugMeta {
|
|
|
27
27
|
event_count: number;
|
|
28
28
|
retrieval_events_7d: number;
|
|
29
29
|
cache_status: 'fresh' | 'stale' | 'unknown';
|
|
30
|
+
/**
|
|
31
|
+
* Learning-loop telemetry (Changes 1–6). Surfaces the health of the
|
|
32
|
+
* self-improvement pipeline: directives awaiting curation, reflection
|
|
33
|
+
* candidates not yet folded in, learned synonyms, and enforcement posture.
|
|
34
|
+
*/
|
|
35
|
+
learning: {
|
|
36
|
+
/** Lessons withheld from the active store pending actionability (Change 4). */
|
|
37
|
+
unactionable_queue_depth: number;
|
|
38
|
+
/** Micro-reflection insight candidates not yet consumed by the curator (Change 6). */
|
|
39
|
+
insight_candidates_pending: number;
|
|
40
|
+
/** Learned tag co-occurrence synonym pairs on disk (Change 5). */
|
|
41
|
+
synonym_pairs: number;
|
|
42
|
+
/** Active directives in `enforce` posture (Change 3). */
|
|
43
|
+
enforced_directives: number;
|
|
44
|
+
/** Active directives that have been auto-escalated at least once (Change 3). */
|
|
45
|
+
escalated_directives: number;
|
|
46
|
+
/** Knowledge-event volume bucketed by type (applied/ignored/violated/...). */
|
|
47
|
+
events_by_type: Record<string, number>;
|
|
48
|
+
};
|
|
30
49
|
}
|
|
31
50
|
/**
|
|
32
51
|
* Compute the debug-metadata block for the knowledge system. Best-effort: never
|
|
@@ -62,6 +62,17 @@ export interface SkillImproveResult {
|
|
|
62
62
|
sourceKnowledgeIds: string[];
|
|
63
63
|
}>;
|
|
64
64
|
model?: string;
|
|
65
|
+
/** Change 4 (Task 4.3): outcome of the unactionable-knowledge hardening pass. */
|
|
66
|
+
unactionableHardening?: {
|
|
67
|
+
hardened: number;
|
|
68
|
+
retired: number;
|
|
69
|
+
remaining: number;
|
|
70
|
+
};
|
|
71
|
+
/** Change 6 (Task 5.3): macro trajectory-motif proposals written this run. */
|
|
72
|
+
macroMotifs?: {
|
|
73
|
+
motifs: number;
|
|
74
|
+
proposalsWritten: number;
|
|
75
|
+
};
|
|
65
76
|
}
|
|
66
77
|
interface InventorySnapshot {
|
|
67
78
|
knowledge: {
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { AgentDefinition } from '../agents';
|
|
2
|
+
import { type RecentEscalation } from '../hooks/knowledge-escalator';
|
|
2
3
|
import { hasActiveFullAuto, hasActiveLeanTurbo } from '../state';
|
|
3
4
|
import { loadLeanTurboRunState } from '../turbo/lean/state';
|
|
4
5
|
/**
|
|
@@ -54,6 +55,8 @@ export interface StatusData {
|
|
|
54
55
|
specStaleStoredHash?: string;
|
|
55
56
|
/** Current spec.md hash on disk (null when spec.md is missing) */
|
|
56
57
|
specStaleCurrentHash?: string | null;
|
|
58
|
+
/** Directives auto-escalated in the last 7 days (Change 3). */
|
|
59
|
+
recentEscalations?: RecentEscalation[];
|
|
57
60
|
}
|
|
58
61
|
/**
|
|
59
62
|
* Get status data from the swarm directory.
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tag co-occurrence synonym map (Change 5 / Task 6.2).
|
|
3
|
+
*
|
|
4
|
+
* Retrieval is brittle when a query phrases a concept differently from the
|
|
5
|
+
* stored directive ("module mocks" vs "dependency seams"). Rather than ship a
|
|
6
|
+
* hand-curated thesaurus or a new NLP dependency, we learn synonyms from the
|
|
7
|
+
* corpus itself: tokens that repeatedly co-occur across an entry's
|
|
8
|
+
* triggers / tags / applies_to_tools / applies_to_agents are treated as
|
|
9
|
+
* related. A pair seen at or above `synonym_min_cooccurrence` distinct entries
|
|
10
|
+
* becomes a synonym edge that retrieval can expand a query along.
|
|
11
|
+
*
|
|
12
|
+
* State file: `.swarm/synonym-map.json` (validated through validateSwarmPath).
|
|
13
|
+
*
|
|
14
|
+
* SECURITY: the map is derived from on-disk knowledge entries, which can be
|
|
15
|
+
* attacker-influenced (auto-enrichment, hive imports). Every token is
|
|
16
|
+
* sanitised against control characters and length-bounded BEFORE it ever
|
|
17
|
+
* reaches the map, and the map is hard-capped (`synonym_map_max_pairs`,
|
|
18
|
+
* LRU-evicted by recency) so a flood of junk pairs cannot grow it without
|
|
19
|
+
* bound. Expansion is therefore bounded and cannot inject paths, regex
|
|
20
|
+
* metacharacters with effect, or arbitrarily long strings into the scorer.
|
|
21
|
+
*/
|
|
22
|
+
export declare const SYNONYM_MAP_FILENAME = "synonym-map.json";
|
|
23
|
+
/** A single learned co-occurrence edge between two distinct tokens. */
|
|
24
|
+
export interface SynonymPair {
|
|
25
|
+
/** Lexicographically-first member (sanitised). */
|
|
26
|
+
a: string;
|
|
27
|
+
/** Lexicographically-second member (sanitised). */
|
|
28
|
+
b: string;
|
|
29
|
+
/** Number of distinct entries in which both tokens co-occurred. */
|
|
30
|
+
count: number;
|
|
31
|
+
/** Monotonic recency marker (for LRU eviction). Higher = more recent. */
|
|
32
|
+
seq: number;
|
|
33
|
+
}
|
|
34
|
+
/** On-disk shape of `.swarm/synonym-map.json`. */
|
|
35
|
+
export interface SynonymMap {
|
|
36
|
+
version: 1;
|
|
37
|
+
/** Monotonic counter; the next recorded/touched pair takes `seq = ++cursor`. */
|
|
38
|
+
cursor: number;
|
|
39
|
+
/** Keyed by `pairKey(a, b)`. */
|
|
40
|
+
pairs: Record<string, SynonymPair>;
|
|
41
|
+
}
|
|
42
|
+
export declare function emptySynonymMap(): SynonymMap;
|
|
43
|
+
export declare function resolveSynonymMapPath(directory: string): string;
|
|
44
|
+
/**
|
|
45
|
+
* Normalise a candidate token to its canonical synonym-map form, or return
|
|
46
|
+
* `null` if it is unusable. Strips control characters (poisoning defence),
|
|
47
|
+
* lowercases, collapses internal whitespace to single spaces, trims, and
|
|
48
|
+
* enforces a length bound. Non-string input yields `null`.
|
|
49
|
+
*/
|
|
50
|
+
export declare function sanitizeToken(raw: unknown): string | null;
|
|
51
|
+
export declare function pairKey(a: string, b: string): string;
|
|
52
|
+
/** A subset of KnowledgeEntry fields relevant to synonym learning. */
|
|
53
|
+
export interface SynonymSourceEntry {
|
|
54
|
+
triggers?: string[];
|
|
55
|
+
tags?: string[];
|
|
56
|
+
applies_to_tools?: string[];
|
|
57
|
+
applies_to_agents?: string[];
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Collect the sanitised, de-duplicated token set that represents one entry for
|
|
61
|
+
* co-occurrence learning. Draws from the entry's triggers, tags,
|
|
62
|
+
* applies_to_tools, and applies_to_agents.
|
|
63
|
+
*/
|
|
64
|
+
export declare function tokensForEntry(entry: SynonymSourceEntry): string[];
|
|
65
|
+
/**
|
|
66
|
+
* Evict the least-recently-touched pairs until `map.pairs` is within
|
|
67
|
+
* `maxPairs`. Mutates `map` in place. Eviction order is by ascending `seq`
|
|
68
|
+
* (oldest first); ties broken by key for determinism.
|
|
69
|
+
*/
|
|
70
|
+
declare function evictToCap(map: SynonymMap, maxPairs: number): void;
|
|
71
|
+
/**
|
|
72
|
+
* Pure: fold one entry's token set into the map, incrementing the co-occurrence
|
|
73
|
+
* count of every distinct token pair and refreshing its recency. Applies the
|
|
74
|
+
* LRU cap afterward. Returns the same `map` reference (mutated) for chaining.
|
|
75
|
+
*
|
|
76
|
+
* Each entry contributes at most +1 to any given pair (the token set is already
|
|
77
|
+
* de-duplicated), so a single entry repeating a tag cannot inflate a pair.
|
|
78
|
+
*/
|
|
79
|
+
export declare function recordEntryCooccurrences(map: SynonymMap, entry: SynonymSourceEntry, maxPairs?: number): SynonymMap;
|
|
80
|
+
/**
|
|
81
|
+
* Pure: rebuild the synonym map from scratch over a list of entries. Used by the
|
|
82
|
+
* curator after phase_complete so the map reflects the current corpus rather
|
|
83
|
+
* than drifting monotonically. Returns a fresh map.
|
|
84
|
+
*/
|
|
85
|
+
export declare function buildSynonymMap(entries: SynonymSourceEntry[], maxPairs?: number): SynonymMap;
|
|
86
|
+
/**
|
|
87
|
+
* Pure: derive an undirected adjacency index of synonyms from the map, keeping
|
|
88
|
+
* only pairs whose count is at or above `minCooccurrence`. Returns a Map from
|
|
89
|
+
* each token to the set of its synonym tokens.
|
|
90
|
+
*/
|
|
91
|
+
export declare function buildSynonymIndex(map: SynonymMap, minCooccurrence?: number): Map<string, Set<string>>;
|
|
92
|
+
/**
|
|
93
|
+
* Pure: expand a list of query tokens with their learned synonyms. Input tokens
|
|
94
|
+
* are sanitised first so the caller can pass raw query terms. Returns only the
|
|
95
|
+
* NEW synonym tokens (never the originals), de-duplicated, with a per-token cap
|
|
96
|
+
* so one over-connected token cannot dominate the candidate pool. Synonyms are
|
|
97
|
+
* emitted in sorted order and sliced — recency is deliberately ignored so the
|
|
98
|
+
* result is deterministic regardless of insertion order.
|
|
99
|
+
*/
|
|
100
|
+
export declare function expandTokens(index: Map<string, Set<string>>, queryTokens: string[], maxPerToken?: number): string[];
|
|
101
|
+
declare function isSynonymPair(value: unknown): value is SynonymPair;
|
|
102
|
+
/**
|
|
103
|
+
* Coerce arbitrary parsed JSON into a valid SynonymMap, dropping any malformed
|
|
104
|
+
* or unsafe pairs. Re-sanitises every token and re-derives the canonical key so
|
|
105
|
+
* a tampered file (control chars, mismatched key) cannot smuggle a poisoned
|
|
106
|
+
* token into retrieval. Enforces the same `maxPairs` LRU cap on READ that the
|
|
107
|
+
* write path enforces, so a tampered file with a huge pair count cannot make
|
|
108
|
+
* every retrieval pay an unbounded coerce/index cost. Returns a fresh empty map
|
|
109
|
+
* on any structural failure.
|
|
110
|
+
*/
|
|
111
|
+
export declare function coerceSynonymMap(parsed: unknown, maxPairs?: number): SynonymMap;
|
|
112
|
+
/**
|
|
113
|
+
* Read and validate the synonym map. Returns an empty map if absent/invalid.
|
|
114
|
+
* Bounded: a file larger than the `maxPairs`-derived byte ceiling is ignored
|
|
115
|
+
* WITHOUT being parsed, so a tampered/oversized map cannot blow up memory or CPU
|
|
116
|
+
* on the retrieval hot path. `maxPairs` is also enforced as an LRU cap on the
|
|
117
|
+
* coerced result.
|
|
118
|
+
*/
|
|
119
|
+
export declare function readSynonymMap(directory: string, maxPairs?: number): Promise<SynonymMap>;
|
|
120
|
+
declare function writeSynonymMapAtomic(filePath: string, map: SynonymMap): Promise<void>;
|
|
121
|
+
/**
|
|
122
|
+
* Atomically rebuild the synonym map from the supplied entries under a
|
|
123
|
+
* directory lock and persist it. Returns the written map. Intended to be called
|
|
124
|
+
* by the curator after phase_complete. Bounded by `maxPairs`.
|
|
125
|
+
*/
|
|
126
|
+
export declare function rebuildSynonymMap(directory: string, entries: SynonymSourceEntry[], maxPairs?: number): Promise<SynonymMap>;
|
|
127
|
+
export declare const _internals: {
|
|
128
|
+
MAX_TOKEN_LENGTH: number;
|
|
129
|
+
DEFAULT_MAX_PAIRS: number;
|
|
130
|
+
DEFAULT_MIN_COOCCURRENCE: number;
|
|
131
|
+
DEFAULT_MAX_EXPANSIONS_PER_TOKEN: number;
|
|
132
|
+
evictToCap: typeof evictToCap;
|
|
133
|
+
isSynonymPair: typeof isSynonymPair;
|
|
134
|
+
writeSynonymMapAtomic: typeof writeSynonymMapAtomic;
|
|
135
|
+
};
|
|
136
|
+
export {};
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Macro-reflector trajectory clustering (Swarm Learning System, Change 6 /
|
|
3
|
+
* Task 5.3).
|
|
4
|
+
*
|
|
5
|
+
* On the skill-improver's scheduled (quota-gated) cadence, scan the last N task
|
|
6
|
+
* trajectories (`.swarm/evidence/<taskId>/trajectory.jsonl`), cluster repeated
|
|
7
|
+
* FAILURE motifs by a (tool, kind) signature, and emit one skill PROPOSAL per
|
|
8
|
+
* recurring motif to `.swarm/skills/proposals/`. Each proposal carries full
|
|
9
|
+
* provenance: a draft SKILL.md body, the cluster of source task ids (and any
|
|
10
|
+
* source knowledge ids), a verification predicate, and `applies_to_agents`.
|
|
11
|
+
*
|
|
12
|
+
* Read-only over the knowledge store; writes only proposal markdown (never
|
|
13
|
+
* active skills). Fail-open.
|
|
14
|
+
*/
|
|
15
|
+
/** Trajectories scanned per macro pass (the plan's N=200 window). */
|
|
16
|
+
export declare const MACRO_TRAJECTORY_WINDOW = 200;
|
|
17
|
+
/** A motif must recur across at least this many distinct tasks to propose. */
|
|
18
|
+
export declare const MOTIF_MIN_TASKS = 2;
|
|
19
|
+
export interface FailureMotif {
|
|
20
|
+
signature: string;
|
|
21
|
+
tool: string;
|
|
22
|
+
kind: string;
|
|
23
|
+
agent: string;
|
|
24
|
+
taskIds: string[];
|
|
25
|
+
sampleVerdicts: string[];
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Cluster failure motifs across the recent trajectory window. Returns motifs
|
|
29
|
+
* that recur across >= MOTIF_MIN_TASKS distinct tasks, most-frequent first.
|
|
30
|
+
*/
|
|
31
|
+
export declare function gatherFailureMotifs(directory: string, opts?: {
|
|
32
|
+
window?: number;
|
|
33
|
+
minTasks?: number;
|
|
34
|
+
}): Promise<FailureMotif[]>;
|
|
35
|
+
/** Render a draft SKILL.md proposal body for a motif (with full provenance). */
|
|
36
|
+
export declare function buildMotifProposal(motif: FailureMotif): string;
|
|
37
|
+
export interface MotifProposalResult {
|
|
38
|
+
motifs: number;
|
|
39
|
+
proposalsWritten: string[];
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Run the macro motif pass and write one proposal per recurring motif. Returns
|
|
43
|
+
* the written proposal paths. Fail-open; never throws.
|
|
44
|
+
*/
|
|
45
|
+
export declare function writeMotifProposals(directory: string, opts?: {
|
|
46
|
+
window?: number;
|
|
47
|
+
minTasks?: number;
|
|
48
|
+
maxProposals?: number;
|
|
49
|
+
}): Promise<MotifProposalResult>;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unactionable-knowledge hardening loop (Swarm Learning System, Change 4 /
|
|
3
|
+
* Task 4.3).
|
|
4
|
+
*
|
|
5
|
+
* Consumes `.swarm/knowledge-unactionable.jsonl` (entries quarantined by the
|
|
6
|
+
* Layer-5 actionability gate) during the skill-improver macro loop. For each
|
|
7
|
+
* queued entry it attempts to produce a hardened version with predicates +
|
|
8
|
+
* scope tags via the same quota-gated v3 enrichment used by the curator
|
|
9
|
+
* (Task 4.2). Entries that pass Layer 5 after hardening move from quarantined
|
|
10
|
+
* to the active store as candidates; entries that fail are marked
|
|
11
|
+
* `retire_candidate:true` (left in the queue for human review / eventual
|
|
12
|
+
* retirement). Already-marked retire candidates are never re-processed.
|
|
13
|
+
*
|
|
14
|
+
* Quota: every LLM attempt goes through `enrichLessonToV3`, which reserves one
|
|
15
|
+
* skill-improver quota slot per call — the loop can never exceed the shared
|
|
16
|
+
* daily budget. A per-run batch cap bounds worst-case cost further.
|
|
17
|
+
*/
|
|
18
|
+
import type { CuratorLLMDelegate } from '../hooks/curator.js';
|
|
19
|
+
import { type EnrichmentQuotaOptions } from '../hooks/knowledge-curator.js';
|
|
20
|
+
import { type UnactionableRecord } from '../hooks/knowledge-validator.js';
|
|
21
|
+
/** Max queue entries processed per improver run (bounds LLM cost per run). */
|
|
22
|
+
export declare const HARDENING_BATCH_LIMIT = 5;
|
|
23
|
+
/** Queue record shape with the retire flag added by this loop. */
|
|
24
|
+
export interface HardenableRecord extends UnactionableRecord {
|
|
25
|
+
retire_candidate?: boolean;
|
|
26
|
+
}
|
|
27
|
+
export interface HardeningResult {
|
|
28
|
+
/** Entries promoted from the queue to the active store. */
|
|
29
|
+
hardened: number;
|
|
30
|
+
/** Entries newly marked retire_candidate (hardening failed). */
|
|
31
|
+
retired: number;
|
|
32
|
+
/** Entries left in the queue (including pre-existing retire candidates). */
|
|
33
|
+
remaining: number;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Run one hardening pass. Never throws; on any error the queue is left as-is
|
|
37
|
+
* and zeros are reported. No-op (besides counting) when no delegate is
|
|
38
|
+
* available — without an LLM there is no hardening attempt, and auto-retiring
|
|
39
|
+
* without an attempt would be wrong.
|
|
40
|
+
*/
|
|
41
|
+
export declare function hardenUnactionableEntries(params: {
|
|
42
|
+
directory: string;
|
|
43
|
+
llmDelegate?: CuratorLLMDelegate;
|
|
44
|
+
quota?: EnrichmentQuotaOptions;
|
|
45
|
+
batchLimit?: number;
|
|
46
|
+
dedupThreshold?: number;
|
|
47
|
+
}): Promise<HardeningResult>;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* external_skill_delete — Delete an external skill candidate from the quarantine store.
|
|
3
|
+
*
|
|
4
|
+
* Removes a candidate by ID. If the candidate was previously promoted, the
|
|
5
|
+
* promoted skill in `.opencode/skills/generated/` is NOT affected — it must be
|
|
6
|
+
* separately retired or revoked. Returns a disabled message when
|
|
7
|
+
* external_skills.curation_enabled is false.
|
|
8
|
+
*
|
|
9
|
+
* Uses an `_internals` DI seam for testability — no `mock.module` leakage.
|
|
10
|
+
*/
|
|
11
|
+
import type { ExternalSkillsConfig } from '../config/schema.js';
|
|
12
|
+
import { createSwarmTool } from './create-tool.js';
|
|
13
|
+
export declare const _internals: {
|
|
14
|
+
loadConfig: (directory: string) => ExternalSkillsConfig | undefined;
|
|
15
|
+
};
|
|
16
|
+
export declare const external_skill_delete: ReturnType<typeof createSwarmTool>;
|