open-multi-agent-kit 0.78.1 → 0.78.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/CHANGELOG.md +37 -0
  2. package/MATURITY.md +4 -0
  3. package/README.md +70 -1
  4. package/dist/benchmark/contracts.d.ts +116 -0
  5. package/dist/benchmark/contracts.js +6 -0
  6. package/dist/benchmark/fixtures.d.ts +11 -0
  7. package/dist/benchmark/fixtures.js +121 -0
  8. package/dist/benchmark/harness.d.ts +13 -0
  9. package/dist/benchmark/harness.js +191 -0
  10. package/dist/benchmark/shadow-mode.d.ts +17 -0
  11. package/dist/benchmark/shadow-mode.js +96 -0
  12. package/dist/cli/register-spec-agent-goal-commands.js +45 -0
  13. package/dist/cli/release-promotion-gate.d.ts +14 -0
  14. package/dist/cli/release-promotion-gate.js +71 -0
  15. package/dist/cli/v2/release-commands.d.ts +29 -0
  16. package/dist/cli/v2/release-commands.js +95 -0
  17. package/dist/commands/chat/native-root-loop.js +14 -1
  18. package/dist/commands/chat/slash/commands/session.js +19 -1
  19. package/dist/commands/goal-interview.d.ts +18 -0
  20. package/dist/commands/goal-interview.js +396 -0
  21. package/dist/commands/merge.js +102 -56
  22. package/dist/contracts/interview.d.ts +106 -0
  23. package/dist/contracts/interview.js +9 -0
  24. package/dist/contracts/provider-health.d.ts +37 -0
  25. package/dist/contracts/provider-health.js +49 -1
  26. package/dist/evidence/evidence-trust-score.d.ts +101 -0
  27. package/dist/evidence/evidence-trust-score.js +408 -0
  28. package/dist/evidence/index.d.ts +6 -0
  29. package/dist/evidence/index.js +3 -0
  30. package/dist/evidence/proof-trust-cli.d.ts +8 -0
  31. package/dist/evidence/proof-trust-cli.js +27 -0
  32. package/dist/evidence/proof-trust.d.ts +14 -0
  33. package/dist/evidence/proof-trust.js +381 -0
  34. package/dist/evidence/regression-proof-matrix.d.ts +42 -0
  35. package/dist/evidence/regression-proof-matrix.js +72 -0
  36. package/dist/goal/intent-frame.d.ts +6 -0
  37. package/dist/goal/intent-frame.js +21 -9
  38. package/dist/goal/interview-assimilation.d.ts +13 -0
  39. package/dist/goal/interview-assimilation.js +383 -0
  40. package/dist/goal/interview-question-bank.d.ts +11 -0
  41. package/dist/goal/interview-question-bank.js +225 -0
  42. package/dist/goal/interview-scoring.d.ts +31 -0
  43. package/dist/goal/interview-scoring.js +187 -0
  44. package/dist/goal/interview-session.d.ts +25 -0
  45. package/dist/goal/interview-session.js +116 -0
  46. package/dist/input/input-envelope.d.ts +22 -0
  47. package/dist/input/input-envelope.js +1 -0
  48. package/dist/orchestration/merge-arbiter.d.ts +91 -0
  49. package/dist/orchestration/merge-arbiter.js +376 -0
  50. package/dist/providers/health.d.ts +3 -0
  51. package/dist/providers/health.js +46 -0
  52. package/dist/providers/index.d.ts +1 -0
  53. package/dist/providers/index.js +1 -0
  54. package/dist/providers/provider-health.d.ts +8 -1
  55. package/dist/providers/provider-health.js +39 -0
  56. package/dist/providers/provider-task-runner.js +31 -0
  57. package/dist/providers/provider.d.ts +2 -0
  58. package/dist/providers/router.js +87 -3
  59. package/dist/providers/types.d.ts +4 -0
  60. package/dist/runtime/advanced-control-loop.d.ts +60 -0
  61. package/dist/runtime/advanced-control-loop.js +136 -0
  62. package/dist/runtime/agent-runtime.d.ts +10 -0
  63. package/dist/runtime/blast-radius.d.ts +10 -0
  64. package/dist/runtime/blast-radius.js +14 -0
  65. package/dist/runtime/contracts/evidence.d.ts +87 -0
  66. package/dist/runtime/contracts/evidence.js +7 -0
  67. package/dist/runtime/contracts/router-v2.d.ts +44 -0
  68. package/dist/runtime/contracts/router-v2.js +4 -0
  69. package/dist/runtime/contracts/weakness-remediation.d.ts +67 -0
  70. package/dist/runtime/contracts/weakness-remediation.js +36 -0
  71. package/dist/runtime/kimi-api-runtime.js +59 -1
  72. package/dist/runtime/proof-bundle-trust.d.ts +74 -0
  73. package/dist/runtime/proof-bundle-trust.js +100 -0
  74. package/dist/runtime/provider-maturity-gate.d.ts +43 -0
  75. package/dist/runtime/provider-maturity-gate.js +129 -0
  76. package/dist/runtime/public-surface.d.ts +93 -0
  77. package/dist/runtime/public-surface.js +146 -0
  78. package/dist/runtime/router-v2-scoring.d.ts +11 -0
  79. package/dist/runtime/router-v2-scoring.js +151 -0
  80. package/dist/runtime/tool-dispatch-contracts.d.ts +24 -3
  81. package/dist/runtime/tool-dispatch-contracts.js +42 -2
  82. package/dist/runtime/weakness-remediation-index.d.ts +27 -0
  83. package/dist/runtime/weakness-remediation-index.js +37 -0
  84. package/dist/safety/enforcement-engine.d.ts +89 -0
  85. package/dist/safety/enforcement-engine.js +279 -0
  86. package/dist/safety/tool-authority-gate.d.ts +40 -0
  87. package/dist/safety/tool-authority-gate.js +92 -0
  88. package/dist/schema/evidence.schema.d.ts +2 -2
  89. package/dist/schema/proof-bundle.schema.d.ts +28 -28
  90. package/dist/util/clipboard-image.d.ts +49 -0
  91. package/dist/util/clipboard-image.js +263 -0
  92. package/docs/2026-06-09/critical-issues.md +20 -0
  93. package/docs/2026-06-09/improvements.md +14 -0
  94. package/docs/2026-06-09/init-checklist.md +25 -0
  95. package/docs/2026-06-09/plan.md +20 -0
  96. package/docs/benchmark-design.md +122 -0
  97. package/docs/github-organic-promotion.md +127 -0
  98. package/docs/native-root-runtime-algorithms.md +301 -0
  99. package/package.json +8 -4
  100. package/readmeasset/ASSET_INDEX.md +1 -0
  101. package/templates/skills/agents/omk-agent-reach-websearch/SKILL.md +55 -0
  102. package/templates/skills/kimi/omk-agent-reach-websearch/SKILL.md +55 -0
@@ -0,0 +1,263 @@
1
+ /**
2
+ * Cross-platform clipboard image reader.
3
+ *
4
+ * Wraps the platform-specific clipboard reading from screenshot-store patterns
5
+ * into a reusable utility for the chat REPL, goal commands, and any input
6
+ * surface that needs Ctrl+V / paste image support.
7
+ *
8
+ * Platforms:
9
+ * - macOS: `pngpaste -` (brew) or `osascript` with TIFF→PNG conversion
10
+ * - Linux: `xclip -selection clipboard -target image/png`
11
+ * - Windows: PowerShell System.Windows.Forms.Clipboard
12
+ *
13
+ * Output: PNG Buffer + saved file path under .omk/screenshots/
14
+ */
15
+ import { execFileSync } from "node:child_process";
16
+ import { mkdirSync, writeFileSync, readFileSync, existsSync, unlinkSync } from "node:fs";
17
+ import { createHash } from "node:crypto";
18
+ import { join, relative } from "node:path";
19
+ export const SCREENSHOT_DIR = ".omk/screenshots";
20
+ export const MAX_IMAGE_BYTES = 20 * 1024 * 1024;
21
+ const CLIPBOARD_TIMEOUT_MS = 5000;
22
+ const CLIPBOARD_MAX_BUFFER = MAX_IMAGE_BYTES * 2;
23
+ const IMAGE_MAGIC = [
24
+ ["png", [0x89, 0x50, 0x4e, 0x47]],
25
+ ["jpg", [0xff, 0xd8, 0xff]],
26
+ ["webp", [0x52, 0x49, 0x46, 0x46]],
27
+ ["gif", [0x47, 0x49, 0x46, 0x38]],
28
+ ];
29
+ export function detectImageExt(buf) {
30
+ for (const [ext, magic] of IMAGE_MAGIC) {
31
+ if (buf.length >= magic.length && magic.every((b, i) => buf[i] === b)) {
32
+ return ext;
33
+ }
34
+ }
35
+ return null;
36
+ }
37
+ function mimeTypeForExt(ext) {
38
+ switch (ext) {
39
+ case "png": return "image/png";
40
+ case "jpg": return "image/jpeg";
41
+ case "webp": return "image/webp";
42
+ case "gif": return "image/gif";
43
+ default: return "application/octet-stream";
44
+ }
45
+ }
46
+ export function toDataUri(base64, ext) {
47
+ return `data:${mimeTypeForExt(ext)};base64,${base64}`;
48
+ }
49
+ function generatePath(projectRoot, ext) {
50
+ const dateDir = new Date().toISOString().slice(0, 10);
51
+ const timestamp = new Date().toISOString().replace(/[:.]/g, "").slice(0, 15);
52
+ const hash = createHash("sha256").update(`${Date.now()}-${Math.random()}`).digest("hex").slice(0, 8);
53
+ const fileName = `screenshot-${timestamp}-${hash}.${ext}`;
54
+ const dir = join(projectRoot, SCREENSHOT_DIR, dateDir);
55
+ mkdirSync(dir, { recursive: true });
56
+ const fullPath = join(dir, fileName);
57
+ const relativePath = relative(projectRoot, fullPath).replace(/\\/g, "/");
58
+ return { fullPath, relativePath };
59
+ }
60
+ // ── Platform readers ────────────────────────────────────────────────────────
61
+ function readMacClipboard() {
62
+ // Try pngpaste first (faster, more reliable)
63
+ try {
64
+ const out = execFileSync("pngpaste", ["-"], {
65
+ timeout: CLIPBOARD_TIMEOUT_MS,
66
+ maxBuffer: CLIPBOARD_MAX_BUFFER,
67
+ stdio: ["pipe", "pipe", "pipe"],
68
+ });
69
+ if (out.length > 0)
70
+ return out;
71
+ }
72
+ catch {
73
+ // pngpaste not installed or clipboard empty
74
+ }
75
+ // Fallback: osascript (handles TIFF → PNG conversion)
76
+ try {
77
+ const script = `
78
+ set theFile to (POSIX path of (path to temporary items) & "omk-clip-" & (random number from 100000 to 999999) & ".png")
79
+ set img to the clipboard as «class PNGf»
80
+ set fRef to open for access POSIX file theFile with write permission
81
+ write img to fRef
82
+ close access fRef
83
+ return theFile
84
+ `;
85
+ const filePath = execFileSync("osascript", ["-e", script], {
86
+ timeout: CLIPBOARD_TIMEOUT_MS,
87
+ encoding: "utf-8",
88
+ }).trim();
89
+ if (filePath) {
90
+ const buf = readFileSync(filePath);
91
+ try {
92
+ unlinkSync(filePath);
93
+ }
94
+ catch { /* ignore */ }
95
+ if (buf.length > 0)
96
+ return buf;
97
+ }
98
+ }
99
+ catch {
100
+ // osascript failed
101
+ }
102
+ return null;
103
+ }
104
+ function readLinuxClipboard() {
105
+ try {
106
+ const out = execFileSync("xclip", ["-selection", "clipboard", "-target", "image/png", "-o"], {
107
+ timeout: CLIPBOARD_TIMEOUT_MS,
108
+ maxBuffer: CLIPBOARD_MAX_BUFFER,
109
+ stdio: ["pipe", "pipe", "pipe"],
110
+ });
111
+ if (out.length > 0)
112
+ return out;
113
+ }
114
+ catch {
115
+ // xclip not available
116
+ }
117
+ // Fallback: wl-paste (Wayland)
118
+ try {
119
+ const out = execFileSync("wl-paste", ["--type", "image/png"], {
120
+ timeout: CLIPBOARD_TIMEOUT_MS,
121
+ maxBuffer: CLIPBOARD_MAX_BUFFER,
122
+ stdio: ["pipe", "pipe", "pipe"],
123
+ });
124
+ if (out.length > 0)
125
+ return out;
126
+ }
127
+ catch {
128
+ // wl-paste not available
129
+ }
130
+ return null;
131
+ }
132
+ function readWindowsClipboard() {
133
+ const script = `
134
+ $ErrorActionPreference = 'SilentlyContinue'
135
+ Add-Type -AssemblyName System.Windows.Forms
136
+ Add-Type -AssemblyName System.Drawing
137
+
138
+ function Emit-Bytes([byte[]]$Bytes) {
139
+ if ($null -eq $Bytes -or $Bytes.Length -eq 0) { exit 1 }
140
+ [Console]::OpenStandardOutput().Write($Bytes, 0, $Bytes.Length)
141
+ exit 0
142
+ }
143
+
144
+ # Try PNG format first
145
+ $data = [System.Windows.Forms.Clipboard]::GetDataObject()
146
+ if ($null -ne $data) {
147
+ foreach ($format in @('PNG', 'image/png')) {
148
+ if ($data.GetDataPresent($format)) {
149
+ $raw = $data.GetData($format)
150
+ if ($raw -is [System.IO.MemoryStream]) { Emit-Bytes $raw.ToArray() }
151
+ if ($raw -is [byte[]]) { Emit-Bytes $raw }
152
+ }
153
+ }
154
+ # File drop (screenshot tool saves to file then copies path)
155
+ if ($data.GetDataPresent([System.Windows.Forms.DataFormats]::FileDrop)) {
156
+ $files = [string[]]$data.GetData([System.Windows.Forms.DataFormats]::FileDrop)
157
+ foreach ($file in $files) {
158
+ if ($file -match '[.](png|jpg|jpeg|webp|gif)$' -and [System.IO.File]::Exists($file)) {
159
+ Emit-Bytes ([System.IO.File]::ReadAllBytes($file))
160
+ }
161
+ }
162
+ }
163
+ }
164
+
165
+ # Fallback: GetImage
166
+ if ([System.Windows.Forms.Clipboard]::ContainsImage()) {
167
+ $img = [System.Windows.Forms.Clipboard]::GetImage()
168
+ if ($null -ne $img) {
169
+ $ms = New-Object System.IO.MemoryStream
170
+ try {
171
+ $img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png)
172
+ Emit-Bytes $ms.ToArray()
173
+ } finally {
174
+ $ms.Dispose()
175
+ $img.Dispose()
176
+ }
177
+ }
178
+ }
179
+ exit 1
180
+ `;
181
+ try {
182
+ const out = execFileSync("powershell.exe", ["-NoProfile", "-Command", "-"], {
183
+ input: script,
184
+ timeout: CLIPBOARD_TIMEOUT_MS,
185
+ maxBuffer: CLIPBOARD_MAX_BUFFER,
186
+ windowsHide: true,
187
+ stdio: ["pipe", "pipe", "pipe"],
188
+ });
189
+ if (out.length > 0)
190
+ return out;
191
+ }
192
+ catch {
193
+ // PowerShell failed
194
+ }
195
+ return null;
196
+ }
197
+ /**
198
+ * Read an image from the system clipboard. Returns null if clipboard is empty
199
+ * or contains no image. Platform-specific: macOS (pngpaste/osascript), Linux
200
+ * (xclip/wl-paste), Windows (PowerShell).
201
+ */
202
+ export function readClipboardImage(platform = process.platform) {
203
+ switch (platform) {
204
+ case "darwin": return readMacClipboard();
205
+ case "linux": return readLinuxClipboard();
206
+ case "win32": return readWindowsClipboard();
207
+ default: return null;
208
+ }
209
+ }
210
+ // ── High-level API ──────────────────────────────────────────────────────────
211
+ /**
212
+ * Read clipboard image, validate, save to .omk/screenshots/, and return
213
+ * both the file path and base64 data URI for wire protocol use.
214
+ */
215
+ export function pasteClipboardImage(projectRoot) {
216
+ const buf = readClipboardImage();
217
+ if (!buf || buf.length === 0) {
218
+ return { ok: false, error: "No image found in clipboard" };
219
+ }
220
+ if (buf.length > MAX_IMAGE_BYTES) {
221
+ return { ok: false, error: `Clipboard image exceeds ${MAX_IMAGE_BYTES / (1024 * 1024)} MB` };
222
+ }
223
+ const ext = detectImageExt(buf);
224
+ if (!ext) {
225
+ return { ok: false, error: "Clipboard content is not a recognized image format (PNG/JPG/WebP/GIF)" };
226
+ }
227
+ const { fullPath, relativePath } = generatePath(projectRoot, ext);
228
+ writeFileSync(fullPath, buf);
229
+ const base64 = buf.toString("base64");
230
+ return {
231
+ ok: true,
232
+ path: fullPath,
233
+ relativePath: `./${relativePath}`,
234
+ dataUri: toDataUri(base64, ext),
235
+ base64,
236
+ ext,
237
+ };
238
+ }
239
+ /**
240
+ * Read an image file from disk, validate, and return base64 data URI.
241
+ * Used for --image <file> flag support.
242
+ */
243
+ export function readImageFile(filePath) {
244
+ if (!existsSync(filePath)) {
245
+ return { ok: false, error: `File not found: ${filePath}` };
246
+ }
247
+ const buf = readFileSync(filePath);
248
+ if (buf.length > MAX_IMAGE_BYTES) {
249
+ return { ok: false, error: `File exceeds ${MAX_IMAGE_BYTES / (1024 * 1024)} MB` };
250
+ }
251
+ const ext = detectImageExt(buf);
252
+ if (!ext) {
253
+ return { ok: false, error: "File is not a recognized image format (PNG/JPG/WebP/GIF)" };
254
+ }
255
+ const base64 = buf.toString("base64");
256
+ return {
257
+ ok: true,
258
+ path: filePath,
259
+ dataUri: toDataUri(base64, ext),
260
+ base64,
261
+ ext,
262
+ };
263
+ }
@@ -0,0 +1,20 @@
1
+ # 2026-06-09 Critical Issues
2
+
3
+ ## Critical Init Status
4
+ - No critical init artifacts were missing when this daily file was generated.
5
+ ### Missing critical artifacts
6
+ - None detected.
7
+
8
+
9
+ ## Critical Artifacts Present
10
+ - ✅ `AGENTS.md` — top-level operating contract
11
+ - ✅ `.kimi/AGENTS.md` — Kimi-specific operating rules
12
+ - ✅ `.omk/config.toml` — OMK project runtime settings
13
+ - ✅ `.omk/agents/root.yaml` — root coordinator agent
14
+ - ✅ `.kimi/mcp.json` — Kimi project MCP registry
15
+ - ✅ `.omk/hooks/pre-shell-guard.sh` — destructive shell guard
16
+ - ✅ `.omk/hooks/protect-secrets.sh` — secret write guard
17
+ - ✅ `.omk/memory/graph-state.json` — local ontology graph database
18
+
19
+ ## Escalation Rule
20
+ - Treat missing shell/secret guards, root agent config, MCP registry, or ontology graph as critical until restored.
@@ -0,0 +1,14 @@
1
+ # 2026-06-09 Improvements
2
+
3
+ ## Current Improvement Backlog
4
+ ### Optional init/support artifacts to add or refresh
5
+ - None detected.
6
+
7
+ ### Critical init artifacts currently blocking reliable chat startup
8
+ - None detected.
9
+
10
+
11
+ ## Suggested Focus
12
+ - Keep `omk chat` startup idempotent and non-destructive.
13
+ - Prefer local graph memory for default ontology state.
14
+ - Keep generated daily docs small, dated, and safe to edit by hand.
@@ -0,0 +1,25 @@
1
+ # 2026-06-09 Required Init Checklist
2
+
3
+ **Run ID:** chat-2026-06-09T00-01-07-440Z-28177
4
+ **Ontology graph:** `.omk/memory/graph-state.json`
5
+
6
+ ## Required Artifacts
7
+ - ✅ `AGENTS.md` — critical; top-level operating contract
8
+ - ✅ `.kimi/AGENTS.md` — critical; Kimi-specific operating rules
9
+ - ✅ `DESIGN.md` — support; design/brand source of truth
10
+ - ✅ `.omk/config.toml` — critical; OMK project runtime settings
11
+ - ✅ `.omk/agents/root.yaml` — critical; root coordinator agent
12
+ - ✅ `.kimi/mcp.json` — critical; Kimi project MCP registry
13
+ - ✅ `.omk/mcp.json` — support; legacy OMK MCP fallback
14
+ - ✅ `.omk/lsp.json` — support; bundled TypeScript/Python LSP config
15
+ - ✅ `.omk/hooks/pre-shell-guard.sh` — critical; destructive shell guard
16
+ - ✅ `.omk/hooks/protect-secrets.sh` — critical; secret write guard
17
+ - ✅ `.omk/memory/graph-state.json` — critical; local ontology graph database
18
+ - ✅ `.kimi/skills` — support; Kimi skill directory
19
+ - ✅ `.agents/skills` — support; portable skill directory
20
+
21
+ ## Recovery Command
22
+ ```bash
23
+ omk init
24
+ omk doctor
25
+ ```
@@ -0,0 +1,20 @@
1
+ # 2026-06-09 OMK Chat Plan
2
+
3
+ **Run ID:** chat-2026-06-09T00-01-07-440Z-28177
4
+ **Generated by:** omk chat bootstrap
5
+
6
+ ## Purpose
7
+ - Start every chat with a dated workspace for planning, issue triage, and verification evidence.
8
+ - Keep ontology-backed memory available before the root coordinator starts.
9
+ - Make required init state visible without overwriting user-authored docs.
10
+
11
+ ## Today Plan
12
+ 1. Review `init-checklist.md` and resolve missing critical init artifacts first.
13
+ 2. Use `improvements.md` as the active improvement backlog.
14
+ 3. Use `critical-issues.md` for blocking defects, safety risks, and verification gaps.
15
+ 4. Record command evidence before claiming work is complete.
16
+
17
+ ## Stop Condition
18
+ - Critical init artifacts are present.
19
+ - Ontology graph exists at `.omk/memory/graph-state.json`.
20
+ - Any new code/docs changes have explicit verification evidence.
@@ -0,0 +1,122 @@
1
+ # OMK Control Plane Replay Benchmark Design
2
+
3
+ ## 1. Purpose
4
+
5
+ Design a reproducible benchmark suite that measures OMK control plane
6
+ performance across 10 representative task categories. The benchmark runs in
7
+ **shadow mode** (recorded traces, no live LLM calls) for baseline
8
+ reproducibility, with optional **live-evaluation mode** for regression
9
+ testing against real providers.
10
+
11
+ ## 2. Task Categories
12
+
13
+ | # | Category | Intent | Description |
14
+ |---|----------|--------|-------------|
15
+ | 1 | read-only repo Q&A | research | Agent answers questions about codebase structure |
16
+ | 2 | small bug fix | debugging | Single-file typo / logic fix |
17
+ | 3 | failing test repair | debugging | Update implementation to satisfy failing test |
18
+ | 4 | multi-file refactor | refactor | Rename/move symbols across 3+ files |
19
+ | 5 | CLI command task | shell-operation | Execute and verify CLI output |
20
+ | 6 | dependency update | coding | Bump package version, fix breaking changes |
21
+ | 7 | merge-conflict task | merge | Resolve git merge conflict automatically |
22
+ | 8 | security-sensitive task | review | Patch vulnerability with audit trail |
23
+ | 9 | provider failure fallback | debugging | Primary provider fails; fallback succeeds |
24
+ | 10 | quota/auth failure fallback | debugging | Quota/auth error triggers provider switch |
25
+
26
+ ## 3. Metrics
27
+
28
+ | Metric | Definition | Source |
29
+ |--------|-----------|--------|
30
+ | solve_rate | passed_tasks / total_tasks | harness result |
31
+ | evidence_trust_score | ETS v2 score per task | evidence-trust-score engine |
32
+ | false_done_rate | tasks claiming success with failing evidence / total | harness+ETS |
33
+ | fallback_success_rate | fallback attempts that succeed / total fallback attempts | router decision trace |
34
+ | router_regret | best_available_runtime_score − selected_runtime_score | shadow-mode diff |
35
+ | cost_per_solved_task | Σ costUsdEstimated / solved_count | attempt records |
36
+ | p95_latency | 95th percentile of task latencyMs | attempt records |
37
+ | rollback_rate | tasks rolled back / total tasks | decision trace |
38
+ | sandbox_violation_count | tasks with unexpected file writes outside worktree | sandbox audit |
39
+
40
+ **router_regret** is computed in shadow mode by scoring all candidates for
41
+ every decision and comparing the selected runtime’s composite against the
42
+ maximum composite.
43
+
44
+ ## 4. Reproducibility Contract
45
+
46
+ Every benchmark run must pin:
47
+ - **treeHash**: git commit SHA of the repo under test
48
+ - **seed**: PRNG seed for synthetic fixture generation
49
+ - **providerConfigHash**: hash of the runtime provider configuration
50
+ - **omkVersion**: package version
51
+ - **benchmarkSchemaVersion**: `omk.benchmark.v1`
52
+
53
+ Shadow-mode runs use pre-recorded `BenchmarkTrace` fixtures (see
54
+ `src/benchmark/fixtures.ts`). Live-evaluation mode records new traces into
55
+ `.omk/benchmarks/<runId>/`.
56
+
57
+ ## 5. Shadow Mode
58
+
59
+ Shadow mode runs router v1 and v2 side-by-side on identical inputs:
60
+ 1. Load a `BenchmarkTask` fixture.
61
+ 2. Run `createRuntimeRouter` (v1) and `createRouterV2ScoringEngine` (v2).
62
+ 3. Record both decisions into `ShadowModeRecord`.
63
+ 4. Compute `router_regret` for each.
64
+ 5. Diff v1/v2 selections and log disagreements.
65
+
66
+ No LLM API calls are made. Runtime `runNode` is replaced with a stub that
67
+ returns the recorded outcome from the fixture.
68
+
69
+ ## 6. Benchmark Harness Lifecycle
70
+
71
+ ```
72
+ loadConfig() → discoverTasks() → for each task:
73
+ setupWorktree() → runTask() → evaluateEvidence() → teardown()
74
+ → computeSummary() → writeJsonReport()
75
+ ```
76
+
77
+ The harness integrates with `scripts/run-tests.mjs` via:
78
+ ```bash
79
+ node scripts/run-benchmark.mjs --shadow --summary-json .omk/benchmarks/latest.json
80
+ ```
81
+
82
+ ## 7. CI Integration
83
+
84
+ A new `benchmark` job runs after `fast-gate` passes on `main` branch merges
85
+ and nightly cron. It:
86
+ 1. Checks out the repo at the merge commit.
87
+ 2. Runs `npm run benchmark:shadow`.
88
+ 3. Uploads `.omk/benchmarks/latest.json` as artifact.
89
+ 4. Fails if `solve_rate < 0.85` or `false_done_rate > 0.05`.
90
+
91
+ ## 8. Directory Layout
92
+
93
+ ```
94
+ src/benchmark/
95
+ contracts.ts # BenchmarkTask, BenchmarkResult, BenchmarkSummary
96
+ harness.ts # runBenchmarkSuite(), runBenchmarkTask()
97
+ shadow-mode.ts # ShadowModeEngine, computeRouterRegret()
98
+ fixtures.ts # generateSyntheticTraces(), loadRecordedTraces()
99
+ scripts/
100
+ run-benchmark.mjs # CLI entrypoint
101
+ test/
102
+ benchmark-harness.test.mjs
103
+ .omk/benchmarks/
104
+ sample-run.json # example output
105
+ ```
106
+
107
+ ## 9. Extending the Benchmark
108
+
109
+ To add a new task category:
110
+ 1. Add intent mapping in `src/benchmark/fixtures.ts`.
111
+ 2. Create a fixture under `test/benchmark-fixtures/`.
112
+ 3. Add an evaluation rule in `src/benchmark/harness.ts`.
113
+ 4. Register the category in `scripts/run-benchmark.mjs`.
114
+
115
+ ## 10. Risks & Mitigations
116
+
117
+ | Risk | Mitigation |
118
+ |------|-----------|
119
+ | Fixture drift (codebase changes) | Pin treeHash; auto-regenerate fixtures in CI if drift detected |
120
+ | Shadow mode not representative of live behavior | Weekly live-evaluation job with small sample |
121
+ | Metrics gaming (fake evidence) | ETS v2 gaming penalty + runner-source requirement |
122
+ | Secret leakage in recorded traces | Redact with `redactTrace()` before persistence |
@@ -0,0 +1,127 @@
1
+ # GitHub organic promotion checklist
2
+
3
+ OMK's strongest GitHub search position is:
4
+
5
+ > Provider-neutral multi-agent control plane for coding agents: route runtimes, scope MCP tools, run DAG workers, verify evidence, and replay agent runs.
6
+
7
+ ## Repository About
8
+
9
+ Recommended GitHub About description:
10
+
11
+ ```txt
12
+ Provider-neutral multi-agent control plane for coding agents. Route runtimes, scope MCP tools, run DAG workers, verify evidence, and replay agent runs from the omk CLI.
13
+ ```
14
+
15
+ Apply after maintainer confirmation:
16
+
17
+ ```bash
18
+ gh repo edit dmae97/open-multi-agent-kit \
19
+ --description "Provider-neutral multi-agent control plane for coding agents. Route runtimes, scope MCP tools, run DAG workers, verify evidence, and replay agent runs from the omk CLI."
20
+ ```
21
+
22
+ ## GitHub Topics
23
+
24
+ Use all 20 topics:
25
+
26
+ ```txt
27
+ ai-agent
28
+ ai-agents
29
+ llm
30
+ coding-agent
31
+ ai-coding
32
+ agentic-coding
33
+ multi-agent
34
+ multi-agent-orchestration
35
+ agent-orchestration
36
+ agent-runtime
37
+ agent-control-plane
38
+ developer-tools
39
+ ai-devtools
40
+ cli
41
+ typescript
42
+ nodejs
43
+ mcp
44
+ model-context-protocol
45
+ workflow-automation
46
+ provider-neutral
47
+ ```
48
+
49
+ Apply after maintainer confirmation:
50
+
51
+ ```bash
52
+ gh repo edit dmae97/open-multi-agent-kit \
53
+ --add-topic ai-agent \
54
+ --add-topic ai-agents \
55
+ --add-topic llm \
56
+ --add-topic coding-agent \
57
+ --add-topic ai-coding \
58
+ --add-topic agentic-coding \
59
+ --add-topic multi-agent \
60
+ --add-topic multi-agent-orchestration \
61
+ --add-topic agent-orchestration \
62
+ --add-topic agent-runtime \
63
+ --add-topic agent-control-plane \
64
+ --add-topic developer-tools \
65
+ --add-topic ai-devtools \
66
+ --add-topic cli \
67
+ --add-topic typescript \
68
+ --add-topic nodejs \
69
+ --add-topic mcp \
70
+ --add-topic model-context-protocol \
71
+ --add-topic workflow-automation \
72
+ --add-topic provider-neutral
73
+ ```
74
+
75
+ ## Social preview
76
+
77
+ Upload this generated image in GitHub repository settings:
78
+
79
+ ```txt
80
+ readmeasset/social-preview.png
81
+ ```
82
+
83
+ It uses the message:
84
+
85
+ ```txt
86
+ OMK
87
+ Provider-Neutral Multi-Agent Control Plane
88
+ Route. Verify. Replay.
89
+ ```
90
+
91
+ ## Awesome-list PR entries
92
+
93
+ ### bradAGI/awesome-cli-coding-agents
94
+
95
+ Preferred section: `Harnesses & orchestration`.
96
+
97
+ ```md
98
+ - **[OMK](https://github.com/dmae97/open-multi-agent-kit)** `⭐ <current>` — Provider-neutral CLI control plane for coding agents: routes runtimes, scopes MCP, runs DAG workers, and verifies evidence before completion. MIT.
99
+ ```
100
+
101
+ ### e2b-dev/awesome-ai-sdks
102
+
103
+ ```md
104
+ ## [OMK](https://github.com/dmae97/open-multi-agent-kit)
105
+
106
+ OMK is a provider-neutral multi-agent control plane for coding workflows. It routes agent runtimes, scopes MCP tools, runs DAG-based workers, verifies evidence, and preserves replayable run artifacts.
107
+
108
+ ### Links
109
+
110
+ - [GitHub](https://github.com/dmae97/open-multi-agent-kit)
111
+ - [npm](https://www.npmjs.com/package/open-multi-agent-kit)
112
+ ```
113
+
114
+ ### punkpeye/awesome-mcp-devtools
115
+
116
+ Preferred section: `Frameworks` or `Development Tools`.
117
+
118
+ ```md
119
+ - [OMK](https://github.com/dmae97/open-multi-agent-kit) - TypeScript CLI control plane for coding agents with scoped MCP injection, provider routing, evidence gates, and replayable run telemetry.
120
+ ```
121
+
122
+ ## Measurement plan
123
+
124
+ - Topics applied + 7 days: inspect GitHub topic/search traffic.
125
+ - First awesome PR merged + 7 days: check `Referring sites` for that awesome repo.
126
+ - 2 weeks: compare visitors, clones, npm downloads, and popular README content.
127
+ - 4 weeks: review visitor-to-star conversion; 1-3% is acceptable for early OSS.