@thispointon/kondi-chat 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +556 -0
- package/bin/kondi-chat +56 -0
- package/bin/kondi-chat.js +72 -0
- package/package.json +55 -0
- package/scripts/demo.tape +49 -0
- package/scripts/postinstall.cjs +103 -0
- package/src/audit/analytics.ts +261 -0
- package/src/audit/ledger.ts +253 -0
- package/src/audit/telemetry.ts +165 -0
- package/src/cli/backend.ts +675 -0
- package/src/cli/commands.ts +419 -0
- package/src/cli/help.ts +182 -0
- package/src/cli/submit-helpers.ts +159 -0
- package/src/cli/submit.ts +539 -0
- package/src/cli/wizard.ts +121 -0
- package/src/context/bootstrap.ts +138 -0
- package/src/context/budget.ts +100 -0
- package/src/context/manager.ts +666 -0
- package/src/context/memory.ts +160 -0
- package/src/context/preflight.ts +176 -0
- package/src/context/project-brain.ts +101 -0
- package/src/context/receipts.ts +108 -0
- package/src/context/skills.ts +154 -0
- package/src/context/symbol-index.ts +240 -0
- package/src/council/profiles.ts +137 -0
- package/src/council/tool.ts +138 -0
- package/src/council-engine/cli/council-artifacts.ts +230 -0
- package/src/council-engine/cli/council-config.ts +178 -0
- package/src/council-engine/cli/council-session-export.ts +116 -0
- package/src/council-engine/cli/kondi.ts +98 -0
- package/src/council-engine/cli/llm-caller.ts +229 -0
- package/src/council-engine/cli/localStorage-shim.ts +119 -0
- package/src/council-engine/cli/node-platform.ts +68 -0
- package/src/council-engine/cli/run-council.ts +481 -0
- package/src/council-engine/cli/run-pipeline.ts +772 -0
- package/src/council-engine/cli/session-export.ts +153 -0
- package/src/council-engine/configs/councils/analysis.json +101 -0
- package/src/council-engine/configs/councils/code-planning.json +86 -0
- package/src/council-engine/configs/councils/coding.json +89 -0
- package/src/council-engine/configs/councils/debate.json +97 -0
- package/src/council-engine/configs/councils/solo-claude.json +34 -0
- package/src/council-engine/configs/councils/solo-gpt.json +34 -0
- package/src/council-engine/council/coding-orchestrator.ts +1205 -0
- package/src/council-engine/council/context-bootstrap.ts +147 -0
- package/src/council-engine/council/context-inspection.ts +42 -0
- package/src/council-engine/council/context-store.ts +763 -0
- package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
- package/src/council-engine/council/factory.ts +164 -0
- package/src/council-engine/council/index.ts +201 -0
- package/src/council-engine/council/ledger-store.ts +438 -0
- package/src/council-engine/council/prompts.ts +1689 -0
- package/src/council-engine/council/storage-cleanup.ts +164 -0
- package/src/council-engine/council/store.ts +1110 -0
- package/src/council-engine/council/synthesis.ts +291 -0
- package/src/council-engine/council/types.ts +845 -0
- package/src/council-engine/council/validation.ts +613 -0
- package/src/council-engine/pipeline/build-detect.ts +73 -0
- package/src/council-engine/pipeline/executor.ts +1048 -0
- package/src/council-engine/pipeline/index.ts +9 -0
- package/src/council-engine/pipeline/install-detect.ts +84 -0
- package/src/council-engine/pipeline/memory-store.ts +182 -0
- package/src/council-engine/pipeline/output-parsers.ts +146 -0
- package/src/council-engine/pipeline/run-output.ts +149 -0
- package/src/council-engine/pipeline/session-import.ts +177 -0
- package/src/council-engine/pipeline/store.ts +753 -0
- package/src/council-engine/pipeline/test-detect.ts +82 -0
- package/src/council-engine/pipeline/types.ts +401 -0
- package/src/council-engine/services/deliberationSummary.ts +114 -0
- package/src/council-engine/tsconfig.json +16 -0
- package/src/council-engine/types/mcp.ts +122 -0
- package/src/council-engine/utils/filterTools.ts +73 -0
- package/src/engine/apply.ts +238 -0
- package/src/engine/checkpoints.ts +237 -0
- package/src/engine/consultants.ts +347 -0
- package/src/engine/diff.ts +171 -0
- package/src/engine/errors.ts +102 -0
- package/src/engine/git-tools.ts +246 -0
- package/src/engine/hooks.ts +181 -0
- package/src/engine/loop-guard.ts +155 -0
- package/src/engine/permissions.ts +293 -0
- package/src/engine/pipeline.ts +376 -0
- package/src/engine/sub-agents.ts +133 -0
- package/src/engine/task-card.ts +185 -0
- package/src/engine/task-router.ts +256 -0
- package/src/engine/task-store.ts +86 -0
- package/src/engine/tools.ts +783 -0
- package/src/engine/verify.ts +111 -0
- package/src/mcp/client.ts +225 -0
- package/src/mcp/config.ts +120 -0
- package/src/mcp/tool-manager.ts +192 -0
- package/src/mcp/types.ts +61 -0
- package/src/providers/llm-caller.ts +943 -0
- package/src/providers/rate-limiter.ts +238 -0
- package/src/router/NOTES.md +28 -0
- package/src/router/collector.ts +474 -0
- package/src/router/embeddings.ts +286 -0
- package/src/router/index.ts +299 -0
- package/src/router/intent-router.ts +225 -0
- package/src/router/nn-router.ts +205 -0
- package/src/router/profiles.ts +309 -0
- package/src/router/registry.ts +565 -0
- package/src/router/rules.ts +274 -0
- package/src/router/train.py +408 -0
- package/src/session/store.ts +211 -0
- package/src/test-utils/mock-llm.ts +39 -0
- package/src/types.ts +322 -0
- package/src/web/manager.ts +311 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rate Limiter — per-provider RPM + TPM buckets with FIFO queueing.
|
|
3
|
+
*
|
|
4
|
+
* Every LLM call goes through `acquire()` before hitting the network;
|
|
5
|
+
* `recordResponse()` reconciles token estimates with actual usage;
|
|
6
|
+
* `recordThrottle()` pauses a bucket for a Retry-After window.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs';
|
|
10
|
+
import { dirname } from 'node:path';
|
|
11
|
+
import type { ProviderId } from '../types.ts';
|
|
12
|
+
|
|
13
|
+
export interface ProviderLimits {
|
|
14
|
+
rpm: number;
|
|
15
|
+
tpm: number;
|
|
16
|
+
maxConcurrent?: number;
|
|
17
|
+
maxQueueLength?: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface LimiterStatus {
|
|
21
|
+
provider: string;
|
|
22
|
+
rpmUsed: number;
|
|
23
|
+
rpmLimit: number;
|
|
24
|
+
tpmUsed: number;
|
|
25
|
+
tpmLimit: number;
|
|
26
|
+
pausedUntil?: number;
|
|
27
|
+
queueLength: number;
|
|
28
|
+
activeRequests: number;
|
|
29
|
+
status: 'ok' | 'throttled' | 'paused' | 'idle';
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const DEFAULTS: Record<string, ProviderLimits> = {
|
|
33
|
+
anthropic: { rpm: 50, tpm: 400_000, maxConcurrent: 10, maxQueueLength: 50 },
|
|
34
|
+
openai: { rpm: 30, tpm: 150_000, maxConcurrent: 10, maxQueueLength: 50 },
|
|
35
|
+
deepseek: { rpm: 60, tpm: 1_000_000, maxConcurrent: 10 },
|
|
36
|
+
google: { rpm: 60, tpm: 1_000_000, maxConcurrent: 10 },
|
|
37
|
+
xai: { rpm: 60, tpm: 500_000 },
|
|
38
|
+
ollama: { rpm: 600, tpm: 100_000_000, maxConcurrent: 4 },
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
const POST_THROTTLE_SLOWDOWN_PCT = 0.10;
|
|
42
|
+
const POST_THROTTLE_DURATION_MS = 5 * 60_000;
|
|
43
|
+
const MAX_QUEUE_LENGTH = 50;
|
|
44
|
+
const MAX_CONCURRENT_DEFAULT = 10;
|
|
45
|
+
|
|
46
|
+
export class RateLimitOverflowError extends Error {
|
|
47
|
+
constructor(provider: string) { super(`Rate limit queue overflow for ${provider}`); }
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
interface Waiter { cost: number; resolve: () => void; reject: (e: Error) => void; queuedAt: number; }
|
|
51
|
+
|
|
52
|
+
class Bucket {
|
|
53
|
+
rpmCapacity: number;
|
|
54
|
+
tpmCapacity: number;
|
|
55
|
+
rpmTokens: number;
|
|
56
|
+
tpmTokens: number;
|
|
57
|
+
lastRefill: number;
|
|
58
|
+
pausedUntil = 0;
|
|
59
|
+
slowdownUntil = 0;
|
|
60
|
+
activeRequests = 0;
|
|
61
|
+
queue: Waiter[] = [];
|
|
62
|
+
maxConcurrent: number;
|
|
63
|
+
maxQueueLength: number;
|
|
64
|
+
|
|
65
|
+
constructor(public limits: ProviderLimits) {
|
|
66
|
+
this.rpmCapacity = limits.rpm;
|
|
67
|
+
this.tpmCapacity = limits.tpm;
|
|
68
|
+
this.rpmTokens = limits.rpm;
|
|
69
|
+
this.tpmTokens = limits.tpm;
|
|
70
|
+
this.lastRefill = Date.now();
|
|
71
|
+
this.maxConcurrent = limits.maxConcurrent ?? MAX_CONCURRENT_DEFAULT;
|
|
72
|
+
this.maxQueueLength = limits.maxQueueLength ?? MAX_QUEUE_LENGTH;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
refill(now: number): void {
|
|
76
|
+
const elapsed = now - this.lastRefill;
|
|
77
|
+
if (elapsed <= 0) return;
|
|
78
|
+
const slowed = now < this.slowdownUntil ? (1 - POST_THROTTLE_SLOWDOWN_PCT) : 1;
|
|
79
|
+
this.rpmTokens = Math.min(this.rpmCapacity, this.rpmTokens + (this.limits.rpm * slowed * elapsed) / 60_000);
|
|
80
|
+
this.tpmTokens = Math.min(this.tpmCapacity, this.tpmTokens + (this.limits.tpm * slowed * elapsed) / 60_000);
|
|
81
|
+
this.lastRefill = now;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
tryAcquire(cost: number, now: number): boolean {
|
|
85
|
+
if (now < this.pausedUntil) return false;
|
|
86
|
+
if (this.activeRequests >= this.maxConcurrent) return false;
|
|
87
|
+
this.refill(now);
|
|
88
|
+
if (this.rpmTokens < 1 || this.tpmTokens < cost) return false;
|
|
89
|
+
this.rpmTokens -= 1;
|
|
90
|
+
this.tpmTokens -= cost;
|
|
91
|
+
this.activeRequests++;
|
|
92
|
+
return true;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export class RateLimiter {
|
|
97
|
+
private buckets = new Map<string, Bucket>();
|
|
98
|
+
|
|
99
|
+
constructor(limits: Record<string, ProviderLimits>) {
|
|
100
|
+
for (const [k, v] of Object.entries(limits)) this.buckets.set(k, new Bucket(v));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
private getBucket(provider: string): Bucket {
|
|
104
|
+
let b = this.buckets.get(provider);
|
|
105
|
+
if (!b) {
|
|
106
|
+
b = new Bucket(DEFAULTS[provider] || { rpm: 60, tpm: 1_000_000 });
|
|
107
|
+
this.buckets.set(provider, b);
|
|
108
|
+
}
|
|
109
|
+
return b;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/** Reserve capacity for a call. Resolves when a slot is free. */
|
|
113
|
+
async acquire(provider: string, estimatedTokens: number): Promise<void> {
|
|
114
|
+
const bucket = this.getBucket(provider);
|
|
115
|
+
const now = Date.now();
|
|
116
|
+
if (bucket.tryAcquire(estimatedTokens, now)) return;
|
|
117
|
+
if (bucket.queue.length >= bucket.maxQueueLength) {
|
|
118
|
+
throw new RateLimitOverflowError(provider);
|
|
119
|
+
}
|
|
120
|
+
return new Promise<void>((resolve, reject) => {
|
|
121
|
+
bucket.queue.push({ cost: estimatedTokens, resolve, reject, queuedAt: Date.now() });
|
|
122
|
+
this.schedule(provider);
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
private schedule(provider: string): void {
|
|
127
|
+
const bucket = this.getBucket(provider);
|
|
128
|
+
const loop = () => {
|
|
129
|
+
const now = Date.now();
|
|
130
|
+
while (bucket.queue.length > 0) {
|
|
131
|
+
const head = bucket.queue[0];
|
|
132
|
+
if (!bucket.tryAcquire(head.cost, now)) break;
|
|
133
|
+
bucket.queue.shift();
|
|
134
|
+
head.resolve();
|
|
135
|
+
}
|
|
136
|
+
if (bucket.queue.length > 0) {
|
|
137
|
+
const waitMs = Math.max(50, now < bucket.pausedUntil ? bucket.pausedUntil - now : 200);
|
|
138
|
+
setTimeout(loop, waitMs);
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
setTimeout(loop, 0);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/** Reconcile estimated vs actual and parse rate-limit headers. */
|
|
145
|
+
recordResponse(
|
|
146
|
+
provider: string,
|
|
147
|
+
actualInputTokens: number,
|
|
148
|
+
actualOutputTokens: number,
|
|
149
|
+
estimatedTokens: number,
|
|
150
|
+
headers?: Record<string, string>,
|
|
151
|
+
): void {
|
|
152
|
+
const bucket = this.getBucket(provider);
|
|
153
|
+
bucket.activeRequests = Math.max(0, bucket.activeRequests - 1);
|
|
154
|
+
const actual = actualInputTokens + actualOutputTokens;
|
|
155
|
+
const delta = estimatedTokens - actual;
|
|
156
|
+
// Refund over-estimate, charge under-estimate.
|
|
157
|
+
bucket.tpmTokens = Math.min(bucket.tpmCapacity, bucket.tpmTokens + delta);
|
|
158
|
+
if (headers) {
|
|
159
|
+
const ra = headers['retry-after'] || headers['Retry-After'];
|
|
160
|
+
if (ra) {
|
|
161
|
+
const ms = /^\d+$/.test(ra) ? parseInt(ra, 10) * 1000 : Math.max(0, new Date(ra).getTime() - Date.now());
|
|
162
|
+
if (ms > 0) this.recordThrottle(provider, ms);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
this.schedule(provider);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
recordThrottle(provider: string, retryAfterMs: number): void {
|
|
169
|
+
const bucket = this.getBucket(provider);
|
|
170
|
+
const now = Date.now();
|
|
171
|
+
bucket.pausedUntil = Math.max(bucket.pausedUntil, now + retryAfterMs);
|
|
172
|
+
bucket.slowdownUntil = bucket.pausedUntil + POST_THROTTLE_DURATION_MS;
|
|
173
|
+
bucket.activeRequests = Math.max(0, bucket.activeRequests - 1);
|
|
174
|
+
this.schedule(provider);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
getStatus(): LimiterStatus[] {
|
|
178
|
+
const now = Date.now();
|
|
179
|
+
const out: LimiterStatus[] = [];
|
|
180
|
+
for (const [provider, b] of this.buckets) {
|
|
181
|
+
b.refill(now);
|
|
182
|
+
let status: LimiterStatus['status'] = 'ok';
|
|
183
|
+
if (now < b.pausedUntil) status = 'paused';
|
|
184
|
+
else if (b.queue.length > 0) status = 'throttled';
|
|
185
|
+
else if (b.rpmTokens === b.rpmCapacity && b.activeRequests === 0) status = 'idle';
|
|
186
|
+
out.push({
|
|
187
|
+
provider,
|
|
188
|
+
rpmUsed: Math.round(b.rpmCapacity - b.rpmTokens),
|
|
189
|
+
rpmLimit: b.rpmCapacity,
|
|
190
|
+
tpmUsed: Math.round(b.tpmCapacity - b.tpmTokens),
|
|
191
|
+
tpmLimit: b.tpmCapacity,
|
|
192
|
+
pausedUntil: b.pausedUntil || undefined,
|
|
193
|
+
queueLength: b.queue.length,
|
|
194
|
+
activeRequests: b.activeRequests,
|
|
195
|
+
status,
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
return out;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
format(): string {
|
|
202
|
+
const rows = this.getStatus();
|
|
203
|
+
if (rows.length === 0) return 'No rate limit buckets active.';
|
|
204
|
+
const lines = ['provider rpm tpm status'];
|
|
205
|
+
for (const r of rows) {
|
|
206
|
+
const rpm = `${r.rpmUsed}/${r.rpmLimit}`.padEnd(13);
|
|
207
|
+
const tpm = `${r.tpmUsed.toLocaleString()}/${r.tpmLimit.toLocaleString()}`.padEnd(20);
|
|
208
|
+
const extra = r.queueLength > 0 ? ` (${r.queueLength} queued)` : '';
|
|
209
|
+
lines.push(`${r.provider.padEnd(12)} ${rpm} ${tpm} ${r.status}${extra}`);
|
|
210
|
+
}
|
|
211
|
+
return lines.join('\n');
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// ── Config loader + global instance ──────────────────────────────────
|
|
216
|
+
|
|
217
|
+
export function loadRateLimitConfig(storageDir: string): Record<string, ProviderLimits> {
|
|
218
|
+
const path = `${storageDir}/rate-limits.json`;
|
|
219
|
+
if (!existsSync(path)) {
|
|
220
|
+
try {
|
|
221
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
222
|
+
writeFileSync(path, JSON.stringify({ limits: DEFAULTS }, null, 2));
|
|
223
|
+
} catch { /* non-fatal */ }
|
|
224
|
+
return { ...DEFAULTS };
|
|
225
|
+
}
|
|
226
|
+
// Fail closed on corrupt config: rate limits guard paid APIs and a silent
|
|
227
|
+
// fallback could leave a user thinking a custom (stricter) config was in
|
|
228
|
+
// effect. Surface the error; the caller decides whether to continue.
|
|
229
|
+
const raw = JSON.parse(readFileSync(path, 'utf-8'));
|
|
230
|
+
if (raw == null || typeof raw !== 'object' || (raw.limits != null && typeof raw.limits !== 'object')) {
|
|
231
|
+
throw new Error(`rate-limits.json is malformed (expected { limits: {...} })`);
|
|
232
|
+
}
|
|
233
|
+
return { ...DEFAULTS, ...(raw.limits || {}) };
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
let globalRateLimiter: RateLimiter | undefined;
|
|
237
|
+
export function setRateLimiter(r: RateLimiter | undefined): void { globalRateLimiter = r; }
|
|
238
|
+
export function getRateLimiter(): RateLimiter | undefined { return globalRateLimiter; }
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Router Design Notes
|
|
2
|
+
|
|
3
|
+
## From NVIDIA LLM Router v2 (vendor/llm-router)
|
|
4
|
+
|
|
5
|
+
### Threshold Tuning Analysis
|
|
6
|
+
The NVIDIA codebase (`nn_router.py:401-496`) has a systematic threshold tuning
|
|
7
|
+
approach worth replicating as a `/routing tune` command:
|
|
8
|
+
|
|
9
|
+
- Tests multiple confidence threshold configurations per model
|
|
10
|
+
- Measures accuracy vs cost savings tradeoff for each config
|
|
11
|
+
- Uses relative cost weights (frontier=1.0, mid=0.4, cheap=0.1)
|
|
12
|
+
- Outputs a table: threshold config → accuracy, model selection %, cost savings %
|
|
13
|
+
- Recommends configurations at different aggressiveness levels
|
|
14
|
+
|
|
15
|
+
This should be built when we have enough training data to evaluate.
|
|
16
|
+
|
|
17
|
+
### Intent-Based Routing (for cold-start)
|
|
18
|
+
The NVIDIA intent router (`hf_intent_objective_fn.py:53-77`) uses a prompt template
|
|
19
|
+
where model/route descriptions are embedded as XML, and a small LLM classifies
|
|
20
|
+
which route matches the user's intent. This solves the cold-start problem when a
|
|
21
|
+
new model is added but the NN hasn't been trained on it yet.
|
|
22
|
+
|
|
23
|
+
We adapted this as `src/router/intent-router.ts`.
|
|
24
|
+
|
|
25
|
+
## Three-Tier Routing Architecture
|
|
26
|
+
1. **NN Router** — fast, trained, handles known patterns (primary once trained)
|
|
27
|
+
2. **Intent Router** — LLM classifies prompt against model descriptions (cold-start)
|
|
28
|
+
3. **Rule Router** — phase/task-kind fallback (always available)
|