@pleri/olam-cli 0.1.161 → 0.1.162
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/dist/agent-stream/agent-sdk-to-chunks.js +20 -2
- package/dist/commands/bootstrap.d.ts.map +1 -1
- package/dist/commands/bootstrap.js +35 -11
- package/dist/commands/bootstrap.js.map +1 -1
- package/dist/commands/flywheel/migrate-overlays.d.ts +1 -0
- package/dist/commands/flywheel/migrate-overlays.d.ts.map +1 -1
- package/dist/commands/flywheel/migrate-overlays.js +29 -3
- package/dist/commands/flywheel/migrate-overlays.js.map +1 -1
- package/dist/commands/skills-source.d.ts.map +1 -1
- package/dist/commands/skills-source.js +57 -2
- package/dist/commands/skills-source.js.map +1 -1
- package/dist/commands/skills.d.ts.map +1 -1
- package/dist/commands/skills.js +14 -0
- package/dist/commands/skills.js.map +1 -1
- package/dist/image-digests.json +7 -7
- package/dist/index.js +996 -618
- package/dist/lib/bootstrap-kubernetes.d.ts.map +1 -1
- package/dist/lib/bootstrap-kubernetes.js +93 -13
- package/dist/lib/bootstrap-kubernetes.js.map +1 -1
- package/dist/mcp-server.js +568 -368
- package/hermes-bundle/version.json +1 -1
- package/host-cp/k8s/manifests/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
- package/host-cp/observability/grafana-port-forward.sh +12 -2
- package/host-cp/observability/kyverno-cardinality-mutate.sh +12 -2
- package/host-cp/observability/loki-ingest.sh +12 -2
- package/host-cp/observability/prom-no-double-grafana.sh +15 -5
- package/host-cp/peripheral-services/helm-values/grafana-values.yaml +159 -0
- package/host-cp/peripheral-services/helm-values/kube-prom-stack-values.yaml +229 -0
- package/host-cp/peripheral-services/helm-values/kyverno-values.yaml +85 -0
- package/host-cp/peripheral-services/helm-values/loki-values.yaml +166 -0
- package/host-cp/peripheral-services/helm-values/promtail-staging.yaml +92 -0
- package/host-cp/peripheral-services/helm-values/promtail-values.yaml +102 -0
- package/host-cp/peripheral-services/helm-values/traefik-values.yaml +73 -0
- package/host-cp/peripheral-services/manifests/20-namespace.yaml +6 -0
- package/host-cp/peripheral-services/manifests/24-deploy-kg-service.yaml +245 -0
- package/host-cp/peripheral-services/manifests/30-traefik-ingressroute-host-cp.yaml +22 -0
- package/host-cp/peripheral-services/manifests/40-traefik-ingressroute-kg.yaml +29 -0
- package/host-cp/peripheral-services/manifests/50-traefik-ingressroute-agent-memory.yaml +29 -0
- package/host-cp/peripheral-services/manifests/60-networkpolicy-ingress.yaml +80 -0
- package/host-cp/peripheral-services/manifests/65-networkpolicy-loki-prom-deny.yaml +67 -0
- package/host-cp/peripheral-services/manifests/80-grafana-dashboard-configmap.yaml +1349 -0
- package/host-cp/peripheral-services/manifests/90-prom-alert-cardinality.yaml +50 -0
- package/host-cp/peripheral-services/manifests/91-servicemonitor-host-cp.yaml +70 -0
- package/host-cp/peripheral-services/manifests/92-servicemonitor-kg-service.yaml +70 -0
- package/host-cp/peripheral-services/manifests/93-servicemonitor-memory-service.yaml +87 -0
- package/host-cp/peripheral-services/manifests/95-prom-recording-rules.yaml +108 -0
- package/host-cp/peripheral-services/manifests/96-kyverno-cardinality-mutate.yaml +195 -0
- package/host-cp/src/plan-chat-service.mjs +147 -1
- package/package.json +1 -1
|
@@ -53,6 +53,30 @@ const CHUNK_TYPES = new Set(['text', 'tool_use']);
|
|
|
53
53
|
// only if the chunks schema bumps; the two must move together.
|
|
54
54
|
const SCOPE_ID_RE = /^[A-Za-z0-9_.-]+$/;
|
|
55
55
|
|
|
56
|
+
// B5 (plan-chat-context-window-display Phase B): tables allowlisted for
|
|
57
|
+
// /v1/shape. Only these tables have server-side where-rewrite support; any
|
|
58
|
+
// other table=... param gets a 400. Guards against a client enumerating
|
|
59
|
+
// tables the service doesn't own.
|
|
60
|
+
const ALLOWED_SHAPE_TABLES = new Set(['chunks', 'message_usage']);
|
|
61
|
+
|
|
62
|
+
// B6 (plan-chat-context-window-display Phase B): context-window caps per
|
|
63
|
+
// model. Mirrors CONTEXT_CAPS from @olam/intelligence/src/llm-router/providers/claude.ts.
|
|
64
|
+
// Host-cp doesn't depend on intelligence, so this is a local copy. Keep in
|
|
65
|
+
// sync with the canonical table (audit:pricing-coverage checks coverage).
|
|
66
|
+
// Default: 200k. 1M only for claude-opus-4-7[1m].
|
|
67
|
+
const CONTEXT_CAPS_LOCAL = {
|
|
68
|
+
'claude-sonnet-4-6': 200_000,
|
|
69
|
+
'claude-haiku-4-5-20251001': 200_000,
|
|
70
|
+
'claude-opus-4-7': 200_000,
|
|
71
|
+
'claude-opus-4-7[1m]': 1_000_000,
|
|
72
|
+
};
|
|
73
|
+
const DEFAULT_CONTEXT_CAP = 200_000;
|
|
74
|
+
const THRESHOLD_PCT = 80; // emit dedup'd system chunk when ctx ≥ this %
|
|
75
|
+
|
|
76
|
+
// Sentinel prefix on threshold system chunks so the dedup query can find them.
|
|
77
|
+
// Keep this stable — changing it makes old dedup rows invisible to the guard.
|
|
78
|
+
const THRESHOLD_CHUNK_PREFIX = 'Context approaching compaction threshold:';
|
|
79
|
+
|
|
56
80
|
// PB2 — query params the server OWNS on /v1/shape. All three are stripped
|
|
57
81
|
// from what we forward to upstream Electric:
|
|
58
82
|
// - `world_id` + `session_id` are SEEDS for the server-derived `where`
|
|
@@ -204,6 +228,10 @@ export function createHandler({
|
|
|
204
228
|
shapeDebugLog,
|
|
205
229
|
createWorld,
|
|
206
230
|
destroyWorld,
|
|
231
|
+
/** B4 — optional override for tests. When supplied, replaces principalFromBearer
|
|
232
|
+
* so the test harness can inject a hardcoded server-resolved actor_id and verify
|
|
233
|
+
* the mismatch guard. Production callers omit this. */
|
|
234
|
+
resolveActor,
|
|
207
235
|
}) {
|
|
208
236
|
if (!pool) throw new Error('createHandler: { pool } required');
|
|
209
237
|
if (typeof bearer !== 'string' || bearer.length === 0) {
|
|
@@ -236,7 +264,26 @@ export function createHandler({
|
|
|
236
264
|
}
|
|
237
265
|
const invalid = validateChunkInput(body);
|
|
238
266
|
if (invalid) return badRequest(res, invalid);
|
|
239
|
-
const principal =
|
|
267
|
+
const principal = resolveActor
|
|
268
|
+
? resolveActor(bearer, body)
|
|
269
|
+
: principalFromBearer(bearer, body);
|
|
270
|
+
|
|
271
|
+
// B4 — mismatch guard: if the client explicitly supplied body.actor_id
|
|
272
|
+
// and it differs from the server-resolved actor_id, reject with 400.
|
|
273
|
+
// In single-secret G1-demo mode this never fires because principalFromBearer
|
|
274
|
+
// echoes back body.actor_id. When Phase A4 lands multi-secret resolution,
|
|
275
|
+
// this guard prevents a caller from spoofing a different actor.
|
|
276
|
+
if (
|
|
277
|
+
typeof body.actor_id === 'string' &&
|
|
278
|
+
body.actor_id.length > 0 &&
|
|
279
|
+
body.actor_id !== principal.actorId
|
|
280
|
+
) {
|
|
281
|
+
return badRequest(
|
|
282
|
+
res,
|
|
283
|
+
`actor_id mismatch: client supplied '${body.actor_id}' but bearer resolves to '${principal.actorId}'`,
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
|
|
240
287
|
try {
|
|
241
288
|
await pool.query(
|
|
242
289
|
`INSERT INTO chunks
|
|
@@ -254,6 +301,93 @@ export function createHandler({
|
|
|
254
301
|
body.chunk_type ?? 'text',
|
|
255
302
|
],
|
|
256
303
|
);
|
|
304
|
+
|
|
305
|
+
// B4 — capture message_usage when the adapter forwarded token usage.
|
|
306
|
+
// body.usage is optional (only present on assistant turns with SDK usage).
|
|
307
|
+
if (body.usage && typeof body.usage === 'object') {
|
|
308
|
+
const usage = body.usage;
|
|
309
|
+
const model = typeof body.model === 'string' && body.model.length > 0
|
|
310
|
+
? body.model
|
|
311
|
+
: 'claude-sonnet-4-6';
|
|
312
|
+
await pool.query(
|
|
313
|
+
`INSERT INTO message_usage
|
|
314
|
+
(world_id, session_id, message_id, actor_id, model,
|
|
315
|
+
input_tokens, output_tokens, cache_read_tokens, cache_create_tokens)
|
|
316
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
|
317
|
+
ON CONFLICT (message_id, actor_id) DO NOTHING`,
|
|
318
|
+
[
|
|
319
|
+
body.world_id,
|
|
320
|
+
body.session_id,
|
|
321
|
+
body.message_id,
|
|
322
|
+
principal.actorId,
|
|
323
|
+
model,
|
|
324
|
+
Number(usage.input_tokens ?? 0),
|
|
325
|
+
Number(usage.output_tokens ?? 0),
|
|
326
|
+
Number(usage.cache_read_input_tokens ?? 0),
|
|
327
|
+
Number(usage.cache_creation_input_tokens ?? 0),
|
|
328
|
+
],
|
|
329
|
+
);
|
|
330
|
+
|
|
331
|
+
// B6 — 80% threshold dedup'd system chunk.
|
|
332
|
+
// After inserting message_usage, check if the per-actor cumulative
|
|
333
|
+
// token count for this session has crossed 80% of the model's context
|
|
334
|
+
// cap. If so, and no prior threshold chunk exists for (session_id,
|
|
335
|
+
// actor_id), emit exactly one dedup'd system chunk.
|
|
336
|
+
//
|
|
337
|
+
// Token math: sum(input_tokens + cache_read_tokens + cache_create_tokens)
|
|
338
|
+
// per (session_id, actor_id), matching Claude Code statusline.
|
|
339
|
+
const contextCap = CONTEXT_CAPS_LOCAL[model] ?? DEFAULT_CONTEXT_CAP;
|
|
340
|
+
const sumResult = await pool.query(
|
|
341
|
+
`SELECT COALESCE(SUM(input_tokens + cache_read_tokens + cache_create_tokens), 0) AS total_used
|
|
342
|
+
FROM message_usage
|
|
343
|
+
WHERE session_id = $1 AND actor_id = $2`,
|
|
344
|
+
[body.session_id, principal.actorId],
|
|
345
|
+
);
|
|
346
|
+
const totalUsed = Number(sumResult.rows[0]?.total_used ?? 0);
|
|
347
|
+
const usedPct = Math.floor((totalUsed / contextCap) * 100);
|
|
348
|
+
|
|
349
|
+
if (usedPct >= THRESHOLD_PCT) {
|
|
350
|
+
// Dedup check: scan chunks for an existing threshold-crossing system
|
|
351
|
+
// chunk for this (session_id, actor_id). Prefix-match is sufficient
|
|
352
|
+
// because THRESHOLD_CHUNK_PREFIX is unique to this purpose.
|
|
353
|
+
const dupResult = await pool.query(
|
|
354
|
+
`SELECT 1 FROM chunks
|
|
355
|
+
WHERE session_id = $1 AND actor_id = $2 AND actor_type = 'system'
|
|
356
|
+
AND chunk LIKE $3
|
|
357
|
+
LIMIT 1`,
|
|
358
|
+
[body.session_id, principal.actorId, `${THRESHOLD_CHUNK_PREFIX}%`],
|
|
359
|
+
);
|
|
360
|
+
const alreadyEmitted = (dupResult.rows ?? []).length > 0;
|
|
361
|
+
|
|
362
|
+
if (!alreadyEmitted) {
|
|
363
|
+
const personaLabel = principal.actorId;
|
|
364
|
+
const thresholdChunk =
|
|
365
|
+
`${THRESHOLD_CHUNK_PREFIX} ${personaLabel} at ${usedPct}%`;
|
|
366
|
+
// Emit the threshold system chunk. Re-use the same message_id +
|
|
367
|
+
// a seq derived from the current seq + 1 to avoid PK conflict.
|
|
368
|
+
// Use body.seq + 1000 as a high-offset seq so it sorts AFTER the
|
|
369
|
+
// triggering chunk but doesn't collide with normal seq values.
|
|
370
|
+
const thresholdSeq = body.seq + 1000;
|
|
371
|
+
await pool.query(
|
|
372
|
+
`INSERT INTO chunks
|
|
373
|
+
(world_id, session_id, message_id, seq, actor_id, actor_type, role, chunk, chunk_type)
|
|
374
|
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
|
|
375
|
+
ON CONFLICT (message_id, seq) DO NOTHING`,
|
|
376
|
+
[
|
|
377
|
+
body.world_id,
|
|
378
|
+
body.session_id,
|
|
379
|
+
body.message_id,
|
|
380
|
+
thresholdSeq,
|
|
381
|
+
principal.actorId,
|
|
382
|
+
'system',
|
|
383
|
+
'system',
|
|
384
|
+
thresholdChunk,
|
|
385
|
+
'text',
|
|
386
|
+
],
|
|
387
|
+
);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
257
391
|
} catch (err) {
|
|
258
392
|
if (err && typeof err === 'object' && 'code' in err && err.code === '23505') {
|
|
259
393
|
return send(res, 409, { error: 'duplicate', message: '(message_id, seq) already exists' });
|
|
@@ -276,6 +410,17 @@ export function createHandler({
|
|
|
276
410
|
// a constant-time comparison via timingSafeEqual.
|
|
277
411
|
if (!checkAuth(req)) return unauthorized(res);
|
|
278
412
|
|
|
413
|
+
// B5 — table allowlist. Only chunks + message_usage are supported; any
|
|
414
|
+
// other table= value is rejected with 400. This guards against clients
|
|
415
|
+
// probing arbitrary tables via the shape proxy.
|
|
416
|
+
const tableParam = url.searchParams.get('table');
|
|
417
|
+
if (!tableParam || !ALLOWED_SHAPE_TABLES.has(tableParam)) {
|
|
418
|
+
return badRequest(
|
|
419
|
+
res,
|
|
420
|
+
`table query param must be one of: ${[...ALLOWED_SHAPE_TABLES].join(', ')}`,
|
|
421
|
+
);
|
|
422
|
+
}
|
|
423
|
+
|
|
279
424
|
// PB2 — server-derived scope. world_id + session_id query params are
|
|
280
425
|
// mandatory; the regex check is the SQL-injection defence for the
|
|
281
426
|
// upstream `where` interpolation below.
|
|
@@ -530,6 +675,7 @@ export async function startService(opts = {}) {
|
|
|
530
675
|
electricUrl,
|
|
531
676
|
shapeDebug: opts.shapeDebug,
|
|
532
677
|
shapeDebugLog: opts.shapeDebugLog,
|
|
678
|
+
resolveActor: opts.resolveActor,
|
|
533
679
|
});
|
|
534
680
|
const server = http.createServer((req, res) => {
|
|
535
681
|
handler(req, res).catch((err) => {
|