@pleri/olam-cli 0.1.157 → 0.1.159
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/doctor.d.ts +21 -10
- package/dist/commands/doctor.d.ts.map +1 -1
- package/dist/commands/doctor.js +95 -39
- package/dist/commands/doctor.js.map +1 -1
- package/dist/image-digests.json +7 -7
- package/dist/index.js +638 -289
- package/dist/index.js.map +1 -1
- package/dist/lib/host-side-proxy.d.ts +67 -0
- package/dist/lib/host-side-proxy.d.ts.map +1 -0
- package/dist/lib/host-side-proxy.js +177 -0
- package/dist/lib/host-side-proxy.js.map +1 -0
- package/dist/lib/k8s-secret-render.d.ts.map +1 -1
- package/dist/lib/k8s-secret-render.js +7 -4
- package/dist/lib/k8s-secret-render.js.map +1 -1
- package/dist/lib/upgrade-kubernetes.d.ts +17 -13
- package/dist/lib/upgrade-kubernetes.d.ts.map +1 -1
- package/dist/lib/upgrade-kubernetes.js +122 -165
- package/dist/lib/upgrade-kubernetes.js.map +1 -1
- package/hermes-bundle/version.json +1 -1
- package/host-cp/k8s/host-side/docker-socket-proxy.compose.yaml +58 -0
- package/host-cp/k8s/manifests/50-deployment.yaml +47 -70
- package/host-cp/k8s/manifests/auth-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/docker-socket-proxy/60-service.yaml +37 -0
- package/host-cp/k8s/manifests/kg-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/mcp-auth-service/50-deployment.yaml +1 -1
- package/host-cp/k8s/manifests/memory-service/50-deployment.yaml +1 -1
- package/host-cp/src/metrics.mjs +281 -0
- package/host-cp/src/server.mjs +31 -2
- package/host-cp/src/tasks-route.mjs +191 -0
- package/package.json +1 -1
|
@@ -18,38 +18,30 @@
|
|
|
18
18
|
# before the main container starts, granting UID-1000 write access on the
|
|
19
19
|
# freshly-provisioned PV. fsGroup alone is insufficient for hostPath volumes.
|
|
20
20
|
#
|
|
21
|
-
# docker
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
21
|
+
# docker access — NO LONGER VIA hostPath (changed in olam-k3d-on-mac-
|
|
22
|
+
# substrate-decision Phase B B2, 2026-05-21). The previous R3-A two-volume
|
|
23
|
+
# hostPath pattern is retracted: round-4 R4-W2-F showed virtiofs returns
|
|
24
|
+
# ENOTSUP on stat/statx of socket files, and that failure is unrecoverable
|
|
25
|
+
# at the containerd OCI runtime layer. host-cp now reaches docker via TCP
|
|
26
|
+
# through the docker-socket-proxy ExternalName Service in the olam
|
|
27
|
+
# namespace (packages/host-cp/k8s/manifests/docker-socket-proxy/60-service.yaml),
|
|
28
|
+
# which kube-dns resolves as a CNAME to host.k3d.internal. The actual
|
|
29
|
+
# proxy container runs on the operator's docker daemon (sibling to k3d),
|
|
30
|
+
# started by `olam upgrade` Step 0.7. See also
|
|
31
|
+
# packages/host-cp/src/lib/docker-request-options.mjs (both substrates now
|
|
32
|
+
# return identical TCP options).
|
|
25
33
|
#
|
|
26
|
-
# k3d cluster create
|
|
27
|
-
#
|
|
28
|
-
#
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
# This mounts the entire colima directory into the k3d node at /host-colima/.
|
|
32
|
-
# The docker socket appears at /host-colima/docker.sock inside the node.
|
|
33
|
-
# The Deployment then uses:
|
|
34
|
-
# - host-colima volume (type: Directory) for the init container chmod
|
|
35
|
-
# - docker-socket volume (type: Socket, source /host-colima/docker.sock)
|
|
36
|
-
# for the main container /var/run/docker.sock mount
|
|
37
|
-
#
|
|
38
|
-
# An init container (socket-perm) runs `chmod 666 /host-colima/docker.sock`
|
|
39
|
-
# as root BEFORE the main container starts. This grants the non-root main
|
|
40
|
-
# container (UID 1000) read+write access to the daemon socket.
|
|
41
|
-
# Deliberate platform-permission concession — see Decision #15.
|
|
42
|
-
# R3-A: init container mounts host-colima (directory) and runs chmod on the
|
|
43
|
-
# socket file inside it. No symlink init container needed — empirically
|
|
44
|
-
# verified in plan pass-2 on kuro-bear 2026-05-20.
|
|
34
|
+
# The operator's k3d cluster create command is therefore simpler — no
|
|
35
|
+
# `--volume $HOME/.colima/default/:/host-colima/@server:*` flag needed.
|
|
36
|
+
# See docs/operator/kubernetes-substrate-beta.md for the current install
|
|
37
|
+
# command.
|
|
45
38
|
#
|
|
46
39
|
# gh-config (/gh-config) and operator-repo (/operator-repo) remain hostPath
|
|
47
40
|
# volumes that resolve to paths inside the k3d node container.
|
|
48
|
-
# OPERATORS MUST pass these volume mounts when creating the k3d cluster
|
|
49
|
-
#
|
|
50
|
-
#
|
|
51
|
-
#
|
|
52
|
-
# guide surfaces this requirement prominently.
|
|
41
|
+
# OPERATORS MUST pass these volume mounts when creating the k3d cluster.
|
|
42
|
+
# Without these flags the gh-config and operator-repo mounts will be empty.
|
|
43
|
+
# The pod will still start — features that depend on GitHub auth or the
|
|
44
|
+
# operator repo will fail gracefully.
|
|
53
45
|
apiVersion: apps/v1
|
|
54
46
|
kind: Deployment
|
|
55
47
|
metadata:
|
|
@@ -108,28 +100,18 @@ spec:
|
|
|
108
100
|
volumeMounts:
|
|
109
101
|
- name: olam-home
|
|
110
102
|
mountPath: /data
|
|
111
|
-
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
image: busybox@sha256:73aaf090f3d85aa34ee199857f03fa3a95c8ede2ffd4cc2cdb5b94e566b11662
|
|
121
|
-
imagePullPolicy: IfNotPresent
|
|
122
|
-
securityContext:
|
|
123
|
-
runAsUser: 0
|
|
124
|
-
runAsNonRoot: false
|
|
125
|
-
allowPrivilegeEscalation: false
|
|
126
|
-
command: ["sh", "-c", "chmod 666 /host-colima/docker.sock"]
|
|
127
|
-
volumeMounts:
|
|
128
|
-
- name: host-colima
|
|
129
|
-
mountPath: /host-colima
|
|
103
|
+
# socket-perm init container REMOVED in olam-k3d-on-mac-substrate-decision
|
|
104
|
+
# Phase B B2 (2026-05-21). The R3-A two-volume hostPath approach for
|
|
105
|
+
# docker.sock has been retracted: round-4 R4-W2-F showed virtiofs
|
|
106
|
+
# ENOTSUP on socket-file stat blocks the mount entirely. host-cp now
|
|
107
|
+
# reaches docker via TCP through the docker-socket-proxy ExternalName
|
|
108
|
+
# Service in the olam namespace (see
|
|
109
|
+
# packages/host-cp/k8s/manifests/docker-socket-proxy/60-service.yaml).
|
|
110
|
+
# The proxy itself runs on the operator's docker daemon (sibling to
|
|
111
|
+
# k3d), started by `olam upgrade` Step 0.7 — not inside this Pod.
|
|
130
112
|
containers:
|
|
131
113
|
- name: olam-host-cp
|
|
132
|
-
image: ghcr.io/pleri/olam-host-cp@sha256:
|
|
114
|
+
image: ghcr.io/pleri/olam-host-cp@sha256:53c6548f6930231a6f905f4a3ae1f49dbc66e52233b64a09e539b4ffa21180db
|
|
133
115
|
imagePullPolicy: IfNotPresent
|
|
134
116
|
securityContext:
|
|
135
117
|
runAsNonRoot: true
|
|
@@ -158,8 +140,13 @@ spec:
|
|
|
158
140
|
readOnly: true
|
|
159
141
|
- name: tmp
|
|
160
142
|
mountPath: /tmp
|
|
161
|
-
-
|
|
162
|
-
|
|
143
|
+
# docker-socket volumeMount REMOVED in olam-k3d-on-mac-substrate-
|
|
144
|
+
# decision Phase B B2. Docker access now goes via TCP to the
|
|
145
|
+
# docker-socket-proxy ExternalName Service in the olam namespace.
|
|
146
|
+
# host-cp's `getDockerRequestOptions('kubernetes')` returns
|
|
147
|
+
# `{ host: 'docker-socket-proxy', port: 2375 }` (collapsed to the
|
|
148
|
+
# same value as the compose substrate's branch — see
|
|
149
|
+
# packages/host-cp/src/lib/docker-request-options.mjs).
|
|
163
150
|
readinessProbe:
|
|
164
151
|
httpGet:
|
|
165
152
|
path: /health
|
|
@@ -197,23 +184,13 @@ spec:
|
|
|
197
184
|
type: DirectoryOrCreate
|
|
198
185
|
- name: tmp
|
|
199
186
|
emptyDir: {}
|
|
200
|
-
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
# R3-A — Socket file within the colima directory (Decision R3-#1).
|
|
211
|
-
# Source is /host-colima/docker.sock — the socket file inside the k3d
|
|
212
|
-
# node's /host-colima directory (set by the colima parent-dir bind).
|
|
213
|
-
# Mounted at /var/run/docker.sock in the main container so host-cp can
|
|
214
|
-
# reach the operator's docker daemon without path changes in app code.
|
|
215
|
-
# The socket-perm init container runs chmod 666 on this path before
|
|
216
|
-
# the main container starts (Decision #15 — same root-init pattern).
|
|
217
|
-
hostPath:
|
|
218
|
-
path: /host-colima/docker.sock
|
|
219
|
-
type: Socket
|
|
187
|
+
# host-colima + docker-socket volumes REMOVED in olam-k3d-on-mac-
|
|
188
|
+
# substrate-decision Phase B B2 (2026-05-21). R3-A's two-volume
|
|
189
|
+
# hostPath approach is fully retracted: round-4 R4-W2-F demonstrated
|
|
190
|
+
# virtiofs ENOTSUP on socket-file stat is unrecoverable at the
|
|
191
|
+
# containerd OCI runtime layer (kubelet bypass via R4-W2-E was
|
|
192
|
+
# necessary-but-not-sufficient). host-cp now reaches docker via TCP
|
|
193
|
+
# through the docker-socket-proxy ExternalName Service — see
|
|
194
|
+
# packages/host-cp/k8s/manifests/docker-socket-proxy/60-service.yaml.
|
|
195
|
+
# The proxy itself runs on the operator's docker daemon (sibling to
|
|
196
|
+
# k3d), started by `olam upgrade` Step 0.7 on macOS.
|
|
@@ -70,7 +70,7 @@ spec:
|
|
|
70
70
|
mountPath: /data
|
|
71
71
|
containers:
|
|
72
72
|
- name: olam-auth-service
|
|
73
|
-
image: ghcr.io/pleri/olam-auth@sha256:
|
|
73
|
+
image: ghcr.io/pleri/olam-auth@sha256:d1b13f12d87d5b119d6495214c26d8c8255deb996d37193f3b4fa47363ab9367
|
|
74
74
|
imagePullPolicy: IfNotPresent
|
|
75
75
|
securityContext:
|
|
76
76
|
runAsNonRoot: true
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# ExternalName Service for the host-side docker-socket-proxy.
|
|
2
|
+
#
|
|
3
|
+
# Provides in-cluster DNS for pods to reach the host-side proxy
|
|
4
|
+
# container (defined in packages/host-cp/k8s/host-side/docker-socket-proxy.compose.yaml).
|
|
5
|
+
# The Service has NO backing Pod — `type: ExternalName` is a kube-dns
|
|
6
|
+
# CNAME alias to `host.k3d.internal`, the gateway address that k3d
|
|
7
|
+
# auto-provisions inside every node container.
|
|
8
|
+
#
|
|
9
|
+
# Decision #7 (round-4 plan pass 2): Universal across all k8s substrates
|
|
10
|
+
# (macOS+colima+virtiofs, Linux native k3d, WSL2). One codepath; the
|
|
11
|
+
# per-Pod cost of running an in-cluster proxy elsewhere is invisible
|
|
12
|
+
# against the maintenance tax of OS-conditional Service generation.
|
|
13
|
+
#
|
|
14
|
+
# Why ExternalName and not in-cluster Pod with hostPath:
|
|
15
|
+
# the in-cluster Pod would itself need to bind /var/run/docker.sock
|
|
16
|
+
# from the lima VM, hitting the same virtiofs ENOTSUP class that
|
|
17
|
+
# R4-W2-F is. The proxy must live OUTSIDE the k3d cluster, on the
|
|
18
|
+
# operator's colima docker daemon. ExternalName makes that
|
|
19
|
+
# transparent to consumers: host-cp configures
|
|
20
|
+
# { host: 'docker-socket-proxy', port: 2375 } regardless of where
|
|
21
|
+
# the actual proxy container lives.
|
|
22
|
+
apiVersion: v1
|
|
23
|
+
kind: Service
|
|
24
|
+
metadata:
|
|
25
|
+
name: docker-socket-proxy
|
|
26
|
+
namespace: olam
|
|
27
|
+
labels:
|
|
28
|
+
app: docker-socket-proxy
|
|
29
|
+
olam.io/component: host-stack
|
|
30
|
+
spec:
|
|
31
|
+
type: ExternalName
|
|
32
|
+
externalName: host.k3d.internal
|
|
33
|
+
ports:
|
|
34
|
+
- name: tcp-2375
|
|
35
|
+
port: 2375
|
|
36
|
+
targetPort: 2375
|
|
37
|
+
protocol: TCP
|
|
@@ -61,7 +61,7 @@ spec:
|
|
|
61
61
|
mountPath: /data
|
|
62
62
|
containers:
|
|
63
63
|
- name: olam-kg-service
|
|
64
|
-
image: ghcr.io/pleri/olam-kg-service@sha256:
|
|
64
|
+
image: ghcr.io/pleri/olam-kg-service@sha256:e7276f9ea4d359dcb8a0d623e701e290f51c565fc8b6e3c14bea75b1b780d23d
|
|
65
65
|
imagePullPolicy: IfNotPresent
|
|
66
66
|
securityContext:
|
|
67
67
|
runAsNonRoot: true
|
|
@@ -68,7 +68,7 @@ spec:
|
|
|
68
68
|
mountPath: /data
|
|
69
69
|
containers:
|
|
70
70
|
- name: olam-mcp-auth-service
|
|
71
|
-
image: ghcr.io/pleri/olam-mcp-auth@sha256:
|
|
71
|
+
image: ghcr.io/pleri/olam-mcp-auth@sha256:d53a7538ca405f4d8c0c4be67d3961617304f07623839eb0de75f9cd2b47b914
|
|
72
72
|
imagePullPolicy: IfNotPresent
|
|
73
73
|
securityContext:
|
|
74
74
|
runAsNonRoot: true
|
|
@@ -70,7 +70,7 @@ spec:
|
|
|
70
70
|
# bootstrap-placeholder comment + run `npm run refresh:manifest-digests`
|
|
71
71
|
# once ghcr.io/pleri/olam-memory-service has a real published digest.
|
|
72
72
|
# bootstrap-placeholder: pre-publish; refresh after first release
|
|
73
|
-
image: ghcr.io/pleri/olam-memory-service@sha256:
|
|
73
|
+
image: ghcr.io/pleri/olam-memory-service@sha256:023bde810b0594829c8aa553b88a64cf53b81b6374085c7a92fb6102450fa3ff
|
|
74
74
|
imagePullPolicy: IfNotPresent
|
|
75
75
|
securityContext:
|
|
76
76
|
runAsNonRoot: true
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
// Phase C Task C3 — hand-rolled Prometheus metrics registry for host-cp.
|
|
2
|
+
//
|
|
3
|
+
// Emits exactly two metric families:
|
|
4
|
+
// http_requests_total{service,route,method,status_code} counter
|
|
5
|
+
// http_request_duration_seconds{service,route,method} histogram
|
|
6
|
+
//
|
|
7
|
+
// TAXONOMY COMPLIANCE (NON-NEGOTIABLE):
|
|
8
|
+
// ONLY {service, route, method, status_code} labels allowed.
|
|
9
|
+
// BANNED: world_id, trace_id, user_id, request_id, operator_id.
|
|
10
|
+
// world_id surfaces via Prometheus exemplars in Phase D — NOT labels.
|
|
11
|
+
//
|
|
12
|
+
// No external npm deps — Prometheus text exposition is simple enough to
|
|
13
|
+
// produce with template literals. Avoids the prom-client footprint on a
|
|
14
|
+
// host-side service that has no other dependency on metrics tooling.
|
|
15
|
+
|
|
16
|
+
// ─── Route mapping ────────────────────────────────────────────────────────
|
|
17
|
+
//
|
|
18
|
+
// Raw req.url is a cardinality bomb: every unique URL is a new time series.
|
|
19
|
+
// We normalize dynamic path segments to stable patterns before labelling.
|
|
20
|
+
//
|
|
21
|
+
// RULES (first match wins):
|
|
22
|
+
// /health → /health
|
|
23
|
+
// /api/bootstrap → /api/bootstrap
|
|
24
|
+
// /metrics → /metrics
|
|
25
|
+
// /api/host-stream → /api/host-stream
|
|
26
|
+
// /api/worlds/{id}/credentials/... → /api/worlds/:id/credentials/:action
|
|
27
|
+
// /api/worlds/{id}/tunnels/... → /api/worlds/:id/tunnels
|
|
28
|
+
// /api/worlds/{id}/pr → /api/worlds/:id/pr
|
|
29
|
+
// /api/worlds/{id}/progress → /api/worlds/:id/progress
|
|
30
|
+
// /api/worlds (no id) → /api/worlds
|
|
31
|
+
// /api/world/{id}/** → /api/world/:id/* (proxy routes)
|
|
32
|
+
// /api/admin/registry/... → /api/admin/registry
|
|
33
|
+
// /api/admin/upgrade → /api/admin/upgrade
|
|
34
|
+
// /api/admin/world-pr → /api/admin/world-pr
|
|
35
|
+
// /api/admin/world-pr/{id} → /api/admin/world-pr/:id
|
|
36
|
+
// /api/auth/credentials/... → /api/auth/credentials
|
|
37
|
+
// /api/auth/... → /api/auth
|
|
38
|
+
// /api/plan/conversations/{id}/... → /api/plan/conversations/:id
|
|
39
|
+
// /api/plan/conversations → /api/plan/conversations
|
|
40
|
+
// /api/plan/** → /api/plan
|
|
41
|
+
// /api/auth/events → /api/auth/events
|
|
42
|
+
// /api/version/status → /api/version/status
|
|
43
|
+
// /api/repos → /api/repos
|
|
44
|
+
// /api/runbooks → /api/runbooks
|
|
45
|
+
// /api/workspaces/match → /api/workspaces/match
|
|
46
|
+
// /api/workspaces → /api/workspaces
|
|
47
|
+
// /api/projects → /api/projects
|
|
48
|
+
// /api/processes/** → /api/processes
|
|
49
|
+
// /v1/chunks/** → /v1/chunks
|
|
50
|
+
// /v1/worlds/** → /v1/worlds
|
|
51
|
+
// /assets/** → /assets (SPA static assets)
|
|
52
|
+
// (other GET to static paths) → /static
|
|
53
|
+
// (unknown) → /unknown
|
|
54
|
+
|
|
55
|
+
/** @param {string} pathname */
|
|
56
|
+
export function pathToRoute(pathname) {
|
|
57
|
+
// Normalize trailing slash for matching (keep bare / as /)
|
|
58
|
+
const p = pathname.length > 1 ? pathname.replace(/\/$/, '') : pathname;
|
|
59
|
+
|
|
60
|
+
if (p === '/health') return '/health';
|
|
61
|
+
if (p === '/api/bootstrap') return '/api/bootstrap';
|
|
62
|
+
if (p === '/metrics') return '/metrics';
|
|
63
|
+
if (p === '/api/host-stream') return '/api/host-stream';
|
|
64
|
+
if (p === '/api/auth/events') return '/api/auth/events';
|
|
65
|
+
if (p === '/api/version/status') return '/api/version/status';
|
|
66
|
+
if (p === '/api/repos') return '/api/repos';
|
|
67
|
+
if (p === '/api/runbooks') return '/api/runbooks';
|
|
68
|
+
if (p === '/api/workspaces/match') return '/api/workspaces/match';
|
|
69
|
+
if (p === '/api/workspaces') return '/api/workspaces';
|
|
70
|
+
if (p === '/api/projects') return '/api/projects';
|
|
71
|
+
if (p === '/api/worlds') return '/api/worlds';
|
|
72
|
+
if (p === '/api/plan/conversations' || p === '/api/plan/personas') return p;
|
|
73
|
+
if (p === '/api/admin/upgrade') return '/api/admin/upgrade';
|
|
74
|
+
if (p === '/api/admin/world-pr') return '/api/admin/world-pr';
|
|
75
|
+
if (p === '/api/admin/registry') return '/api/admin/registry';
|
|
76
|
+
if (p.startsWith('/api/worlds/')) {
|
|
77
|
+
if (p.includes('/credentials/')) return '/api/worlds/:id/credentials/:action';
|
|
78
|
+
if (p.includes('/tunnels')) return '/api/worlds/:id/tunnels';
|
|
79
|
+
if (p.endsWith('/pr')) return '/api/worlds/:id/pr';
|
|
80
|
+
if (p.endsWith('/progress')) return '/api/worlds/:id/progress';
|
|
81
|
+
return '/api/worlds/:id';
|
|
82
|
+
}
|
|
83
|
+
if (p.startsWith('/api/world/')) return '/api/world/:id/*';
|
|
84
|
+
if (p.startsWith('/api/admin/registry/')) return '/api/admin/registry';
|
|
85
|
+
if (p.startsWith('/api/admin/world-pr/')) return '/api/admin/world-pr/:id';
|
|
86
|
+
if (p.startsWith('/api/auth/credentials')) return '/api/auth/credentials';
|
|
87
|
+
if (p.startsWith('/api/auth/')) return '/api/auth';
|
|
88
|
+
if (p.startsWith('/api/plan/conversations/')) return '/api/plan/conversations/:id';
|
|
89
|
+
if (p.startsWith('/api/plan/')) return '/api/plan';
|
|
90
|
+
if (p.startsWith('/api/processes') || p.startsWith('/api/servers')) return '/api/processes';
|
|
91
|
+
if (p.startsWith('/v1/chunks')) return '/v1/chunks';
|
|
92
|
+
if (p.startsWith('/v1/worlds')) return '/v1/worlds';
|
|
93
|
+
if (p.startsWith('/assets/')) return '/assets';
|
|
94
|
+
// SPA HTML fallback routes (GET / and SPA sub-routes like /worlds, /plan/...)
|
|
95
|
+
if (p === '/' || p.startsWith('/worlds') || p.startsWith('/plan') || p.startsWith('/workspaces')) return '/static';
|
|
96
|
+
return '/unknown';
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// ─── In-memory registry ───────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
const HISTOGRAM_BUCKETS = [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5];
|
|
102
|
+
|
|
103
|
+
/** @type {Map<string, number>} labelSet → count */
|
|
104
|
+
const _counters = new Map();
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Per label-set histogram state.
|
|
108
|
+
* @type {Map<string, {buckets: number[], sum: number, count: number}>}
|
|
109
|
+
*/
|
|
110
|
+
const _histograms = new Map();
|
|
111
|
+
|
|
112
|
+
/** @param {string[]} parts label values in canonical order */
|
|
113
|
+
function _labelKey(parts) {
|
|
114
|
+
return parts.join('\x00');
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Reset all metrics. FOR TESTS ONLY — never call in production code.
|
|
119
|
+
* Exported as a separate name so it's invisible to consumers that only
|
|
120
|
+
* import the named exports they need.
|
|
121
|
+
*/
|
|
122
|
+
export function _resetForTest() {
|
|
123
|
+
_counters.clear();
|
|
124
|
+
_histograms.clear();
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Increment http_requests_total counter.
|
|
129
|
+
*
|
|
130
|
+
* @param {string} service
|
|
131
|
+
* @param {string} route — MUST be a normalized route pattern
|
|
132
|
+
* @param {string} method
|
|
133
|
+
* @param {string} statusCode
|
|
134
|
+
*/
|
|
135
|
+
export function incRequest(service, route, method, statusCode) {
|
|
136
|
+
const key = _labelKey([service, route, method, statusCode]);
|
|
137
|
+
_counters.set(key, (_counters.get(key) ?? 0) + 1);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Observe http_request_duration_seconds.
|
|
142
|
+
*
|
|
143
|
+
* @param {string} service
|
|
144
|
+
* @param {string} route
|
|
145
|
+
* @param {string} method
|
|
146
|
+
* @param {number} seconds
|
|
147
|
+
*/
|
|
148
|
+
export function observeDuration(service, route, method, seconds) {
|
|
149
|
+
const key = _labelKey([service, route, method]);
|
|
150
|
+
let h = _histograms.get(key);
|
|
151
|
+
if (!h) {
|
|
152
|
+
// buckets[i] = count of observations where seconds <= HISTOGRAM_BUCKETS[i]
|
|
153
|
+
// but stored as INCREMENTAL per-range so cumulation happens on render.
|
|
154
|
+
// Each bucket[i] = count that fell in range (HISTOGRAM_BUCKETS[i-1], HISTOGRAM_BUCKETS[i]].
|
|
155
|
+
h = { buckets: new Array(HISTOGRAM_BUCKETS.length).fill(0), sum: 0, count: 0 };
|
|
156
|
+
_histograms.set(key, h);
|
|
157
|
+
}
|
|
158
|
+
// Find the first bucket boundary that accommodates this observation.
|
|
159
|
+
// Increment only that bucket; render accumulates for the exposition.
|
|
160
|
+
let placed = false;
|
|
161
|
+
for (let i = 0; i < HISTOGRAM_BUCKETS.length; i++) {
|
|
162
|
+
if (seconds <= HISTOGRAM_BUCKETS[i]) {
|
|
163
|
+
h.buckets[i]++;
|
|
164
|
+
placed = true;
|
|
165
|
+
break;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// Observations beyond the last bucket are counted in h.count only;
|
|
169
|
+
// the +Inf bucket in the exposition equals h.count.
|
|
170
|
+
if (!placed) {
|
|
171
|
+
// No bucket captured it — it lands in +Inf only.
|
|
172
|
+
}
|
|
173
|
+
h.sum += seconds;
|
|
174
|
+
h.count++;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// ─── Prometheus text exposition ───────────────────────────────────────────
|
|
178
|
+
|
|
179
|
+
/** Escape label value per Prometheus text format (backslash, newline, quote). */
|
|
180
|
+
function escapeLabelValue(v) {
|
|
181
|
+
return String(v).replace(/\\/g, '\\\\').replace(/\n/g, '\\n').replace(/"/g, '\\"');
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Build the `{k1="v1",k2="v2",...}` label-set string.
|
|
186
|
+
* @param {Record<string, string>} labels
|
|
187
|
+
*/
|
|
188
|
+
function labelSet(labels) {
|
|
189
|
+
const parts = Object.entries(labels).map(
|
|
190
|
+
([k, v]) => `${k}="${escapeLabelValue(v)}"`,
|
|
191
|
+
);
|
|
192
|
+
return `{${parts.join(',')}}`;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Render the complete Prometheus text exposition.
|
|
197
|
+
* @returns {string}
|
|
198
|
+
*/
|
|
199
|
+
export function renderMetrics() {
|
|
200
|
+
const lines = [];
|
|
201
|
+
|
|
202
|
+
// ── http_requests_total ─────────────────────────────────────────────
|
|
203
|
+
lines.push('# HELP http_requests_total Total number of HTTP requests handled.');
|
|
204
|
+
lines.push('# TYPE http_requests_total counter');
|
|
205
|
+
for (const [key, count] of _counters) {
|
|
206
|
+
const [service, route, method, status_code] = key.split('\x00');
|
|
207
|
+
lines.push(
|
|
208
|
+
`http_requests_total${labelSet({ service, route, method, status_code })} ${count}`,
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// ── http_request_duration_seconds ───────────────────────────────────
|
|
213
|
+
lines.push('# HELP http_request_duration_seconds HTTP request duration in seconds (histogram).');
|
|
214
|
+
lines.push('# TYPE http_request_duration_seconds histogram');
|
|
215
|
+
for (const [key, h] of _histograms) {
|
|
216
|
+
const [service, route, method] = key.split('\x00');
|
|
217
|
+
const base = { service, route, method };
|
|
218
|
+
// Cumulative buckets: le=X must be ≥ sum of all observations ≤ X.
|
|
219
|
+
let cumulative = 0;
|
|
220
|
+
for (let i = 0; i < HISTOGRAM_BUCKETS.length; i++) {
|
|
221
|
+
cumulative += h.buckets[i];
|
|
222
|
+
lines.push(
|
|
223
|
+
`http_request_duration_seconds_bucket${labelSet({ ...base, le: String(HISTOGRAM_BUCKETS[i]) })} ${cumulative}`,
|
|
224
|
+
);
|
|
225
|
+
}
|
|
226
|
+
lines.push(
|
|
227
|
+
`http_request_duration_seconds_bucket${labelSet({ ...base, le: '+Inf' })} ${h.count}`,
|
|
228
|
+
);
|
|
229
|
+
lines.push(`http_request_duration_seconds_sum${labelSet(base)} ${h.sum}`);
|
|
230
|
+
lines.push(`http_request_duration_seconds_count${labelSet(base)} ${h.count}`);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
lines.push(''); // trailing newline
|
|
234
|
+
return lines.join('\n');
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// ─── Request instrumentation wrapper ─────────────────────────────────────
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Wrap an async request handler so every request is instrumented.
|
|
241
|
+
*
|
|
242
|
+
* The wrapper:
|
|
243
|
+
* 1. Derives a stable route pattern from req.url.
|
|
244
|
+
* 2. Starts a high-resolution timer.
|
|
245
|
+
* 3. Calls the original handler.
|
|
246
|
+
* 4. Records counter + histogram using the response's status code.
|
|
247
|
+
*
|
|
248
|
+
* Status code capture: we monkey-patch res.writeHead and res.end to intercept
|
|
249
|
+
* the status before it's sent. Falls back to res.statusCode (which Node sets
|
|
250
|
+
* implicitly on .end() when no explicit writeHead call was made).
|
|
251
|
+
*
|
|
252
|
+
* @param {string} serviceName — emitted as the `service` label
|
|
253
|
+
* @param {(req: import('node:http').IncomingMessage, res: import('node:http').ServerResponse) => Promise<void>} handler
|
|
254
|
+
* @returns {(req: import('node:http').IncomingMessage, res: import('node:http').ServerResponse) => Promise<void>}
|
|
255
|
+
*/
|
|
256
|
+
export function instrumentHandler(serviceName, handler) {
|
|
257
|
+
return async (req, res) => {
|
|
258
|
+
const start = performance.now();
|
|
259
|
+
|
|
260
|
+
// Intercept status code by wrapping writeHead.
|
|
261
|
+
let capturedStatus = null;
|
|
262
|
+
const origWriteHead = res.writeHead.bind(res);
|
|
263
|
+
res.writeHead = (status, ...rest) => {
|
|
264
|
+
capturedStatus = status;
|
|
265
|
+
return origWriteHead(status, ...rest);
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
try {
|
|
269
|
+
await handler(req, res);
|
|
270
|
+
} finally {
|
|
271
|
+
const durationSec = (performance.now() - start) / 1000;
|
|
272
|
+
const urlObj = new URL(req.url ?? '/', `http://localhost`);
|
|
273
|
+
const route = pathToRoute(urlObj.pathname);
|
|
274
|
+
const method = (req.method ?? 'GET').toUpperCase();
|
|
275
|
+
const statusCode = String(capturedStatus ?? res.statusCode ?? 200);
|
|
276
|
+
|
|
277
|
+
incRequest(serviceName, route, method, statusCode);
|
|
278
|
+
observeDuration(serviceName, route, method, durationSec);
|
|
279
|
+
}
|
|
280
|
+
};
|
|
281
|
+
}
|
package/host-cp/src/server.mjs
CHANGED
|
@@ -53,6 +53,7 @@ import {
|
|
|
53
53
|
normalizeName,
|
|
54
54
|
} from './world-names-store.mjs';
|
|
55
55
|
import { createLocalWorldsSource } from './local-worlds-source.mjs';
|
|
56
|
+
import { dispatchTasksRoute } from './tasks-route.mjs';
|
|
56
57
|
import { createPylonWorldsSource } from './pylon-worlds-source.mjs';
|
|
57
58
|
import { composeWorldsSources } from './compose-worlds-sources.mjs';
|
|
58
59
|
import { createWorldPrStateStore } from './world-pr-state.mjs';
|
|
@@ -71,6 +72,7 @@ import {
|
|
|
71
72
|
handleListServers,
|
|
72
73
|
handleServerBridges,
|
|
73
74
|
} from './routes/process-port.mjs';
|
|
75
|
+
import { instrumentHandler, renderMetrics } from './metrics.mjs';
|
|
74
76
|
|
|
75
77
|
// ── Deployment-mode detection ─────────────────────────────────────
|
|
76
78
|
//
|
|
@@ -680,7 +682,10 @@ async function getSecret(worldId) {
|
|
|
680
682
|
|
|
681
683
|
// ── HTTP server ──────────────────────────────────────────────────────
|
|
682
684
|
|
|
683
|
-
|
|
685
|
+
// Phase C Task C3: wrap the raw handler with the Prometheus instrumentation
|
|
686
|
+
// wrapper. Every request increments http_requests_total and observes
|
|
687
|
+
// http_request_duration_seconds before the response is sent.
|
|
688
|
+
const server = http.createServer(instrumentHandler('host-cp', async (req, res) => {
|
|
684
689
|
const url = new URL(req.url ?? '/', `http://${req.headers.host}`);
|
|
685
690
|
|
|
686
691
|
// /health: fast diagnostics, no auth, no proxying. Docker healthcheck
|
|
@@ -717,6 +722,22 @@ const server = http.createServer(async (req, res) => {
|
|
|
717
722
|
});
|
|
718
723
|
}
|
|
719
724
|
|
|
725
|
+
// /metrics: Prometheus text exposition (Phase C Task C3).
|
|
726
|
+
// Unauthenticated — same rationale as /health: the Prometheus scraper
|
|
727
|
+
// in-cluster cannot carry the operator's session token.
|
|
728
|
+
// Returns only the 4 taxonomy-compliant labels {service,route,method,status_code}.
|
|
729
|
+
// BANNED labels (world_id, trace_id, user_id, request_id, operator_id)
|
|
730
|
+
// are never emitted here; layer-2 enforcement is the ServiceMonitor labeldrop.
|
|
731
|
+
if (url.pathname === '/metrics') {
|
|
732
|
+
const body = renderMetrics();
|
|
733
|
+
res.writeHead(200, {
|
|
734
|
+
'Content-Type': 'text/plain; version=0.0.4; charset=utf-8',
|
|
735
|
+
'Cache-Control': 'no-cache, no-store',
|
|
736
|
+
});
|
|
737
|
+
res.end(body);
|
|
738
|
+
return;
|
|
739
|
+
}
|
|
740
|
+
|
|
720
741
|
// /api/bootstrap: SPA reads the token at load time. Unauthed because
|
|
721
742
|
// anything local that can hit 127.0.0.1:19000 can also read the token
|
|
722
743
|
// file directly (same OS-level privilege boundary). Single-user-only;
|
|
@@ -773,6 +794,14 @@ const server = http.createServer(async (req, res) => {
|
|
|
773
794
|
}));
|
|
774
795
|
}
|
|
775
796
|
|
|
797
|
+
// /api/tasks/* — B2.2: @olam/tasks-write-api mount via pgPoolExecutor
|
|
798
|
+
// adapter. Bearer-auth already applied above; per-request scopes + olamNodeId
|
|
799
|
+
// come from X-Olam-* headers; RLS enforced server-side per D-B-23.
|
|
800
|
+
if (url.pathname.startsWith('/api/tasks')) {
|
|
801
|
+
const handled = await dispatchTasksRoute(req, res, url);
|
|
802
|
+
if (handled) return;
|
|
803
|
+
}
|
|
804
|
+
|
|
776
805
|
// /api/version/status: returns the current version snapshot (baked SHA
|
|
777
806
|
// vs operator's local HEAD). No auth required beyond the existing gate
|
|
778
807
|
// (already applied above). Phase 1 only — detection, no auto-upgrade.
|
|
@@ -2178,7 +2207,7 @@ const server = http.createServer(async (req, res) => {
|
|
|
2178
2207
|
pathname: url.pathname,
|
|
2179
2208
|
message: 'B3 ships /health + /api/world/<id>/*. B4-B9 ship the rest.',
|
|
2180
2209
|
});
|
|
2181
|
-
});
|
|
2210
|
+
}));
|
|
2182
2211
|
|
|
2183
2212
|
/**
|
|
2184
2213
|
* @param {import('node:http').ServerResponse} res
|