@trillboards/edge-sdk 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +147 -2
- package/deploy/docker/Dockerfile.cpu +132 -0
- package/deploy/docker/Dockerfile.cuda +134 -0
- package/deploy/docker/Dockerfile.openvino +131 -0
- package/deploy/docker/README.md +358 -0
- package/deploy/helm/README.md +508 -0
- package/deploy/helm/trillboards-edge/Chart.yaml +19 -0
- package/deploy/helm/trillboards-edge/templates/_helpers.tpl +40 -0
- package/deploy/helm/trillboards-edge/templates/daemonset.yaml +120 -0
- package/deploy/helm/trillboards-edge/templates/service.yaml +15 -0
- package/deploy/helm/trillboards-edge/values.yaml +95 -0
- package/deploy/k8s/daemonset.yaml +144 -0
- package/dist/CommandRouter.d.ts +113 -0
- package/dist/CommandRouter.d.ts.map +1 -0
- package/dist/CommandRouter.js +392 -0
- package/dist/CommandRouter.js.map +1 -0
- package/dist/EdgeAgent.d.ts +6 -1
- package/dist/EdgeAgent.d.ts.map +1 -1
- package/dist/EdgeAgent.js +277 -10
- package/dist/EdgeAgent.js.map +1 -1
- package/dist/cli.js +60 -8
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +1 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js.map +1 -1
- package/dist/demo.d.ts +111 -0
- package/dist/demo.d.ts.map +1 -0
- package/dist/demo.js +483 -0
- package/dist/demo.js.map +1 -0
- package/dist/diagnose.d.ts +59 -0
- package/dist/diagnose.d.ts.map +1 -0
- package/dist/diagnose.js +651 -0
- package/dist/diagnose.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -1
- package/dist/index.js.map +1 -1
- package/dist/init.d.ts +19 -0
- package/dist/init.d.ts.map +1 -0
- package/dist/init.js +364 -0
- package/dist/init.js.map +1 -0
- package/dist/mcp-server.d.ts +27 -0
- package/dist/mcp-server.d.ts.map +1 -0
- package/dist/mcp-server.js +1264 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/status.d.ts +11 -0
- package/dist/status.d.ts.map +1 -0
- package/dist/status.js +343 -0
- package/dist/status.js.map +1 -0
- package/package.json +5 -4
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
# Kubernetes / Helm Deployment -- Trillboards Edge SDK
|
|
2
|
+
|
|
3
|
+
This guide covers deploying the Trillboards Edge AI agent across a fleet of Kubernetes nodes using Helm or plain kubectl manifests. The agent runs as a DaemonSet, placing one instance on each labeled edge node.
|
|
4
|
+
|
|
5
|
+
## Prerequisites
|
|
6
|
+
|
|
7
|
+
- Kubernetes 1.24 or later
|
|
8
|
+
- Helm 3.0 or later (for Helm-based deployment)
|
|
9
|
+
- `kubectl` configured to communicate with your cluster
|
|
10
|
+
- A Trillboards device token (obtain from the Trillboards dashboard under **Devices > Add Device**)
|
|
11
|
+
- For GPU nodes: NVIDIA device plugin for Kubernetes (`k8s-device-plugin`) installed
|
|
12
|
+
|
|
13
|
+
## Quick Start with Helm
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# 1. Create the device token secret
|
|
17
|
+
kubectl create secret generic trillboards-edge \
|
|
18
|
+
--from-literal=device-token=YOUR_DEVICE_TOKEN_HERE
|
|
19
|
+
|
|
20
|
+
# 2. Add the Helm chart (local path -- chart is bundled in the SDK)
|
|
21
|
+
helm install trillboards-edge \
|
|
22
|
+
./packages/edge-sdk/deploy/helm/trillboards-edge
|
|
23
|
+
|
|
24
|
+
# 3. Label your edge nodes so the DaemonSet schedules onto them
|
|
25
|
+
kubectl label node my-edge-node-01 trillboards.com/edge-device=true
|
|
26
|
+
kubectl label node my-edge-node-02 trillboards.com/edge-device=true
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
The agent pod will start on every node carrying the `trillboards.com/edge-device=true` label.
|
|
30
|
+
|
|
31
|
+
## Chart Information
|
|
32
|
+
|
|
33
|
+
| Field | Value |
|
|
34
|
+
|-------|-------|
|
|
35
|
+
| Chart name | `trillboards-edge` |
|
|
36
|
+
| Chart version | `0.2.2` |
|
|
37
|
+
| App version | `0.2.2` |
|
|
38
|
+
| Type | `application` |
|
|
39
|
+
| Source | `https://github.com/trillboards/packages` |
|
|
40
|
+
|
|
41
|
+
## Creating the Device Token Secret
|
|
42
|
+
|
|
43
|
+
The chart expects a Kubernetes Secret containing the device token. This secret is referenced by all pods in the DaemonSet.
|
|
44
|
+
|
|
45
|
+
**Single token for the entire fleet:**
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
kubectl create secret generic trillboards-edge \
|
|
49
|
+
--from-literal=device-token=YOUR_TOKEN
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
**From a file:**
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
echo -n 'YOUR_TOKEN' > device-token.txt
|
|
56
|
+
kubectl create secret generic trillboards-edge \
|
|
57
|
+
--from-file=device-token=device-token.txt
|
|
58
|
+
rm device-token.txt
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
To use a different secret name or key, set `deviceToken.existingSecret` and `deviceToken.secretKey` in your values override.
|
|
62
|
+
|
|
63
|
+
## values.yaml Reference
|
|
64
|
+
|
|
65
|
+
Below is the complete set of configurable values with defaults.
|
|
66
|
+
|
|
67
|
+
### Image
|
|
68
|
+
|
|
69
|
+
```yaml
|
|
70
|
+
image:
|
|
71
|
+
repository: trillboards/edge-sdk # Container image repository
|
|
72
|
+
tag: "latest" # Image tag: latest (CPU), cuda, openvino
|
|
73
|
+
pullPolicy: Always # Image pull policy
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
For GPU nodes, override the tag:
|
|
77
|
+
|
|
78
|
+
```yaml
|
|
79
|
+
# NVIDIA GPU nodes
|
|
80
|
+
image:
|
|
81
|
+
tag: "cuda"
|
|
82
|
+
|
|
83
|
+
# Intel OpenVINO nodes
|
|
84
|
+
image:
|
|
85
|
+
tag: "openvino"
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### API Configuration
|
|
89
|
+
|
|
90
|
+
```yaml
|
|
91
|
+
api:
|
|
92
|
+
baseUrl: "https://api.trillboards.com" # Backend API URL
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Device Token
|
|
96
|
+
|
|
97
|
+
```yaml
|
|
98
|
+
deviceToken:
|
|
99
|
+
existingSecret: "trillboards-edge" # Name of the Kubernetes Secret
|
|
100
|
+
secretKey: "device-token" # Key within the Secret
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Sensing Configuration
|
|
104
|
+
|
|
105
|
+
```yaml
|
|
106
|
+
sensing:
|
|
107
|
+
camera:
|
|
108
|
+
enabled: true # Enable camera capture for face detection
|
|
109
|
+
audio:
|
|
110
|
+
enabled: true # Enable audio capture for ambient classification
|
|
111
|
+
kiosk:
|
|
112
|
+
enabled: false # Launch headless Chromium for screen player
|
|
113
|
+
url: "https://screen.trillboards.com" # Kiosk URL when enabled
|
|
114
|
+
executionProvider: "cpu" # ONNX execution provider: cpu, openvino, directml, cuda
|
|
115
|
+
logLevel: "info" # Log level: debug, info, warn, error
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Resource Limits
|
|
119
|
+
|
|
120
|
+
```yaml
|
|
121
|
+
resources:
|
|
122
|
+
requests:
|
|
123
|
+
cpu: "250m" # CPU request per pod
|
|
124
|
+
memory: "256Mi" # Memory request per pod
|
|
125
|
+
limits:
|
|
126
|
+
cpu: "1000m" # CPU limit per pod
|
|
127
|
+
memory: "1Gi" # Memory limit per pod
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### GPU Resources
|
|
131
|
+
|
|
132
|
+
```yaml
|
|
133
|
+
gpu:
|
|
134
|
+
enabled: false # Set to true and add nvidia.com/gpu limits for GPU pods
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
When `gpu.enabled` is true, add GPU resource limits to the `resources` block in your values override:
|
|
138
|
+
|
|
139
|
+
```yaml
|
|
140
|
+
gpu:
|
|
141
|
+
enabled: true
|
|
142
|
+
resources:
|
|
143
|
+
limits:
|
|
144
|
+
cpu: "2000m"
|
|
145
|
+
memory: "2Gi"
|
|
146
|
+
nvidia.com/gpu: 1
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Status Server
|
|
150
|
+
|
|
151
|
+
```yaml
|
|
152
|
+
statusServer:
|
|
153
|
+
port: 9090 # HTTP port for /status, /health, /metrics endpoints
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Prometheus Integration
|
|
157
|
+
|
|
158
|
+
```yaml
|
|
159
|
+
prometheus:
|
|
160
|
+
enabled: true # Add Prometheus scrape annotations to pods
|
|
161
|
+
port: 9090 # Metrics port (matches statusServer.port)
|
|
162
|
+
path: "/metrics" # Metrics endpoint path
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
When `prometheus.enabled` is `true`, pods receive these annotations automatically:
|
|
166
|
+
|
|
167
|
+
```yaml
|
|
168
|
+
prometheus.io/scrape: "true"
|
|
169
|
+
prometheus.io/port: "9090"
|
|
170
|
+
prometheus.io/path: "/metrics"
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
No additional ServiceMonitor or PodMonitor CRDs are required. Prometheus discovers the pods via annotation-based scraping.
|
|
174
|
+
|
|
175
|
+
### Health Probes
|
|
176
|
+
|
|
177
|
+
```yaml
|
|
178
|
+
probes:
|
|
179
|
+
liveness:
|
|
180
|
+
initialDelaySeconds: 30 # Wait for agent startup before first liveness check
|
|
181
|
+
periodSeconds: 30 # Check interval
|
|
182
|
+
readiness:
|
|
183
|
+
initialDelaySeconds: 10 # Wait before first readiness check
|
|
184
|
+
periodSeconds: 10 # Check interval
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
Both probes hit the StatusServer's `/health` endpoint:
|
|
188
|
+
- **200 OK** -- agent is healthy; pod marked Ready
|
|
189
|
+
- **503 Service Unavailable** -- one or more subsystems degraded; pod marked NotReady
|
|
190
|
+
|
|
191
|
+
### Storage
|
|
192
|
+
|
|
193
|
+
```yaml
|
|
194
|
+
models:
|
|
195
|
+
hostPath: "/opt/trillboards/models" # Host path for ONNX model persistence
|
|
196
|
+
|
|
197
|
+
data:
|
|
198
|
+
hostPath: "/opt/trillboards/data" # Host path for signal buffer and device identity
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Both volumes use `DirectoryOrCreate` type, so the directories are created automatically on first pod start.
|
|
202
|
+
|
|
203
|
+
### Node Scheduling
|
|
204
|
+
|
|
205
|
+
```yaml
|
|
206
|
+
nodeSelector:
|
|
207
|
+
trillboards.com/edge-device: "true" # Only schedule on labeled nodes
|
|
208
|
+
|
|
209
|
+
tolerations:
|
|
210
|
+
- key: "edge"
|
|
211
|
+
operator: "Exists"
|
|
212
|
+
effect: "NoSchedule" # Tolerate edge-tainted nodes
|
|
213
|
+
|
|
214
|
+
affinity: {} # Additional affinity rules (optional)
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Update Strategy
|
|
218
|
+
|
|
219
|
+
```yaml
|
|
220
|
+
updateStrategy:
|
|
221
|
+
type: RollingUpdate
|
|
222
|
+
rollingUpdate:
|
|
223
|
+
maxUnavailable: 1 # Roll one pod at a time during upgrades
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### Security Context
|
|
227
|
+
|
|
228
|
+
```yaml
|
|
229
|
+
securityContext:
|
|
230
|
+
privileged: false # No privileged mode by default
|
|
231
|
+
readOnlyRootFilesystem: false # Agent writes to /app/data at runtime
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## GPU Node Deployment
|
|
235
|
+
|
|
236
|
+
For nodes with NVIDIA GPUs, install the NVIDIA device plugin and deploy with the CUDA image.
|
|
237
|
+
|
|
238
|
+
### 1. Install NVIDIA Device Plugin
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.3/nvidia-device-plugin.yml
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### 2. Label and Taint GPU Nodes
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
kubectl label node gpu-node-01 trillboards.com/edge-device=true
|
|
248
|
+
kubectl taint node gpu-node-01 nvidia.com/gpu=present:NoSchedule
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### 3. Deploy with GPU Values
|
|
252
|
+
|
|
253
|
+
Create a `values-gpu.yaml`:
|
|
254
|
+
|
|
255
|
+
```yaml
|
|
256
|
+
image:
|
|
257
|
+
tag: "cuda"
|
|
258
|
+
|
|
259
|
+
sensing:
|
|
260
|
+
executionProvider: "cuda"
|
|
261
|
+
|
|
262
|
+
gpu:
|
|
263
|
+
enabled: true
|
|
264
|
+
|
|
265
|
+
resources:
|
|
266
|
+
requests:
|
|
267
|
+
cpu: "500m"
|
|
268
|
+
memory: "512Mi"
|
|
269
|
+
limits:
|
|
270
|
+
cpu: "2000m"
|
|
271
|
+
memory: "2Gi"
|
|
272
|
+
nvidia.com/gpu: 1
|
|
273
|
+
|
|
274
|
+
tolerations:
|
|
275
|
+
- key: "edge"
|
|
276
|
+
operator: "Exists"
|
|
277
|
+
effect: "NoSchedule"
|
|
278
|
+
- key: "nvidia.com/gpu"
|
|
279
|
+
operator: "Exists"
|
|
280
|
+
effect: "NoSchedule"
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
helm install trillboards-edge-gpu \
|
|
285
|
+
./packages/edge-sdk/deploy/helm/trillboards-edge \
|
|
286
|
+
-f values-gpu.yaml
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
## Heterogeneous Fleet (Mixed CPU + GPU Nodes)
|
|
290
|
+
|
|
291
|
+
For fleets with both CPU-only and GPU nodes, deploy two Helm releases with different node selectors.
|
|
292
|
+
|
|
293
|
+
```bash
|
|
294
|
+
# CPU nodes
|
|
295
|
+
kubectl label node cpu-node-01 trillboards.com/edge-device=true trillboards.com/gpu=false
|
|
296
|
+
kubectl label node cpu-node-02 trillboards.com/edge-device=true trillboards.com/gpu=false
|
|
297
|
+
|
|
298
|
+
# GPU nodes
|
|
299
|
+
kubectl label node gpu-node-01 trillboards.com/edge-device=true trillboards.com/gpu=true
|
|
300
|
+
kubectl label node gpu-node-02 trillboards.com/edge-device=true trillboards.com/gpu=true
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
```bash
|
|
304
|
+
# Deploy CPU fleet
|
|
305
|
+
helm install trillboards-cpu \
|
|
306
|
+
./packages/edge-sdk/deploy/helm/trillboards-edge \
|
|
307
|
+
--set image.tag=latest \
|
|
308
|
+
--set sensing.executionProvider=cpu \
|
|
309
|
+
--set nodeSelector."trillboards\.com/gpu"=false
|
|
310
|
+
|
|
311
|
+
# Deploy GPU fleet
|
|
312
|
+
helm install trillboards-gpu \
|
|
313
|
+
./packages/edge-sdk/deploy/helm/trillboards-edge \
|
|
314
|
+
-f values-gpu.yaml \
|
|
315
|
+
--set nodeSelector."trillboards\.com/gpu"=true
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
## Node Labeling for Edge Devices
|
|
319
|
+
|
|
320
|
+
The DaemonSet schedules only onto nodes with the `trillboards.com/edge-device=true` label. This prevents the agent from running on control plane nodes or non-edge workers.
|
|
321
|
+
|
|
322
|
+
**Label a node:**
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
kubectl label node <node-name> trillboards.com/edge-device=true
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
**Remove the label (stops the agent on that node):**
|
|
329
|
+
|
|
330
|
+
```bash
|
|
331
|
+
kubectl label node <node-name> trillboards.com/edge-device-
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
**List all edge nodes:**
|
|
335
|
+
|
|
336
|
+
```bash
|
|
337
|
+
kubectl get nodes -l trillboards.com/edge-device=true
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
## Scaling Strategy
|
|
341
|
+
|
|
342
|
+
The chart deploys a **DaemonSet**, not a Deployment. This means:
|
|
343
|
+
|
|
344
|
+
- **One agent per labeled node** -- adding a new edge node and labeling it automatically starts an agent pod
|
|
345
|
+
- **Removing a label** stops the agent on that node
|
|
346
|
+
- There is no replica count to configure; the fleet scales with node count
|
|
347
|
+
- Rolling updates proceed one node at a time (`maxUnavailable: 1`)
|
|
348
|
+
|
|
349
|
+
This is the correct scaling model for edge sensing: each physical device needs exactly one agent reading its camera and audio inputs.
|
|
350
|
+
|
|
351
|
+
## Upgrading
|
|
352
|
+
|
|
353
|
+
```bash
|
|
354
|
+
# Update to a new image tag
|
|
355
|
+
helm upgrade trillboards-edge \
|
|
356
|
+
./packages/edge-sdk/deploy/helm/trillboards-edge \
|
|
357
|
+
--set image.tag=0.3.0
|
|
358
|
+
|
|
359
|
+
# Or apply a full values file
|
|
360
|
+
helm upgrade trillboards-edge \
|
|
361
|
+
./packages/edge-sdk/deploy/helm/trillboards-edge \
|
|
362
|
+
-f my-values.yaml
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
The DaemonSet uses `RollingUpdate` strategy with `maxUnavailable: 1`, so pods are restarted one at a time across the fleet.
|
|
366
|
+
|
|
367
|
+
## Rollback
|
|
368
|
+
|
|
369
|
+
```bash
|
|
370
|
+
# View release history
|
|
371
|
+
helm history trillboards-edge
|
|
372
|
+
|
|
373
|
+
# Roll back to a specific revision
|
|
374
|
+
helm rollback trillboards-edge 2
|
|
375
|
+
|
|
376
|
+
# Roll back to the previous release
|
|
377
|
+
helm rollback trillboards-edge
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
## Prometheus Integration
|
|
381
|
+
|
|
382
|
+
When `prometheus.enabled` is `true` (the default), pod annotations are added for Prometheus auto-discovery. The agent exposes Prometheus-format metrics at `GET /metrics` on the status server port.
|
|
383
|
+
|
|
384
|
+
Exported metrics:
|
|
385
|
+
|
|
386
|
+
| Metric | Type | Description |
|
|
387
|
+
|--------|------|-------------|
|
|
388
|
+
| `trillboards_face_count` | gauge | Current detected face count |
|
|
389
|
+
| `trillboards_attention_score` | gauge | Average attention score (0-1) |
|
|
390
|
+
| `trillboards_vas_score` | gauge | Weighted VAS score (0-1) |
|
|
391
|
+
| `trillboards_signal_buffer_count` | gauge | Buffered signals waiting to send |
|
|
392
|
+
| `trillboards_signal_buffer_capacity` | gauge | Maximum signal buffer capacity |
|
|
393
|
+
| `trillboards_uptime_seconds` | gauge | Agent uptime in seconds |
|
|
394
|
+
| `trillboards_heap_used_mb` | gauge | V8 heap used in MB |
|
|
395
|
+
| `trillboards_memory_rss_mb` | gauge | Process RSS memory in MB |
|
|
396
|
+
| `trillboards_socket_connected` | gauge | Socket.io connection status (1/0) |
|
|
397
|
+
| `trillboards_federated_enabled` | gauge | Federated learning enabled (1/0) |
|
|
398
|
+
| `trillboards_tier` | gauge | Device capability tier (1-4) |
|
|
399
|
+
| `trillboards_models_loaded_count` | gauge | Number of loaded ML models |
|
|
400
|
+
|
|
401
|
+
**Prometheus scrape config** (if not using annotation-based auto-discovery):
|
|
402
|
+
|
|
403
|
+
```yaml
|
|
404
|
+
scrape_configs:
|
|
405
|
+
- job_name: 'trillboards-edge'
|
|
406
|
+
kubernetes_sd_configs:
|
|
407
|
+
- role: pod
|
|
408
|
+
relabel_configs:
|
|
409
|
+
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
410
|
+
action: keep
|
|
411
|
+
regex: true
|
|
412
|
+
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_port]
|
|
413
|
+
action: replace
|
|
414
|
+
target_label: __address__
|
|
415
|
+
regex: (.+)
|
|
416
|
+
replacement: ${1}:$1
|
|
417
|
+
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
418
|
+
action: replace
|
|
419
|
+
target_label: __metrics_path__
|
|
420
|
+
regex: (.+)
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
## Plain kubectl Deployment (Without Helm)
|
|
424
|
+
|
|
425
|
+
If you prefer not to use Helm, a standalone Kubernetes manifest is provided at `deploy/k8s/daemonset.yaml`.
|
|
426
|
+
|
|
427
|
+
### 1. Create the Secret
|
|
428
|
+
|
|
429
|
+
```bash
|
|
430
|
+
kubectl create secret generic trillboards-edge \
|
|
431
|
+
--from-literal=device-token=YOUR_TOKEN
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
### 2. Apply the Manifest
|
|
435
|
+
|
|
436
|
+
```bash
|
|
437
|
+
kubectl apply -f packages/edge-sdk/deploy/k8s/daemonset.yaml
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
This creates:
|
|
441
|
+
- A `DaemonSet` named `trillboards-edge-agent` with all environment variables, volume mounts, probes, and Prometheus annotations pre-configured
|
|
442
|
+
- A headless `Service` named `trillboards-edge-status` for per-pod addressability
|
|
443
|
+
|
|
444
|
+
### 3. Label Nodes
|
|
445
|
+
|
|
446
|
+
```bash
|
|
447
|
+
kubectl label node <node-name> trillboards.com/edge-device=true
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
### 4. Customizing the kubectl Manifest
|
|
451
|
+
|
|
452
|
+
Edit `daemonset.yaml` directly to change:
|
|
453
|
+
- Image tag (line: `image: trillboards/edge-sdk:latest`)
|
|
454
|
+
- Environment variable values
|
|
455
|
+
- Resource requests and limits
|
|
456
|
+
- Node selector labels
|
|
457
|
+
- Tolerations
|
|
458
|
+
|
|
459
|
+
## Service Topology
|
|
460
|
+
|
|
461
|
+
The Helm chart creates a **headless Service** (`clusterIP: None`). This means:
|
|
462
|
+
|
|
463
|
+
- Each pod is individually addressable via DNS: `<pod-name>.trillboards-edge.<namespace>.svc.cluster.local`
|
|
464
|
+
- There is no load balancing across pods (each pod serves a unique physical device)
|
|
465
|
+
- The service enables Prometheus service discovery and direct pod access for debugging
|
|
466
|
+
|
|
467
|
+
To query a specific device's status from within the cluster:
|
|
468
|
+
|
|
469
|
+
```bash
|
|
470
|
+
kubectl exec -it debug-pod -- curl http://trillboards-edge-0.trillboards-edge.default.svc.cluster.local:9090/status
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
## Volume Architecture
|
|
474
|
+
|
|
475
|
+
The DaemonSet mounts three volumes:
|
|
476
|
+
|
|
477
|
+
| Volume | Type | Mount Path | Purpose |
|
|
478
|
+
|--------|------|-----------|---------|
|
|
479
|
+
| `models` | `hostPath` (DirectoryOrCreate) | `/app/models` | ONNX model files. Persisted across pod restarts. |
|
|
480
|
+
| `data` | `hostPath` (DirectoryOrCreate) | `/app/data` | Signal buffer (SQLite), device identity, federated gradients. |
|
|
481
|
+
| `video-devices` | `hostPath` (Directory) | `/dev` (read-only) | Camera device passthrough (`/dev/video*`). |
|
|
482
|
+
|
|
483
|
+
Host paths default to `/opt/trillboards/models` and `/opt/trillboards/data`. Override in values if your nodes use a different layout.
|
|
484
|
+
|
|
485
|
+
## Troubleshooting
|
|
486
|
+
|
|
487
|
+
**Pods stuck in Pending:**
|
|
488
|
+
- Verify nodes are labeled: `kubectl get nodes -l trillboards.com/edge-device=true`
|
|
489
|
+
- Check for taint issues: `kubectl describe pod <pod-name>` and look for taint-related scheduling failures
|
|
490
|
+
- Verify resource availability: `kubectl describe node <node-name>`
|
|
491
|
+
|
|
492
|
+
**Pods in CrashLoopBackOff:**
|
|
493
|
+
- Check logs: `kubectl logs <pod-name>`
|
|
494
|
+
- The most common cause is a missing or invalid `DEVICE_TOKEN`. Verify the secret exists: `kubectl get secret trillboards-edge -o yaml`
|
|
495
|
+
|
|
496
|
+
**Health check failures (pod NotReady):**
|
|
497
|
+
- The liveness probe waits 30 seconds before starting. If the agent takes longer to initialize (first-time model download), increase `probes.liveness.initialDelaySeconds`.
|
|
498
|
+
- Check the status endpoint directly: `kubectl exec <pod-name> -- curl -s http://localhost:9090/health`
|
|
499
|
+
|
|
500
|
+
**Camera not detected inside pod:**
|
|
501
|
+
- Verify `/dev/video*` exists on the host node
|
|
502
|
+
- The `video-devices` volume mounts `/dev` read-only. If your device requires write access, set `securityContext.privileged: true` (not recommended for production).
|
|
503
|
+
- Some USB cameras need `--privileged` or specific device cgroup rules
|
|
504
|
+
|
|
505
|
+
**No metrics in Prometheus:**
|
|
506
|
+
- Verify annotations: `kubectl get pod <pod-name> -o jsonpath='{.metadata.annotations}'`
|
|
507
|
+
- Check that Prometheus is configured for annotation-based pod discovery
|
|
508
|
+
- Test metrics endpoint: `kubectl exec <pod-name> -- curl -s http://localhost:9090/metrics`
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
apiVersion: v2
|
|
2
|
+
name: trillboards-edge
|
|
3
|
+
description: Trillboards Edge AI SDK — fleet-wide audience sensing for DOOH devices
|
|
4
|
+
type: application
|
|
5
|
+
version: 0.2.2
|
|
6
|
+
appVersion: "0.2.2"
|
|
7
|
+
keywords:
|
|
8
|
+
- dooh
|
|
9
|
+
- edge-ai
|
|
10
|
+
- audience-sensing
|
|
11
|
+
- digital-signage
|
|
12
|
+
- onnx
|
|
13
|
+
- ctv
|
|
14
|
+
home: https://trillboards.com
|
|
15
|
+
sources:
|
|
16
|
+
- https://github.com/trillboards/packages
|
|
17
|
+
maintainers:
|
|
18
|
+
- name: Trillboards Engineering
|
|
19
|
+
email: engineering@trillboards.com
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{{/*
|
|
2
|
+
Expand the name of the chart.
|
|
3
|
+
*/}}
|
|
4
|
+
{{- define "trillboards-edge.name" -}}
|
|
5
|
+
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
|
6
|
+
{{- end }}
|
|
7
|
+
|
|
8
|
+
{{/*
|
|
9
|
+
Create a default fully qualified app name.
|
|
10
|
+
*/}}
|
|
11
|
+
{{- define "trillboards-edge.fullname" -}}
|
|
12
|
+
{{- if .Values.fullnameOverride }}
|
|
13
|
+
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
|
14
|
+
{{- else }}
|
|
15
|
+
{{- $name := default .Chart.Name .Values.nameOverride }}
|
|
16
|
+
{{- if contains $name .Release.Name }}
|
|
17
|
+
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
|
18
|
+
{{- else }}
|
|
19
|
+
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
|
20
|
+
{{- end }}
|
|
21
|
+
{{- end }}
|
|
22
|
+
{{- end }}
|
|
23
|
+
|
|
24
|
+
{{/*
|
|
25
|
+
Common labels
|
|
26
|
+
*/}}
|
|
27
|
+
{{- define "trillboards-edge.labels" -}}
|
|
28
|
+
helm.sh/chart: {{ include "trillboards-edge.name" . }}-{{ .Chart.Version }}
|
|
29
|
+
{{ include "trillboards-edge.selectorLabels" . }}
|
|
30
|
+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
|
31
|
+
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
|
32
|
+
{{- end }}
|
|
33
|
+
|
|
34
|
+
{{/*
|
|
35
|
+
Selector labels
|
|
36
|
+
*/}}
|
|
37
|
+
{{- define "trillboards-edge.selectorLabels" -}}
|
|
38
|
+
app.kubernetes.io/name: {{ include "trillboards-edge.name" . }}
|
|
39
|
+
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
40
|
+
{{- end }}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
apiVersion: apps/v1
|
|
2
|
+
kind: DaemonSet
|
|
3
|
+
metadata:
|
|
4
|
+
name: {{ include "trillboards-edge.fullname" . }}
|
|
5
|
+
labels:
|
|
6
|
+
{{- include "trillboards-edge.labels" . | nindent 4 }}
|
|
7
|
+
spec:
|
|
8
|
+
selector:
|
|
9
|
+
matchLabels:
|
|
10
|
+
{{- include "trillboards-edge.selectorLabels" . | nindent 6 }}
|
|
11
|
+
updateStrategy:
|
|
12
|
+
{{- toYaml .Values.updateStrategy | nindent 4 }}
|
|
13
|
+
template:
|
|
14
|
+
metadata:
|
|
15
|
+
labels:
|
|
16
|
+
{{- include "trillboards-edge.selectorLabels" . | nindent 8 }}
|
|
17
|
+
annotations:
|
|
18
|
+
{{- if .Values.prometheus.enabled }}
|
|
19
|
+
prometheus.io/scrape: "true"
|
|
20
|
+
prometheus.io/port: {{ .Values.prometheus.port | quote }}
|
|
21
|
+
prometheus.io/path: {{ .Values.prometheus.path | quote }}
|
|
22
|
+
{{- end }}
|
|
23
|
+
spec:
|
|
24
|
+
terminationGracePeriodSeconds: 30
|
|
25
|
+
containers:
|
|
26
|
+
- name: edge-agent
|
|
27
|
+
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
|
28
|
+
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
|
29
|
+
ports:
|
|
30
|
+
- name: status
|
|
31
|
+
containerPort: {{ .Values.statusServer.port }}
|
|
32
|
+
protocol: TCP
|
|
33
|
+
env:
|
|
34
|
+
- name: DEVICE_TOKEN
|
|
35
|
+
valueFrom:
|
|
36
|
+
secretKeyRef:
|
|
37
|
+
name: {{ .Values.deviceToken.existingSecret }}
|
|
38
|
+
key: {{ .Values.deviceToken.secretKey }}
|
|
39
|
+
- name: SCREEN_ID
|
|
40
|
+
valueFrom:
|
|
41
|
+
fieldRef:
|
|
42
|
+
fieldPath: spec.nodeName
|
|
43
|
+
- name: API_BASE_URL
|
|
44
|
+
value: {{ .Values.api.baseUrl | quote }}
|
|
45
|
+
- name: CAMERA_ENABLED
|
|
46
|
+
value: {{ .Values.sensing.camera.enabled | quote }}
|
|
47
|
+
- name: AUDIO_ENABLED
|
|
48
|
+
value: {{ .Values.sensing.audio.enabled | quote }}
|
|
49
|
+
- name: KIOSK_ENABLED
|
|
50
|
+
value: {{ .Values.sensing.kiosk.enabled | quote }}
|
|
51
|
+
{{- if .Values.sensing.kiosk.enabled }}
|
|
52
|
+
- name: KIOSK_URL
|
|
53
|
+
value: {{ .Values.sensing.kiosk.url | quote }}
|
|
54
|
+
{{- end }}
|
|
55
|
+
- name: EXECUTION_PROVIDER
|
|
56
|
+
value: {{ .Values.sensing.executionProvider | quote }}
|
|
57
|
+
- name: LOG_LEVEL
|
|
58
|
+
value: {{ .Values.sensing.logLevel | quote }}
|
|
59
|
+
- name: STATUS_PORT
|
|
60
|
+
value: {{ .Values.statusServer.port | quote }}
|
|
61
|
+
- name: NODE_NAME
|
|
62
|
+
valueFrom:
|
|
63
|
+
fieldRef:
|
|
64
|
+
fieldPath: spec.nodeName
|
|
65
|
+
resources:
|
|
66
|
+
{{- toYaml .Values.resources | nindent 12 }}
|
|
67
|
+
{{- if .Values.gpu.enabled }}
|
|
68
|
+
# GPU resource requests are added to limits
|
|
69
|
+
{{- end }}
|
|
70
|
+
livenessProbe:
|
|
71
|
+
httpGet:
|
|
72
|
+
path: /health
|
|
73
|
+
port: status
|
|
74
|
+
initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }}
|
|
75
|
+
periodSeconds: {{ .Values.probes.liveness.periodSeconds }}
|
|
76
|
+
timeoutSeconds: 5
|
|
77
|
+
failureThreshold: 3
|
|
78
|
+
readinessProbe:
|
|
79
|
+
httpGet:
|
|
80
|
+
path: /health
|
|
81
|
+
port: status
|
|
82
|
+
initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }}
|
|
83
|
+
periodSeconds: {{ .Values.probes.readiness.periodSeconds }}
|
|
84
|
+
timeoutSeconds: 3
|
|
85
|
+
failureThreshold: 2
|
|
86
|
+
volumeMounts:
|
|
87
|
+
- name: models
|
|
88
|
+
mountPath: /app/models
|
|
89
|
+
- name: data
|
|
90
|
+
mountPath: /app/data
|
|
91
|
+
- name: video-devices
|
|
92
|
+
mountPath: /dev
|
|
93
|
+
readOnly: true
|
|
94
|
+
securityContext:
|
|
95
|
+
{{- toYaml .Values.securityContext | nindent 12 }}
|
|
96
|
+
volumes:
|
|
97
|
+
- name: models
|
|
98
|
+
hostPath:
|
|
99
|
+
path: {{ .Values.models.hostPath }}
|
|
100
|
+
type: DirectoryOrCreate
|
|
101
|
+
- name: data
|
|
102
|
+
hostPath:
|
|
103
|
+
path: {{ .Values.data.hostPath }}
|
|
104
|
+
type: DirectoryOrCreate
|
|
105
|
+
- name: video-devices
|
|
106
|
+
hostPath:
|
|
107
|
+
path: /dev
|
|
108
|
+
type: Directory
|
|
109
|
+
{{- with .Values.nodeSelector }}
|
|
110
|
+
nodeSelector:
|
|
111
|
+
{{- toYaml . | nindent 8 }}
|
|
112
|
+
{{- end }}
|
|
113
|
+
{{- with .Values.tolerations }}
|
|
114
|
+
tolerations:
|
|
115
|
+
{{- toYaml . | nindent 8 }}
|
|
116
|
+
{{- end }}
|
|
117
|
+
{{- with .Values.affinity }}
|
|
118
|
+
affinity:
|
|
119
|
+
{{- toYaml . | nindent 8 }}
|
|
120
|
+
{{- end }}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
apiVersion: v1
|
|
2
|
+
kind: Service
|
|
3
|
+
metadata:
|
|
4
|
+
name: {{ include "trillboards-edge.fullname" . }}
|
|
5
|
+
labels:
|
|
6
|
+
{{- include "trillboards-edge.labels" . | nindent 4 }}
|
|
7
|
+
spec:
|
|
8
|
+
selector:
|
|
9
|
+
{{- include "trillboards-edge.selectorLabels" . | nindent 4 }}
|
|
10
|
+
ports:
|
|
11
|
+
- name: status
|
|
12
|
+
port: {{ .Values.statusServer.port }}
|
|
13
|
+
targetPort: status
|
|
14
|
+
protocol: TCP
|
|
15
|
+
clusterIP: None # Headless — each pod addressable individually
|