elsabro 2.3.0 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +668 -20
- package/bin/install.js +0 -0
- package/flows/development-flow.json +452 -0
- package/flows/quick-flow.json +118 -0
- package/package.json +3 -2
- package/references/SYSTEM_INDEX.md +379 -5
- package/references/agent-marketplace.md +2274 -0
- package/references/agent-protocol.md +1126 -0
- package/references/ai-code-suggestions.md +2413 -0
- package/references/checkpointing.md +595 -0
- package/references/collaboration-patterns.md +851 -0
- package/references/collaborative-sessions.md +1081 -0
- package/references/configuration-management.md +1810 -0
- package/references/cost-tracking.md +1095 -0
- package/references/enterprise-sso.md +2001 -0
- package/references/error-contracts-v2.md +968 -0
- package/references/event-driven.md +1031 -0
- package/references/flow-orchestration.md +940 -0
- package/references/flow-visualization.md +1557 -0
- package/references/ide-integrations.md +3513 -0
- package/references/interrupt-system.md +681 -0
- package/references/kubernetes-deployment.md +3099 -0
- package/references/memory-system.md +683 -0
- package/references/mobile-companion.md +3236 -0
- package/references/multi-llm-providers.md +2494 -0
- package/references/multi-project-memory.md +1182 -0
- package/references/observability.md +793 -0
- package/references/output-schemas.md +858 -0
- package/references/performance-profiler.md +955 -0
- package/references/plugin-system.md +1526 -0
- package/references/prompt-management.md +292 -0
- package/references/sandbox-execution.md +303 -0
- package/references/security-system.md +1253 -0
- package/references/streaming.md +696 -0
- package/references/testing-framework.md +1151 -0
- package/references/time-travel.md +802 -0
- package/references/tool-registry.md +886 -0
- package/references/voice-commands.md +3296 -0
- package/templates/agent-marketplace-config.json +220 -0
- package/templates/agent-protocol-config.json +136 -0
- package/templates/ai-suggestions-config.json +100 -0
- package/templates/checkpoint-state.json +61 -0
- package/templates/collaboration-config.json +157 -0
- package/templates/collaborative-sessions-config.json +153 -0
- package/templates/configuration-config.json +245 -0
- package/templates/cost-tracking-config.json +148 -0
- package/templates/enterprise-sso-config.json +438 -0
- package/templates/events-config.json +148 -0
- package/templates/flow-visualization-config.json +196 -0
- package/templates/ide-integrations-config.json +442 -0
- package/templates/kubernetes-config.json +764 -0
- package/templates/memory-state.json +84 -0
- package/templates/mobile-companion-config.json +600 -0
- package/templates/multi-llm-config.json +544 -0
- package/templates/multi-project-memory-config.json +145 -0
- package/templates/observability-config.json +109 -0
- package/templates/performance-profiler-config.json +125 -0
- package/templates/plugin-config.json +170 -0
- package/templates/prompt-management-config.json +86 -0
- package/templates/sandbox-config.json +185 -0
- package/templates/schemas-config.json +65 -0
- package/templates/security-config.json +120 -0
- package/templates/streaming-config.json +72 -0
- package/templates/testing-config.json +81 -0
- package/templates/timetravel-config.json +62 -0
- package/templates/tool-registry-config.json +109 -0
- package/templates/voice-commands-config.json +658 -0
|
@@ -0,0 +1,793 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: observability
|
|
3
|
+
description: Sistema de observabilidad con OpenTelemetry para ELSABRO
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# ELSABRO Observability System
|
|
8
|
+
|
|
9
|
+
## Vision General
|
|
10
|
+
|
|
11
|
+
El sistema de observabilidad proporciona visibilidad completa de la ejecución de agentes, permitiendo debugging, optimización y monitoreo en tiempo real.
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
┌──────────────────────────────────────────────────────────────────────────┐
|
|
15
|
+
│ OBSERVABILITY STACK │
|
|
16
|
+
├──────────────────────────────────────────────────────────────────────────┤
|
|
17
|
+
│ │
|
|
18
|
+
│ ┌────────────────────────────────────────────────────────────────┐ │
|
|
19
|
+
│ │ COLLECTION LAYER │ │
|
|
20
|
+
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
|
|
21
|
+
│ │ │ TRACES │ │ METRICS │ │ LOGS │ │ EVENTS │ │ │
|
|
22
|
+
│ │ │ (Spans) │ │(Counters)│ │(Struct.) │ │(Actions) │ │ │
|
|
23
|
+
│ │ └────┬─────┘ └────┬─────┘ └────┬─────┘ └────┬─────┘ │ │
|
|
24
|
+
│ │ │ │ │ │ │ │
|
|
25
|
+
│ └───────┼─────────────┼─────────────┼─────────────┼──────────────┘ │
|
|
26
|
+
│ │ │ │ │ │
|
|
27
|
+
│ └─────────────┴─────────────┴─────────────┘ │
|
|
28
|
+
│ │ │
|
|
29
|
+
│ ▼ │
|
|
30
|
+
│ ┌────────────────────────────────────────────────────────────────┐ │
|
|
31
|
+
│ │ PROCESSING LAYER │ │
|
|
32
|
+
│ │ ┌────────────────┐ ┌────────────────┐ ┌────────────────┐ │ │
|
|
33
|
+
│ │ │ Aggregation │ │ Filtering │ │ Enrichment │ │ │
|
|
34
|
+
│ │ │ (by agent) │ │ (by severity) │ │ (context) │ │ │
|
|
35
|
+
│ │ └────────────────┘ └────────────────┘ └────────────────┘ │ │
|
|
36
|
+
│ └────────────────────────────────────────────────────────────────┘ │
|
|
37
|
+
│ │ │
|
|
38
|
+
│ ▼ │
|
|
39
|
+
│ ┌────────────────────────────────────────────────────────────────┐ │
|
|
40
|
+
│ │ EXPORT LAYER │ │
|
|
41
|
+
│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │
|
|
42
|
+
│ │ │ Console │ │ File │ │ Jaeger │ │ Langtrace│ │ │
|
|
43
|
+
│ │ │ (local) │ │ (JSON) │ │ (remote) │ │ (SaaS) │ │ │
|
|
44
|
+
│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │
|
|
45
|
+
│ └────────────────────────────────────────────────────────────────┘ │
|
|
46
|
+
│ │
|
|
47
|
+
└──────────────────────────────────────────────────────────────────────────┘
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Componentes de Telemetría
|
|
53
|
+
|
|
54
|
+
### 1. Traces (Distributed Tracing)
|
|
55
|
+
|
|
56
|
+
Rastreo de ejecución a través de múltiples agentes y operaciones.
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
Trace: flow_execution_abc123
|
|
60
|
+
│
|
|
61
|
+
├── Span: flow.start
|
|
62
|
+
│ ├── duration: 50ms
|
|
63
|
+
│ └── attributes: { flowId, inputs }
|
|
64
|
+
│
|
|
65
|
+
├── Span: node.analyze (parallel)
|
|
66
|
+
│ ├── Span: agent.Explore
|
|
67
|
+
│ │ ├── duration: 12s
|
|
68
|
+
│ │ └── attributes: { model: haiku, tokens: 2500 }
|
|
69
|
+
│ ├── Span: agent.Plan
|
|
70
|
+
│ │ ├── duration: 15s
|
|
71
|
+
│ │ └── attributes: { model: haiku, tokens: 3200 }
|
|
72
|
+
│ └── Span: agent.code-explorer
|
|
73
|
+
│ ├── duration: 18s
|
|
74
|
+
│ └── attributes: { model: haiku, tokens: 4100 }
|
|
75
|
+
│
|
|
76
|
+
├── Span: node.implement (parallel)
|
|
77
|
+
│ ├── Span: agent.elsabro-executor
|
|
78
|
+
│ │ ├── duration: 45s
|
|
79
|
+
│ │ └── attributes: { model: opus, tokens: 15000 }
|
|
80
|
+
│ └── Span: agent.elsabro-qa
|
|
81
|
+
│ ├── duration: 30s
|
|
82
|
+
│ └── attributes: { model: opus, tokens: 8000 }
|
|
83
|
+
│
|
|
84
|
+
└── Span: flow.complete
|
|
85
|
+
├── duration: 2m 15s
|
|
86
|
+
└── attributes: { success: true, filesModified: 5 }
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### 2. Metrics (Counters & Gauges)
|
|
90
|
+
|
|
91
|
+
Métricas agregadas para monitoreo y alerting.
|
|
92
|
+
|
|
93
|
+
```javascript
|
|
94
|
+
// Counters
|
|
95
|
+
elsabro.agents.invocations_total // Total de invocaciones por agente
|
|
96
|
+
elsabro.agents.errors_total // Total de errores por agente
|
|
97
|
+
elsabro.flows.executions_total // Total de flows ejecutados
|
|
98
|
+
elsabro.checkpoints.saved_total // Total de checkpoints guardados
|
|
99
|
+
|
|
100
|
+
// Gauges
|
|
101
|
+
elsabro.agents.active // Agentes actualmente ejecutando
|
|
102
|
+
elsabro.flows.active // Flows actualmente en progreso
|
|
103
|
+
elsabro.memory.size_bytes // Tamaño de memoria utilizada
|
|
104
|
+
|
|
105
|
+
// Histograms
|
|
106
|
+
elsabro.agents.duration_seconds // Distribución de duración
|
|
107
|
+
elsabro.agents.tokens_used // Distribución de tokens usados
|
|
108
|
+
elsabro.flows.duration_seconds // Duración total de flows
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### 3. Logs (Structured Logging)
|
|
112
|
+
|
|
113
|
+
Logs estructurados con contexto para debugging.
|
|
114
|
+
|
|
115
|
+
```json
|
|
116
|
+
{
|
|
117
|
+
"timestamp": "2024-01-20T15:30:45.123Z",
|
|
118
|
+
"level": "INFO",
|
|
119
|
+
"message": "Agent execution completed",
|
|
120
|
+
"traceId": "abc123",
|
|
121
|
+
"spanId": "def456",
|
|
122
|
+
"agent": "elsabro-executor",
|
|
123
|
+
"model": "opus",
|
|
124
|
+
"duration_ms": 45000,
|
|
125
|
+
"tokens": {
|
|
126
|
+
"input": 5000,
|
|
127
|
+
"output": 10000,
|
|
128
|
+
"total": 15000
|
|
129
|
+
},
|
|
130
|
+
"result": "success",
|
|
131
|
+
"filesModified": ["src/auth/login.ts", "src/auth/login.test.ts"],
|
|
132
|
+
"context": {
|
|
133
|
+
"flowId": "development_flow",
|
|
134
|
+
"nodeId": "implement",
|
|
135
|
+
"sessionId": "ses_xyz"
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### 4. Events (Action Tracking)
|
|
141
|
+
|
|
142
|
+
Eventos discretos para auditoría y analytics.
|
|
143
|
+
|
|
144
|
+
```json
|
|
145
|
+
{
|
|
146
|
+
"type": "agent.started",
|
|
147
|
+
"timestamp": "2024-01-20T15:30:00.000Z",
|
|
148
|
+
"data": {
|
|
149
|
+
"agentId": "elsabro-executor",
|
|
150
|
+
"nodeId": "implement",
|
|
151
|
+
"inputs": { "task": "Implement auth" }
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
{
|
|
156
|
+
"type": "checkpoint.saved",
|
|
157
|
+
"timestamp": "2024-01-20T15:30:45.000Z",
|
|
158
|
+
"data": {
|
|
159
|
+
"checkpointId": "chk_abc123",
|
|
160
|
+
"trigger": "PHASE_COMPLETE",
|
|
161
|
+
"phase": "implementation"
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
{
|
|
166
|
+
"type": "interrupt.triggered",
|
|
167
|
+
"timestamp": "2024-01-20T15:31:00.000Z",
|
|
168
|
+
"data": {
|
|
169
|
+
"interruptId": "int_xyz",
|
|
170
|
+
"type": "approval",
|
|
171
|
+
"reason": "Production deployment"
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## API de Observability
|
|
179
|
+
|
|
180
|
+
### TelemetryManager
|
|
181
|
+
|
|
182
|
+
```javascript
|
|
183
|
+
/**
|
|
184
|
+
* TelemetryManager
|
|
185
|
+
* Gestiona toda la telemetría de ELSABRO
|
|
186
|
+
*/
|
|
187
|
+
class TelemetryManager {
|
|
188
|
+
constructor(options = {}) {
|
|
189
|
+
this.config = {
|
|
190
|
+
serviceName: options.serviceName || 'elsabro',
|
|
191
|
+
environment: options.environment || 'development',
|
|
192
|
+
exporters: options.exporters || ['console'],
|
|
193
|
+
samplingRate: options.samplingRate || 1.0,
|
|
194
|
+
metricsInterval: options.metricsInterval || 60000
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
this.tracer = this.initTracer();
|
|
198
|
+
this.meter = this.initMeter();
|
|
199
|
+
this.logger = this.initLogger();
|
|
200
|
+
|
|
201
|
+
// Métricas predefinidas
|
|
202
|
+
this.metrics = this.initMetrics();
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// ==================== Tracing ====================
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Inicializa el tracer
|
|
209
|
+
*/
|
|
210
|
+
initTracer() {
|
|
211
|
+
// OpenTelemetry compatible tracer
|
|
212
|
+
return {
|
|
213
|
+
activeSpans: new Map(),
|
|
214
|
+
completedSpans: [],
|
|
215
|
+
|
|
216
|
+
startSpan(name, options = {}) {
|
|
217
|
+
const spanId = `span_${Date.now()}_${Math.random().toString(36).substring(7)}`;
|
|
218
|
+
const span = {
|
|
219
|
+
spanId,
|
|
220
|
+
name,
|
|
221
|
+
traceId: options.traceId || `trace_${Date.now()}`,
|
|
222
|
+
parentSpanId: options.parentSpanId || null,
|
|
223
|
+
startTime: Date.now(),
|
|
224
|
+
endTime: null,
|
|
225
|
+
duration: null,
|
|
226
|
+
status: 'RUNNING',
|
|
227
|
+
attributes: options.attributes || {},
|
|
228
|
+
events: []
|
|
229
|
+
};
|
|
230
|
+
|
|
231
|
+
this.activeSpans.set(spanId, span);
|
|
232
|
+
return span;
|
|
233
|
+
},
|
|
234
|
+
|
|
235
|
+
endSpan(spanId, status = 'OK') {
|
|
236
|
+
const span = this.activeSpans.get(spanId);
|
|
237
|
+
if (span) {
|
|
238
|
+
span.endTime = Date.now();
|
|
239
|
+
span.duration = span.endTime - span.startTime;
|
|
240
|
+
span.status = status;
|
|
241
|
+
this.activeSpans.delete(spanId);
|
|
242
|
+
this.completedSpans.push(span);
|
|
243
|
+
}
|
|
244
|
+
return span;
|
|
245
|
+
},
|
|
246
|
+
|
|
247
|
+
addEvent(spanId, name, attributes = {}) {
|
|
248
|
+
const span = this.activeSpans.get(spanId);
|
|
249
|
+
if (span) {
|
|
250
|
+
span.events.push({
|
|
251
|
+
name,
|
|
252
|
+
timestamp: Date.now(),
|
|
253
|
+
attributes
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
},
|
|
257
|
+
|
|
258
|
+
setAttribute(spanId, key, value) {
|
|
259
|
+
const span = this.activeSpans.get(spanId);
|
|
260
|
+
if (span) {
|
|
261
|
+
span.attributes[key] = value;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Crea un span para ejecución de agente
|
|
269
|
+
*/
|
|
270
|
+
startAgentSpan(agentId, options = {}) {
|
|
271
|
+
const span = this.tracer.startSpan(`agent.${agentId}`, {
|
|
272
|
+
traceId: options.traceId,
|
|
273
|
+
parentSpanId: options.parentSpanId,
|
|
274
|
+
attributes: {
|
|
275
|
+
'agent.id': agentId,
|
|
276
|
+
'agent.model': options.model || 'unknown',
|
|
277
|
+
'agent.node_id': options.nodeId,
|
|
278
|
+
'flow.id': options.flowId
|
|
279
|
+
}
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
// Incrementar contador
|
|
283
|
+
this.metrics.agentInvocations.add(1, { agent: agentId });
|
|
284
|
+
|
|
285
|
+
return span;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Finaliza un span de agente
|
|
290
|
+
*/
|
|
291
|
+
endAgentSpan(span, result) {
|
|
292
|
+
// Agregar atributos del resultado
|
|
293
|
+
this.tracer.setAttribute(span.spanId, 'agent.tokens.input', result.tokensInput || 0);
|
|
294
|
+
this.tracer.setAttribute(span.spanId, 'agent.tokens.output', result.tokensOutput || 0);
|
|
295
|
+
this.tracer.setAttribute(span.spanId, 'agent.success', result.success);
|
|
296
|
+
|
|
297
|
+
if (result.error) {
|
|
298
|
+
this.tracer.setAttribute(span.spanId, 'agent.error', result.error);
|
|
299
|
+
this.metrics.agentErrors.add(1, { agent: span.attributes['agent.id'] });
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Registrar duración
|
|
303
|
+
const status = result.success ? 'OK' : 'ERROR';
|
|
304
|
+
const completedSpan = this.tracer.endSpan(span.spanId, status);
|
|
305
|
+
|
|
306
|
+
// Actualizar histogramas
|
|
307
|
+
this.metrics.agentDuration.record(completedSpan.duration / 1000, {
|
|
308
|
+
agent: span.attributes['agent.id']
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
this.metrics.agentTokens.record(
|
|
312
|
+
(result.tokensInput || 0) + (result.tokensOutput || 0),
|
|
313
|
+
{ agent: span.attributes['agent.id'] }
|
|
314
|
+
);
|
|
315
|
+
|
|
316
|
+
return completedSpan;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Crea un span para ejecución de flow
|
|
321
|
+
*/
|
|
322
|
+
startFlowSpan(flowId, options = {}) {
|
|
323
|
+
const span = this.tracer.startSpan(`flow.${flowId}`, {
|
|
324
|
+
attributes: {
|
|
325
|
+
'flow.id': flowId,
|
|
326
|
+
'flow.version': options.version,
|
|
327
|
+
'session.id': options.sessionId
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
this.metrics.flowsActive.add(1);
|
|
332
|
+
this.metrics.flowExecutions.add(1, { flow: flowId });
|
|
333
|
+
|
|
334
|
+
return span;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Finaliza un span de flow
|
|
339
|
+
*/
|
|
340
|
+
endFlowSpan(span, result) {
|
|
341
|
+
this.tracer.setAttribute(span.spanId, 'flow.success', result.success);
|
|
342
|
+
this.tracer.setAttribute(span.spanId, 'flow.nodes_executed', result.nodesExecuted || 0);
|
|
343
|
+
this.tracer.setAttribute(span.spanId, 'flow.files_modified', result.filesModified?.length || 0);
|
|
344
|
+
|
|
345
|
+
const status = result.success ? 'OK' : 'ERROR';
|
|
346
|
+
const completedSpan = this.tracer.endSpan(span.spanId, status);
|
|
347
|
+
|
|
348
|
+
this.metrics.flowsActive.add(-1);
|
|
349
|
+
this.metrics.flowDuration.record(completedSpan.duration / 1000, {
|
|
350
|
+
flow: span.attributes['flow.id']
|
|
351
|
+
});
|
|
352
|
+
|
|
353
|
+
return completedSpan;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// ==================== Metrics ====================
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Inicializa el meter para métricas
|
|
360
|
+
*/
|
|
361
|
+
initMeter() {
|
|
362
|
+
return {
|
|
363
|
+
counters: new Map(),
|
|
364
|
+
gauges: new Map(),
|
|
365
|
+
histograms: new Map(),
|
|
366
|
+
|
|
367
|
+
createCounter(name, options = {}) {
|
|
368
|
+
const counter = {
|
|
369
|
+
name,
|
|
370
|
+
description: options.description,
|
|
371
|
+
value: 0,
|
|
372
|
+
labels: new Map(),
|
|
373
|
+
|
|
374
|
+
add(value, labels = {}) {
|
|
375
|
+
const key = JSON.stringify(labels);
|
|
376
|
+
const current = this.labels.get(key) || 0;
|
|
377
|
+
this.labels.set(key, current + value);
|
|
378
|
+
this.value += value;
|
|
379
|
+
}
|
|
380
|
+
};
|
|
381
|
+
this.counters.set(name, counter);
|
|
382
|
+
return counter;
|
|
383
|
+
},
|
|
384
|
+
|
|
385
|
+
createGauge(name, options = {}) {
|
|
386
|
+
const gauge = {
|
|
387
|
+
name,
|
|
388
|
+
description: options.description,
|
|
389
|
+
value: 0,
|
|
390
|
+
|
|
391
|
+
set(value) {
|
|
392
|
+
this.value = value;
|
|
393
|
+
},
|
|
394
|
+
|
|
395
|
+
add(delta) {
|
|
396
|
+
this.value += delta;
|
|
397
|
+
}
|
|
398
|
+
};
|
|
399
|
+
this.gauges.set(name, gauge);
|
|
400
|
+
return gauge;
|
|
401
|
+
},
|
|
402
|
+
|
|
403
|
+
createHistogram(name, options = {}) {
|
|
404
|
+
const histogram = {
|
|
405
|
+
name,
|
|
406
|
+
description: options.description,
|
|
407
|
+
buckets: options.buckets || [0.1, 0.5, 1, 2, 5, 10, 30, 60],
|
|
408
|
+
values: [],
|
|
409
|
+
labels: new Map(),
|
|
410
|
+
|
|
411
|
+
record(value, labels = {}) {
|
|
412
|
+
const key = JSON.stringify(labels);
|
|
413
|
+
if (!this.labels.has(key)) {
|
|
414
|
+
this.labels.set(key, []);
|
|
415
|
+
}
|
|
416
|
+
this.labels.get(key).push(value);
|
|
417
|
+
this.values.push(value);
|
|
418
|
+
},
|
|
419
|
+
|
|
420
|
+
getStats(labels = {}) {
|
|
421
|
+
const key = JSON.stringify(labels);
|
|
422
|
+
const values = this.labels.get(key) || this.values;
|
|
423
|
+
|
|
424
|
+
if (values.length === 0) return null;
|
|
425
|
+
|
|
426
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
427
|
+
return {
|
|
428
|
+
count: values.length,
|
|
429
|
+
sum: values.reduce((a, b) => a + b, 0),
|
|
430
|
+
min: sorted[0],
|
|
431
|
+
max: sorted[sorted.length - 1],
|
|
432
|
+
avg: values.reduce((a, b) => a + b, 0) / values.length,
|
|
433
|
+
p50: sorted[Math.floor(values.length * 0.5)],
|
|
434
|
+
p90: sorted[Math.floor(values.length * 0.9)],
|
|
435
|
+
p99: sorted[Math.floor(values.length * 0.99)]
|
|
436
|
+
};
|
|
437
|
+
}
|
|
438
|
+
};
|
|
439
|
+
this.histograms.set(name, histogram);
|
|
440
|
+
return histogram;
|
|
441
|
+
}
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Inicializa métricas predefinidas
|
|
447
|
+
*/
|
|
448
|
+
initMetrics() {
|
|
449
|
+
return {
|
|
450
|
+
// Counters
|
|
451
|
+
agentInvocations: this.meter.createCounter('elsabro.agents.invocations_total', {
|
|
452
|
+
description: 'Total agent invocations'
|
|
453
|
+
}),
|
|
454
|
+
agentErrors: this.meter.createCounter('elsabro.agents.errors_total', {
|
|
455
|
+
description: 'Total agent errors'
|
|
456
|
+
}),
|
|
457
|
+
flowExecutions: this.meter.createCounter('elsabro.flows.executions_total', {
|
|
458
|
+
description: 'Total flow executions'
|
|
459
|
+
}),
|
|
460
|
+
checkpointsSaved: this.meter.createCounter('elsabro.checkpoints.saved_total', {
|
|
461
|
+
description: 'Total checkpoints saved'
|
|
462
|
+
}),
|
|
463
|
+
interruptsTriggered: this.meter.createCounter('elsabro.interrupts.triggered_total', {
|
|
464
|
+
description: 'Total interrupts triggered'
|
|
465
|
+
}),
|
|
466
|
+
|
|
467
|
+
// Gauges
|
|
468
|
+
agentsActive: this.meter.createGauge('elsabro.agents.active', {
|
|
469
|
+
description: 'Currently active agents'
|
|
470
|
+
}),
|
|
471
|
+
flowsActive: this.meter.createGauge('elsabro.flows.active', {
|
|
472
|
+
description: 'Currently active flows'
|
|
473
|
+
}),
|
|
474
|
+
|
|
475
|
+
// Histograms
|
|
476
|
+
agentDuration: this.meter.createHistogram('elsabro.agents.duration_seconds', {
|
|
477
|
+
description: 'Agent execution duration',
|
|
478
|
+
buckets: [1, 5, 10, 30, 60, 120, 300]
|
|
479
|
+
}),
|
|
480
|
+
agentTokens: this.meter.createHistogram('elsabro.agents.tokens_used', {
|
|
481
|
+
description: 'Tokens used per agent',
|
|
482
|
+
buckets: [100, 500, 1000, 5000, 10000, 50000]
|
|
483
|
+
}),
|
|
484
|
+
flowDuration: this.meter.createHistogram('elsabro.flows.duration_seconds', {
|
|
485
|
+
description: 'Flow execution duration',
|
|
486
|
+
buckets: [10, 30, 60, 120, 300, 600, 1800]
|
|
487
|
+
})
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// ==================== Logging ====================
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Inicializa el logger estructurado
|
|
495
|
+
*/
|
|
496
|
+
initLogger() {
|
|
497
|
+
return {
|
|
498
|
+
logs: [],
|
|
499
|
+
level: 'INFO',
|
|
500
|
+
levels: { DEBUG: 0, INFO: 1, WARN: 2, ERROR: 3 },
|
|
501
|
+
|
|
502
|
+
log(level, message, context = {}) {
|
|
503
|
+
if (this.levels[level] < this.levels[this.level]) return;
|
|
504
|
+
|
|
505
|
+
const entry = {
|
|
506
|
+
timestamp: new Date().toISOString(),
|
|
507
|
+
level,
|
|
508
|
+
message,
|
|
509
|
+
...context,
|
|
510
|
+
service: 'elsabro'
|
|
511
|
+
};
|
|
512
|
+
|
|
513
|
+
this.logs.push(entry);
|
|
514
|
+
|
|
515
|
+
// Console output
|
|
516
|
+
const color = {
|
|
517
|
+
DEBUG: '\x1b[36m',
|
|
518
|
+
INFO: '\x1b[32m',
|
|
519
|
+
WARN: '\x1b[33m',
|
|
520
|
+
ERROR: '\x1b[31m'
|
|
521
|
+
}[level] || '';
|
|
522
|
+
|
|
523
|
+
console.log(`${color}[${level}]\x1b[0m ${message}`, context);
|
|
524
|
+
|
|
525
|
+
return entry;
|
|
526
|
+
},
|
|
527
|
+
|
|
528
|
+
debug(message, context) { return this.log('DEBUG', message, context); },
|
|
529
|
+
info(message, context) { return this.log('INFO', message, context); },
|
|
530
|
+
warn(message, context) { return this.log('WARN', message, context); },
|
|
531
|
+
error(message, context) { return this.log('ERROR', message, context); }
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// ==================== Events ====================
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Emite un evento
|
|
539
|
+
*/
|
|
540
|
+
emit(eventType, data = {}) {
|
|
541
|
+
const event = {
|
|
542
|
+
type: eventType,
|
|
543
|
+
timestamp: new Date().toISOString(),
|
|
544
|
+
data,
|
|
545
|
+
service: this.config.serviceName,
|
|
546
|
+
environment: this.config.environment
|
|
547
|
+
};
|
|
548
|
+
|
|
549
|
+
// Log the event
|
|
550
|
+
this.logger.info(`Event: ${eventType}`, data);
|
|
551
|
+
|
|
552
|
+
return event;
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
// ==================== Dashboard ====================
|
|
556
|
+
|
|
557
|
+
/**
|
|
558
|
+
* Genera resumen de telemetría
|
|
559
|
+
*/
|
|
560
|
+
getSummary() {
|
|
561
|
+
const agentStats = this.metrics.agentDuration.getStats();
|
|
562
|
+
const flowStats = this.metrics.flowDuration.getStats();
|
|
563
|
+
|
|
564
|
+
return {
|
|
565
|
+
timestamp: new Date().toISOString(),
|
|
566
|
+
agents: {
|
|
567
|
+
totalInvocations: this.metrics.agentInvocations.value,
|
|
568
|
+
totalErrors: this.metrics.agentErrors.value,
|
|
569
|
+
currentlyActive: this.metrics.agentsActive.value,
|
|
570
|
+
errorRate: this.metrics.agentInvocations.value > 0
|
|
571
|
+
? (this.metrics.agentErrors.value / this.metrics.agentInvocations.value * 100).toFixed(2) + '%'
|
|
572
|
+
: '0%',
|
|
573
|
+
duration: agentStats ? {
|
|
574
|
+
avg: `${agentStats.avg.toFixed(2)}s`,
|
|
575
|
+
p50: `${agentStats.p50.toFixed(2)}s`,
|
|
576
|
+
p90: `${agentStats.p90.toFixed(2)}s`,
|
|
577
|
+
p99: `${agentStats.p99.toFixed(2)}s`
|
|
578
|
+
} : null
|
|
579
|
+
},
|
|
580
|
+
flows: {
|
|
581
|
+
totalExecutions: this.metrics.flowExecutions.value,
|
|
582
|
+
currentlyActive: this.metrics.flowsActive.value,
|
|
583
|
+
duration: flowStats ? {
|
|
584
|
+
avg: `${flowStats.avg.toFixed(2)}s`,
|
|
585
|
+
p50: `${flowStats.p50.toFixed(2)}s`,
|
|
586
|
+
p90: `${flowStats.p90.toFixed(2)}s`
|
|
587
|
+
} : null
|
|
588
|
+
},
|
|
589
|
+
checkpoints: {
|
|
590
|
+
total: this.metrics.checkpointsSaved.value
|
|
591
|
+
},
|
|
592
|
+
interrupts: {
|
|
593
|
+
total: this.metrics.interruptsTriggered.value
|
|
594
|
+
},
|
|
595
|
+
traces: {
|
|
596
|
+
active: this.tracer.activeSpans.size,
|
|
597
|
+
completed: this.tracer.completedSpans.length
|
|
598
|
+
}
|
|
599
|
+
};
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
/**
|
|
603
|
+
* Genera dashboard ASCII
|
|
604
|
+
*/
|
|
605
|
+
renderDashboard() {
|
|
606
|
+
const summary = this.getSummary();
|
|
607
|
+
|
|
608
|
+
return `
|
|
609
|
+
╔══════════════════════════════════════════════════════════════════════════╗
|
|
610
|
+
║ ELSABRO OBSERVABILITY DASHBOARD ║
|
|
611
|
+
╠══════════════════════════════════════════════════════════════════════════╣
|
|
612
|
+
║ ${new Date().toISOString().padEnd(72)}║
|
|
613
|
+
╠══════════════════════════════════════════════════════════════════════════╣
|
|
614
|
+
║ AGENTS ║
|
|
615
|
+
║ ├─ Total Invocations: ${String(summary.agents.totalInvocations).padEnd(10)} Active: ${String(summary.agents.currentlyActive).padEnd(5)} ║
|
|
616
|
+
║ ├─ Total Errors: ${String(summary.agents.totalErrors).padEnd(10)} Error Rate: ${summary.agents.errorRate.padEnd(8)} ║
|
|
617
|
+
║ └─ Duration (p50/p90): ${summary.agents.duration ? `${summary.agents.duration.p50} / ${summary.agents.duration.p90}` : 'N/A'} ║
|
|
618
|
+
║ ║
|
|
619
|
+
║ FLOWS ║
|
|
620
|
+
║ ├─ Total Executions: ${String(summary.flows.totalExecutions).padEnd(10)} Active: ${String(summary.flows.currentlyActive).padEnd(5)} ║
|
|
621
|
+
║ └─ Duration (avg): ${summary.flows.duration ? summary.flows.duration.avg : 'N/A'} ║
|
|
622
|
+
║ ║
|
|
623
|
+
║ SYSTEM ║
|
|
624
|
+
║ ├─ Checkpoints Saved: ${String(summary.checkpoints.total).padEnd(10)} ║
|
|
625
|
+
║ ├─ Interrupts: ${String(summary.interrupts.total).padEnd(10)} ║
|
|
626
|
+
║ └─ Active Traces: ${String(summary.traces.active).padEnd(10)} ║
|
|
627
|
+
╚══════════════════════════════════════════════════════════════════════════╝
|
|
628
|
+
`;
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// ==================== Export ====================
|
|
632
|
+
|
|
633
|
+
/**
|
|
634
|
+
* Exporta telemetría a diferentes formatos
|
|
635
|
+
*/
|
|
636
|
+
export(format = 'json') {
|
|
637
|
+
const data = {
|
|
638
|
+
summary: this.getSummary(),
|
|
639
|
+
traces: this.tracer.completedSpans,
|
|
640
|
+
logs: this.logger.logs,
|
|
641
|
+
metrics: {
|
|
642
|
+
counters: Object.fromEntries(
|
|
643
|
+
[...this.meter.counters].map(([k, v]) => [k, { value: v.value, labels: Object.fromEntries(v.labels) }])
|
|
644
|
+
),
|
|
645
|
+
gauges: Object.fromEntries(
|
|
646
|
+
[...this.meter.gauges].map(([k, v]) => [k, v.value])
|
|
647
|
+
),
|
|
648
|
+
histograms: Object.fromEntries(
|
|
649
|
+
[...this.meter.histograms].map(([k, v]) => [k, v.getStats()])
|
|
650
|
+
)
|
|
651
|
+
}
|
|
652
|
+
};
|
|
653
|
+
|
|
654
|
+
if (format === 'json') {
|
|
655
|
+
return JSON.stringify(data, null, 2);
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
return data;
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
// Singleton instance
|
|
663
|
+
let telemetryInstance = null;
|
|
664
|
+
|
|
665
|
+
function getTelemetry(options) {
|
|
666
|
+
if (!telemetryInstance) {
|
|
667
|
+
telemetryInstance = new TelemetryManager(options);
|
|
668
|
+
}
|
|
669
|
+
return telemetryInstance;
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
module.exports = { TelemetryManager, getTelemetry };
|
|
673
|
+
```
|
|
674
|
+
|
|
675
|
+
---
|
|
676
|
+
|
|
677
|
+
## Integración con Agentes
|
|
678
|
+
|
|
679
|
+
### Instrumentación Automática
|
|
680
|
+
|
|
681
|
+
```javascript
|
|
682
|
+
// Wrapper para instrumentar agentes
|
|
683
|
+
async function instrumentedAgentExecution(agent, inputs, context) {
|
|
684
|
+
const telemetry = getTelemetry();
|
|
685
|
+
|
|
686
|
+
// Crear span
|
|
687
|
+
const span = telemetry.startAgentSpan(agent.id, {
|
|
688
|
+
traceId: context.traceId,
|
|
689
|
+
parentSpanId: context.parentSpanId,
|
|
690
|
+
model: agent.model,
|
|
691
|
+
nodeId: context.nodeId,
|
|
692
|
+
flowId: context.flowId
|
|
693
|
+
});
|
|
694
|
+
|
|
695
|
+
telemetry.logger.info(`Agent started: ${agent.id}`, {
|
|
696
|
+
model: agent.model,
|
|
697
|
+
nodeId: context.nodeId
|
|
698
|
+
});
|
|
699
|
+
|
|
700
|
+
try {
|
|
701
|
+
// Ejecutar agente
|
|
702
|
+
const result = await executeAgent(agent, inputs);
|
|
703
|
+
|
|
704
|
+
// Finalizar span con éxito
|
|
705
|
+
telemetry.endAgentSpan(span, {
|
|
706
|
+
success: true,
|
|
707
|
+
tokensInput: result.usage?.input_tokens,
|
|
708
|
+
tokensOutput: result.usage?.output_tokens
|
|
709
|
+
});
|
|
710
|
+
|
|
711
|
+
telemetry.logger.info(`Agent completed: ${agent.id}`, {
|
|
712
|
+
duration_ms: span.duration,
|
|
713
|
+
tokens: result.usage?.total_tokens
|
|
714
|
+
});
|
|
715
|
+
|
|
716
|
+
return result;
|
|
717
|
+
|
|
718
|
+
} catch (error) {
|
|
719
|
+
// Finalizar span con error
|
|
720
|
+
telemetry.endAgentSpan(span, {
|
|
721
|
+
success: false,
|
|
722
|
+
error: error.message
|
|
723
|
+
});
|
|
724
|
+
|
|
725
|
+
telemetry.logger.error(`Agent failed: ${agent.id}`, {
|
|
726
|
+
error: error.message,
|
|
727
|
+
duration_ms: Date.now() - span.startTime
|
|
728
|
+
});
|
|
729
|
+
|
|
730
|
+
throw error;
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
```
|
|
734
|
+
|
|
735
|
+
---
|
|
736
|
+
|
|
737
|
+
## Comandos de Usuario
|
|
738
|
+
|
|
739
|
+
### /elsabro:telemetry
|
|
740
|
+
|
|
741
|
+
```bash
|
|
742
|
+
/elsabro:telemetry dashboard # Mostrar dashboard en tiempo real
|
|
743
|
+
/elsabro:telemetry summary # Resumen de métricas
|
|
744
|
+
/elsabro:telemetry traces # Ver traces recientes
|
|
745
|
+
/elsabro:telemetry export # Exportar telemetría a archivo
|
|
746
|
+
/elsabro:telemetry reset # Resetear métricas
|
|
747
|
+
```
|
|
748
|
+
|
|
749
|
+
---
|
|
750
|
+
|
|
751
|
+
## Configuración
|
|
752
|
+
|
|
753
|
+
### .planning/observability-config.json
|
|
754
|
+
|
|
755
|
+
```json
|
|
756
|
+
{
|
|
757
|
+
"observability": {
|
|
758
|
+
"enabled": true,
|
|
759
|
+
"serviceName": "elsabro",
|
|
760
|
+
"environment": "development",
|
|
761
|
+
"tracing": {
|
|
762
|
+
"enabled": true,
|
|
763
|
+
"samplingRate": 1.0,
|
|
764
|
+
"exporters": ["console", "file"]
|
|
765
|
+
},
|
|
766
|
+
"metrics": {
|
|
767
|
+
"enabled": true,
|
|
768
|
+
"interval": 60000,
|
|
769
|
+
"exporters": ["console"]
|
|
770
|
+
},
|
|
771
|
+
"logging": {
|
|
772
|
+
"level": "INFO",
|
|
773
|
+
"structured": true,
|
|
774
|
+
"exporters": ["console", "file"]
|
|
775
|
+
},
|
|
776
|
+
"exporters": {
|
|
777
|
+
"file": {
|
|
778
|
+
"path": ".planning/telemetry/",
|
|
779
|
+
"format": "json",
|
|
780
|
+
"rotation": "daily"
|
|
781
|
+
},
|
|
782
|
+
"jaeger": {
|
|
783
|
+
"enabled": false,
|
|
784
|
+
"endpoint": "http://localhost:14268/api/traces"
|
|
785
|
+
},
|
|
786
|
+
"langtrace": {
|
|
787
|
+
"enabled": false,
|
|
788
|
+
"apiKey": ""
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
```
|