agentic-qe 1.9.3 → 1.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/README.md +30 -5
- package/config/.env.otel.example +25 -0
- package/config/OTEL-QUICK-REFERENCE.md +137 -0
- package/config/README-OTEL.md +222 -0
- package/config/alerting-rules.yml +518 -0
- package/config/docker-compose.otel.yml +187 -0
- package/config/grafana/dashboards/agentic-qe-overview.json +286 -0
- package/config/grafana/provisioning/dashboards/dashboards.yml +19 -0
- package/config/grafana/provisioning/datasources/datasources.yml +53 -0
- package/config/otel-collector-config.yaml.example +145 -0
- package/config/prometheus.yml.example +106 -0
- package/dist/alerting/AlertManager.d.ts +120 -0
- package/dist/alerting/AlertManager.d.ts.map +1 -0
- package/dist/alerting/AlertManager.js +345 -0
- package/dist/alerting/AlertManager.js.map +1 -0
- package/dist/alerting/FeedbackRouter.d.ts +98 -0
- package/dist/alerting/FeedbackRouter.d.ts.map +1 -0
- package/dist/alerting/FeedbackRouter.js +331 -0
- package/dist/alerting/FeedbackRouter.js.map +1 -0
- package/dist/alerting/StrategyApplicator.d.ts +120 -0
- package/dist/alerting/StrategyApplicator.d.ts.map +1 -0
- package/dist/alerting/StrategyApplicator.js +299 -0
- package/dist/alerting/StrategyApplicator.js.map +1 -0
- package/dist/alerting/index.d.ts +68 -0
- package/dist/alerting/index.d.ts.map +1 -0
- package/dist/alerting/index.js +112 -0
- package/dist/alerting/index.js.map +1 -0
- package/dist/alerting/types.d.ts +118 -0
- package/dist/alerting/types.d.ts.map +1 -0
- package/dist/alerting/types.js +11 -0
- package/dist/alerting/types.js.map +1 -0
- package/dist/cli/init/claude-config.d.ts.map +1 -1
- package/dist/cli/init/claude-config.js +12 -7
- package/dist/cli/init/claude-config.js.map +1 -1
- package/dist/core/memory/IPatternStore.d.ts +209 -0
- package/dist/core/memory/IPatternStore.d.ts.map +1 -0
- package/dist/core/memory/IPatternStore.js +15 -0
- package/dist/core/memory/IPatternStore.js.map +1 -0
- package/dist/core/memory/MigrationTools.d.ts +192 -0
- package/dist/core/memory/MigrationTools.d.ts.map +1 -0
- package/dist/core/memory/MigrationTools.js +615 -0
- package/dist/core/memory/MigrationTools.js.map +1 -0
- package/dist/core/memory/NeuralEnhancement.d.ts +154 -0
- package/dist/core/memory/NeuralEnhancement.d.ts.map +1 -0
- package/dist/core/memory/NeuralEnhancement.js +598 -0
- package/dist/core/memory/NeuralEnhancement.js.map +1 -0
- package/dist/core/memory/PatternStoreFactory.d.ts +143 -0
- package/dist/core/memory/PatternStoreFactory.d.ts.map +1 -0
- package/dist/core/memory/PatternStoreFactory.js +370 -0
- package/dist/core/memory/PatternStoreFactory.js.map +1 -0
- package/dist/core/memory/RealAgentDBAdapter.d.ts +1 -0
- package/dist/core/memory/RealAgentDBAdapter.d.ts.map +1 -1
- package/dist/core/memory/RealAgentDBAdapter.js +28 -20
- package/dist/core/memory/RealAgentDBAdapter.js.map +1 -1
- package/dist/core/memory/RuVectorPatternStore.d.ts +198 -0
- package/dist/core/memory/RuVectorPatternStore.d.ts.map +1 -0
- package/dist/core/memory/RuVectorPatternStore.js +605 -0
- package/dist/core/memory/RuVectorPatternStore.js.map +1 -0
- package/dist/core/memory/SelfHealingMonitor.d.ts +186 -0
- package/dist/core/memory/SelfHealingMonitor.d.ts.map +1 -0
- package/dist/core/memory/SelfHealingMonitor.js +451 -0
- package/dist/core/memory/SelfHealingMonitor.js.map +1 -0
- package/dist/core/memory/SwarmMemoryManager.d.ts +62 -0
- package/dist/core/memory/SwarmMemoryManager.d.ts.map +1 -1
- package/dist/core/memory/SwarmMemoryManager.js +97 -0
- package/dist/core/memory/SwarmMemoryManager.js.map +1 -1
- package/dist/core/memory/index.d.ts +11 -0
- package/dist/core/memory/index.d.ts.map +1 -1
- package/dist/core/memory/index.js +36 -1
- package/dist/core/memory/index.js.map +1 -1
- package/dist/reasoning/RuVectorReasoningAdapter.d.ts +232 -0
- package/dist/reasoning/RuVectorReasoningAdapter.d.ts.map +1 -0
- package/dist/reasoning/RuVectorReasoningAdapter.js +585 -0
- package/dist/reasoning/RuVectorReasoningAdapter.js.map +1 -0
- package/dist/reasoning/index.d.ts +2 -0
- package/dist/reasoning/index.d.ts.map +1 -1
- package/dist/reasoning/index.js +6 -1
- package/dist/reasoning/index.js.map +1 -1
- package/dist/reporting/ResultAggregator.d.ts +107 -0
- package/dist/reporting/ResultAggregator.d.ts.map +1 -0
- package/dist/reporting/ResultAggregator.js +435 -0
- package/dist/reporting/ResultAggregator.js.map +1 -0
- package/dist/reporting/index.d.ts +48 -0
- package/dist/reporting/index.d.ts.map +1 -0
- package/dist/reporting/index.js +154 -0
- package/dist/reporting/index.js.map +1 -0
- package/dist/reporting/reporters/ControlLoopReporter.d.ts +128 -0
- package/dist/reporting/reporters/ControlLoopReporter.d.ts.map +1 -0
- package/dist/reporting/reporters/ControlLoopReporter.js +417 -0
- package/dist/reporting/reporters/ControlLoopReporter.js.map +1 -0
- package/dist/reporting/reporters/HumanReadableReporter.d.ts +140 -0
- package/dist/reporting/reporters/HumanReadableReporter.d.ts.map +1 -0
- package/dist/reporting/reporters/HumanReadableReporter.js +524 -0
- package/dist/reporting/reporters/HumanReadableReporter.js.map +1 -0
- package/dist/reporting/reporters/JSONReporter.d.ts +193 -0
- package/dist/reporting/reporters/JSONReporter.d.ts.map +1 -0
- package/dist/reporting/reporters/JSONReporter.js +324 -0
- package/dist/reporting/reporters/JSONReporter.js.map +1 -0
- package/dist/reporting/reporters/index.d.ts +14 -0
- package/dist/reporting/reporters/index.d.ts.map +1 -0
- package/dist/reporting/reporters/index.js +19 -0
- package/dist/reporting/reporters/index.js.map +1 -0
- package/dist/reporting/types.d.ts +427 -0
- package/dist/reporting/types.d.ts.map +1 -0
- package/dist/reporting/types.js +12 -0
- package/dist/reporting/types.js.map +1 -0
- package/package.json +9 -1
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
{
|
|
2
|
+
"annotations": {
|
|
3
|
+
"list": [
|
|
4
|
+
{
|
|
5
|
+
"builtIn": 1,
|
|
6
|
+
"datasource": {
|
|
7
|
+
"type": "grafana",
|
|
8
|
+
"uid": "-- Grafana --"
|
|
9
|
+
},
|
|
10
|
+
"enable": true,
|
|
11
|
+
"hide": true,
|
|
12
|
+
"iconColor": "rgba(0, 211, 255, 1)",
|
|
13
|
+
"name": "Annotations & Alerts",
|
|
14
|
+
"type": "dashboard"
|
|
15
|
+
}
|
|
16
|
+
]
|
|
17
|
+
},
|
|
18
|
+
"editable": true,
|
|
19
|
+
"fiscalYearStartMonth": 0,
|
|
20
|
+
"graphTooltip": 0,
|
|
21
|
+
"id": null,
|
|
22
|
+
"links": [],
|
|
23
|
+
"liveNow": false,
|
|
24
|
+
"panels": [
|
|
25
|
+
{
|
|
26
|
+
"datasource": {
|
|
27
|
+
"type": "prometheus",
|
|
28
|
+
"uid": "prometheus"
|
|
29
|
+
},
|
|
30
|
+
"fieldConfig": {
|
|
31
|
+
"defaults": {
|
|
32
|
+
"color": {
|
|
33
|
+
"mode": "palette-classic"
|
|
34
|
+
},
|
|
35
|
+
"custom": {
|
|
36
|
+
"axisCenteredZero": false,
|
|
37
|
+
"axisColorMode": "text",
|
|
38
|
+
"axisLabel": "",
|
|
39
|
+
"axisPlacement": "auto",
|
|
40
|
+
"barAlignment": 0,
|
|
41
|
+
"drawStyle": "line",
|
|
42
|
+
"fillOpacity": 10,
|
|
43
|
+
"gradientMode": "none",
|
|
44
|
+
"hideFrom": {
|
|
45
|
+
"tooltip": false,
|
|
46
|
+
"viz": false,
|
|
47
|
+
"legend": false
|
|
48
|
+
},
|
|
49
|
+
"lineInterpolation": "linear",
|
|
50
|
+
"lineWidth": 1,
|
|
51
|
+
"pointSize": 5,
|
|
52
|
+
"scaleDistribution": {
|
|
53
|
+
"type": "linear"
|
|
54
|
+
},
|
|
55
|
+
"showPoints": "never",
|
|
56
|
+
"spanNulls": false,
|
|
57
|
+
"stacking": {
|
|
58
|
+
"group": "A",
|
|
59
|
+
"mode": "none"
|
|
60
|
+
},
|
|
61
|
+
"thresholdsStyle": {
|
|
62
|
+
"mode": "off"
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"mappings": [],
|
|
66
|
+
"thresholds": {
|
|
67
|
+
"mode": "absolute",
|
|
68
|
+
"steps": [
|
|
69
|
+
{
|
|
70
|
+
"color": "green",
|
|
71
|
+
"value": null
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"color": "red",
|
|
75
|
+
"value": 80
|
|
76
|
+
}
|
|
77
|
+
]
|
|
78
|
+
},
|
|
79
|
+
"unit": "short"
|
|
80
|
+
},
|
|
81
|
+
"overrides": []
|
|
82
|
+
},
|
|
83
|
+
"gridPos": {
|
|
84
|
+
"h": 8,
|
|
85
|
+
"w": 12,
|
|
86
|
+
"x": 0,
|
|
87
|
+
"y": 0
|
|
88
|
+
},
|
|
89
|
+
"id": 1,
|
|
90
|
+
"options": {
|
|
91
|
+
"legend": {
|
|
92
|
+
"calcs": [],
|
|
93
|
+
"displayMode": "list",
|
|
94
|
+
"placement": "bottom",
|
|
95
|
+
"showLegend": true
|
|
96
|
+
},
|
|
97
|
+
"tooltip": {
|
|
98
|
+
"mode": "single",
|
|
99
|
+
"sort": "none"
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
"targets": [
|
|
103
|
+
{
|
|
104
|
+
"datasource": {
|
|
105
|
+
"type": "prometheus",
|
|
106
|
+
"uid": "prometheus"
|
|
107
|
+
},
|
|
108
|
+
"expr": "rate(aqe_requests_total[5m])",
|
|
109
|
+
"refId": "A",
|
|
110
|
+
"legendFormat": "Request Rate"
|
|
111
|
+
}
|
|
112
|
+
],
|
|
113
|
+
"title": "Request Rate",
|
|
114
|
+
"type": "timeseries"
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"datasource": {
|
|
118
|
+
"type": "prometheus",
|
|
119
|
+
"uid": "prometheus"
|
|
120
|
+
},
|
|
121
|
+
"fieldConfig": {
|
|
122
|
+
"defaults": {
|
|
123
|
+
"color": {
|
|
124
|
+
"mode": "thresholds"
|
|
125
|
+
},
|
|
126
|
+
"mappings": [],
|
|
127
|
+
"thresholds": {
|
|
128
|
+
"mode": "absolute",
|
|
129
|
+
"steps": [
|
|
130
|
+
{
|
|
131
|
+
"color": "green",
|
|
132
|
+
"value": null
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
"color": "yellow",
|
|
136
|
+
"value": 500
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
"color": "red",
|
|
140
|
+
"value": 1000
|
|
141
|
+
}
|
|
142
|
+
]
|
|
143
|
+
},
|
|
144
|
+
"unit": "ms"
|
|
145
|
+
},
|
|
146
|
+
"overrides": []
|
|
147
|
+
},
|
|
148
|
+
"gridPos": {
|
|
149
|
+
"h": 8,
|
|
150
|
+
"w": 12,
|
|
151
|
+
"x": 12,
|
|
152
|
+
"y": 0
|
|
153
|
+
},
|
|
154
|
+
"id": 2,
|
|
155
|
+
"options": {
|
|
156
|
+
"orientation": "auto",
|
|
157
|
+
"reduceOptions": {
|
|
158
|
+
"values": false,
|
|
159
|
+
"calcs": [
|
|
160
|
+
"lastNotNull"
|
|
161
|
+
],
|
|
162
|
+
"fields": ""
|
|
163
|
+
},
|
|
164
|
+
"showThresholdLabels": false,
|
|
165
|
+
"showThresholdMarkers": true
|
|
166
|
+
},
|
|
167
|
+
"pluginVersion": "10.0.0",
|
|
168
|
+
"targets": [
|
|
169
|
+
{
|
|
170
|
+
"datasource": {
|
|
171
|
+
"type": "prometheus",
|
|
172
|
+
"uid": "prometheus"
|
|
173
|
+
},
|
|
174
|
+
"expr": "histogram_quantile(0.95, rate(aqe_request_duration_bucket[5m]))",
|
|
175
|
+
"refId": "A",
|
|
176
|
+
"legendFormat": "P95 Latency"
|
|
177
|
+
}
|
|
178
|
+
],
|
|
179
|
+
"title": "P95 Response Time",
|
|
180
|
+
"type": "gauge"
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
"datasource": {
|
|
184
|
+
"type": "prometheus",
|
|
185
|
+
"uid": "prometheus"
|
|
186
|
+
},
|
|
187
|
+
"fieldConfig": {
|
|
188
|
+
"defaults": {
|
|
189
|
+
"color": {
|
|
190
|
+
"mode": "palette-classic"
|
|
191
|
+
},
|
|
192
|
+
"custom": {
|
|
193
|
+
"axisCenteredZero": false,
|
|
194
|
+
"axisColorMode": "text",
|
|
195
|
+
"axisLabel": "",
|
|
196
|
+
"axisPlacement": "auto",
|
|
197
|
+
"barAlignment": 0,
|
|
198
|
+
"drawStyle": "line",
|
|
199
|
+
"fillOpacity": 10,
|
|
200
|
+
"gradientMode": "none",
|
|
201
|
+
"hideFrom": {
|
|
202
|
+
"tooltip": false,
|
|
203
|
+
"viz": false,
|
|
204
|
+
"legend": false
|
|
205
|
+
},
|
|
206
|
+
"lineInterpolation": "linear",
|
|
207
|
+
"lineWidth": 1,
|
|
208
|
+
"pointSize": 5,
|
|
209
|
+
"scaleDistribution": {
|
|
210
|
+
"type": "linear"
|
|
211
|
+
},
|
|
212
|
+
"showPoints": "never",
|
|
213
|
+
"spanNulls": false,
|
|
214
|
+
"stacking": {
|
|
215
|
+
"group": "A",
|
|
216
|
+
"mode": "none"
|
|
217
|
+
},
|
|
218
|
+
"thresholdsStyle": {
|
|
219
|
+
"mode": "off"
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
"mappings": [],
|
|
223
|
+
"thresholds": {
|
|
224
|
+
"mode": "absolute",
|
|
225
|
+
"steps": [
|
|
226
|
+
{
|
|
227
|
+
"color": "green",
|
|
228
|
+
"value": null
|
|
229
|
+
}
|
|
230
|
+
]
|
|
231
|
+
},
|
|
232
|
+
"unit": "short"
|
|
233
|
+
},
|
|
234
|
+
"overrides": []
|
|
235
|
+
},
|
|
236
|
+
"gridPos": {
|
|
237
|
+
"h": 8,
|
|
238
|
+
"w": 24,
|
|
239
|
+
"x": 0,
|
|
240
|
+
"y": 8
|
|
241
|
+
},
|
|
242
|
+
"id": 3,
|
|
243
|
+
"options": {
|
|
244
|
+
"legend": {
|
|
245
|
+
"calcs": [],
|
|
246
|
+
"displayMode": "list",
|
|
247
|
+
"placement": "bottom",
|
|
248
|
+
"showLegend": true
|
|
249
|
+
},
|
|
250
|
+
"tooltip": {
|
|
251
|
+
"mode": "single",
|
|
252
|
+
"sort": "none"
|
|
253
|
+
}
|
|
254
|
+
},
|
|
255
|
+
"targets": [
|
|
256
|
+
{
|
|
257
|
+
"datasource": {
|
|
258
|
+
"type": "prometheus",
|
|
259
|
+
"uid": "prometheus"
|
|
260
|
+
},
|
|
261
|
+
"expr": "sum by (agent_type) (rate(aqe_agent_tasks_total[5m]))",
|
|
262
|
+
"refId": "A",
|
|
263
|
+
"legendFormat": "{{agent_type}}"
|
|
264
|
+
}
|
|
265
|
+
],
|
|
266
|
+
"title": "Agent Activity by Type",
|
|
267
|
+
"type": "timeseries"
|
|
268
|
+
}
|
|
269
|
+
],
|
|
270
|
+
"schemaVersion": 38,
|
|
271
|
+
"style": "dark",
|
|
272
|
+
"tags": ["agentic-qe", "overview"],
|
|
273
|
+
"templating": {
|
|
274
|
+
"list": []
|
|
275
|
+
},
|
|
276
|
+
"time": {
|
|
277
|
+
"from": "now-1h",
|
|
278
|
+
"to": "now"
|
|
279
|
+
},
|
|
280
|
+
"timepicker": {},
|
|
281
|
+
"timezone": "",
|
|
282
|
+
"title": "Agentic QE Fleet - Overview",
|
|
283
|
+
"uid": "aqe-overview",
|
|
284
|
+
"version": 1,
|
|
285
|
+
"weekStart": ""
|
|
286
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Grafana Dashboard Provisioning
|
|
2
|
+
# Agentic QE Fleet - Issue #71
|
|
3
|
+
#
|
|
4
|
+
# This file configures where Grafana should look for dashboard JSON files
|
|
5
|
+
|
|
6
|
+
apiVersion: 1
|
|
7
|
+
|
|
8
|
+
providers:
|
|
9
|
+
# Dashboard provider for Agentic QE Fleet
|
|
10
|
+
- name: 'Agentic QE Fleet'
|
|
11
|
+
orgId: 1
|
|
12
|
+
folder: 'Agentic QE Fleet'
|
|
13
|
+
type: file
|
|
14
|
+
disableDeletion: false
|
|
15
|
+
updateIntervalSeconds: 10
|
|
16
|
+
allowUiUpdates: true
|
|
17
|
+
options:
|
|
18
|
+
path: /var/lib/grafana/dashboards
|
|
19
|
+
foldersFromFilesStructure: true
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Grafana Datasource Provisioning
|
|
2
|
+
# Agentic QE Fleet - Issue #71
|
|
3
|
+
#
|
|
4
|
+
# This file configures datasources for Prometheus and Jaeger
|
|
5
|
+
# Grafana will automatically import these on startup
|
|
6
|
+
|
|
7
|
+
apiVersion: 1
|
|
8
|
+
|
|
9
|
+
datasources:
|
|
10
|
+
# Prometheus datasource for metrics
|
|
11
|
+
- name: Prometheus
|
|
12
|
+
type: prometheus
|
|
13
|
+
access: proxy
|
|
14
|
+
url: http://prometheus:9090
|
|
15
|
+
isDefault: true
|
|
16
|
+
editable: false
|
|
17
|
+
jsonData:
|
|
18
|
+
timeInterval: 15s
|
|
19
|
+
queryTimeout: 60s
|
|
20
|
+
httpMethod: POST
|
|
21
|
+
version: 1
|
|
22
|
+
|
|
23
|
+
# Jaeger datasource for distributed tracing
|
|
24
|
+
- name: Jaeger
|
|
25
|
+
type: jaeger
|
|
26
|
+
access: proxy
|
|
27
|
+
url: http://jaeger:16686
|
|
28
|
+
editable: false
|
|
29
|
+
jsonData:
|
|
30
|
+
# Trace query configuration
|
|
31
|
+
tracesToLogs:
|
|
32
|
+
datasourceUid: 'loki'
|
|
33
|
+
tags: ['job', 'instance', 'pod', 'namespace']
|
|
34
|
+
mappedTags: [{ key: 'service.name', value: 'service' }]
|
|
35
|
+
mapTagNamesEnabled: true
|
|
36
|
+
spanStartTimeShift: '1h'
|
|
37
|
+
spanEndTimeShift: '1h'
|
|
38
|
+
# Node graph configuration
|
|
39
|
+
nodeGraph:
|
|
40
|
+
enabled: true
|
|
41
|
+
version: 1
|
|
42
|
+
|
|
43
|
+
# OTEL Collector Prometheus endpoint (for collector metrics)
|
|
44
|
+
- name: OTEL Collector Metrics
|
|
45
|
+
type: prometheus
|
|
46
|
+
access: proxy
|
|
47
|
+
url: http://otel-collector:8889
|
|
48
|
+
editable: false
|
|
49
|
+
jsonData:
|
|
50
|
+
timeInterval: 15s
|
|
51
|
+
queryTimeout: 60s
|
|
52
|
+
httpMethod: POST
|
|
53
|
+
version: 1
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# OpenTelemetry Collector Configuration
|
|
2
|
+
# Agentic QE Fleet - Issues #69 & #71
|
|
3
|
+
#
|
|
4
|
+
# This configuration defines the OTEL Collector pipeline for receiving,
|
|
5
|
+
# processing, and exporting telemetry data from the Agentic QE Fleet.
|
|
6
|
+
#
|
|
7
|
+
# Usage: Copy to otel-collector-config.yaml and customize as needed
|
|
8
|
+
|
|
9
|
+
receivers:
|
|
10
|
+
# OTLP receiver - accepts traces and metrics via gRPC and HTTP
|
|
11
|
+
otlp:
|
|
12
|
+
protocols:
|
|
13
|
+
grpc:
|
|
14
|
+
endpoint: 0.0.0.0:4317
|
|
15
|
+
# TLS configuration for production
|
|
16
|
+
# tls:
|
|
17
|
+
# cert_file: /etc/otel/certs/server.crt
|
|
18
|
+
# key_file: /etc/otel/certs/server.key
|
|
19
|
+
|
|
20
|
+
http:
|
|
21
|
+
endpoint: 0.0.0.0:4318
|
|
22
|
+
# CORS configuration if needed
|
|
23
|
+
# cors:
|
|
24
|
+
# allowed_origins:
|
|
25
|
+
# - http://localhost:3000
|
|
26
|
+
# - https://*.example.com
|
|
27
|
+
|
|
28
|
+
processors:
|
|
29
|
+
# Batch processor - groups spans/metrics for efficient export
|
|
30
|
+
batch:
|
|
31
|
+
timeout: 10s
|
|
32
|
+
send_batch_size: 1024
|
|
33
|
+
send_batch_max_size: 2048
|
|
34
|
+
|
|
35
|
+
# Memory limiter - prevents OOM issues
|
|
36
|
+
memory_limiter:
|
|
37
|
+
check_interval: 1s
|
|
38
|
+
limit_mib: 512
|
|
39
|
+
spike_limit_mib: 128
|
|
40
|
+
|
|
41
|
+
# Resource processor - adds service metadata
|
|
42
|
+
resource:
|
|
43
|
+
attributes:
|
|
44
|
+
- key: service.namespace
|
|
45
|
+
value: agentic-qe
|
|
46
|
+
action: upsert
|
|
47
|
+
- key: deployment.environment
|
|
48
|
+
from_attribute: deployment.environment
|
|
49
|
+
action: upsert
|
|
50
|
+
|
|
51
|
+
# Attributes processor - enriches telemetry data
|
|
52
|
+
attributes:
|
|
53
|
+
actions:
|
|
54
|
+
- key: environment
|
|
55
|
+
from_attribute: deployment.environment
|
|
56
|
+
action: upsert
|
|
57
|
+
# Sanitize sensitive data
|
|
58
|
+
- key: db.password
|
|
59
|
+
action: delete
|
|
60
|
+
- key: api.key
|
|
61
|
+
action: delete
|
|
62
|
+
- key: jwt.token
|
|
63
|
+
action: delete
|
|
64
|
+
|
|
65
|
+
# Transform processor - modify spans/metrics (optional)
|
|
66
|
+
# transform:
|
|
67
|
+
# traces:
|
|
68
|
+
# statements:
|
|
69
|
+
# - set(status.code, 1) where attributes["http.status_code"] < 400
|
|
70
|
+
|
|
71
|
+
exporters:
|
|
72
|
+
# Prometheus exporter - exposes metrics endpoint
|
|
73
|
+
prometheus:
|
|
74
|
+
endpoint: "0.0.0.0:8889"
|
|
75
|
+
namespace: aqe
|
|
76
|
+
const_labels:
|
|
77
|
+
service: agentic-qe-fleet
|
|
78
|
+
# Metric resource to telemetry conversion
|
|
79
|
+
resource_to_telemetry_conversion:
|
|
80
|
+
enabled: true
|
|
81
|
+
|
|
82
|
+
# Jaeger exporter - sends traces to Jaeger
|
|
83
|
+
otlp/jaeger:
|
|
84
|
+
endpoint: jaeger:4317
|
|
85
|
+
tls:
|
|
86
|
+
insecure: true
|
|
87
|
+
# For production with TLS
|
|
88
|
+
# tls:
|
|
89
|
+
# insecure: false
|
|
90
|
+
# cert_file: /etc/otel/certs/client.crt
|
|
91
|
+
# key_file: /etc/otel/certs/client.key
|
|
92
|
+
# ca_file: /etc/otel/certs/ca.crt
|
|
93
|
+
|
|
94
|
+
# Logging exporter - debug output (disable in production)
|
|
95
|
+
logging:
|
|
96
|
+
verbosity: detailed
|
|
97
|
+
sampling_initial: 5
|
|
98
|
+
sampling_thereafter: 200
|
|
99
|
+
|
|
100
|
+
# File exporter - local backup (optional)
|
|
101
|
+
# file:
|
|
102
|
+
# path: /var/log/otel/telemetry.json
|
|
103
|
+
|
|
104
|
+
service:
|
|
105
|
+
# Telemetry pipelines
|
|
106
|
+
pipelines:
|
|
107
|
+
# Trace pipeline
|
|
108
|
+
traces:
|
|
109
|
+
receivers: [otlp]
|
|
110
|
+
processors: [memory_limiter, batch, resource, attributes]
|
|
111
|
+
exporters: [otlp/jaeger, logging]
|
|
112
|
+
|
|
113
|
+
# Metrics pipeline
|
|
114
|
+
metrics:
|
|
115
|
+
receivers: [otlp]
|
|
116
|
+
processors: [memory_limiter, batch, resource, attributes]
|
|
117
|
+
exporters: [prometheus, logging]
|
|
118
|
+
|
|
119
|
+
# Collector telemetry (self-monitoring)
|
|
120
|
+
telemetry:
|
|
121
|
+
logs:
|
|
122
|
+
level: info
|
|
123
|
+
# Development: debug, Production: info
|
|
124
|
+
# level: debug
|
|
125
|
+
|
|
126
|
+
metrics:
|
|
127
|
+
# Expose collector's own metrics
|
|
128
|
+
address: 0.0.0.0:8888
|
|
129
|
+
|
|
130
|
+
# Extensions
|
|
131
|
+
extensions: [health_check, pprof, zpages]
|
|
132
|
+
|
|
133
|
+
# Extensions configuration
|
|
134
|
+
extensions:
|
|
135
|
+
# Health check endpoint
|
|
136
|
+
health_check:
|
|
137
|
+
endpoint: 0.0.0.0:13133
|
|
138
|
+
|
|
139
|
+
# Performance profiling (development only)
|
|
140
|
+
pprof:
|
|
141
|
+
endpoint: 0.0.0.0:1777
|
|
142
|
+
|
|
143
|
+
# ZPages debug interface (development only)
|
|
144
|
+
zpages:
|
|
145
|
+
endpoint: 0.0.0.0:55679
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# Prometheus Configuration
|
|
2
|
+
# Agentic QE Fleet - Issues #69 & #71
|
|
3
|
+
#
|
|
4
|
+
# This configuration defines scrape targets and alerting rules
|
|
5
|
+
# for the Agentic QE Fleet observability stack.
|
|
6
|
+
#
|
|
7
|
+
# Usage: Copy to prometheus.yml and customize as needed
|
|
8
|
+
|
|
9
|
+
global:
|
|
10
|
+
scrape_interval: 15s
|
|
11
|
+
scrape_timeout: 10s
|
|
12
|
+
evaluation_interval: 15s
|
|
13
|
+
|
|
14
|
+
# External labels attached to all metrics
|
|
15
|
+
external_labels:
|
|
16
|
+
cluster: 'agentic-qe-fleet'
|
|
17
|
+
environment: 'development'
|
|
18
|
+
region: 'us-east-1'
|
|
19
|
+
|
|
20
|
+
# Scrape configurations
|
|
21
|
+
scrape_configs:
|
|
22
|
+
# OTEL Collector metrics (from Prometheus exporter)
|
|
23
|
+
- job_name: 'otel-collector'
|
|
24
|
+
static_configs:
|
|
25
|
+
- targets: ['otel-collector:8889']
|
|
26
|
+
labels:
|
|
27
|
+
service: 'otel-collector'
|
|
28
|
+
component: 'telemetry'
|
|
29
|
+
|
|
30
|
+
# OTEL Collector self-monitoring
|
|
31
|
+
- job_name: 'otel-collector-internal'
|
|
32
|
+
static_configs:
|
|
33
|
+
- targets: ['otel-collector:8888']
|
|
34
|
+
labels:
|
|
35
|
+
service: 'otel-collector-internal'
|
|
36
|
+
component: 'telemetry'
|
|
37
|
+
|
|
38
|
+
# Agentic QE Fleet application metrics (if exposed directly)
|
|
39
|
+
- job_name: 'agentic-qe-fleet'
|
|
40
|
+
static_configs:
|
|
41
|
+
- targets: ['agentic-qe-fleet:3000']
|
|
42
|
+
labels:
|
|
43
|
+
service: 'agentic-qe-fleet'
|
|
44
|
+
component: 'application'
|
|
45
|
+
metrics_path: '/metrics'
|
|
46
|
+
scrape_interval: 30s
|
|
47
|
+
|
|
48
|
+
# Prometheus self-monitoring
|
|
49
|
+
- job_name: 'prometheus'
|
|
50
|
+
static_configs:
|
|
51
|
+
- targets: ['localhost:9090']
|
|
52
|
+
labels:
|
|
53
|
+
service: 'prometheus'
|
|
54
|
+
component: 'telemetry'
|
|
55
|
+
|
|
56
|
+
# Jaeger metrics (optional)
|
|
57
|
+
- job_name: 'jaeger'
|
|
58
|
+
static_configs:
|
|
59
|
+
- targets: ['jaeger:14269']
|
|
60
|
+
labels:
|
|
61
|
+
service: 'jaeger'
|
|
62
|
+
component: 'telemetry'
|
|
63
|
+
metrics_path: '/metrics'
|
|
64
|
+
|
|
65
|
+
# PostgreSQL metrics (optional, requires postgres_exporter)
|
|
66
|
+
# - job_name: 'postgres'
|
|
67
|
+
# static_configs:
|
|
68
|
+
# - targets: ['postgres-exporter:9187']
|
|
69
|
+
# labels:
|
|
70
|
+
# service: 'postgres'
|
|
71
|
+
# component: 'database'
|
|
72
|
+
|
|
73
|
+
# Redis metrics (optional, requires redis_exporter)
|
|
74
|
+
# - job_name: 'redis'
|
|
75
|
+
# static_configs:
|
|
76
|
+
# - targets: ['redis-exporter:9121']
|
|
77
|
+
# labels:
|
|
78
|
+
# service: 'redis'
|
|
79
|
+
# component: 'cache'
|
|
80
|
+
|
|
81
|
+
# Alerting configuration
|
|
82
|
+
alerting:
|
|
83
|
+
alertmanagers:
|
|
84
|
+
- static_configs:
|
|
85
|
+
- targets: []
|
|
86
|
+
# Uncomment when Alertmanager is deployed
|
|
87
|
+
# - targets: ['alertmanager:9093']
|
|
88
|
+
|
|
89
|
+
# Rule files for recording and alerting rules
|
|
90
|
+
rule_files:
|
|
91
|
+
- '/etc/prometheus/rules/*.yml'
|
|
92
|
+
|
|
93
|
+
# Storage configuration
|
|
94
|
+
storage:
|
|
95
|
+
tsdb:
|
|
96
|
+
# Retention period
|
|
97
|
+
retention.time: 15d
|
|
98
|
+
# Maximum storage size
|
|
99
|
+
retention.size: 10GB
|
|
100
|
+
|
|
101
|
+
# Remote write configuration (optional, for long-term storage)
|
|
102
|
+
# remote_write:
|
|
103
|
+
# - url: "https://prometheus-remote-write.example.com/api/v1/write"
|
|
104
|
+
# basic_auth:
|
|
105
|
+
# username: "user"
|
|
106
|
+
# password: "password"
|