agentacta 2026.3.12 → 2026.3.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -2
- package/config.js +2 -0
- package/db.js +12 -0
- package/delta-attribution-context.js +57 -0
- package/index.js +93 -13
- package/indexer.js +31 -4
- package/insights.js +260 -0
- package/package.json +4 -1
- package/project-attribution.js +443 -0
- package/public/app.js +313 -22
- package/public/index.html +13 -10
- package/public/style.css +197 -16
package/insights.js
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// SIGNAL_WEIGHTS kept for reference — no longer used directly in scoring.
|
|
4
|
+
// Scoring is now severity-scaled per signal (see analyzeSession).
|
|
5
|
+
const SIGNAL_WEIGHTS = {
|
|
6
|
+
tool_retry_loop: 30,
|
|
7
|
+
session_bail: 25,
|
|
8
|
+
high_error_rate: 20,
|
|
9
|
+
long_prompt_short_session: 15,
|
|
10
|
+
no_completion: 10
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
function analyzeSession(db, sessionId) {
|
|
14
|
+
const session = db.prepare('SELECT * FROM sessions WHERE id = ?').get(sessionId);
|
|
15
|
+
if (!session) return null;
|
|
16
|
+
|
|
17
|
+
const events = db.prepare(
|
|
18
|
+
'SELECT * FROM events WHERE session_id = ? ORDER BY timestamp ASC'
|
|
19
|
+
).all(sessionId);
|
|
20
|
+
|
|
21
|
+
const signals = [];
|
|
22
|
+
|
|
23
|
+
// 1. tool_retry_loop: Same tool called 3+ times consecutively
|
|
24
|
+
// Group by tool, keep highest streak count per tool
|
|
25
|
+
const toolCalls = events.filter(e => e.type === 'tool_call');
|
|
26
|
+
if (toolCalls.length >= 3) {
|
|
27
|
+
const worstStreakByTool = {};
|
|
28
|
+
let consecutive = 1;
|
|
29
|
+
for (let i = 1; i < toolCalls.length; i++) {
|
|
30
|
+
if (toolCalls[i].tool_name === toolCalls[i - 1].tool_name) {
|
|
31
|
+
consecutive++;
|
|
32
|
+
} else {
|
|
33
|
+
if (consecutive >= 3) {
|
|
34
|
+
const tool = toolCalls[i - 1].tool_name;
|
|
35
|
+
if (!worstStreakByTool[tool] || consecutive > worstStreakByTool[tool]) {
|
|
36
|
+
worstStreakByTool[tool] = consecutive;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
consecutive = 1;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
// Check final streak
|
|
43
|
+
if (consecutive >= 3) {
|
|
44
|
+
const tool = toolCalls[toolCalls.length - 1].tool_name;
|
|
45
|
+
if (!worstStreakByTool[tool] || consecutive > worstStreakByTool[tool]) {
|
|
46
|
+
worstStreakByTool[tool] = consecutive;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
for (const [tool, count] of Object.entries(worstStreakByTool)) {
|
|
50
|
+
signals.push({ type: 'tool_retry_loop', tool, count });
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// 2. session_bail: >20 tool calls but no file write events
|
|
55
|
+
if (toolCalls.length > 20) {
|
|
56
|
+
const hasWrite = events.some(e =>
|
|
57
|
+
e.type === 'tool_call' && e.tool_name &&
|
|
58
|
+
(e.tool_name === 'Write' || e.tool_name === 'Edit' ||
|
|
59
|
+
e.tool_name.toLowerCase().includes('write') ||
|
|
60
|
+
e.tool_name.toLowerCase().includes('edit'))
|
|
61
|
+
);
|
|
62
|
+
if (!hasWrite) {
|
|
63
|
+
signals.push({
|
|
64
|
+
type: 'session_bail',
|
|
65
|
+
tool_calls: toolCalls.length
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// 3. high_error_rate: >30% of tool calls returned errors
|
|
71
|
+
const toolResults = events.filter(e => e.type === 'tool_result');
|
|
72
|
+
if (toolResults.length > 0) {
|
|
73
|
+
const errorResults = toolResults.filter(e => {
|
|
74
|
+
const c = (e.content || e.tool_result || '').toLowerCase();
|
|
75
|
+
return c.includes('error') || c.includes('Error') || c.includes('ERROR') ||
|
|
76
|
+
c.includes('failed') || c.includes('exception');
|
|
77
|
+
});
|
|
78
|
+
const errorRate = errorResults.length / toolResults.length;
|
|
79
|
+
if (errorRate > 0.3) {
|
|
80
|
+
signals.push({
|
|
81
|
+
type: 'high_error_rate',
|
|
82
|
+
error_count: errorResults.length,
|
|
83
|
+
total: toolResults.length,
|
|
84
|
+
rate: Math.round(errorRate * 100)
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// 4. long_prompt_short_session: Initial prompt <15 words but >30 tool calls
|
|
90
|
+
if (session.initial_prompt && toolCalls.length > 30) {
|
|
91
|
+
const wordCount = session.initial_prompt.trim().split(/\s+/).length;
|
|
92
|
+
if (wordCount < 15) {
|
|
93
|
+
signals.push({
|
|
94
|
+
type: 'long_prompt_short_session',
|
|
95
|
+
prompt_words: wordCount,
|
|
96
|
+
tool_calls: toolCalls.length
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// 5. no_completion: Last event is a tool call, not an assistant message
|
|
102
|
+
if (events.length > 0) {
|
|
103
|
+
const lastEvent = events[events.length - 1];
|
|
104
|
+
if (lastEvent.type === 'tool_call' || lastEvent.type === 'tool_result') {
|
|
105
|
+
signals.push({
|
|
106
|
+
type: 'no_completion',
|
|
107
|
+
last_event_type: lastEvent.type,
|
|
108
|
+
last_tool: lastEvent.tool_name || null
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Compute confusion_score — severity-scaled per signal
|
|
114
|
+
function clamp(val, min, max) { return Math.max(min, Math.min(max, val)); }
|
|
115
|
+
function lerp(t, min, max) { return min + clamp(t, 0, 1) * (max - min); }
|
|
116
|
+
|
|
117
|
+
const seenTypes = new Set();
|
|
118
|
+
let confusionScore = 0;
|
|
119
|
+
for (const sig of signals) {
|
|
120
|
+
if (seenTypes.has(sig.type)) continue;
|
|
121
|
+
seenTypes.add(sig.type);
|
|
122
|
+
|
|
123
|
+
if (sig.type === 'tool_retry_loop') {
|
|
124
|
+
// streak 3 = base 20, streak 10+ = 40
|
|
125
|
+
const t = clamp((sig.count - 3) / 7, 0, 1);
|
|
126
|
+
confusionScore += Math.round(lerp(t, 20, 40));
|
|
127
|
+
} else if (sig.type === 'session_bail') {
|
|
128
|
+
// 20 tool calls = base 15, 60+ = 30
|
|
129
|
+
const t = clamp((sig.tool_calls - 20) / 40, 0, 1);
|
|
130
|
+
confusionScore += Math.round(lerp(t, 15, 30));
|
|
131
|
+
} else if (sig.type === 'high_error_rate') {
|
|
132
|
+
// 31% error rate = base 10, 100% = 35
|
|
133
|
+
const t = clamp((sig.rate - 30) / 70, 0, 1);
|
|
134
|
+
confusionScore += Math.round(lerp(t, 10, 35));
|
|
135
|
+
} else if (sig.type === 'long_prompt_short_session') {
|
|
136
|
+
// 30 tool calls = base 10, 80+ = 20
|
|
137
|
+
const t = clamp((sig.tool_calls - 30) / 50, 0, 1);
|
|
138
|
+
confusionScore += Math.round(lerp(t, 10, 20));
|
|
139
|
+
} else if (sig.type === 'no_completion') {
|
|
140
|
+
confusionScore += 10;
|
|
141
|
+
} else {
|
|
142
|
+
confusionScore += SIGNAL_WEIGHTS[sig.type] || 0;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
confusionScore = Math.min(confusionScore, 100);
|
|
146
|
+
|
|
147
|
+
const flagged = confusionScore >= 30;
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
session_id: sessionId,
|
|
151
|
+
signals,
|
|
152
|
+
confusion_score: confusionScore,
|
|
153
|
+
flagged,
|
|
154
|
+
computed_at: new Date().toISOString()
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function analyzeAll(db) {
|
|
159
|
+
const sessions = db.prepare('SELECT id FROM sessions').all();
|
|
160
|
+
const results = [];
|
|
161
|
+
|
|
162
|
+
const upsert = db.prepare(`
|
|
163
|
+
INSERT OR REPLACE INTO session_insights
|
|
164
|
+
(session_id, signals, confusion_score, flagged, computed_at)
|
|
165
|
+
VALUES (?, ?, ?, ?, ?)
|
|
166
|
+
`);
|
|
167
|
+
|
|
168
|
+
const runAll = db.transaction(() => {
|
|
169
|
+
for (const s of sessions) {
|
|
170
|
+
const result = analyzeSession(db, s.id);
|
|
171
|
+
if (!result) continue;
|
|
172
|
+
upsert.run(
|
|
173
|
+
result.session_id,
|
|
174
|
+
JSON.stringify(result.signals),
|
|
175
|
+
result.confusion_score,
|
|
176
|
+
result.flagged ? 1 : 0,
|
|
177
|
+
result.computed_at
|
|
178
|
+
);
|
|
179
|
+
results.push(result);
|
|
180
|
+
}
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
runAll();
|
|
184
|
+
return results;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function getInsightsSummary(db) {
|
|
188
|
+
const rows = db.prepare(
|
|
189
|
+
'SELECT si.*, s.summary, s.model, s.agent, s.start_time, s.tool_count, s.message_count FROM session_insights si JOIN sessions s ON s.id = si.session_id'
|
|
190
|
+
).all();
|
|
191
|
+
|
|
192
|
+
if (!rows.length) {
|
|
193
|
+
return {
|
|
194
|
+
total_sessions: 0,
|
|
195
|
+
flagged_count: 0,
|
|
196
|
+
flagged_percentage: 0,
|
|
197
|
+
avg_confusion_score: 0,
|
|
198
|
+
signal_counts: {},
|
|
199
|
+
by_agent: {},
|
|
200
|
+
top_flagged: []
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
let totalScore = 0;
|
|
205
|
+
let flaggedCount = 0;
|
|
206
|
+
const signalCounts = {};
|
|
207
|
+
const byAgent = {};
|
|
208
|
+
|
|
209
|
+
for (const row of rows) {
|
|
210
|
+
totalScore += row.confusion_score;
|
|
211
|
+
if (row.flagged) flaggedCount++;
|
|
212
|
+
|
|
213
|
+
const signals = JSON.parse(row.signals || '[]');
|
|
214
|
+
const seenTypes = new Set();
|
|
215
|
+
for (const sig of signals) {
|
|
216
|
+
if (!seenTypes.has(sig.type)) {
|
|
217
|
+
signalCounts[sig.type] = (signalCounts[sig.type] || 0) + 1;
|
|
218
|
+
seenTypes.add(sig.type);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const agent = row.agent || 'unknown';
|
|
223
|
+
if (!byAgent[agent]) byAgent[agent] = { count: 0, flagged: 0, total_score: 0 };
|
|
224
|
+
byAgent[agent].count++;
|
|
225
|
+
if (row.flagged) byAgent[agent].flagged++;
|
|
226
|
+
byAgent[agent].total_score += row.confusion_score;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
for (const agent of Object.keys(byAgent)) {
|
|
230
|
+
byAgent[agent].avg_score = Math.round(byAgent[agent].total_score / byAgent[agent].count);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const topFlagged = rows
|
|
234
|
+
.filter(r => r.flagged)
|
|
235
|
+
.sort((a, b) => b.confusion_score - a.confusion_score)
|
|
236
|
+
.slice(0, 20)
|
|
237
|
+
.map(r => ({
|
|
238
|
+
session_id: r.session_id,
|
|
239
|
+
summary: r.summary,
|
|
240
|
+
model: r.model,
|
|
241
|
+
agent: r.agent,
|
|
242
|
+
start_time: r.start_time,
|
|
243
|
+
tool_count: r.tool_count,
|
|
244
|
+
message_count: r.message_count,
|
|
245
|
+
confusion_score: r.confusion_score,
|
|
246
|
+
signals: JSON.parse(r.signals || '[]')
|
|
247
|
+
}));
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
total_sessions: rows.length,
|
|
251
|
+
flagged_count: flaggedCount,
|
|
252
|
+
flagged_percentage: rows.length ? Math.round((flaggedCount / rows.length) * 100) : 0,
|
|
253
|
+
avg_confusion_score: Math.round(totalScore / rows.length),
|
|
254
|
+
signal_counts: signalCounts,
|
|
255
|
+
by_agent: byAgent,
|
|
256
|
+
top_flagged: topFlagged
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
module.exports = { analyzeSession, analyzeAll, getInsightsSummary };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentacta",
|
|
3
|
-
"version": "2026.3.
|
|
3
|
+
"version": "2026.3.27",
|
|
4
4
|
"description": "Audit trail and search engine for AI agent sessions",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
@@ -11,6 +11,9 @@
|
|
|
11
11
|
"indexer.js",
|
|
12
12
|
"db.js",
|
|
13
13
|
"config.js",
|
|
14
|
+
"project-attribution.js",
|
|
15
|
+
"delta-attribution-context.js",
|
|
16
|
+
"insights.js",
|
|
14
17
|
"public/",
|
|
15
18
|
"LICENSE",
|
|
16
19
|
"README.md"
|