wotann 0.5.87 → 0.5.88
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/commands/insights.d.ts +18 -0
- package/dist/cli/commands/insights.js +64 -0
- package/dist/index.js +22 -0
- package/dist/learning/insights.d.ts +82 -0
- package/dist/learning/insights.js +180 -0
- package/package.json +1 -1
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `wotann insights` — render deterministic metrics from saved sessions.
|
|
3
|
+
*
|
|
4
|
+
* Thin CLI wrapper around src/learning/insights.ts. JSON mode emits the
|
|
5
|
+
* full structured report; text mode formats a Hermes-style readout
|
|
6
|
+
* with top tools / common query prefixes / error patterns.
|
|
7
|
+
*/
|
|
8
|
+
export interface RunInsightsOptions {
|
|
9
|
+
/** Defaults to `process.cwd()`. */
|
|
10
|
+
readonly workingDir?: string;
|
|
11
|
+
/** Day-window (Number.isFinite + > 0). Omit for all-time. */
|
|
12
|
+
readonly days?: number;
|
|
13
|
+
/** Emit the full report as JSON instead of the formatted text. */
|
|
14
|
+
readonly json?: boolean;
|
|
15
|
+
/** Injectable writer for tests. Defaults to console.log. */
|
|
16
|
+
readonly write?: (line: string) => void;
|
|
17
|
+
}
|
|
18
|
+
export declare function runInsights(opts?: RunInsightsOptions): number;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `wotann insights` — render deterministic metrics from saved sessions.
|
|
3
|
+
*
|
|
4
|
+
* Thin CLI wrapper around src/learning/insights.ts. JSON mode emits the
|
|
5
|
+
* full structured report; text mode formats a Hermes-style readout
|
|
6
|
+
* with top tools / common query prefixes / error patterns.
|
|
7
|
+
*/
|
|
8
|
+
import chalk from "chalk";
|
|
9
|
+
import { generateInsights } from "../../learning/insights.js";
|
|
10
|
+
export function runInsights(opts = {}) {
|
|
11
|
+
const write = opts.write ?? ((line) => console.log(line));
|
|
12
|
+
const report = generateInsights({
|
|
13
|
+
workingDir: opts.workingDir ?? process.cwd(),
|
|
14
|
+
...(opts.days !== undefined && Number.isFinite(opts.days) && opts.days > 0
|
|
15
|
+
? { days: opts.days }
|
|
16
|
+
: {}),
|
|
17
|
+
});
|
|
18
|
+
if (opts.json === true) {
|
|
19
|
+
write(JSON.stringify(report, null, 2));
|
|
20
|
+
return 0;
|
|
21
|
+
}
|
|
22
|
+
renderTextReport(write, report);
|
|
23
|
+
return 0;
|
|
24
|
+
}
|
|
25
|
+
function renderTextReport(write, r) {
|
|
26
|
+
const windowLabel = r.windowDays !== null ? `last ${r.windowDays} days` : "all time";
|
|
27
|
+
write("");
|
|
28
|
+
write(chalk.bold(`WOTANN insights — ${windowLabel}`));
|
|
29
|
+
write("");
|
|
30
|
+
if (r.sessionsTotal === 0) {
|
|
31
|
+
write(chalk.dim(" no sessions found in .wotann/sessions/"));
|
|
32
|
+
write("");
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
write(` sessions in window: ${r.sessionsInWindow} of ${r.sessionsTotal} on disk`);
|
|
36
|
+
if (r.sessionsCorrupt > 0) {
|
|
37
|
+
write(chalk.yellow(` sessions corrupt: ${r.sessionsCorrupt} (skipped)`));
|
|
38
|
+
}
|
|
39
|
+
if (r.sessionsIncognito > 0) {
|
|
40
|
+
write(chalk.dim(` sessions incognito: ${r.sessionsIncognito} (excluded from query histograms)`));
|
|
41
|
+
}
|
|
42
|
+
write(` messages: ${r.messagesTotal}`);
|
|
43
|
+
if (r.sessionsInWindow > 0) {
|
|
44
|
+
write(` avg msgs/session: ${r.avgMessagesPerSession.toFixed(1)}`);
|
|
45
|
+
}
|
|
46
|
+
write(` tokens: ${r.tokensTotal.toLocaleString()}`);
|
|
47
|
+
write(` cost: $${r.costTotal.toFixed(4)}`);
|
|
48
|
+
write(` tool calls: ${r.toolCallsTotal}`);
|
|
49
|
+
write(` providers: ${r.providersUsed.join(", ") || chalk.dim("(none)")}`);
|
|
50
|
+
write(` models: ${r.modelsUsed.join(", ") || chalk.dim("(none)")}`);
|
|
51
|
+
renderHistogram(write, "Top tools", r.topTools, 30);
|
|
52
|
+
renderHistogram(write, "Common query prefixes", r.topUserPrefixes, 40);
|
|
53
|
+
renderHistogram(write, "Error patterns", r.errors, 60);
|
|
54
|
+
write("");
|
|
55
|
+
}
|
|
56
|
+
function renderHistogram(write, title, rows, keyWidth) {
|
|
57
|
+
if (rows.length === 0)
|
|
58
|
+
return;
|
|
59
|
+
write("");
|
|
60
|
+
write(chalk.bold(` ${title}:`));
|
|
61
|
+
for (const row of rows) {
|
|
62
|
+
write(` ${row.key.padEnd(keyWidth)} ${row.count}`);
|
|
63
|
+
}
|
|
64
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -3630,6 +3630,28 @@ curatorCmd
|
|
|
3630
3630
|
const mod = await import("./cli/commands/skills-curator.js");
|
|
3631
3631
|
process.exit(mod.runUnpin(name));
|
|
3632
3632
|
});
|
|
3633
|
+
// ── wotann insights ──────────────────────────────────────────
|
|
3634
|
+
//
|
|
3635
|
+
// Hermes Gap 3 port — deterministic metrics over saved sessions:
|
|
3636
|
+
// top tools, common query prefixes, error patterns. Pure-data MVP;
|
|
3637
|
+
// LLM-graded review deferred to a follow-up (aux-cred pattern).
|
|
3638
|
+
// Module lives at src/learning/insights.ts; CLI shell in
|
|
3639
|
+
// src/cli/commands/insights.ts.
|
|
3640
|
+
program
|
|
3641
|
+
.command("insights")
|
|
3642
|
+
.description("Show session metrics — top tools, common query prefixes, error patterns")
|
|
3643
|
+
.option("--days <n>", "Only include sessions started within the last N days (default: all time)", (v) => parseInt(v, 10))
|
|
3644
|
+
.option("--json", "Emit the full report as JSON instead of formatted text")
|
|
3645
|
+
.action(async (opts) => {
|
|
3646
|
+
const mod = await import("./cli/commands/insights.js");
|
|
3647
|
+
const cliOpts = {};
|
|
3648
|
+
if (opts.days !== undefined && Number.isFinite(opts.days) && opts.days > 0) {
|
|
3649
|
+
cliOpts.days = opts.days;
|
|
3650
|
+
}
|
|
3651
|
+
if (opts.json === true)
|
|
3652
|
+
cliOpts.json = true;
|
|
3653
|
+
process.exit(mod.runInsights(cliOpts));
|
|
3654
|
+
});
|
|
3633
3655
|
// ── wotann cost ──────────────────────────────────────────────
|
|
3634
3656
|
//
|
|
3635
3657
|
// Wave 4G: the cost command now accepts an optional `period` argument
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Insights — deterministic metrics over saved WOTANN sessions.
|
|
3
|
+
*
|
|
4
|
+
* Hermes Gap 3 port: surface the "what worked / what failed / repeating
|
|
5
|
+
* patterns" data WOTANN already stores. The Hermes implementation adds
|
|
6
|
+
* an LLM-graded pass on top — that's deferred to a follow-up ticket
|
|
7
|
+
* (lands with the same aux-cred pattern the Skill Curator is waiting on).
|
|
8
|
+
* The data layer below is the foundation: pure aggregation, zero LLM,
|
|
9
|
+
* works fully offline.
|
|
10
|
+
*
|
|
11
|
+
* Inputs: every `.json` file in `<workingDir>/.wotann/sessions/`.
|
|
12
|
+
* Each file is a {@link SessionState} round-trip (see
|
|
13
|
+
* `src/core/session.ts`).
|
|
14
|
+
*
|
|
15
|
+
* Output: a structured {@link InsightsReport} with totals + top-N
|
|
16
|
+
* histograms for tools, user query prefixes, and error patterns.
|
|
17
|
+
*
|
|
18
|
+
* Strict invariants:
|
|
19
|
+
* - Pure read-only — never writes back to the sessions directory.
|
|
20
|
+
* - Skips sessions that fail to parse (corrupt JSON) rather than
|
|
21
|
+
* crashing the whole report. Honest count via {@link
|
|
22
|
+
* InsightsReport.sessionsTotal}.
|
|
23
|
+
* - Ignores incognito sessions when counting query patterns (those
|
|
24
|
+
* are explicitly off-the-record).
|
|
25
|
+
* - Stable sort — top-N lists are deterministic across runs.
|
|
26
|
+
*/
|
|
27
|
+
export interface InsightsOptions {
|
|
28
|
+
/** Working directory containing `.wotann/sessions/`. */
|
|
29
|
+
readonly workingDir: string;
|
|
30
|
+
/** Limit to sessions started within the last N days (omit = all time). */
|
|
31
|
+
readonly days?: number;
|
|
32
|
+
}
|
|
33
|
+
export interface TopCount {
|
|
34
|
+
readonly key: string;
|
|
35
|
+
readonly count: number;
|
|
36
|
+
}
|
|
37
|
+
export interface InsightsReport {
|
|
38
|
+
/** The day-window applied, or null when no window was requested. */
|
|
39
|
+
readonly windowDays: number | null;
|
|
40
|
+
/** Sessions present on disk (regardless of window). */
|
|
41
|
+
readonly sessionsTotal: number;
|
|
42
|
+
/** Sessions that fell within the configured day-window. */
|
|
43
|
+
readonly sessionsInWindow: number;
|
|
44
|
+
/** Sessions skipped because the JSON failed to parse. */
|
|
45
|
+
readonly sessionsCorrupt: number;
|
|
46
|
+
/** Sessions skipped because they were incognito. */
|
|
47
|
+
readonly sessionsIncognito: number;
|
|
48
|
+
/** Aggregate message count across the in-window non-incognito set. */
|
|
49
|
+
readonly messagesTotal: number;
|
|
50
|
+
readonly avgMessagesPerSession: number;
|
|
51
|
+
readonly tokensTotal: number;
|
|
52
|
+
readonly costTotal: number;
|
|
53
|
+
readonly toolCallsTotal: number;
|
|
54
|
+
/** Top tools by invocation count (descending). */
|
|
55
|
+
readonly topTools: readonly TopCount[];
|
|
56
|
+
/** Top user-query prefixes (first 5 words, lowercased) — descending. */
|
|
57
|
+
readonly topUserPrefixes: readonly TopCount[];
|
|
58
|
+
/** Top error patterns (Runtime error / [Guardrail] prefix). */
|
|
59
|
+
readonly errors: readonly TopCount[];
|
|
60
|
+
/** Distinct models observed in-window — sorted alphabetically. */
|
|
61
|
+
readonly modelsUsed: readonly string[];
|
|
62
|
+
/** Distinct providers observed in-window — sorted alphabetically. */
|
|
63
|
+
readonly providersUsed: readonly string[];
|
|
64
|
+
}
|
|
65
|
+
export declare function generateInsights(opts: InsightsOptions): InsightsReport;
|
|
66
|
+
/**
|
|
67
|
+
* Extract a 5-word, lowercased prefix from a user message. Returns null
|
|
68
|
+
* when the content is empty or whitespace-only so {@link bump} doesn't
|
|
69
|
+
* inflate a phantom "" bucket.
|
|
70
|
+
*/
|
|
71
|
+
export declare function extractPrefix(content: string): string | null;
|
|
72
|
+
/**
|
|
73
|
+
* Recognize the two error shapes WOTANN's runtime emits as system
|
|
74
|
+
* messages: `Runtime error: <reason>` (from AppV3.tsx) and
|
|
75
|
+
* `[Guardrail] <reason>` (from runtime-agent-loop.ts). Anything else
|
|
76
|
+
* is treated as informational and ignored.
|
|
77
|
+
*
|
|
78
|
+
* The returned string is truncated to 80 chars to keep the histogram
|
|
79
|
+
* cardinality bounded (different stack traces / file paths shouldn't
|
|
80
|
+
* each get their own bucket).
|
|
81
|
+
*/
|
|
82
|
+
export declare function normalizeErrorMessage(content: string): string | null;
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Insights — deterministic metrics over saved WOTANN sessions.
|
|
3
|
+
*
|
|
4
|
+
* Hermes Gap 3 port: surface the "what worked / what failed / repeating
|
|
5
|
+
* patterns" data WOTANN already stores. The Hermes implementation adds
|
|
6
|
+
* an LLM-graded pass on top — that's deferred to a follow-up ticket
|
|
7
|
+
* (lands with the same aux-cred pattern the Skill Curator is waiting on).
|
|
8
|
+
* The data layer below is the foundation: pure aggregation, zero LLM,
|
|
9
|
+
* works fully offline.
|
|
10
|
+
*
|
|
11
|
+
* Inputs: every `.json` file in `<workingDir>/.wotann/sessions/`.
|
|
12
|
+
* Each file is a {@link SessionState} round-trip (see
|
|
13
|
+
* `src/core/session.ts`).
|
|
14
|
+
*
|
|
15
|
+
* Output: a structured {@link InsightsReport} with totals + top-N
|
|
16
|
+
* histograms for tools, user query prefixes, and error patterns.
|
|
17
|
+
*
|
|
18
|
+
* Strict invariants:
|
|
19
|
+
* - Pure read-only — never writes back to the sessions directory.
|
|
20
|
+
* - Skips sessions that fail to parse (corrupt JSON) rather than
|
|
21
|
+
* crashing the whole report. Honest count via {@link
|
|
22
|
+
* InsightsReport.sessionsTotal}.
|
|
23
|
+
* - Ignores incognito sessions when counting query patterns (those
|
|
24
|
+
* are explicitly off-the-record).
|
|
25
|
+
* - Stable sort — top-N lists are deterministic across runs.
|
|
26
|
+
*/
|
|
27
|
+
import { existsSync, readdirSync } from "node:fs";
|
|
28
|
+
import { join } from "node:path";
|
|
29
|
+
import { restoreSession } from "../core/session.js";
|
|
30
|
+
// ── Public API ────────────────────────────────────────────────────
|
|
31
|
+
export function generateInsights(opts) {
|
|
32
|
+
const sessionsDir = join(opts.workingDir, ".wotann", "sessions");
|
|
33
|
+
if (!existsSync(sessionsDir)) {
|
|
34
|
+
return emptyReport(opts.days ?? null);
|
|
35
|
+
}
|
|
36
|
+
const files = readdirSync(sessionsDir).filter((name) => name.endsWith(".json"));
|
|
37
|
+
const parsed = [];
|
|
38
|
+
let corrupt = 0;
|
|
39
|
+
for (const name of files) {
|
|
40
|
+
const session = restoreSession(join(sessionsDir, name));
|
|
41
|
+
if (session === null) {
|
|
42
|
+
corrupt++;
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
parsed.push(session);
|
|
46
|
+
}
|
|
47
|
+
return buildReport(parsed, opts.days, corrupt);
|
|
48
|
+
}
|
|
49
|
+
// ── Internal helpers ──────────────────────────────────────────────
|
|
50
|
+
function buildReport(sessions, days, corruptCount) {
|
|
51
|
+
const cutoffMs = days !== undefined ? Date.now() - days * 86_400_000 : null;
|
|
52
|
+
const inWindow = cutoffMs !== null
|
|
53
|
+
? sessions.filter((s) => {
|
|
54
|
+
const startMs = s.startedAt instanceof Date ? s.startedAt.getTime() : new Date(s.startedAt).getTime();
|
|
55
|
+
return Number.isFinite(startMs) && startMs >= cutoffMs;
|
|
56
|
+
})
|
|
57
|
+
: sessions;
|
|
58
|
+
const incognito = inWindow.filter((s) => s.incognito);
|
|
59
|
+
const live = inWindow.filter((s) => !s.incognito);
|
|
60
|
+
let messagesTotal = 0;
|
|
61
|
+
let tokensTotal = 0;
|
|
62
|
+
let costTotal = 0;
|
|
63
|
+
let toolCallsTotal = 0;
|
|
64
|
+
const toolCounts = new Map();
|
|
65
|
+
const prefixCounts = new Map();
|
|
66
|
+
const errorCounts = new Map();
|
|
67
|
+
const models = new Set();
|
|
68
|
+
const providers = new Set();
|
|
69
|
+
for (const session of live) {
|
|
70
|
+
messagesTotal += session.messages.length;
|
|
71
|
+
tokensTotal += session.totalTokens;
|
|
72
|
+
costTotal += session.totalCost;
|
|
73
|
+
toolCallsTotal += session.toolCalls;
|
|
74
|
+
if (typeof session.model === "string" && session.model.length > 0) {
|
|
75
|
+
models.add(session.model);
|
|
76
|
+
}
|
|
77
|
+
if (typeof session.provider === "string" && session.provider.length > 0) {
|
|
78
|
+
providers.add(session.provider);
|
|
79
|
+
}
|
|
80
|
+
for (const msg of session.messages) {
|
|
81
|
+
if (msg.role === "tool" && typeof msg.toolName === "string" && msg.toolName.length > 0) {
|
|
82
|
+
bump(toolCounts, msg.toolName);
|
|
83
|
+
}
|
|
84
|
+
if (msg.role === "user" && typeof msg.content === "string") {
|
|
85
|
+
const prefix = extractPrefix(msg.content);
|
|
86
|
+
if (prefix !== null)
|
|
87
|
+
bump(prefixCounts, prefix);
|
|
88
|
+
}
|
|
89
|
+
if (msg.role === "system" && typeof msg.content === "string") {
|
|
90
|
+
const normalized = normalizeErrorMessage(msg.content);
|
|
91
|
+
if (normalized !== null)
|
|
92
|
+
bump(errorCounts, normalized);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
windowDays: days ?? null,
|
|
98
|
+
sessionsTotal: sessions.length,
|
|
99
|
+
sessionsInWindow: inWindow.length,
|
|
100
|
+
sessionsCorrupt: corruptCount,
|
|
101
|
+
sessionsIncognito: incognito.length,
|
|
102
|
+
messagesTotal,
|
|
103
|
+
avgMessagesPerSession: live.length > 0 ? messagesTotal / live.length : 0,
|
|
104
|
+
tokensTotal,
|
|
105
|
+
costTotal,
|
|
106
|
+
toolCallsTotal,
|
|
107
|
+
topTools: topN(toolCounts, 10),
|
|
108
|
+
topUserPrefixes: topN(prefixCounts, 10),
|
|
109
|
+
errors: topN(errorCounts, 10),
|
|
110
|
+
modelsUsed: [...models].sort(),
|
|
111
|
+
providersUsed: [...providers].sort(),
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Extract a 5-word, lowercased prefix from a user message. Returns null
|
|
116
|
+
* when the content is empty or whitespace-only so {@link bump} doesn't
|
|
117
|
+
* inflate a phantom "" bucket.
|
|
118
|
+
*/
|
|
119
|
+
export function extractPrefix(content) {
|
|
120
|
+
const words = content
|
|
121
|
+
.trim()
|
|
122
|
+
.split(/\s+/)
|
|
123
|
+
.filter((w) => w.length > 0)
|
|
124
|
+
.slice(0, 5);
|
|
125
|
+
if (words.length === 0)
|
|
126
|
+
return null;
|
|
127
|
+
return words.join(" ").toLowerCase();
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Recognize the two error shapes WOTANN's runtime emits as system
|
|
131
|
+
* messages: `Runtime error: <reason>` (from AppV3.tsx) and
|
|
132
|
+
* `[Guardrail] <reason>` (from runtime-agent-loop.ts). Anything else
|
|
133
|
+
* is treated as informational and ignored.
|
|
134
|
+
*
|
|
135
|
+
* The returned string is truncated to 80 chars to keep the histogram
|
|
136
|
+
* cardinality bounded (different stack traces / file paths shouldn't
|
|
137
|
+
* each get their own bucket).
|
|
138
|
+
*/
|
|
139
|
+
export function normalizeErrorMessage(content) {
|
|
140
|
+
if (content.startsWith("Runtime error:") || content.startsWith("[Guardrail]")) {
|
|
141
|
+
return content.slice(0, 80);
|
|
142
|
+
}
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
function bump(counts, key) {
|
|
146
|
+
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Top-N by count, descending. Ties resolve by key lexicographic order
|
|
150
|
+
* so output is deterministic across runs.
|
|
151
|
+
*/
|
|
152
|
+
function topN(counts, n) {
|
|
153
|
+
return [...counts.entries()]
|
|
154
|
+
.map(([key, count]) => ({ key, count }))
|
|
155
|
+
.sort((a, b) => {
|
|
156
|
+
if (b.count !== a.count)
|
|
157
|
+
return b.count - a.count;
|
|
158
|
+
return a.key.localeCompare(b.key);
|
|
159
|
+
})
|
|
160
|
+
.slice(0, n);
|
|
161
|
+
}
|
|
162
|
+
function emptyReport(windowDays) {
|
|
163
|
+
return {
|
|
164
|
+
windowDays,
|
|
165
|
+
sessionsTotal: 0,
|
|
166
|
+
sessionsInWindow: 0,
|
|
167
|
+
sessionsCorrupt: 0,
|
|
168
|
+
sessionsIncognito: 0,
|
|
169
|
+
messagesTotal: 0,
|
|
170
|
+
avgMessagesPerSession: 0,
|
|
171
|
+
tokensTotal: 0,
|
|
172
|
+
costTotal: 0,
|
|
173
|
+
toolCallsTotal: 0,
|
|
174
|
+
topTools: [],
|
|
175
|
+
topUserPrefixes: [],
|
|
176
|
+
errors: [],
|
|
177
|
+
modelsUsed: [],
|
|
178
|
+
providersUsed: [],
|
|
179
|
+
};
|
|
180
|
+
}
|