@voidwire/llm-summarize 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +76 -19
  2. package/cli.ts +35 -25
  3. package/index.ts +132 -13
  4. package/package.json +6 -6
package/README.md CHANGED
@@ -1,12 +1,12 @@
1
1
  # llm-summarize
2
2
 
3
- Fast LLM-powered text summarization for observability and logging.
3
+ Structured session insight extraction for knowledge systems.
4
4
 
5
5
  ## Philosophy
6
6
 
7
7
  - **Config-driven** - No hardcoded defaults, specify exact provider/model
8
8
  - **Prismis pattern** - Secrets in .env, references in config.toml via `env:VAR_NAME`
9
- - **Fast and cheap** - Designed for high-volume summarization (haiku/gpt-4.1-mini)
9
+ - **Knowledge-focused** - Extracts decisions, patterns, preferences, not just summaries
10
10
  - **Composable** - JSON output pipes to jq and other tools
11
11
 
12
12
  ## Installation
@@ -22,10 +22,10 @@ cd llmcli-tools
22
22
 
23
23
  ```toml
24
24
  [llm]
25
- provider = "openai"
26
- model = "gpt-4.1-mini"
27
- api_key = "env:OPENAI_API_KEY"
28
- max_tokens = 50
25
+ provider = "ollama"
26
+ model = "Qwen2.5:3b"
27
+ api_base = "https://ollama.example.com"
28
+ max_tokens = 1024
29
29
  ```
30
30
 
31
31
  ### Secrets file: `~/.config/llm/.env`
@@ -38,9 +38,14 @@ ANTHROPIC_API_KEY=sk-ant-...
38
38
  ## Usage
39
39
 
40
40
  ```bash
41
- llm-summarize <text>
42
- llm-summarize --stdin
43
- echo "text" | llm-summarize --stdin
41
+ # From stdin (typical usage)
42
+ cat session.txt | llm-summarize --stdin
43
+
44
+ # From clipboard
45
+ pbpaste | llm-summarize --stdin
46
+
47
+ # Direct text
48
+ llm-summarize "session transcript text"
44
49
  ```
45
50
 
46
51
  ## Options
@@ -48,7 +53,7 @@ echo "text" | llm-summarize --stdin
48
53
  | Flag | Description |
49
54
  |------|-------------|
50
55
  | `--model <name>` | Override model from config |
51
- | `--max-tokens <n>` | Max output tokens |
56
+ | `--max-tokens <n>` | Max output tokens (default: 1024) |
52
57
  | `--stdin` | Read text from stdin |
53
58
  | `-h, --help` | Show help |
54
59
 
@@ -56,28 +61,80 @@ echo "text" | llm-summarize --stdin
56
61
 
57
62
  ```json
58
63
  {
59
- "summary": "User saved form data to PostgreSQL.",
60
- "model": "gpt-4.1-mini",
61
- "tokens_used": 12
64
+ "insights": {
65
+ "summary": "Implemented Redis caching layer with TTL and tag-based invalidation.",
66
+ "decisions": [
67
+ "Chose Redis over in-memory caching for persistence across restarts"
68
+ ],
69
+ "patterns_used": [
70
+ "Tag-based cache invalidation"
71
+ ],
72
+ "problems_solved": [
73
+ "Added caching to reduce database load with automatic invalidation on writes"
74
+ ],
75
+ "tools_heavy": [
76
+ "Redis",
77
+ "CacheService"
78
+ ]
79
+ },
80
+ "model": "Qwen2.5:3b",
81
+ "tokens_used": 126
62
82
  }
63
83
  ```
64
84
 
85
+ ### SessionInsights Fields
86
+
87
+ | Field | Description |
88
+ |-------|-------------|
89
+ | `summary` | One sentence: what was accomplished (always present) |
90
+ | `decisions` | Specific decisions with reasoning |
91
+ | `patterns_used` | Development patterns observed |
92
+ | `preferences_expressed` | User preferences revealed |
93
+ | `problems_solved` | Problems addressed and how |
94
+ | `tools_heavy` | Tools used notably |
95
+
96
+ Fields are omitted when no clear evidence exists in the transcript.
97
+
65
98
  ## Supported Providers
66
99
 
67
100
  | Provider | Models | API Key |
68
101
  |----------|--------|---------|
102
+ | `ollama` | Qwen2.5:3b, llama3.2:3b, etc. | Not needed |
69
103
  | `anthropic` | claude-3-5-haiku-latest, claude-sonnet-4-20250514 | Required |
70
- | `openai` | gpt-4.1-mini, gpt-4o | Required |
71
- | `ollama` | llama3, mistral, gemma3, etc. | Not needed |
104
+ | `openai` | gpt-4o-mini, gpt-4o | Required |
72
105
 
73
106
  ### Ollama Configuration
74
107
 
75
108
  ```toml
76
109
  [llm]
77
110
  provider = "ollama"
78
- model = "llama3"
79
- api_base = "http://localhost:11434/api/generate" # optional, this is default
80
- max_tokens = 50
111
+ model = "Qwen2.5:3b"
112
+ api_base = "https://ollama.example.com"
113
+ max_tokens = 1024
114
+ ```
115
+
116
+ ### Cloud Provider Configuration
117
+
118
+ ```toml
119
+ [llm]
120
+ provider = "anthropic"
121
+ model = "claude-3-5-haiku-latest"
122
+ api_key = "env:ANTHROPIC_API_KEY"
123
+ max_tokens = 1024
124
+ ```
125
+
126
+ ## Library Usage
127
+
128
+ ```typescript
129
+ import { summarize, loadConfig, type SessionInsights } from "@voidwire/llm-summarize";
130
+
131
+ const config = loadConfig();
132
+ const result = await summarize("session transcript", config);
133
+
134
+ if (result.insights) {
135
+ console.log(result.insights.summary);
136
+ console.log(result.insights.decisions);
137
+ }
81
138
  ```
82
139
 
83
140
  ## Exit Codes
@@ -85,5 +142,5 @@ max_tokens = 50
85
142
  | Code | Meaning |
86
143
  |------|---------|
87
144
  | 0 | Success |
88
- | 1 | API error (rate limit, auth, network) |
145
+ | 1 | API error (rate limit, auth, network, parse failure) |
89
146
  | 2 | Client error (missing args, invalid config) |
package/cli.ts CHANGED
@@ -3,7 +3,7 @@
3
3
  * llm-summarize CLI
4
4
  *
5
5
  * Philosophy:
6
- * - Fast summaries for observability and logging
6
+ * - Structured session insight extraction for knowledge systems
7
7
  * - Multi-provider support (Anthropic, OpenAI, Ollama)
8
8
  * - Deterministic JSON output for tooling integration
9
9
  * - Config-driven - no hardcoded defaults
@@ -15,10 +15,10 @@
15
15
  *
16
16
  * Config: ~/.config/llm/config.toml
17
17
  * [llm]
18
- * provider = "anthropic"
19
- * model = "claude-3-5-haiku-latest"
20
- * api_key = "env:ANTHROPIC_API_KEY"
21
- * max_tokens = 50
18
+ * provider = "ollama"
19
+ * model = "Qwen2.5:3b"
20
+ * api_base = "https://ollama.example.com"
21
+ * max_tokens = 1024
22
22
  *
23
23
  * Secrets: ~/.config/llm/.env
24
24
  * ANTHROPIC_API_KEY=sk-ant-...
@@ -49,10 +49,10 @@ async function readStdin(): Promise<string> {
49
49
  */
50
50
  function printUsage(): void {
51
51
  console.error(`
52
- llm-summarize - Summarize text using LLM APIs
52
+ llm-summarize - Extract structured insights from session transcripts
53
53
 
54
54
  Philosophy:
55
- Fast, cheap summaries for observability events.
55
+ Structured session insight extraction for knowledge systems.
56
56
  Config-driven - specify exact provider/model.
57
57
  JSON output for tooling integration.
58
58
 
@@ -61,16 +61,16 @@ Usage: llm-summarize [options] <text>
61
61
 
62
62
  Options:
63
63
  --model <name> Override model from config
64
- --max-tokens <n> Max output tokens (default: from config or 50)
64
+ --max-tokens <n> Max output tokens (default: from config or 1024)
65
65
  --stdin Read text from stdin
66
66
  -h, --help Show this help
67
67
 
68
68
  Config file: ~/.config/llm/config.toml
69
69
  [llm]
70
- provider = "anthropic"
71
- model = "claude-3-5-haiku-latest"
72
- api_key = "env:ANTHROPIC_API_KEY"
73
- max_tokens = 50
70
+ provider = "ollama"
71
+ model = "Qwen2.5:3b"
72
+ api_base = "https://ollama.example.com"
73
+ max_tokens = 1024
74
74
 
75
75
  Secrets file: ~/.config/llm/.env
76
76
  ANTHROPIC_API_KEY=sk-ant-...
@@ -84,20 +84,28 @@ Environment overrides:
84
84
  Supported providers:
85
85
  anthropic - Claude models (claude-3-5-haiku-latest, claude-sonnet-4-20250514)
86
86
  openai - GPT models (gpt-4.1-mini, gpt-4o)
87
- ollama - Local models (llama3, mistral, gemma3, etc.) - no API key needed
87
+ ollama - Local models (Qwen2.5:3b, llama3.2:3b, etc.) - no API key needed
88
+
89
+ Output format:
90
+ {
91
+ "insights": {
92
+ "summary": "One sentence: what was accomplished",
93
+ "decisions": ["Specific decisions with reasoning"],
94
+ "patterns_used": ["Development patterns observed"],
95
+ "preferences_expressed": ["User preferences revealed"],
96
+ "problems_solved": ["Problems addressed and how"],
97
+ "tools_heavy": ["Tools used notably"]
98
+ },
99
+ "model": "qwen2.5:3b",
100
+ "tokens_used": 150
101
+ }
88
102
 
89
103
  Examples:
90
- # Simple summarization
91
- llm-summarize "User requested fix for post-password-reset login failure"
92
-
93
- # With options
94
- llm-summarize --max-tokens 30 "Long event description..."
95
-
96
- # From stdin (for piping)
97
- echo "Tool: Edit, File: auth.ts, Result: added JWT validation" | llm-summarize --stdin
104
+ # Extract insights from session transcript
105
+ cat session.txt | llm-summarize --stdin
98
106
 
99
- # Pipe from another tool
100
- cat event.json | jq -r '.description' | llm-summarize --stdin
107
+ # From clipboard
108
+ pbpaste | llm-summarize --stdin
101
109
  `);
102
110
  }
103
111
 
@@ -175,8 +183,10 @@ async function main(): Promise<void> {
175
183
  console.log(JSON.stringify(result, null, 2));
176
184
 
177
185
  // Diagnostic
178
- if (result.summary) {
179
- console.error(`✅ Summarized (${result.tokens_used || "?"} tokens)`);
186
+ if (result.insights) {
187
+ console.error(
188
+ `✅ Extracted insights (${result.tokens_used || "?"} tokens)`,
189
+ );
180
190
  process.exit(0);
181
191
  } else {
182
192
  console.error(`❌ ${result.error}`);
package/index.ts CHANGED
@@ -1,13 +1,14 @@
1
1
  /**
2
2
  * llm-summarize - Library exports
3
3
  *
4
- * Fast LLM-powered text summarization for observability and logging.
4
+ * Structured session insight extraction for knowledge systems.
5
5
  * Pure functions, no process.exit, no stderr output.
6
6
  *
7
7
  * Usage:
8
8
  * import { summarize, loadConfig } from "llm-summarize";
9
9
  * const config = loadConfig();
10
- * const result = await summarize("text to summarize", config);
10
+ * const result = await summarize("session transcript", config);
11
+ * // result.insights.summary, result.insights.decisions, etc.
11
12
  */
12
13
 
13
14
  import { readFileSync, existsSync } from "fs";
@@ -17,8 +18,17 @@ import { join } from "path";
17
18
  // Types
18
19
  // ============================================================================
19
20
 
21
+ export interface SessionInsights {
22
+ summary: string;
23
+ decisions?: string[];
24
+ patterns_used?: string[];
25
+ preferences_expressed?: string[];
26
+ problems_solved?: string[];
27
+ tools_heavy?: string[];
28
+ }
29
+
20
30
  export interface SummarizeResult {
21
- summary?: string;
31
+ insights?: SessionInsights;
22
32
  error?: string;
23
33
  model?: string;
24
34
  tokens_used?: number;
@@ -39,6 +49,80 @@ export interface SummarizeOptions {
39
49
 
40
50
  export type ProviderType = "anthropic" | "openai" | "ollama";
41
51
 
52
+ // ============================================================================
53
+ // System Prompt
54
+ // ============================================================================
55
+
56
+ const SYSTEM_PROMPT = `You are an experienced engineering manager reviewing session transcripts to extract actionable team insights.
57
+
58
+ Analyze the development session conversation and extract structured observations.
59
+
60
+ <output_schema>
61
+ {
62
+ "summary": "One sentence: what was accomplished or decided",
63
+ "decisions": ["Specific decision and its reasoning"],
64
+ "patterns_used": ["Development pattern or approach observed"],
65
+ "preferences_expressed": ["User preference revealed through actions or statements"],
66
+ "problems_solved": ["Problem that was addressed and how"],
67
+ "tools_heavy": ["Tool used repeatedly or in notable ways"]
68
+ }
69
+ </output_schema>
70
+
71
+ <rules>
72
+ - Include a field ONLY when the conversation provides clear evidence
73
+ - Extract specifics: "Chose SQLite over Postgres for single-user simplicity" not "Made a database decision"
74
+ - Omit empty arrays entirely
75
+ </rules>
76
+
77
+ Output valid JSON only. No markdown code blocks, no explanation.`;
78
+
79
+ // ============================================================================
80
+ // Response Parsing
81
+ // ============================================================================
82
+
83
+ /**
84
+ * Extract JSON from LLM response that may contain:
85
+ * - Markdown code blocks (```json ... ```)
86
+ * - MLX end tokens (<|im_end|>, <|end|>)
87
+ * - Thinking blocks (<think>...</think>)
88
+ * - Raw JSON
89
+ */
90
+ function extractJson(raw: string): SessionInsights | null {
91
+ let text = raw.trim();
92
+
93
+ // Remove thinking blocks
94
+ text = text.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
95
+
96
+ // Remove MLX end tokens
97
+ text = text
98
+ .replace(/<\|im_end\|>/g, "")
99
+ .replace(/<\|end\|>/g, "")
100
+ .trim();
101
+
102
+ // Extract from markdown code block if present
103
+ const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
104
+ if (codeBlockMatch) {
105
+ text = codeBlockMatch[1].trim();
106
+ }
107
+
108
+ // Find JSON object in text (handle leading/trailing garbage)
109
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
110
+ if (!jsonMatch) {
111
+ return null;
112
+ }
113
+
114
+ try {
115
+ const parsed = JSON.parse(jsonMatch[0]);
116
+ // Validate required field
117
+ if (typeof parsed.summary !== "string") {
118
+ return null;
119
+ }
120
+ return parsed as SessionInsights;
121
+ } catch {
122
+ return null;
123
+ }
124
+ }
125
+
42
126
  // ============================================================================
43
127
  // Config Loading
44
128
  // ============================================================================
@@ -115,7 +199,7 @@ export function loadConfig(): LLMConfig {
115
199
  model: null,
116
200
  apiKey: null,
117
201
  apiBase: null,
118
- maxTokens: 50,
202
+ maxTokens: 1024,
119
203
  };
120
204
 
121
205
  if (!existsSync(configPath)) {
@@ -190,10 +274,11 @@ async function callAnthropic(
190
274
  model,
191
275
  max_tokens: maxTokens,
192
276
  temperature: 0.3,
277
+ system: SYSTEM_PROMPT,
193
278
  messages: [
194
279
  {
195
280
  role: "user",
196
- content: `What was accomplished or decided? One sentence, past tense, focus on actions and outcomes:\n\n${text}`,
281
+ content: text,
197
282
  },
198
283
  ],
199
284
  }),
@@ -208,9 +293,16 @@ async function callAnthropic(
208
293
 
209
294
  const result = await response.json();
210
295
  const content = result.content?.[0]?.text || "";
296
+ const insights = extractJson(content);
297
+
298
+ if (!insights) {
299
+ return {
300
+ error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
301
+ };
302
+ }
211
303
 
212
304
  return {
213
- summary: content.trim(),
305
+ insights,
214
306
  model,
215
307
  tokens_used: result.usage?.output_tokens,
216
308
  };
@@ -245,9 +337,13 @@ async function callOpenAI(
245
337
  max_tokens: maxTokens,
246
338
  temperature: 0.3,
247
339
  messages: [
340
+ {
341
+ role: "system",
342
+ content: SYSTEM_PROMPT,
343
+ },
248
344
  {
249
345
  role: "user",
250
- content: `What was accomplished or decided? One sentence, past tense, focus on actions and outcomes:\n\n${text}`,
346
+ content: text,
251
347
  },
252
348
  ],
253
349
  }),
@@ -262,9 +358,16 @@ async function callOpenAI(
262
358
 
263
359
  const result = await response.json();
264
360
  const content = result.choices?.[0]?.message?.content || "";
361
+ const insights = extractJson(content);
362
+
363
+ if (!insights) {
364
+ return {
365
+ error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
366
+ };
367
+ }
265
368
 
266
369
  return {
267
- summary: content.trim(),
370
+ insights,
268
371
  model,
269
372
  tokens_used: result.usage?.completion_tokens,
270
373
  };
@@ -276,7 +379,7 @@ async function callOpenAI(
276
379
  }
277
380
 
278
381
  /**
279
- * Call Ollama API
382
+ * Call Ollama API (chat endpoint for system prompt support)
280
383
  */
281
384
  async function callOllama(
282
385
  text: string,
@@ -284,7 +387,7 @@ async function callOllama(
284
387
  maxTokens: number,
285
388
  apiBase: string,
286
389
  ): Promise<SummarizeResult> {
287
- const endpoint = `${apiBase}/api/generate`;
390
+ const endpoint = `${apiBase}/api/chat`;
288
391
 
289
392
  try {
290
393
  const response = await fetch(endpoint, {
@@ -294,7 +397,16 @@ async function callOllama(
294
397
  },
295
398
  body: JSON.stringify({
296
399
  model,
297
- prompt: `What was accomplished or decided? One sentence, past tense, focus on actions and outcomes:\n\n${text}`,
400
+ messages: [
401
+ {
402
+ role: "system",
403
+ content: SYSTEM_PROMPT,
404
+ },
405
+ {
406
+ role: "user",
407
+ content: text,
408
+ },
409
+ ],
298
410
  stream: false,
299
411
  options: {
300
412
  num_predict: maxTokens,
@@ -311,10 +423,17 @@ async function callOllama(
311
423
  }
312
424
 
313
425
  const result = await response.json();
314
- const content = result.response || "";
426
+ const content = result.message?.content || "";
427
+ const insights = extractJson(content);
428
+
429
+ if (!insights) {
430
+ return {
431
+ error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
432
+ };
433
+ }
315
434
 
316
435
  return {
317
- summary: content.trim(),
436
+ insights,
318
437
  model,
319
438
  tokens_used: result.eval_count,
320
439
  };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@voidwire/llm-summarize",
3
- "version": "2.0.0",
4
- "description": "Fast LLM-powered text summarization for observability and logging",
3
+ "version": "3.0.0",
4
+ "description": "Structured session insight extraction for knowledge systems",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
7
7
  "bin": {
@@ -18,9 +18,6 @@
18
18
  "README.md",
19
19
  "LICENSE"
20
20
  ],
21
- "scripts": {
22
- "test": "bun test"
23
- },
24
21
  "keywords": [
25
22
  "llm",
26
23
  "summarize",
@@ -42,5 +39,8 @@
42
39
  },
43
40
  "engines": {
44
41
  "bun": ">=1.0.0"
42
+ },
43
+ "scripts": {
44
+ "test": "bun test"
45
45
  }
46
- }
46
+ }