@voidwire/llm-summarize 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -19
- package/cli.ts +56 -26
- package/index.ts +167 -16
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# llm-summarize
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Structured session insight extraction for knowledge systems.
|
|
4
4
|
|
|
5
5
|
## Philosophy
|
|
6
6
|
|
|
7
7
|
- **Config-driven** - No hardcoded defaults, specify exact provider/model
|
|
8
8
|
- **Prismis pattern** - Secrets in .env, references in config.toml via `env:VAR_NAME`
|
|
9
|
-
- **
|
|
9
|
+
- **Knowledge-focused** - Extracts decisions, patterns, preferences, not just summaries
|
|
10
10
|
- **Composable** - JSON output pipes to jq and other tools
|
|
11
11
|
|
|
12
12
|
## Installation
|
|
@@ -22,10 +22,10 @@ cd llmcli-tools
|
|
|
22
22
|
|
|
23
23
|
```toml
|
|
24
24
|
[llm]
|
|
25
|
-
provider = "
|
|
26
|
-
model = "
|
|
27
|
-
|
|
28
|
-
max_tokens =
|
|
25
|
+
provider = "ollama"
|
|
26
|
+
model = "Qwen2.5:3b"
|
|
27
|
+
api_base = "https://ollama.example.com"
|
|
28
|
+
max_tokens = 1024
|
|
29
29
|
```
|
|
30
30
|
|
|
31
31
|
### Secrets file: `~/.config/llm/.env`
|
|
@@ -38,9 +38,14 @@ ANTHROPIC_API_KEY=sk-ant-...
|
|
|
38
38
|
## Usage
|
|
39
39
|
|
|
40
40
|
```bash
|
|
41
|
-
|
|
42
|
-
llm-summarize --stdin
|
|
43
|
-
|
|
41
|
+
# From stdin (typical usage)
|
|
42
|
+
cat session.txt | llm-summarize --stdin
|
|
43
|
+
|
|
44
|
+
# From clipboard
|
|
45
|
+
pbpaste | llm-summarize --stdin
|
|
46
|
+
|
|
47
|
+
# Direct text
|
|
48
|
+
llm-summarize "session transcript text"
|
|
44
49
|
```
|
|
45
50
|
|
|
46
51
|
## Options
|
|
@@ -48,7 +53,7 @@ echo "text" | llm-summarize --stdin
|
|
|
48
53
|
| Flag | Description |
|
|
49
54
|
|------|-------------|
|
|
50
55
|
| `--model <name>` | Override model from config |
|
|
51
|
-
| `--max-tokens <n>` | Max output tokens |
|
|
56
|
+
| `--max-tokens <n>` | Max output tokens (default: 1024) |
|
|
52
57
|
| `--stdin` | Read text from stdin |
|
|
53
58
|
| `-h, --help` | Show help |
|
|
54
59
|
|
|
@@ -56,28 +61,80 @@ echo "text" | llm-summarize --stdin
|
|
|
56
61
|
|
|
57
62
|
```json
|
|
58
63
|
{
|
|
59
|
-
"
|
|
60
|
-
|
|
61
|
-
|
|
64
|
+
"insights": {
|
|
65
|
+
"summary": "Implemented Redis caching layer with TTL and tag-based invalidation.",
|
|
66
|
+
"decisions": [
|
|
67
|
+
"Chose Redis over in-memory caching for persistence across restarts"
|
|
68
|
+
],
|
|
69
|
+
"patterns_used": [
|
|
70
|
+
"Tag-based cache invalidation"
|
|
71
|
+
],
|
|
72
|
+
"problems_solved": [
|
|
73
|
+
"Added caching to reduce database load with automatic invalidation on writes"
|
|
74
|
+
],
|
|
75
|
+
"tools_heavy": [
|
|
76
|
+
"Redis",
|
|
77
|
+
"CacheService"
|
|
78
|
+
]
|
|
79
|
+
},
|
|
80
|
+
"model": "Qwen2.5:3b",
|
|
81
|
+
"tokens_used": 126
|
|
62
82
|
}
|
|
63
83
|
```
|
|
64
84
|
|
|
85
|
+
### SessionInsights Fields
|
|
86
|
+
|
|
87
|
+
| Field | Description |
|
|
88
|
+
|-------|-------------|
|
|
89
|
+
| `summary` | One sentence: what was accomplished (always present) |
|
|
90
|
+
| `decisions` | Specific decisions with reasoning |
|
|
91
|
+
| `patterns_used` | Development patterns observed |
|
|
92
|
+
| `preferences_expressed` | User preferences revealed |
|
|
93
|
+
| `problems_solved` | Problems addressed and how |
|
|
94
|
+
| `tools_heavy` | Tools used notably |
|
|
95
|
+
|
|
96
|
+
Fields are omitted when no clear evidence exists in the transcript.
|
|
97
|
+
|
|
65
98
|
## Supported Providers
|
|
66
99
|
|
|
67
100
|
| Provider | Models | API Key |
|
|
68
101
|
|----------|--------|---------|
|
|
102
|
+
| `ollama` | Qwen2.5:3b, llama3.2:3b, etc. | Not needed |
|
|
69
103
|
| `anthropic` | claude-3-5-haiku-latest, claude-sonnet-4-20250514 | Required |
|
|
70
|
-
| `openai` | gpt-
|
|
71
|
-
| `ollama` | llama3, mistral, gemma3, etc. | Not needed |
|
|
104
|
+
| `openai` | gpt-4o-mini, gpt-4o | Required |
|
|
72
105
|
|
|
73
106
|
### Ollama Configuration
|
|
74
107
|
|
|
75
108
|
```toml
|
|
76
109
|
[llm]
|
|
77
110
|
provider = "ollama"
|
|
78
|
-
model = "
|
|
79
|
-
api_base = "
|
|
80
|
-
max_tokens =
|
|
111
|
+
model = "Qwen2.5:3b"
|
|
112
|
+
api_base = "https://ollama.example.com"
|
|
113
|
+
max_tokens = 1024
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Cloud Provider Configuration
|
|
117
|
+
|
|
118
|
+
```toml
|
|
119
|
+
[llm]
|
|
120
|
+
provider = "anthropic"
|
|
121
|
+
model = "claude-3-5-haiku-latest"
|
|
122
|
+
api_key = "env:ANTHROPIC_API_KEY"
|
|
123
|
+
max_tokens = 1024
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Library Usage
|
|
127
|
+
|
|
128
|
+
```typescript
|
|
129
|
+
import { summarize, loadConfig, type SessionInsights } from "@voidwire/llm-summarize";
|
|
130
|
+
|
|
131
|
+
const config = loadConfig();
|
|
132
|
+
const result = await summarize("session transcript", config);
|
|
133
|
+
|
|
134
|
+
if (result.insights) {
|
|
135
|
+
console.log(result.insights.summary);
|
|
136
|
+
console.log(result.insights.decisions);
|
|
137
|
+
}
|
|
81
138
|
```
|
|
82
139
|
|
|
83
140
|
## Exit Codes
|
|
@@ -85,5 +142,5 @@ max_tokens = 50
|
|
|
85
142
|
| Code | Meaning |
|
|
86
143
|
|------|---------|
|
|
87
144
|
| 0 | Success |
|
|
88
|
-
| 1 | API error (rate limit, auth, network) |
|
|
145
|
+
| 1 | API error (rate limit, auth, network, parse failure) |
|
|
89
146
|
| 2 | Client error (missing args, invalid config) |
|
package/cli.ts
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* llm-summarize CLI
|
|
4
4
|
*
|
|
5
5
|
* Philosophy:
|
|
6
|
-
* -
|
|
6
|
+
* - Structured session insight extraction for knowledge systems
|
|
7
7
|
* - Multi-provider support (Anthropic, OpenAI, Ollama)
|
|
8
8
|
* - Deterministic JSON output for tooling integration
|
|
9
9
|
* - Config-driven - no hardcoded defaults
|
|
@@ -15,10 +15,10 @@
|
|
|
15
15
|
*
|
|
16
16
|
* Config: ~/.config/llm/config.toml
|
|
17
17
|
* [llm]
|
|
18
|
-
* provider = "
|
|
19
|
-
* model = "
|
|
20
|
-
*
|
|
21
|
-
* max_tokens =
|
|
18
|
+
* provider = "ollama"
|
|
19
|
+
* model = "Qwen2.5:3b"
|
|
20
|
+
* api_base = "https://ollama.example.com"
|
|
21
|
+
* max_tokens = 1024
|
|
22
22
|
*
|
|
23
23
|
* Secrets: ~/.config/llm/.env
|
|
24
24
|
* ANTHROPIC_API_KEY=sk-ant-...
|
|
@@ -29,7 +29,12 @@
|
|
|
29
29
|
* 2 - Client error (missing args, invalid config)
|
|
30
30
|
*/
|
|
31
31
|
|
|
32
|
-
import {
|
|
32
|
+
import {
|
|
33
|
+
summarize,
|
|
34
|
+
loadConfig,
|
|
35
|
+
type SummarizeOptions,
|
|
36
|
+
type SummarizeMode,
|
|
37
|
+
} from "./index";
|
|
33
38
|
|
|
34
39
|
/**
|
|
35
40
|
* Read text from stdin
|
|
@@ -49,10 +54,10 @@ async function readStdin(): Promise<string> {
|
|
|
49
54
|
*/
|
|
50
55
|
function printUsage(): void {
|
|
51
56
|
console.error(`
|
|
52
|
-
llm-summarize -
|
|
57
|
+
llm-summarize - Extract structured insights from session transcripts
|
|
53
58
|
|
|
54
59
|
Philosophy:
|
|
55
|
-
|
|
60
|
+
Structured session insight extraction for knowledge systems.
|
|
56
61
|
Config-driven - specify exact provider/model.
|
|
57
62
|
JSON output for tooling integration.
|
|
58
63
|
|
|
@@ -60,17 +65,22 @@ Usage: llm-summarize [options] <text>
|
|
|
60
65
|
llm-summarize --stdin
|
|
61
66
|
|
|
62
67
|
Options:
|
|
68
|
+
--mode <mode> Summarization mode: quick or insights (default: insights)
|
|
63
69
|
--model <name> Override model from config
|
|
64
|
-
--max-tokens <n> Max output tokens (default: from config or
|
|
70
|
+
--max-tokens <n> Max output tokens (default: from config or 1024)
|
|
65
71
|
--stdin Read text from stdin
|
|
66
72
|
-h, --help Show this help
|
|
67
73
|
|
|
74
|
+
Modes:
|
|
75
|
+
quick - Fast one-liner summary (for user prompts)
|
|
76
|
+
insights - Full SessionInsights extraction (for responses)
|
|
77
|
+
|
|
68
78
|
Config file: ~/.config/llm/config.toml
|
|
69
79
|
[llm]
|
|
70
|
-
provider = "
|
|
71
|
-
model = "
|
|
72
|
-
|
|
73
|
-
max_tokens =
|
|
80
|
+
provider = "ollama"
|
|
81
|
+
model = "Qwen2.5:3b"
|
|
82
|
+
api_base = "https://ollama.example.com"
|
|
83
|
+
max_tokens = 1024
|
|
74
84
|
|
|
75
85
|
Secrets file: ~/.config/llm/.env
|
|
76
86
|
ANTHROPIC_API_KEY=sk-ant-...
|
|
@@ -84,20 +94,28 @@ Environment overrides:
|
|
|
84
94
|
Supported providers:
|
|
85
95
|
anthropic - Claude models (claude-3-5-haiku-latest, claude-sonnet-4-20250514)
|
|
86
96
|
openai - GPT models (gpt-4.1-mini, gpt-4o)
|
|
87
|
-
ollama - Local models (
|
|
97
|
+
ollama - Local models (Qwen2.5:3b, llama3.2:3b, etc.) - no API key needed
|
|
98
|
+
|
|
99
|
+
Output format:
|
|
100
|
+
{
|
|
101
|
+
"insights": {
|
|
102
|
+
"summary": "One sentence: what was accomplished",
|
|
103
|
+
"decisions": ["Specific decisions with reasoning"],
|
|
104
|
+
"patterns_used": ["Development patterns observed"],
|
|
105
|
+
"preferences_expressed": ["User preferences revealed"],
|
|
106
|
+
"problems_solved": ["Problems addressed and how"],
|
|
107
|
+
"tools_heavy": ["Tools used notably"]
|
|
108
|
+
},
|
|
109
|
+
"model": "qwen2.5:3b",
|
|
110
|
+
"tokens_used": 150
|
|
111
|
+
}
|
|
88
112
|
|
|
89
113
|
Examples:
|
|
90
|
-
#
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
# With options
|
|
94
|
-
llm-summarize --max-tokens 30 "Long event description..."
|
|
95
|
-
|
|
96
|
-
# From stdin (for piping)
|
|
97
|
-
echo "Tool: Edit, File: auth.ts, Result: added JWT validation" | llm-summarize --stdin
|
|
114
|
+
# Extract insights from session transcript
|
|
115
|
+
cat session.txt | llm-summarize --stdin
|
|
98
116
|
|
|
99
|
-
#
|
|
100
|
-
|
|
117
|
+
# From clipboard
|
|
118
|
+
pbpaste | llm-summarize --stdin
|
|
101
119
|
`);
|
|
102
120
|
}
|
|
103
121
|
|
|
@@ -119,6 +137,7 @@ async function parseArgs(argv: string[]): Promise<ParsedArgs | null> {
|
|
|
119
137
|
|
|
120
138
|
let modelOverride: string | undefined;
|
|
121
139
|
let maxTokensOverride: number | undefined;
|
|
140
|
+
let modeOverride: SummarizeMode | undefined;
|
|
122
141
|
let useStdin = false;
|
|
123
142
|
let text = "";
|
|
124
143
|
|
|
@@ -129,6 +148,14 @@ async function parseArgs(argv: string[]): Promise<ParsedArgs | null> {
|
|
|
129
148
|
modelOverride = args[++i];
|
|
130
149
|
} else if (arg === "--max-tokens" && i + 1 < args.length) {
|
|
131
150
|
maxTokensOverride = parseInt(args[++i], 10);
|
|
151
|
+
} else if (arg === "--mode" && i + 1 < args.length) {
|
|
152
|
+
const mode = args[++i];
|
|
153
|
+
if (mode === "quick" || mode === "insights") {
|
|
154
|
+
modeOverride = mode;
|
|
155
|
+
} else {
|
|
156
|
+
console.error(`Invalid mode: ${mode}. Use 'quick' or 'insights'.`);
|
|
157
|
+
process.exit(2);
|
|
158
|
+
}
|
|
132
159
|
} else if (arg === "--stdin") {
|
|
133
160
|
useStdin = true;
|
|
134
161
|
} else if (!arg.startsWith("-")) {
|
|
@@ -146,6 +173,7 @@ async function parseArgs(argv: string[]): Promise<ParsedArgs | null> {
|
|
|
146
173
|
options: {
|
|
147
174
|
model: modelOverride,
|
|
148
175
|
maxTokens: maxTokensOverride,
|
|
176
|
+
mode: modeOverride,
|
|
149
177
|
},
|
|
150
178
|
};
|
|
151
179
|
}
|
|
@@ -175,8 +203,10 @@ async function main(): Promise<void> {
|
|
|
175
203
|
console.log(JSON.stringify(result, null, 2));
|
|
176
204
|
|
|
177
205
|
// Diagnostic
|
|
178
|
-
if (result.
|
|
179
|
-
console.error(
|
|
206
|
+
if (result.insights) {
|
|
207
|
+
console.error(
|
|
208
|
+
`✅ Extracted insights (${result.tokens_used || "?"} tokens)`,
|
|
209
|
+
);
|
|
180
210
|
process.exit(0);
|
|
181
211
|
} else {
|
|
182
212
|
console.error(`❌ ${result.error}`);
|
package/index.ts
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* llm-summarize - Library exports
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Structured session insight extraction for knowledge systems.
|
|
5
5
|
* Pure functions, no process.exit, no stderr output.
|
|
6
6
|
*
|
|
7
7
|
* Usage:
|
|
8
8
|
* import { summarize, loadConfig } from "llm-summarize";
|
|
9
9
|
* const config = loadConfig();
|
|
10
|
-
* const result = await summarize("
|
|
10
|
+
* const result = await summarize("session transcript", config);
|
|
11
|
+
* // result.insights.summary, result.insights.decisions, etc.
|
|
11
12
|
*/
|
|
12
13
|
|
|
13
14
|
import { readFileSync, existsSync } from "fs";
|
|
@@ -17,8 +18,17 @@ import { join } from "path";
|
|
|
17
18
|
// Types
|
|
18
19
|
// ============================================================================
|
|
19
20
|
|
|
21
|
+
export interface SessionInsights {
|
|
22
|
+
summary: string;
|
|
23
|
+
decisions?: string[];
|
|
24
|
+
patterns_used?: string[];
|
|
25
|
+
preferences_expressed?: string[];
|
|
26
|
+
problems_solved?: string[];
|
|
27
|
+
tools_heavy?: string[];
|
|
28
|
+
}
|
|
29
|
+
|
|
20
30
|
export interface SummarizeResult {
|
|
21
|
-
|
|
31
|
+
insights?: SessionInsights;
|
|
22
32
|
error?: string;
|
|
23
33
|
model?: string;
|
|
24
34
|
tokens_used?: number;
|
|
@@ -35,9 +45,104 @@ export interface LLMConfig {
|
|
|
35
45
|
export interface SummarizeOptions {
|
|
36
46
|
model?: string;
|
|
37
47
|
maxTokens?: number;
|
|
48
|
+
mode?: "quick" | "insights";
|
|
38
49
|
}
|
|
39
50
|
|
|
40
51
|
export type ProviderType = "anthropic" | "openai" | "ollama";
|
|
52
|
+
export type SummarizeMode = "quick" | "insights";
|
|
53
|
+
|
|
54
|
+
// ============================================================================
|
|
55
|
+
// System Prompts
|
|
56
|
+
// ============================================================================
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Quick mode: Fast one-liner summary for user prompts
|
|
60
|
+
*/
|
|
61
|
+
const QUICK_PROMPT = `Summarize what the user is asking or doing in one sentence.
|
|
62
|
+
Use the user's name from the context in your summary (e.g., "Rudy asked about...").
|
|
63
|
+
|
|
64
|
+
Output JSON only: {"summary": "One sentence summary"}`;
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Insights mode: Full SessionInsights extraction for responses
|
|
68
|
+
*/
|
|
69
|
+
const INSIGHTS_PROMPT = `You are an experienced engineering manager reviewing session transcripts to extract actionable team insights.
|
|
70
|
+
|
|
71
|
+
Analyze the development session conversation and extract structured observations.
|
|
72
|
+
|
|
73
|
+
<output_schema>
|
|
74
|
+
{
|
|
75
|
+
"summary": "One sentence: what was accomplished or decided",
|
|
76
|
+
"decisions": ["Specific decision and its reasoning"],
|
|
77
|
+
"patterns_used": ["Development pattern or approach observed"],
|
|
78
|
+
"preferences_expressed": ["User preference revealed through actions or statements"],
|
|
79
|
+
"problems_solved": ["Problem that was addressed and how"],
|
|
80
|
+
"tools_heavy": ["Tool used repeatedly or in notable ways"]
|
|
81
|
+
}
|
|
82
|
+
</output_schema>
|
|
83
|
+
|
|
84
|
+
<rules>
|
|
85
|
+
- Use the user's name from the context in the summary field (e.g., "Rudy implemented...")
|
|
86
|
+
- Include a field ONLY when the conversation provides clear evidence
|
|
87
|
+
- Extract specifics: "Chose SQLite over Postgres for single-user simplicity" not "Made a database decision"
|
|
88
|
+
- Omit empty arrays entirely
|
|
89
|
+
</rules>
|
|
90
|
+
|
|
91
|
+
Output valid JSON only. No markdown code blocks, no explanation.`;
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Get prompt for the specified mode
|
|
95
|
+
*/
|
|
96
|
+
function getPromptForMode(mode: SummarizeMode): string {
|
|
97
|
+
return mode === "quick" ? QUICK_PROMPT : INSIGHTS_PROMPT;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// ============================================================================
|
|
101
|
+
// Response Parsing
|
|
102
|
+
// ============================================================================
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Extract JSON from LLM response that may contain:
|
|
106
|
+
* - Markdown code blocks (```json ... ```)
|
|
107
|
+
* - MLX end tokens (<|im_end|>, <|end|>)
|
|
108
|
+
* - Thinking blocks (<think>...</think>)
|
|
109
|
+
* - Raw JSON
|
|
110
|
+
*/
|
|
111
|
+
function extractJson(raw: string): SessionInsights | null {
|
|
112
|
+
let text = raw.trim();
|
|
113
|
+
|
|
114
|
+
// Remove thinking blocks
|
|
115
|
+
text = text.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
|
|
116
|
+
|
|
117
|
+
// Remove MLX end tokens
|
|
118
|
+
text = text
|
|
119
|
+
.replace(/<\|im_end\|>/g, "")
|
|
120
|
+
.replace(/<\|end\|>/g, "")
|
|
121
|
+
.trim();
|
|
122
|
+
|
|
123
|
+
// Extract from markdown code block if present
|
|
124
|
+
const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
125
|
+
if (codeBlockMatch) {
|
|
126
|
+
text = codeBlockMatch[1].trim();
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Find JSON object in text (handle leading/trailing garbage)
|
|
130
|
+
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
131
|
+
if (!jsonMatch) {
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
try {
|
|
136
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
137
|
+
// Validate required field
|
|
138
|
+
if (typeof parsed.summary !== "string") {
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
return parsed as SessionInsights;
|
|
142
|
+
} catch {
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
41
146
|
|
|
42
147
|
// ============================================================================
|
|
43
148
|
// Config Loading
|
|
@@ -115,7 +220,7 @@ export function loadConfig(): LLMConfig {
|
|
|
115
220
|
model: null,
|
|
116
221
|
apiKey: null,
|
|
117
222
|
apiBase: null,
|
|
118
|
-
maxTokens:
|
|
223
|
+
maxTokens: 1024,
|
|
119
224
|
};
|
|
120
225
|
|
|
121
226
|
if (!existsSync(configPath)) {
|
|
@@ -174,6 +279,7 @@ async function callAnthropic(
|
|
|
174
279
|
model: string,
|
|
175
280
|
maxTokens: number,
|
|
176
281
|
apiKey: string,
|
|
282
|
+
systemPrompt: string,
|
|
177
283
|
apiBase?: string,
|
|
178
284
|
): Promise<SummarizeResult> {
|
|
179
285
|
const endpoint = apiBase || "https://api.anthropic.com/v1/messages";
|
|
@@ -190,10 +296,11 @@ async function callAnthropic(
|
|
|
190
296
|
model,
|
|
191
297
|
max_tokens: maxTokens,
|
|
192
298
|
temperature: 0.3,
|
|
299
|
+
system: systemPrompt,
|
|
193
300
|
messages: [
|
|
194
301
|
{
|
|
195
302
|
role: "user",
|
|
196
|
-
content:
|
|
303
|
+
content: text,
|
|
197
304
|
},
|
|
198
305
|
],
|
|
199
306
|
}),
|
|
@@ -208,9 +315,16 @@ async function callAnthropic(
|
|
|
208
315
|
|
|
209
316
|
const result = await response.json();
|
|
210
317
|
const content = result.content?.[0]?.text || "";
|
|
318
|
+
const insights = extractJson(content);
|
|
319
|
+
|
|
320
|
+
if (!insights) {
|
|
321
|
+
return {
|
|
322
|
+
error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
|
|
323
|
+
};
|
|
324
|
+
}
|
|
211
325
|
|
|
212
326
|
return {
|
|
213
|
-
|
|
327
|
+
insights,
|
|
214
328
|
model,
|
|
215
329
|
tokens_used: result.usage?.output_tokens,
|
|
216
330
|
};
|
|
@@ -229,6 +343,7 @@ async function callOpenAI(
|
|
|
229
343
|
model: string,
|
|
230
344
|
maxTokens: number,
|
|
231
345
|
apiKey: string,
|
|
346
|
+
systemPrompt: string,
|
|
232
347
|
apiBase?: string,
|
|
233
348
|
): Promise<SummarizeResult> {
|
|
234
349
|
const endpoint = apiBase || "https://api.openai.com/v1/chat/completions";
|
|
@@ -245,9 +360,13 @@ async function callOpenAI(
|
|
|
245
360
|
max_tokens: maxTokens,
|
|
246
361
|
temperature: 0.3,
|
|
247
362
|
messages: [
|
|
363
|
+
{
|
|
364
|
+
role: "system",
|
|
365
|
+
content: systemPrompt,
|
|
366
|
+
},
|
|
248
367
|
{
|
|
249
368
|
role: "user",
|
|
250
|
-
content:
|
|
369
|
+
content: text,
|
|
251
370
|
},
|
|
252
371
|
],
|
|
253
372
|
}),
|
|
@@ -262,9 +381,16 @@ async function callOpenAI(
|
|
|
262
381
|
|
|
263
382
|
const result = await response.json();
|
|
264
383
|
const content = result.choices?.[0]?.message?.content || "";
|
|
384
|
+
const insights = extractJson(content);
|
|
385
|
+
|
|
386
|
+
if (!insights) {
|
|
387
|
+
return {
|
|
388
|
+
error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
|
|
389
|
+
};
|
|
390
|
+
}
|
|
265
391
|
|
|
266
392
|
return {
|
|
267
|
-
|
|
393
|
+
insights,
|
|
268
394
|
model,
|
|
269
395
|
tokens_used: result.usage?.completion_tokens,
|
|
270
396
|
};
|
|
@@ -276,15 +402,16 @@ async function callOpenAI(
|
|
|
276
402
|
}
|
|
277
403
|
|
|
278
404
|
/**
|
|
279
|
-
* Call Ollama API
|
|
405
|
+
* Call Ollama API (chat endpoint for system prompt support)
|
|
280
406
|
*/
|
|
281
407
|
async function callOllama(
|
|
282
408
|
text: string,
|
|
283
409
|
model: string,
|
|
284
410
|
maxTokens: number,
|
|
285
411
|
apiBase: string,
|
|
412
|
+
systemPrompt: string,
|
|
286
413
|
): Promise<SummarizeResult> {
|
|
287
|
-
const endpoint = `${apiBase}/api/
|
|
414
|
+
const endpoint = `${apiBase}/api/chat`;
|
|
288
415
|
|
|
289
416
|
try {
|
|
290
417
|
const response = await fetch(endpoint, {
|
|
@@ -294,7 +421,16 @@ async function callOllama(
|
|
|
294
421
|
},
|
|
295
422
|
body: JSON.stringify({
|
|
296
423
|
model,
|
|
297
|
-
|
|
424
|
+
messages: [
|
|
425
|
+
{
|
|
426
|
+
role: "system",
|
|
427
|
+
content: systemPrompt,
|
|
428
|
+
},
|
|
429
|
+
{
|
|
430
|
+
role: "user",
|
|
431
|
+
content: text,
|
|
432
|
+
},
|
|
433
|
+
],
|
|
298
434
|
stream: false,
|
|
299
435
|
options: {
|
|
300
436
|
num_predict: maxTokens,
|
|
@@ -311,10 +447,17 @@ async function callOllama(
|
|
|
311
447
|
}
|
|
312
448
|
|
|
313
449
|
const result = await response.json();
|
|
314
|
-
const content = result.
|
|
450
|
+
const content = result.message?.content || "";
|
|
451
|
+
const insights = extractJson(content);
|
|
452
|
+
|
|
453
|
+
if (!insights) {
|
|
454
|
+
return {
|
|
455
|
+
error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
|
|
456
|
+
};
|
|
457
|
+
}
|
|
315
458
|
|
|
316
459
|
return {
|
|
317
|
-
|
|
460
|
+
insights,
|
|
318
461
|
model,
|
|
319
462
|
tokens_used: result.eval_count,
|
|
320
463
|
};
|
|
@@ -334,8 +477,12 @@ async function callOllama(
|
|
|
334
477
|
*
|
|
335
478
|
* @param text - Text to summarize
|
|
336
479
|
* @param config - LLM configuration (from loadConfig())
|
|
337
|
-
* @param options - Optional overrides for model and
|
|
338
|
-
* @returns SummarizeResult with
|
|
480
|
+
* @param options - Optional overrides for model, maxTokens, and mode
|
|
481
|
+
* @returns SummarizeResult with insights or error
|
|
482
|
+
*
|
|
483
|
+
* Modes:
|
|
484
|
+
* - "quick": Fast one-liner summary (for user prompts)
|
|
485
|
+
* - "insights": Full SessionInsights extraction (for responses, default)
|
|
339
486
|
*/
|
|
340
487
|
export async function summarize(
|
|
341
488
|
text: string,
|
|
@@ -346,6 +493,8 @@ export async function summarize(
|
|
|
346
493
|
const model = options?.model || config.model;
|
|
347
494
|
const maxTokens = options?.maxTokens || config.maxTokens;
|
|
348
495
|
const apiKey = config.apiKey;
|
|
496
|
+
const mode: SummarizeMode = options?.mode || "insights";
|
|
497
|
+
const systemPrompt = getPromptForMode(mode);
|
|
349
498
|
|
|
350
499
|
// Validate config
|
|
351
500
|
if (!provider) {
|
|
@@ -374,6 +523,7 @@ export async function summarize(
|
|
|
374
523
|
model,
|
|
375
524
|
maxTokens,
|
|
376
525
|
apiKey!,
|
|
526
|
+
systemPrompt,
|
|
377
527
|
config.apiBase || undefined,
|
|
378
528
|
);
|
|
379
529
|
} else if (provider === "openai") {
|
|
@@ -382,6 +532,7 @@ export async function summarize(
|
|
|
382
532
|
model,
|
|
383
533
|
maxTokens,
|
|
384
534
|
apiKey!,
|
|
535
|
+
systemPrompt,
|
|
385
536
|
config.apiBase || undefined,
|
|
386
537
|
);
|
|
387
538
|
} else if (provider === "ollama") {
|
|
@@ -390,7 +541,7 @@ export async function summarize(
|
|
|
390
541
|
error: `No api_base configured for ollama. Set api_base in ~/.config/llm/config.toml`,
|
|
391
542
|
};
|
|
392
543
|
}
|
|
393
|
-
return callOllama(text, model, maxTokens, config.apiBase);
|
|
544
|
+
return callOllama(text, model, maxTokens, config.apiBase, systemPrompt);
|
|
394
545
|
} else {
|
|
395
546
|
return {
|
|
396
547
|
error: `Unknown provider: ${provider}. Supported: anthropic, openai, ollama`,
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@voidwire/llm-summarize",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "3.1.0",
|
|
4
|
+
"description": "Structured session insight extraction for knowledge systems",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./index.ts",
|
|
7
7
|
"bin": {
|