npm - @voidwire/llm-summarize - Versions diffs - 2.0.0 → 3.1.0 - Mend

@voidwire/llm-summarize 2.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 # llm-summarize
-Fast LLM-powered text summarization for observability and logging.
+Structured session insight extraction for knowledge systems.
 ## Philosophy
 - **Config-driven** - No hardcoded defaults, specify exact provider/model
 - **Prismis pattern** - Secrets in .env, references in config.toml via `env:VAR_NAME`
-- **Fast and cheap** - Designed for high-volume summarization (haiku/gpt-4.1-mini)
+- **Knowledge-focused** - Extracts decisions, patterns, preferences, not just summaries
 - **Composable** - JSON output pipes to jq and other tools
 ## Installation
@@ -22,10 +22,10 @@ cd llmcli-tools
 ```toml
 [llm]
-provider = "openai"
-model = "gpt-4.1-mini"
-api_key = "env:OPENAI_API_KEY"
-max_tokens = 50
+provider = "ollama"
+model = "Qwen2.5:3b"
+api_base = "https://ollama.example.com"
+max_tokens = 1024
 ```
 ### Secrets file: `~/.config/llm/.env`
@@ -38,9 +38,14 @@ ANTHROPIC_API_KEY=sk-ant-...
 ## Usage
 ```bash
-llm-summarize <text>
-llm-summarize --stdin
-echo "text" | llm-summarize --stdin
+# From stdin (typical usage)
+cat session.txt | llm-summarize --stdin
+# From clipboard
+pbpaste | llm-summarize --stdin
+# Direct text
+llm-summarize "session transcript text"
 ```
 ## Options
@@ -48,7 +53,7 @@ echo "text" | llm-summarize --stdin
 | Flag | Description |
 |------|-------------|
 | `--model <name>` | Override model from config |
-| `--max-tokens <n>` | Max output tokens |
+| `--max-tokens <n>` | Max output tokens (default: 1024) |
 | `--stdin` | Read text from stdin |
 | `-h, --help` | Show help |
@@ -56,28 +61,80 @@ echo "text" | llm-summarize --stdin
 ```json
 {
-  "summary": "User saved form data to PostgreSQL.",
-  "model": "gpt-4.1-mini",
-  "tokens_used": 12
+  "insights": {
+    "summary": "Implemented Redis caching layer with TTL and tag-based invalidation.",
+    "decisions": [
+      "Chose Redis over in-memory caching for persistence across restarts"
+    ],
+    "patterns_used": [
+      "Tag-based cache invalidation"
+    ],
+    "problems_solved": [
+      "Added caching to reduce database load with automatic invalidation on writes"
+    ],
+    "tools_heavy": [
+      "Redis",
+      "CacheService"
+    ]
+  },
+  "model": "Qwen2.5:3b",
+  "tokens_used": 126
 }
 ```
+### SessionInsights Fields
+| Field | Description |
+|-------|-------------|
+| `summary` | One sentence: what was accomplished (always present) |
+| `decisions` | Specific decisions with reasoning |
+| `patterns_used` | Development patterns observed |
+| `preferences_expressed` | User preferences revealed |
+| `problems_solved` | Problems addressed and how |
+| `tools_heavy` | Tools used notably |
+Fields are omitted when no clear evidence exists in the transcript.
 ## Supported Providers
 | Provider | Models | API Key |
 |----------|--------|---------|
+| `ollama` | Qwen2.5:3b, llama3.2:3b, etc. | Not needed |
 | `anthropic` | claude-3-5-haiku-latest, claude-sonnet-4-20250514 | Required |
-| `openai` | gpt-4.1-mini, gpt-4o | Required |
-| `ollama` | llama3, mistral, gemma3, etc. | Not needed |
+| `openai` | gpt-4o-mini, gpt-4o | Required |
 ### Ollama Configuration
 ```toml
 [llm]
 provider = "ollama"
-model = "llama3"
-api_base = "http://localhost:11434/api/generate"  # optional, this is default
-max_tokens = 50
+model = "Qwen2.5:3b"
+api_base = "https://ollama.example.com"
+max_tokens = 1024
+```
+### Cloud Provider Configuration
+```toml
+[llm]
+provider = "anthropic"
+model = "claude-3-5-haiku-latest"
+api_key = "env:ANTHROPIC_API_KEY"
+max_tokens = 1024
+```
+## Library Usage
+```typescript
+import { summarize, loadConfig, type SessionInsights } from "@voidwire/llm-summarize";
+const config = loadConfig();
+const result = await summarize("session transcript", config);
+if (result.insights) {
+  console.log(result.insights.summary);
+  console.log(result.insights.decisions);
+}
 ```
 ## Exit Codes
@@ -85,5 +142,5 @@ max_tokens = 50
 | Code | Meaning |
 |------|---------|
 | 0 | Success |
-| 1 | API error (rate limit, auth, network) |
+| 1 | API error (rate limit, auth, network, parse failure) |
 | 2 | Client error (missing args, invalid config) |

package/cli.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  * llm-summarize CLI
  *
  * Philosophy:
- * - Fast summaries for observability and logging
+ * - Structured session insight extraction for knowledge systems
  * - Multi-provider support (Anthropic, OpenAI, Ollama)
  * - Deterministic JSON output for tooling integration
  * - Config-driven - no hardcoded defaults
@@ -15,10 +15,10 @@
  *
  * Config: ~/.config/llm/config.toml
  *   [llm]
- *   provider = "anthropic"
- *   model = "claude-3-5-haiku-latest"
- *   api_key = "env:ANTHROPIC_API_KEY"
- *   max_tokens = 50
+ *   provider = "ollama"
+ *   model = "Qwen2.5:3b"
+ *   api_base = "https://ollama.example.com"
+ *   max_tokens = 1024
  *
  * Secrets: ~/.config/llm/.env
  *   ANTHROPIC_API_KEY=sk-ant-...
@@ -29,7 +29,12 @@
  *   2 - Client error (missing args, invalid config)
  */
-import { summarize, loadConfig, type SummarizeOptions } from "./index";
+import {
+  summarize,
+  loadConfig,
+  type SummarizeOptions,
+  type SummarizeMode,
+} from "./index";
 /**
  * Read text from stdin
@@ -49,10 +54,10 @@ async function readStdin(): Promise<string> {
  */
 function printUsage(): void {
   console.error(`
-llm-summarize - Summarize text using LLM APIs
+llm-summarize - Extract structured insights from session transcripts
 Philosophy:
-  Fast, cheap summaries for observability events.
+  Structured session insight extraction for knowledge systems.
   Config-driven - specify exact provider/model.
   JSON output for tooling integration.
@@ -60,17 +65,22 @@ Usage: llm-summarize [options] <text>
        llm-summarize --stdin
 Options:
+  --mode <mode>         Summarization mode: quick or insights (default: insights)
   --model <name>        Override model from config
-  --max-tokens <n>      Max output tokens (default: from config or 50)
+  --max-tokens <n>      Max output tokens (default: from config or 1024)
   --stdin               Read text from stdin
   -h, --help            Show this help
+Modes:
+  quick     - Fast one-liner summary (for user prompts)
+  insights  - Full SessionInsights extraction (for responses)
 Config file: ~/.config/llm/config.toml
   [llm]
-  provider = "anthropic"
-  model = "claude-3-5-haiku-latest"
-  api_key = "env:ANTHROPIC_API_KEY"
-  max_tokens = 50
+  provider = "ollama"
+  model = "Qwen2.5:3b"
+  api_base = "https://ollama.example.com"
+  max_tokens = 1024
 Secrets file: ~/.config/llm/.env
   ANTHROPIC_API_KEY=sk-ant-...
@@ -84,20 +94,28 @@ Environment overrides:
 Supported providers:
   anthropic - Claude models (claude-3-5-haiku-latest, claude-sonnet-4-20250514)
   openai    - GPT models (gpt-4.1-mini, gpt-4o)
-  ollama    - Local models (llama3, mistral, gemma3, etc.) - no API key needed
+  ollama    - Local models (Qwen2.5:3b, llama3.2:3b, etc.) - no API key needed
+Output format:
+  {
+    "insights": {
+      "summary": "One sentence: what was accomplished",
+      "decisions": ["Specific decisions with reasoning"],
+      "patterns_used": ["Development patterns observed"],
+      "preferences_expressed": ["User preferences revealed"],
+      "problems_solved": ["Problems addressed and how"],
+      "tools_heavy": ["Tools used notably"]
+    },
+    "model": "qwen2.5:3b",
+    "tokens_used": 150
+  }
 Examples:
-  # Simple summarization
-  llm-summarize "User requested fix for post-password-reset login failure"
-  # With options
-  llm-summarize --max-tokens 30 "Long event description..."
-  # From stdin (for piping)
-  echo "Tool: Edit, File: auth.ts, Result: added JWT validation" | llm-summarize --stdin
+  # Extract insights from session transcript
+  cat session.txt | llm-summarize --stdin
-  # Pipe from another tool
-  cat event.json | jq -r '.description' | llm-summarize --stdin
+  # From clipboard
+  pbpaste | llm-summarize --stdin
 `);
 }
@@ -119,6 +137,7 @@ async function parseArgs(argv: string[]): Promise<ParsedArgs | null> {
   let modelOverride: string | undefined;
   let maxTokensOverride: number | undefined;
+  let modeOverride: SummarizeMode | undefined;
   let useStdin = false;
   let text = "";
@@ -129,6 +148,14 @@ async function parseArgs(argv: string[]): Promise<ParsedArgs | null> {
       modelOverride = args[++i];
     } else if (arg === "--max-tokens" && i + 1 < args.length) {
       maxTokensOverride = parseInt(args[++i], 10);
+    } else if (arg === "--mode" && i + 1 < args.length) {
+      const mode = args[++i];
+      if (mode === "quick" || mode === "insights") {
+        modeOverride = mode;
+      } else {
+        console.error(`Invalid mode: ${mode}. Use 'quick' or 'insights'.`);
+        process.exit(2);
+      }
     } else if (arg === "--stdin") {
       useStdin = true;
     } else if (!arg.startsWith("-")) {
@@ -146,6 +173,7 @@ async function parseArgs(argv: string[]): Promise<ParsedArgs | null> {
     options: {
       model: modelOverride,
       maxTokens: maxTokensOverride,
+      mode: modeOverride,
     },
   };
 }
@@ -175,8 +203,10 @@ async function main(): Promise<void> {
   console.log(JSON.stringify(result, null, 2));
   // Diagnostic
-  if (result.summary) {
-    console.error(`✅ Summarized (${result.tokens_used || "?"} tokens)`);
+  if (result.insights) {
+    console.error(
+      `✅ Extracted insights (${result.tokens_used || "?"} tokens)`,
+    );
     process.exit(0);
   } else {
     console.error(`❌ ${result.error}`);

package/index.ts CHANGED Viewed

@@ -1,13 +1,14 @@
 /**
  * llm-summarize - Library exports
  *
- * Fast LLM-powered text summarization for observability and logging.
+ * Structured session insight extraction for knowledge systems.
  * Pure functions, no process.exit, no stderr output.
  *
  * Usage:
  *   import { summarize, loadConfig } from "llm-summarize";
  *   const config = loadConfig();
- *   const result = await summarize("text to summarize", config);
+ *   const result = await summarize("session transcript", config);
+ *   // result.insights.summary, result.insights.decisions, etc.
  */
 import { readFileSync, existsSync } from "fs";
@@ -17,8 +18,17 @@ import { join } from "path";
 // Types
 // ============================================================================
+export interface SessionInsights {
+  summary: string;
+  decisions?: string[];
+  patterns_used?: string[];
+  preferences_expressed?: string[];
+  problems_solved?: string[];
+  tools_heavy?: string[];
+}
 export interface SummarizeResult {
-  summary?: string;
+  insights?: SessionInsights;
   error?: string;
   model?: string;
   tokens_used?: number;
@@ -35,9 +45,104 @@ export interface LLMConfig {
 export interface SummarizeOptions {
   model?: string;
   maxTokens?: number;
+  mode?: "quick" | "insights";
 }
 export type ProviderType = "anthropic" | "openai" | "ollama";
+export type SummarizeMode = "quick" | "insights";
+// ============================================================================
+// System Prompts
+// ============================================================================
+/**
+ * Quick mode: Fast one-liner summary for user prompts
+ */
+const QUICK_PROMPT = `Summarize what the user is asking or doing in one sentence.
+Use the user's name from the context in your summary (e.g., "Rudy asked about...").
+Output JSON only: {"summary": "One sentence summary"}`;
+/**
+ * Insights mode: Full SessionInsights extraction for responses
+ */
+const INSIGHTS_PROMPT = `You are an experienced engineering manager reviewing session transcripts to extract actionable team insights.
+Analyze the development session conversation and extract structured observations.
+<output_schema>
+{
+  "summary": "One sentence: what was accomplished or decided",
+  "decisions": ["Specific decision and its reasoning"],
+  "patterns_used": ["Development pattern or approach observed"],
+  "preferences_expressed": ["User preference revealed through actions or statements"],
+  "problems_solved": ["Problem that was addressed and how"],
+  "tools_heavy": ["Tool used repeatedly or in notable ways"]
+}
+</output_schema>
+<rules>
+- Use the user's name from the context in the summary field (e.g., "Rudy implemented...")
+- Include a field ONLY when the conversation provides clear evidence
+- Extract specifics: "Chose SQLite over Postgres for single-user simplicity" not "Made a database decision"
+- Omit empty arrays entirely
+</rules>
+Output valid JSON only. No markdown code blocks, no explanation.`;
+/**
+ * Get prompt for the specified mode
+ */
+function getPromptForMode(mode: SummarizeMode): string {
+  return mode === "quick" ? QUICK_PROMPT : INSIGHTS_PROMPT;
+}
+// ============================================================================
+// Response Parsing
+// ============================================================================
+/**
+ * Extract JSON from LLM response that may contain:
+ * - Markdown code blocks (```json ... ```)
+ * - MLX end tokens (<|im_end|>, <|end|>)
+ * - Thinking blocks (<think>...</think>)
+ * - Raw JSON
+ */
+function extractJson(raw: string): SessionInsights | null {
+  let text = raw.trim();
+  // Remove thinking blocks
+  text = text.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
+  // Remove MLX end tokens
+  text = text
+    .replace(/<\|im_end\|>/g, "")
+    .replace(/<\|end\|>/g, "")
+    .trim();
+  // Extract from markdown code block if present
+  const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
+  if (codeBlockMatch) {
+    text = codeBlockMatch[1].trim();
+  }
+  // Find JSON object in text (handle leading/trailing garbage)
+  const jsonMatch = text.match(/\{[\s\S]*\}/);
+  if (!jsonMatch) {
+    return null;
+  }
+  try {
+    const parsed = JSON.parse(jsonMatch[0]);
+    // Validate required field
+    if (typeof parsed.summary !== "string") {
+      return null;
+    }
+    return parsed as SessionInsights;
+  } catch {
+    return null;
+  }
+}
 // ============================================================================
 // Config Loading
@@ -115,7 +220,7 @@ export function loadConfig(): LLMConfig {
     model: null,
     apiKey: null,
     apiBase: null,
-    maxTokens: 50,
+    maxTokens: 1024,
   };
   if (!existsSync(configPath)) {
@@ -174,6 +279,7 @@ async function callAnthropic(
   model: string,
   maxTokens: number,
   apiKey: string,
+  systemPrompt: string,
   apiBase?: string,
 ): Promise<SummarizeResult> {
   const endpoint = apiBase || "https://api.anthropic.com/v1/messages";
@@ -190,10 +296,11 @@ async function callAnthropic(
         model,
         max_tokens: maxTokens,
         temperature: 0.3,
+        system: systemPrompt,
         messages: [
           {
             role: "user",
-            content: `What was accomplished or decided? One sentence, past tense, focus on actions and outcomes:\n\n${text}`,
+            content: text,
           },
         ],
       }),
@@ -208,9 +315,16 @@ async function callAnthropic(
     const result = await response.json();
     const content = result.content?.[0]?.text || "";
+    const insights = extractJson(content);
+    if (!insights) {
+      return {
+        error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
+      };
+    }
     return {
-      summary: content.trim(),
+      insights,
       model,
       tokens_used: result.usage?.output_tokens,
     };
@@ -229,6 +343,7 @@ async function callOpenAI(
   model: string,
   maxTokens: number,
   apiKey: string,
+  systemPrompt: string,
   apiBase?: string,
 ): Promise<SummarizeResult> {
   const endpoint = apiBase || "https://api.openai.com/v1/chat/completions";
@@ -245,9 +360,13 @@ async function callOpenAI(
         max_tokens: maxTokens,
         temperature: 0.3,
         messages: [
+          {
+            role: "system",
+            content: systemPrompt,
+          },
           {
             role: "user",
-            content: `What was accomplished or decided? One sentence, past tense, focus on actions and outcomes:\n\n${text}`,
+            content: text,
           },
         ],
       }),
@@ -262,9 +381,16 @@ async function callOpenAI(
     const result = await response.json();
     const content = result.choices?.[0]?.message?.content || "";
+    const insights = extractJson(content);
+    if (!insights) {
+      return {
+        error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
+      };
+    }
     return {
-      summary: content.trim(),
+      insights,
       model,
       tokens_used: result.usage?.completion_tokens,
     };
@@ -276,15 +402,16 @@ async function callOpenAI(
 }
 /**
- * Call Ollama API
+ * Call Ollama API (chat endpoint for system prompt support)
  */
 async function callOllama(
   text: string,
   model: string,
   maxTokens: number,
   apiBase: string,
+  systemPrompt: string,
 ): Promise<SummarizeResult> {
-  const endpoint = `${apiBase}/api/generate`;
+  const endpoint = `${apiBase}/api/chat`;
   try {
     const response = await fetch(endpoint, {
@@ -294,7 +421,16 @@ async function callOllama(
       },
       body: JSON.stringify({
         model,
-        prompt: `What was accomplished or decided? One sentence, past tense, focus on actions and outcomes:\n\n${text}`,
+        messages: [
+          {
+            role: "system",
+            content: systemPrompt,
+          },
+          {
+            role: "user",
+            content: text,
+          },
+        ],
         stream: false,
         options: {
           num_predict: maxTokens,
@@ -311,10 +447,17 @@ async function callOllama(
     }
     const result = await response.json();
-    const content = result.response || "";
+    const content = result.message?.content || "";
+    const insights = extractJson(content);
+    if (!insights) {
+      return {
+        error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
+      };
+    }
     return {
-      summary: content.trim(),
+      insights,
       model,
       tokens_used: result.eval_count,
     };
@@ -334,8 +477,12 @@ async function callOllama(
  *
  * @param text - Text to summarize
  * @param config - LLM configuration (from loadConfig())
- * @param options - Optional overrides for model and maxTokens
- * @returns SummarizeResult with summary or error
+ * @param options - Optional overrides for model, maxTokens, and mode
+ * @returns SummarizeResult with insights or error
+ *
+ * Modes:
+ * - "quick": Fast one-liner summary (for user prompts)
+ * - "insights": Full SessionInsights extraction (for responses, default)
  */
 export async function summarize(
   text: string,
@@ -346,6 +493,8 @@ export async function summarize(
   const model = options?.model || config.model;
   const maxTokens = options?.maxTokens || config.maxTokens;
   const apiKey = config.apiKey;
+  const mode: SummarizeMode = options?.mode || "insights";
+  const systemPrompt = getPromptForMode(mode);
   // Validate config
   if (!provider) {
@@ -374,6 +523,7 @@ export async function summarize(
       model,
       maxTokens,
       apiKey!,
+      systemPrompt,
       config.apiBase || undefined,
     );
   } else if (provider === "openai") {
@@ -382,6 +532,7 @@ export async function summarize(
       model,
       maxTokens,
       apiKey!,
+      systemPrompt,
       config.apiBase || undefined,
     );
   } else if (provider === "ollama") {
@@ -390,7 +541,7 @@ export async function summarize(
         error: `No api_base configured for ollama. Set api_base in ~/.config/llm/config.toml`,
       };
     }
-    return callOllama(text, model, maxTokens, config.apiBase);
+    return callOllama(text, model, maxTokens, config.apiBase, systemPrompt);
   } else {
     return {
       error: `Unknown provider: ${provider}. Supported: anthropic, openai, ollama`,

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@voidwire/llm-summarize",
-  "version": "2.0.0",
-  "description": "Fast LLM-powered text summarization for observability and logging",
+  "version": "3.1.0",
+  "description": "Structured session insight extraction for knowledge systems",
   "type": "module",
   "main": "./index.ts",
   "bin": {