npm - @voidwire/llm-summarize - Versions diffs - 2.0.0 → 3.0.0 - Mend

@voidwire/llm-summarize 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 # llm-summarize
-Fast LLM-powered text summarization for observability and logging.
+Structured session insight extraction for knowledge systems.
 ## Philosophy
 - **Config-driven** - No hardcoded defaults, specify exact provider/model
 - **Prismis pattern** - Secrets in .env, references in config.toml via `env:VAR_NAME`
-- **Fast and cheap** - Designed for high-volume summarization (haiku/gpt-4.1-mini)
+- **Knowledge-focused** - Extracts decisions, patterns, preferences, not just summaries
 - **Composable** - JSON output pipes to jq and other tools
 ## Installation
@@ -22,10 +22,10 @@ cd llmcli-tools
 ```toml
 [llm]
-provider = "openai"
-model = "gpt-4.1-mini"
-api_key = "env:OPENAI_API_KEY"
-max_tokens = 50
+provider = "ollama"
+model = "Qwen2.5:3b"
+api_base = "https://ollama.example.com"
+max_tokens = 1024
 ```
 ### Secrets file: `~/.config/llm/.env`
@@ -38,9 +38,14 @@ ANTHROPIC_API_KEY=sk-ant-...
 ## Usage
 ```bash
-llm-summarize <text>
-llm-summarize --stdin
-echo "text" | llm-summarize --stdin
+# From stdin (typical usage)
+cat session.txt | llm-summarize --stdin
+# From clipboard
+pbpaste | llm-summarize --stdin
+# Direct text
+llm-summarize "session transcript text"
 ```
 ## Options
@@ -48,7 +53,7 @@ echo "text" | llm-summarize --stdin
 | Flag | Description |
 |------|-------------|
 | `--model <name>` | Override model from config |
-| `--max-tokens <n>` | Max output tokens |
+| `--max-tokens <n>` | Max output tokens (default: 1024) |
 | `--stdin` | Read text from stdin |
 | `-h, --help` | Show help |
@@ -56,28 +61,80 @@ echo "text" | llm-summarize --stdin
 ```json
 {
-  "summary": "User saved form data to PostgreSQL.",
-  "model": "gpt-4.1-mini",
-  "tokens_used": 12
+  "insights": {
+    "summary": "Implemented Redis caching layer with TTL and tag-based invalidation.",
+    "decisions": [
+      "Chose Redis over in-memory caching for persistence across restarts"
+    ],
+    "patterns_used": [
+      "Tag-based cache invalidation"
+    ],
+    "problems_solved": [
+      "Added caching to reduce database load with automatic invalidation on writes"
+    ],
+    "tools_heavy": [
+      "Redis",
+      "CacheService"
+    ]
+  },
+  "model": "Qwen2.5:3b",
+  "tokens_used": 126
 }
 ```
+### SessionInsights Fields
+| Field | Description |
+|-------|-------------|
+| `summary` | One sentence: what was accomplished (always present) |
+| `decisions` | Specific decisions with reasoning |
+| `patterns_used` | Development patterns observed |
+| `preferences_expressed` | User preferences revealed |
+| `problems_solved` | Problems addressed and how |
+| `tools_heavy` | Tools used notably |
+Fields are omitted when no clear evidence exists in the transcript.
 ## Supported Providers
 | Provider | Models | API Key |
 |----------|--------|---------|
+| `ollama` | Qwen2.5:3b, llama3.2:3b, etc. | Not needed |
 | `anthropic` | claude-3-5-haiku-latest, claude-sonnet-4-20250514 | Required |
-| `openai` | gpt-4.1-mini, gpt-4o | Required |
-| `ollama` | llama3, mistral, gemma3, etc. | Not needed |
+| `openai` | gpt-4o-mini, gpt-4o | Required |
 ### Ollama Configuration
 ```toml
 [llm]
 provider = "ollama"
-model = "llama3"
-api_base = "http://localhost:11434/api/generate"  # optional, this is default
-max_tokens = 50
+model = "Qwen2.5:3b"
+api_base = "https://ollama.example.com"
+max_tokens = 1024
+```
+### Cloud Provider Configuration
+```toml
+[llm]
+provider = "anthropic"
+model = "claude-3-5-haiku-latest"
+api_key = "env:ANTHROPIC_API_KEY"
+max_tokens = 1024
+```
+## Library Usage
+```typescript
+import { summarize, loadConfig, type SessionInsights } from "@voidwire/llm-summarize";
+const config = loadConfig();
+const result = await summarize("session transcript", config);
+if (result.insights) {
+  console.log(result.insights.summary);
+  console.log(result.insights.decisions);
+}
 ```
 ## Exit Codes
@@ -85,5 +142,5 @@ max_tokens = 50
 | Code | Meaning |
 |------|---------|
 | 0 | Success |
-| 1 | API error (rate limit, auth, network) |
+| 1 | API error (rate limit, auth, network, parse failure) |
 | 2 | Client error (missing args, invalid config) |

package/cli.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  * llm-summarize CLI
  *
  * Philosophy:
- * - Fast summaries for observability and logging
+ * - Structured session insight extraction for knowledge systems
  * - Multi-provider support (Anthropic, OpenAI, Ollama)
  * - Deterministic JSON output for tooling integration
  * - Config-driven - no hardcoded defaults
@@ -15,10 +15,10 @@
  *
  * Config: ~/.config/llm/config.toml
  *   [llm]
- *   provider = "anthropic"
- *   model = "claude-3-5-haiku-latest"
- *   api_key = "env:ANTHROPIC_API_KEY"
- *   max_tokens = 50
+ *   provider = "ollama"
+ *   model = "Qwen2.5:3b"
+ *   api_base = "https://ollama.example.com"
+ *   max_tokens = 1024
  *
  * Secrets: ~/.config/llm/.env
  *   ANTHROPIC_API_KEY=sk-ant-...
@@ -49,10 +49,10 @@ async function readStdin(): Promise<string> {
  */
 function printUsage(): void {
   console.error(`
-llm-summarize - Summarize text using LLM APIs
+llm-summarize - Extract structured insights from session transcripts
 Philosophy:
-  Fast, cheap summaries for observability events.
+  Structured session insight extraction for knowledge systems.
   Config-driven - specify exact provider/model.
   JSON output for tooling integration.
@@ -61,16 +61,16 @@ Usage: llm-summarize [options] <text>
 Options:
   --model <name>        Override model from config
-  --max-tokens <n>      Max output tokens (default: from config or 50)
+  --max-tokens <n>      Max output tokens (default: from config or 1024)
   --stdin               Read text from stdin
   -h, --help            Show this help
 Config file: ~/.config/llm/config.toml
   [llm]
-  provider = "anthropic"
-  model = "claude-3-5-haiku-latest"
-  api_key = "env:ANTHROPIC_API_KEY"
-  max_tokens = 50
+  provider = "ollama"
+  model = "Qwen2.5:3b"
+  api_base = "https://ollama.example.com"
+  max_tokens = 1024
 Secrets file: ~/.config/llm/.env
   ANTHROPIC_API_KEY=sk-ant-...
@@ -84,20 +84,28 @@ Environment overrides:
 Supported providers:
   anthropic - Claude models (claude-3-5-haiku-latest, claude-sonnet-4-20250514)
   openai    - GPT models (gpt-4.1-mini, gpt-4o)
-  ollama    - Local models (llama3, mistral, gemma3, etc.) - no API key needed
+  ollama    - Local models (Qwen2.5:3b, llama3.2:3b, etc.) - no API key needed
+Output format:
+  {
+    "insights": {
+      "summary": "One sentence: what was accomplished",
+      "decisions": ["Specific decisions with reasoning"],
+      "patterns_used": ["Development patterns observed"],
+      "preferences_expressed": ["User preferences revealed"],
+      "problems_solved": ["Problems addressed and how"],
+      "tools_heavy": ["Tools used notably"]
+    },
+    "model": "qwen2.5:3b",
+    "tokens_used": 150
+  }
 Examples:
-  # Simple summarization
-  llm-summarize "User requested fix for post-password-reset login failure"
-  # With options
-  llm-summarize --max-tokens 30 "Long event description..."
-  # From stdin (for piping)
-  echo "Tool: Edit, File: auth.ts, Result: added JWT validation" | llm-summarize --stdin
+  # Extract insights from session transcript
+  cat session.txt | llm-summarize --stdin
-  # Pipe from another tool
-  cat event.json | jq -r '.description' | llm-summarize --stdin
+  # From clipboard
+  pbpaste | llm-summarize --stdin
 `);
 }
@@ -175,8 +183,10 @@ async function main(): Promise<void> {
   console.log(JSON.stringify(result, null, 2));
   // Diagnostic
-  if (result.summary) {
-    console.error(`✅ Summarized (${result.tokens_used || "?"} tokens)`);
+  if (result.insights) {
+    console.error(
+      `✅ Extracted insights (${result.tokens_used || "?"} tokens)`,
+    );
     process.exit(0);
   } else {
     console.error(`❌ ${result.error}`);

package/index.ts CHANGED Viewed

@@ -1,13 +1,14 @@
 /**
  * llm-summarize - Library exports
  *
- * Fast LLM-powered text summarization for observability and logging.
+ * Structured session insight extraction for knowledge systems.
  * Pure functions, no process.exit, no stderr output.
  *
  * Usage:
  *   import { summarize, loadConfig } from "llm-summarize";
  *   const config = loadConfig();
- *   const result = await summarize("text to summarize", config);
+ *   const result = await summarize("session transcript", config);
+ *   // result.insights.summary, result.insights.decisions, etc.
  */
 import { readFileSync, existsSync } from "fs";
@@ -17,8 +18,17 @@ import { join } from "path";
 // Types
 // ============================================================================
+export interface SessionInsights {
+  summary: string;
+  decisions?: string[];
+  patterns_used?: string[];
+  preferences_expressed?: string[];
+  problems_solved?: string[];
+  tools_heavy?: string[];
+}
 export interface SummarizeResult {
-  summary?: string;
+  insights?: SessionInsights;
   error?: string;
   model?: string;
   tokens_used?: number;
@@ -39,6 +49,80 @@ export interface SummarizeOptions {
 export type ProviderType = "anthropic" | "openai" | "ollama";
+// ============================================================================
+// System Prompt
+// ============================================================================
+const SYSTEM_PROMPT = `You are an experienced engineering manager reviewing session transcripts to extract actionable team insights.
+Analyze the development session conversation and extract structured observations.
+<output_schema>
+{
+  "summary": "One sentence: what was accomplished or decided",
+  "decisions": ["Specific decision and its reasoning"],
+  "patterns_used": ["Development pattern or approach observed"],
+  "preferences_expressed": ["User preference revealed through actions or statements"],
+  "problems_solved": ["Problem that was addressed and how"],
+  "tools_heavy": ["Tool used repeatedly or in notable ways"]
+}
+</output_schema>
+<rules>
+- Include a field ONLY when the conversation provides clear evidence
+- Extract specifics: "Chose SQLite over Postgres for single-user simplicity" not "Made a database decision"
+- Omit empty arrays entirely
+</rules>
+Output valid JSON only. No markdown code blocks, no explanation.`;
+// ============================================================================
+// Response Parsing
+// ============================================================================
+/**
+ * Extract JSON from LLM response that may contain:
+ * - Markdown code blocks (```json ... ```)
+ * - MLX end tokens (<|im_end|>, <|end|>)
+ * - Thinking blocks (<think>...</think>)
+ * - Raw JSON
+ */
+function extractJson(raw: string): SessionInsights | null {
+  let text = raw.trim();
+  // Remove thinking blocks
+  text = text.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
+  // Remove MLX end tokens
+  text = text
+    .replace(/<\|im_end\|>/g, "")
+    .replace(/<\|end\|>/g, "")
+    .trim();
+  // Extract from markdown code block if present
+  const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
+  if (codeBlockMatch) {
+    text = codeBlockMatch[1].trim();
+  }
+  // Find JSON object in text (handle leading/trailing garbage)
+  const jsonMatch = text.match(/\{[\s\S]*\}/);
+  if (!jsonMatch) {
+    return null;
+  }
+  try {
+    const parsed = JSON.parse(jsonMatch[0]);
+    // Validate required field
+    if (typeof parsed.summary !== "string") {
+      return null;
+    }
+    return parsed as SessionInsights;
+  } catch {
+    return null;
+  }
+}
 // ============================================================================
 // Config Loading
 // ============================================================================
@@ -115,7 +199,7 @@ export function loadConfig(): LLMConfig {
     model: null,
     apiKey: null,
     apiBase: null,
-    maxTokens: 50,
+    maxTokens: 1024,
   };
   if (!existsSync(configPath)) {
@@ -190,10 +274,11 @@ async function callAnthropic(
         model,
         max_tokens: maxTokens,
         temperature: 0.3,
+        system: SYSTEM_PROMPT,
         messages: [
           {
             role: "user",
-            content: `What was accomplished or decided? One sentence, past tense, focus on actions and outcomes:\n\n${text}`,
+            content: text,
           },
         ],
       }),
@@ -208,9 +293,16 @@ async function callAnthropic(
     const result = await response.json();
     const content = result.content?.[0]?.text || "";
+    const insights = extractJson(content);
+    if (!insights) {
+      return {
+        error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
+      };
+    }
     return {
-      summary: content.trim(),
+      insights,
       model,
       tokens_used: result.usage?.output_tokens,
     };
@@ -245,9 +337,13 @@ async function callOpenAI(
         max_tokens: maxTokens,
         temperature: 0.3,
         messages: [
+          {
+            role: "system",
+            content: SYSTEM_PROMPT,
+          },
           {
             role: "user",
-            content: `What was accomplished or decided? One sentence, past tense, focus on actions and outcomes:\n\n${text}`,
+            content: text,
           },
         ],
       }),
@@ -262,9 +358,16 @@ async function callOpenAI(
     const result = await response.json();
     const content = result.choices?.[0]?.message?.content || "";
+    const insights = extractJson(content);
+    if (!insights) {
+      return {
+        error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
+      };
+    }
     return {
-      summary: content.trim(),
+      insights,
       model,
       tokens_used: result.usage?.completion_tokens,
     };
@@ -276,7 +379,7 @@ async function callOpenAI(
 }
 /**
- * Call Ollama API
+ * Call Ollama API (chat endpoint for system prompt support)
  */
 async function callOllama(
   text: string,
@@ -284,7 +387,7 @@ async function callOllama(
   maxTokens: number,
   apiBase: string,
 ): Promise<SummarizeResult> {
-  const endpoint = `${apiBase}/api/generate`;
+  const endpoint = `${apiBase}/api/chat`;
   try {
     const response = await fetch(endpoint, {
@@ -294,7 +397,16 @@ async function callOllama(
       },
       body: JSON.stringify({
         model,
-        prompt: `What was accomplished or decided? One sentence, past tense, focus on actions and outcomes:\n\n${text}`,
+        messages: [
+          {
+            role: "system",
+            content: SYSTEM_PROMPT,
+          },
+          {
+            role: "user",
+            content: text,
+          },
+        ],
         stream: false,
         options: {
           num_predict: maxTokens,
@@ -311,10 +423,17 @@ async function callOllama(
     }
     const result = await response.json();
-    const content = result.response || "";
+    const content = result.message?.content || "";
+    const insights = extractJson(content);
+    if (!insights) {
+      return {
+        error: `Failed to parse response as JSON: ${content.slice(0, 200)}`,
+      };
+    }
     return {
-      summary: content.trim(),
+      insights,
       model,
       tokens_used: result.eval_count,
     };

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@voidwire/llm-summarize",
-  "version": "2.0.0",
-  "description": "Fast LLM-powered text summarization for observability and logging",
+  "version": "3.0.0",
+  "description": "Structured session insight extraction for knowledge systems",
   "type": "module",
   "main": "./index.ts",
   "bin": {
@@ -18,9 +18,6 @@
     "README.md",
     "LICENSE"
   ],
-  "scripts": {
-    "test": "bun test"
-  },
   "keywords": [
     "llm",
     "summarize",
@@ -42,5 +39,8 @@
   },
   "engines": {
     "bun": ">=1.0.0"
+  },
+  "scripts": {
+    "test": "bun test"
   }
-}
+}