npm - llmist - Versions diffs - 0.3.1 → 0.4.1 - Mend

llmist 0.3.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/{chunk-VRTKJK2X.js → chunk-A4GRCCXF.js} +2 -2
package/dist/{chunk-VYBRYR2S.js → chunk-LQE7TKKW.js} +43 -38
package/dist/chunk-LQE7TKKW.js.map +1 -0
package/dist/{chunk-I55AV3WV.js → chunk-QVDGTUQN.js} +2 -2
package/dist/cli.cjs +186 -148
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +146 -113
package/dist/cli.js.map +1 -1
package/dist/index.cjs +225 -37
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +227 -6
package/dist/index.d.ts +227 -6
package/dist/index.js +186 -3
package/dist/index.js.map +1 -1
package/dist/{mock-stream-C2sBQlvc.d.cts → mock-stream-C0vOqI3L.d.cts} +1 -1
package/dist/{mock-stream-C2sBQlvc.d.ts → mock-stream-C0vOqI3L.d.ts} +1 -1
package/dist/testing/index.cjs +42 -37
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.d.cts +2 -2
package/dist/testing/index.d.ts +2 -2
package/dist/testing/index.js +2 -2
package/package.json +1 -1
package/dist/chunk-VYBRYR2S.js.map +0 -1
/package/dist/{chunk-VRTKJK2X.js.map → chunk-A4GRCCXF.js.map} +0 -0
/package/dist/{chunk-I55AV3WV.js.map → chunk-QVDGTUQN.js.map} +0 -0

package/dist/index.cjs CHANGED Viewed

@@ -2791,10 +2791,11 @@ var init_gemini = __esm({
         return GEMINI_MODELS;
       }
       buildRequestPayload(options, descriptor, _spec, messages) {
-        const { systemInstruction, contents } = this.extractSystemAndContents(messages);
+        const contents = this.convertMessagesToContents(messages);
         const generationConfig = this.buildGenerationConfig(options);
         const config = {
-          ...systemInstruction ? { systemInstruction: systemInstruction.parts.map((p) => p.text).join("\n") } : {},
+          // Note: systemInstruction removed - it doesn't work with countTokens()
+          // System messages are now included in contents as user+model exchanges
           ...generationConfig ? { ...generationConfig } : {},
           // Explicitly disable function calling to prevent UNEXPECTED_TOOL_CALL errors
           toolConfig: {
@@ -2815,31 +2816,37 @@ var init_gemini = __esm({
         const streamResponse = await client.models.generateContentStream(payload);
         return streamResponse;
       }
-      extractSystemAndContents(messages) {
-        const firstSystemIndex = messages.findIndex((message) => message.role === "system");
-        if (firstSystemIndex === -1) {
-          return {
-            systemInstruction: null,
-            contents: this.mergeConsecutiveMessages(messages)
-          };
-        }
-        let systemBlockEnd = firstSystemIndex;
-        while (systemBlockEnd < messages.length && messages[systemBlockEnd].role === "system") {
-          systemBlockEnd++;
+      /**
+       * Convert LLM messages to Gemini contents format.
+       *
+       * For Gemini, we convert system messages to user+model exchanges instead of
+       * using systemInstruction, because:
+       * 1. systemInstruction doesn't work with countTokens() API
+       * 2. This approach gives perfect token counting accuracy (0% error)
+       * 3. The model receives and follows system instructions identically
+       *
+       * System message: "You are a helpful assistant"
+       * Becomes:
+       * - User: "You are a helpful assistant"
+       * - Model: "Understood."
+       */
+      convertMessagesToContents(messages) {
+        const expandedMessages = [];
+        for (const message of messages) {
+          if (message.role === "system") {
+            expandedMessages.push({
+              role: "user",
+              content: message.content
+            });
+            expandedMessages.push({
+              role: "assistant",
+              content: "Understood."
+            });
+          } else {
+            expandedMessages.push(message);
+          }
         }
-        const systemMessages = messages.slice(firstSystemIndex, systemBlockEnd);
-        const nonSystemMessages = [
-          ...messages.slice(0, firstSystemIndex),
-          ...messages.slice(systemBlockEnd)
-        ];
-        const systemInstruction = {
-          role: "system",
-          parts: systemMessages.map((message) => ({ text: message.content }))
-        };
-        return {
-          systemInstruction,
-          contents: this.mergeConsecutiveMessages(nonSystemMessages)
-        };
+        return this.mergeConsecutiveMessages(expandedMessages);
       }
       mergeConsecutiveMessages(messages) {
         if (messages.length === 0) {
@@ -2928,8 +2935,8 @@ var init_gemini = __esm({
        *
        * This method provides accurate token estimation for Gemini models by:
        * - Using the SDK's countTokens() method
-       * - Properly extracting and handling system instructions
-       * - Transforming messages to Gemini's expected format
+       * - Converting system messages to user+model exchanges (same as in generation)
+       * - This gives perfect token counting accuracy (0% error vs actual usage)
        *
        * @param messages - The messages to count tokens for
        * @param descriptor - Model descriptor containing the model name
@@ -2948,16 +2955,14 @@ var init_gemini = __esm({
        */
       async countTokens(messages, descriptor, _spec) {
         const client = this.client;
-        const { systemInstruction, contents } = this.extractSystemAndContents(messages);
-        const request = {
-          model: descriptor.name,
-          contents: this.convertContentsForNewSDK(contents)
-        };
-        if (systemInstruction) {
-          request.systemInstruction = systemInstruction.parts.map((p) => p.text).join("\n");
-        }
+        const contents = this.convertMessagesToContents(messages);
         try {
-          const response = await client.models.countTokens(request);
+          const response = await client.models.countTokens({
+            model: descriptor.name,
+            contents: this.convertContentsForNewSDK(contents)
+            // Note: systemInstruction not used - it's not supported by countTokens()
+            // and would cause a 2100% token counting error
+          });
           return response.totalTokens ?? 0;
         } catch (error) {
           console.warn(
@@ -4604,6 +4609,189 @@ var HookPresets = class _HookPresets {
       }
     };
   }
+  /**
+   * Tracks comprehensive progress metrics including iterations, tokens, cost, and timing.
+   *
+   * **This preset showcases llmist's core capabilities by demonstrating:**
+   * - Observer pattern for non-intrusive monitoring
+   * - Integration with ModelRegistry for cost estimation
+   * - Callback-based architecture for flexible UI updates
+   * - Provider-agnostic token and cost tracking
+   *
+   * Unlike `tokenTracking()` which only logs to console, this preset provides
+   * structured data through callbacks, making it perfect for building custom UIs,
+   * dashboards, or progress indicators (like the llmist CLI).
+   *
+   * **Output (when logProgress: true):**
+   * - Iteration number and call count
+   * - Cumulative token usage (input + output)
+   * - Cumulative cost in USD (requires modelRegistry)
+   * - Elapsed time in seconds
+   *
+   * **Use cases:**
+   * - Building CLI progress indicators with live updates
+   * - Creating web dashboards with real-time metrics
+   * - Budget monitoring and cost alerts
+   * - Performance tracking and optimization
+   * - Custom logging to external systems (Datadog, CloudWatch, etc.)
+   *
+   * **Performance:** Minimal overhead. Uses Date.now() for timing and optional
+   * ModelRegistry.estimateCost() which is O(1) lookup. Callback invocation is
+   * synchronous and fast.
+   *
+   * @param options - Progress tracking options
+   * @param options.modelRegistry - ModelRegistry for cost estimation (optional)
+   * @param options.onProgress - Callback invoked after each LLM call (optional)
+   * @param options.logProgress - Log progress to console (default: false)
+   * @returns Hook configuration with progress tracking observers
+   *
+   * @example
+   * ```typescript
+   * // Basic usage with callback (RECOMMENDED - used by llmist CLI)
+   * import { LLMist, HookPresets } from 'llmist';
+   *
+   * const client = LLMist.create();
+   *
+   * await client.agent()
+   *   .withHooks(HookPresets.progressTracking({
+   *     modelRegistry: client.modelRegistry,
+   *     onProgress: (stats) => {
+   *       // Update your UI with stats
+   *       console.log(`#${stats.currentIteration} | ${stats.totalTokens} tokens | $${stats.totalCost.toFixed(4)}`);
+   *     }
+   *   }))
+   *   .withGadgets(Calculator)
+   *   .ask("Calculate 15 * 23");
+   * // Output: #1 | 245 tokens | $0.0012
+   * ```
+   *
+   * @example
+   * ```typescript
+   * // Console logging mode (quick debugging)
+   * await client.agent()
+   *   .withHooks(HookPresets.progressTracking({
+   *     modelRegistry: client.modelRegistry,
+   *     logProgress: true  // Simple console output
+   *   }))
+   *   .ask("Your prompt");
+   * // Output: 📊 Progress: Iteration #1 | 245 tokens | $0.0012 | 1.2s
+   * ```
+   *
+   * @example
+   * ```typescript
+   * // Budget monitoring with alerts
+   * const BUDGET_USD = 0.10;
+   *
+   * await client.agent()
+   *   .withHooks(HookPresets.progressTracking({
+   *     modelRegistry: client.modelRegistry,
+   *     onProgress: (stats) => {
+   *       if (stats.totalCost > BUDGET_USD) {
+   *         throw new Error(`Budget exceeded: $${stats.totalCost.toFixed(4)}`);
+   *       }
+   *     }
+   *   }))
+   *   .ask("Long running task...");
+   * ```
+   *
+   * @example
+   * ```typescript
+   * // Web dashboard integration
+   * let progressBar: HTMLElement;
+   *
+   * await client.agent()
+   *   .withHooks(HookPresets.progressTracking({
+   *     modelRegistry: client.modelRegistry,
+   *     onProgress: (stats) => {
+   *       // Update web UI in real-time
+   *       progressBar.textContent = `Iteration ${stats.currentIteration}`;
+   *       progressBar.dataset.cost = stats.totalCost.toFixed(4);
+   *       progressBar.dataset.tokens = stats.totalTokens.toString();
+   *     }
+   *   }))
+   *   .ask("Your prompt");
+   * ```
+   *
+   * @example
+   * ```typescript
+   * // External logging (Datadog, CloudWatch, etc.)
+   * await client.agent()
+   *   .withHooks(HookPresets.progressTracking({
+   *     modelRegistry: client.modelRegistry,
+   *     onProgress: async (stats) => {
+   *       await metrics.gauge('llm.iteration', stats.currentIteration);
+   *       await metrics.gauge('llm.cost', stats.totalCost);
+   *       await metrics.gauge('llm.tokens', stats.totalTokens);
+   *     }
+   *   }))
+   *   .ask("Your prompt");
+   * ```
+   *
+   * @see {@link https://github.com/zbigniewsobiecki/llmist/blob/main/docs/HOOKS.md#hookpresetsprogresstrackingoptions | Full documentation}
+   * @see {@link ProgressTrackingOptions} for detailed options
+   * @see {@link ProgressStats} for the callback data structure
+   */
+  static progressTracking(options) {
+    const { modelRegistry, onProgress, logProgress = false } = options ?? {};
+    let totalCalls = 0;
+    let currentIteration = 0;
+    let totalInputTokens = 0;
+    let totalOutputTokens = 0;
+    let totalCost = 0;
+    const startTime = Date.now();
+    return {
+      observers: {
+        // Track iteration on each LLM call start
+        onLLMCallStart: async (ctx) => {
+          currentIteration++;
+        },
+        // Accumulate metrics and report progress on each LLM call completion
+        onLLMCallComplete: async (ctx) => {
+          totalCalls++;
+          if (ctx.usage) {
+            totalInputTokens += ctx.usage.inputTokens;
+            totalOutputTokens += ctx.usage.outputTokens;
+            if (modelRegistry) {
+              try {
+                const modelName = ctx.options.model.includes(":") ? ctx.options.model.split(":")[1] : ctx.options.model;
+                const costEstimate = modelRegistry.estimateCost(
+                  modelName,
+                  ctx.usage.inputTokens,
+                  ctx.usage.outputTokens
+                );
+                if (costEstimate) {
+                  totalCost += costEstimate.totalCost;
+                }
+              } catch (error) {
+                if (logProgress) {
+                  console.warn(`\u26A0\uFE0F  Cost estimation failed:`, error);
+                }
+              }
+            }
+          }
+          const stats = {
+            currentIteration,
+            totalCalls,
+            totalInputTokens,
+            totalOutputTokens,
+            totalTokens: totalInputTokens + totalOutputTokens,
+            totalCost,
+            elapsedSeconds: Number(((Date.now() - startTime) / 1e3).toFixed(1))
+          };
+          if (onProgress) {
+            onProgress(stats);
+          }
+          if (logProgress) {
+            const formattedTokens = stats.totalTokens >= 1e3 ? `${(stats.totalTokens / 1e3).toFixed(1)}k` : `${stats.totalTokens}`;
+            const formattedCost = stats.totalCost > 0 ? `$${stats.totalCost.toFixed(4)}` : "$0";
+            console.log(
+              `\u{1F4CA} Progress: Iteration #${stats.currentIteration} | ${formattedTokens} tokens | ${formattedCost} | ${stats.elapsedSeconds}s`
+            );
+          }
+        }
+      }
+    };
+  }
   /**
    * Logs detailed error information for debugging and troubleshooting.
    *