npm - llmist - Versions diffs - 0.4.0 → 0.4.1 - Mend

llmist 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/{chunk-VRTKJK2X.js → chunk-A4GRCCXF.js} +2 -2
package/dist/{chunk-VYBRYR2S.js → chunk-LQE7TKKW.js} +43 -38
package/dist/chunk-LQE7TKKW.js.map +1 -0
package/dist/{chunk-I55AV3WV.js → chunk-QVDGTUQN.js} +2 -2
package/dist/cli.cjs +43 -38
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +3 -3
package/dist/cli.js.map +1 -1
package/dist/index.cjs +42 -37
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +17 -3
package/dist/index.d.ts +17 -3
package/dist/index.js +3 -3
package/dist/testing/index.cjs +42 -37
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.js +2 -2
package/package.json +1 -1
package/dist/chunk-VYBRYR2S.js.map +0 -1
/package/dist/{chunk-VRTKJK2X.js.map → chunk-A4GRCCXF.js.map} +0 -0
/package/dist/{chunk-I55AV3WV.js.map → chunk-QVDGTUQN.js.map} +0 -0

package/dist/index.cjs CHANGED Viewed

@@ -2791,10 +2791,11 @@ var init_gemini = __esm({
         return GEMINI_MODELS;
       }
       buildRequestPayload(options, descriptor, _spec, messages) {
-        const { systemInstruction, contents } = this.extractSystemAndContents(messages);
+        const contents = this.convertMessagesToContents(messages);
         const generationConfig = this.buildGenerationConfig(options);
         const config = {
-          ...systemInstruction ? { systemInstruction: systemInstruction.parts.map((p) => p.text).join("\n") } : {},
+          // Note: systemInstruction removed - it doesn't work with countTokens()
+          // System messages are now included in contents as user+model exchanges
           ...generationConfig ? { ...generationConfig } : {},
           // Explicitly disable function calling to prevent UNEXPECTED_TOOL_CALL errors
           toolConfig: {
@@ -2815,31 +2816,37 @@ var init_gemini = __esm({
         const streamResponse = await client.models.generateContentStream(payload);
         return streamResponse;
       }
-      extractSystemAndContents(messages) {
-        const firstSystemIndex = messages.findIndex((message) => message.role === "system");
-        if (firstSystemIndex === -1) {
-          return {
-            systemInstruction: null,
-            contents: this.mergeConsecutiveMessages(messages)
-          };
-        }
-        let systemBlockEnd = firstSystemIndex;
-        while (systemBlockEnd < messages.length && messages[systemBlockEnd].role === "system") {
-          systemBlockEnd++;
+      /**
+       * Convert LLM messages to Gemini contents format.
+       *
+       * For Gemini, we convert system messages to user+model exchanges instead of
+       * using systemInstruction, because:
+       * 1. systemInstruction doesn't work with countTokens() API
+       * 2. This approach gives perfect token counting accuracy (0% error)
+       * 3. The model receives and follows system instructions identically
+       *
+       * System message: "You are a helpful assistant"
+       * Becomes:
+       * - User: "You are a helpful assistant"
+       * - Model: "Understood."
+       */
+      convertMessagesToContents(messages) {
+        const expandedMessages = [];
+        for (const message of messages) {
+          if (message.role === "system") {
+            expandedMessages.push({
+              role: "user",
+              content: message.content
+            });
+            expandedMessages.push({
+              role: "assistant",
+              content: "Understood."
+            });
+          } else {
+            expandedMessages.push(message);
+          }
         }
-        const systemMessages = messages.slice(firstSystemIndex, systemBlockEnd);
-        const nonSystemMessages = [
-          ...messages.slice(0, firstSystemIndex),
-          ...messages.slice(systemBlockEnd)
-        ];
-        const systemInstruction = {
-          role: "system",
-          parts: systemMessages.map((message) => ({ text: message.content }))
-        };
-        return {
-          systemInstruction,
-          contents: this.mergeConsecutiveMessages(nonSystemMessages)
-        };
+        return this.mergeConsecutiveMessages(expandedMessages);
       }
       mergeConsecutiveMessages(messages) {
         if (messages.length === 0) {
@@ -2928,8 +2935,8 @@ var init_gemini = __esm({
        *
        * This method provides accurate token estimation for Gemini models by:
        * - Using the SDK's countTokens() method
-       * - Properly extracting and handling system instructions
-       * - Transforming messages to Gemini's expected format
+       * - Converting system messages to user+model exchanges (same as in generation)
+       * - This gives perfect token counting accuracy (0% error vs actual usage)
        *
        * @param messages - The messages to count tokens for
        * @param descriptor - Model descriptor containing the model name
@@ -2948,16 +2955,14 @@ var init_gemini = __esm({
        */
       async countTokens(messages, descriptor, _spec) {
         const client = this.client;
-        const { systemInstruction, contents } = this.extractSystemAndContents(messages);
-        const request = {
-          model: descriptor.name,
-          contents: this.convertContentsForNewSDK(contents)
-        };
-        if (systemInstruction) {
-          request.systemInstruction = systemInstruction.parts.map((p) => p.text).join("\n");
-        }
+        const contents = this.convertMessagesToContents(messages);
         try {
-          const response = await client.models.countTokens(request);
+          const response = await client.models.countTokens({
+            model: descriptor.name,
+            contents: this.convertContentsForNewSDK(contents)
+            // Note: systemInstruction not used - it's not supported by countTokens()
+            // and would cause a 2100% token counting error
+          });
           return response.totalTokens ?? 0;
         } catch (error) {
           console.warn(