npm - @blank-utils/llm - Versions diffs - 0.4.18 → 0.4.20 - Mend

@blank-utils/llm 0.4.18 → 0.4.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/{chunk-SB7Q5JMU.js → chunk-T4VK6BPC.js} +45 -56
package/dist/index.js +1 -1
package/dist/react/index.js +1 -1
package/package.json +1 -1

package/dist/{chunk-SB7Q5JMU.js → chunk-T4VK6BPC.js} RENAMED Viewed

@@ -915,31 +915,6 @@ function ChatInput({
   const textareaRef = useRef2(null);
   const fileInputRef = useRef2(null);
   const [isDragging, setIsDragging] = useState2(false);
-  const captionerPromiseRef = useRef2(null);
-  useEffect2(() => {
-    let mounted = true;
-    if (captionerPromiseRef.current) return;
-    const initCaptioner = async () => {
-      try {
-        console.log("[ImagePipeline] Initializing Transformers.js background captioner...");
-        const { pipeline: pipeline2, env } = await import("@huggingface/transformers");
-        env.allowLocalModels = false;
-        env.useBrowserCache = true;
-        const captioner = await pipeline2("image-to-text", "Xenova/vit-gpt2-image-captioning", {
-          device: "wasm"
-        });
-        if (mounted) console.log("[ImagePipeline] Captioner loaded successfully!");
-        return captioner;
-      } catch (err) {
-        console.warn("Failed to initialize background captioner:", err);
-        return null;
-      }
-    };
-    captionerPromiseRef.current = initCaptioner();
-    return () => {
-      mounted = false;
-    };
-  }, []);
   useEffect2(() => {
     const textarea = textareaRef.current;
     if (!textarea) return;
@@ -1057,34 +1032,7 @@ ${newText}` : newText);
       if (e.target?.result && typeof e.target.result === "string") {
         const id = Math.random().toString(36).substring(7);
         const dataUrl = e.target.result;
-        let extractedText;
-        if (captionerPromiseRef.current) {
-          console.log(`[ImagePipeline] Generating caption for ${file.name} (Waiting for captioner)...`);
-          try {
-            const captioner = await captionerPromiseRef.current;
-            if (captioner) {
-              const out = await captioner(dataUrl, {
-                max_new_tokens: 100,
-                num_beams: 4,
-                repetition_penalty: 1.5
-              });
-              console.log("[ImagePipeline] Raw captioner output:", out);
-              if (Array.isArray(out) && out[0] && out[0].generated_text) {
-                extractedText = out[0].generated_text;
-              } else if (!Array.isArray(out) && out.generated_text) {
-                extractedText = out.generated_text;
-              }
-              console.log("[ImagePipeline] Extracted caption text:", extractedText);
-            } else {
-              console.log("[ImagePipeline] Captioner initialized to null, skipping caption generation.");
-            }
-          } catch (err) {
-            console.warn("[ImagePipeline] Background captioning failed for image:", err);
-          }
-        } else {
-          console.log("[ImagePipeline] Captioner promise ref is null, skipping caption generation.");
-        }
-        onImageAdd?.({ id, dataUrl, file, name: file.name, extractedText });
+        onImageAdd?.({ id, dataUrl, file, name: file.name });
       }
     };
     reader.readAsDataURL(file);
@@ -1497,10 +1445,53 @@ ${systemPrompt}` : systemPrompt;
       isProcessingRef.current = false;
     }
   };
-  const handleSend = () => {
+  const handleSend = async () => {
     const text = input.trim();
     if (!text && images.length === 0) return;
     const currentImages = [...images];
+    setInput("");
+    setImages([]);
+    abortRef.current = false;
+    if (!isVisionModel2(modelId || "")) {
+      const needsCaptioning = currentImages.filter((img) => !img.extractedText && !img.name.toLowerCase().endsWith(".svg") && !img.name.toLowerCase().endsWith(".pdf"));
+      if (needsCaptioning.length > 0) {
+        setIsGenerating(true);
+        setStreamingText("[System: Initializing detailed image-to-text captioning pipeline (Florence-2)...]\n");
+        try {
+          const { pipeline: pipeline2, env } = await import("@huggingface/transformers");
+          env.allowLocalModels = false;
+          env.useBrowserCache = true;
+          const captioner = await pipeline2("image-to-text", "Xenova/vit-gpt2-image-captioning", { device: "wasm", dtype: "q8" });
+          for (let i = 0; i < needsCaptioning.length; i++) {
+            if (abortRef.current) break;
+            const img = needsCaptioning[i];
+            if (!img) continue;
+            setStreamingText(`[System: Extracting detailed visual description for ${img.name}... ${i + 1}/${needsCaptioning.length}]
+`);
+            const out = await captioner(img.dataUrl, {
+              max_new_tokens: 64,
+              num_beams: 4,
+              repetition_penalty: 1.5
+            });
+            let val = "";
+            if (Array.isArray(out) && out[0] && out[0].generated_text) val = out[0].generated_text;
+            else if (!Array.isArray(out) && out.generated_text) val = out.generated_text;
+            img.extractedText = val;
+          }
+        } catch (err) {
+          console.error("[ImagePipeline] Captioning error:", err);
+          setStreamingText(`[System: Fallback captioning failed: ${err}]
+`);
+        }
+        if (abortRef.current) {
+          setStreamingText("");
+          setIsGenerating(false);
+          return;
+        }
+        setStreamingText("");
+        setIsGenerating(false);
+      }
+    }
     let finalText = text;
     for (const img of currentImages) {
       if (img.extractedText) {
@@ -1517,8 +1508,6 @@ ${systemPrompt}` : systemPrompt;
         }
       }
     }
-    setInput("");
-    setImages([]);
     onSendProp?.(finalText);
     if (llm && isReady) {
       generate(finalText, messages, currentImages);

package/dist/index.js CHANGED Viewed

@@ -26,7 +26,7 @@ import {
   useCompletion,
   useLLM,
   useStream
-} from "./chunk-SB7Q5JMU.js";
+} from "./chunk-T4VK6BPC.js";
 export {
   Chat,
   ChatInput,

package/dist/react/index.js CHANGED Viewed

@@ -9,7 +9,7 @@ import {
   useCompletion,
   useLLM,
   useStream
-} from "../chunk-SB7Q5JMU.js";
+} from "../chunk-T4VK6BPC.js";
 export {
   Chat,
   ChatApp,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blank-utils/llm",
-  "version": "0.4.18",
+  "version": "0.4.20",
   "description": "Run LLMs directly in your browser with WebGPU acceleration. Supports React hooks and eager background loading.",
   "type": "module",
   "main": "./dist/index.js",