@blank-utils/llm 0.4.17 → 0.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -915,31 +915,6 @@ function ChatInput({
915
915
  const textareaRef = useRef2(null);
916
916
  const fileInputRef = useRef2(null);
917
917
  const [isDragging, setIsDragging] = useState2(false);
918
- const captionerPromiseRef = useRef2(null);
919
- useEffect2(() => {
920
- let mounted = true;
921
- if (captionerPromiseRef.current) return;
922
- const initCaptioner = async () => {
923
- try {
924
- console.log("[ImagePipeline] Initializing Transformers.js background captioner...");
925
- const { pipeline: pipeline2, env } = await import("@huggingface/transformers");
926
- env.allowLocalModels = false;
927
- env.useBrowserCache = true;
928
- const captioner = await pipeline2("image-to-text", "Xenova/vit-gpt2-image-captioning", {
929
- device: "wasm"
930
- });
931
- if (mounted) console.log("[ImagePipeline] Captioner loaded successfully!");
932
- return captioner;
933
- } catch (err) {
934
- console.warn("Failed to initialize background captioner:", err);
935
- return null;
936
- }
937
- };
938
- captionerPromiseRef.current = initCaptioner();
939
- return () => {
940
- mounted = false;
941
- };
942
- }, []);
943
918
  useEffect2(() => {
944
919
  const textarea = textareaRef.current;
945
920
  if (!textarea) return;
@@ -1057,30 +1032,7 @@ ${newText}` : newText);
1057
1032
  if (e.target?.result && typeof e.target.result === "string") {
1058
1033
  const id = Math.random().toString(36).substring(7);
1059
1034
  const dataUrl = e.target.result;
1060
- let extractedText;
1061
- if (captionerPromiseRef.current) {
1062
- console.log(`[ImagePipeline] Generating caption for ${file.name} (Waiting for captioner)...`);
1063
- try {
1064
- const captioner = await captionerPromiseRef.current;
1065
- if (captioner) {
1066
- const out = await captioner(dataUrl);
1067
- console.log("[ImagePipeline] Raw captioner output:", out);
1068
- if (Array.isArray(out) && out[0] && out[0].generated_text) {
1069
- extractedText = out[0].generated_text;
1070
- } else if (!Array.isArray(out) && out.generated_text) {
1071
- extractedText = out.generated_text;
1072
- }
1073
- console.log("[ImagePipeline] Extracted caption text:", extractedText);
1074
- } else {
1075
- console.log("[ImagePipeline] Captioner initialized to null, skipping caption generation.");
1076
- }
1077
- } catch (err) {
1078
- console.warn("[ImagePipeline] Background captioning failed for image:", err);
1079
- }
1080
- } else {
1081
- console.log("[ImagePipeline] Captioner promise ref is null, skipping caption generation.");
1082
- }
1083
- onImageAdd?.({ id, dataUrl, file, name: file.name, extractedText });
1035
+ onImageAdd?.({ id, dataUrl, file, name: file.name });
1084
1036
  }
1085
1037
  };
1086
1038
  reader.readAsDataURL(file);
@@ -1493,10 +1445,53 @@ ${systemPrompt}` : systemPrompt;
1493
1445
  isProcessingRef.current = false;
1494
1446
  }
1495
1447
  };
1496
- const handleSend = () => {
1448
+ const handleSend = async () => {
1497
1449
  const text = input.trim();
1498
1450
  if (!text && images.length === 0) return;
1499
1451
  const currentImages = [...images];
1452
+ setInput("");
1453
+ setImages([]);
1454
+ abortRef.current = false;
1455
+ if (!isVisionModel2(modelId || "")) {
1456
+ const needsCaptioning = currentImages.filter((img) => !img.extractedText && !img.name.toLowerCase().endsWith(".svg") && !img.name.toLowerCase().endsWith(".pdf"));
1457
+ if (needsCaptioning.length > 0) {
1458
+ setIsGenerating(true);
1459
+ setStreamingText("[System: Initializing detailed image-to-text captioning pipeline (Florence-2)...]\n");
1460
+ try {
1461
+ const { pipeline: pipeline2, env } = await import("@huggingface/transformers");
1462
+ env.allowLocalModels = false;
1463
+ env.useBrowserCache = true;
1464
+ const captioner = await pipeline2("image-to-text", "onnx-community/Florence-2-base-ft", { device: "wasm", dtype: "q8" });
1465
+ for (let i = 0; i < needsCaptioning.length; i++) {
1466
+ if (abortRef.current) break;
1467
+ const img = needsCaptioning[i];
1468
+ if (!img) continue;
1469
+ setStreamingText(`[System: Extracting detailed visual description for ${img.name}... ${i + 1}/${needsCaptioning.length}]
1470
+ `);
1471
+ const out = await captioner(img.dataUrl, {
1472
+ text: "<MORE_DETAILED_CAPTION>",
1473
+ max_new_tokens: 512,
1474
+ num_beams: 3
1475
+ });
1476
+ let val = "";
1477
+ if (Array.isArray(out) && out[0] && out[0].generated_text) val = out[0].generated_text;
1478
+ else if (!Array.isArray(out) && out.generated_text) val = out.generated_text;
1479
+ img.extractedText = val;
1480
+ }
1481
+ } catch (err) {
1482
+ console.error("[ImagePipeline] Captioning error:", err);
1483
+ setStreamingText(`[System: Fallback captioning failed: ${err}]
1484
+ `);
1485
+ }
1486
+ if (abortRef.current) {
1487
+ setStreamingText("");
1488
+ setIsGenerating(false);
1489
+ return;
1490
+ }
1491
+ setStreamingText("");
1492
+ setIsGenerating(false);
1493
+ }
1494
+ }
1500
1495
  let finalText = text;
1501
1496
  for (const img of currentImages) {
1502
1497
  if (img.extractedText) {
@@ -1513,8 +1508,6 @@ ${systemPrompt}` : systemPrompt;
1513
1508
  }
1514
1509
  }
1515
1510
  }
1516
- setInput("");
1517
- setImages([]);
1518
1511
  onSendProp?.(finalText);
1519
1512
  if (llm && isReady) {
1520
1513
  generate(finalText, messages, currentImages);
package/dist/index.js CHANGED
@@ -26,7 +26,7 @@ import {
26
26
  useCompletion,
27
27
  useLLM,
28
28
  useStream
29
- } from "./chunk-MV5JXFWL.js";
29
+ } from "./chunk-JOBJ4PY7.js";
30
30
  export {
31
31
  Chat,
32
32
  ChatInput,
@@ -9,7 +9,7 @@ import {
9
9
  useCompletion,
10
10
  useLLM,
11
11
  useStream
12
- } from "../chunk-MV5JXFWL.js";
12
+ } from "../chunk-JOBJ4PY7.js";
13
13
  export {
14
14
  Chat,
15
15
  ChatApp,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blank-utils/llm",
3
- "version": "0.4.17",
3
+ "version": "0.4.19",
4
4
  "description": "Run LLMs directly in your browser with WebGPU acceleration. Supports React hooks and eager background loading.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",