@blank-utils/llm 0.4.17 → 0.4.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -915,31 +915,6 @@ function ChatInput({
|
|
|
915
915
|
const textareaRef = useRef2(null);
|
|
916
916
|
const fileInputRef = useRef2(null);
|
|
917
917
|
const [isDragging, setIsDragging] = useState2(false);
|
|
918
|
-
const captionerPromiseRef = useRef2(null);
|
|
919
|
-
useEffect2(() => {
|
|
920
|
-
let mounted = true;
|
|
921
|
-
if (captionerPromiseRef.current) return;
|
|
922
|
-
const initCaptioner = async () => {
|
|
923
|
-
try {
|
|
924
|
-
console.log("[ImagePipeline] Initializing Transformers.js background captioner...");
|
|
925
|
-
const { pipeline: pipeline2, env } = await import("@huggingface/transformers");
|
|
926
|
-
env.allowLocalModels = false;
|
|
927
|
-
env.useBrowserCache = true;
|
|
928
|
-
const captioner = await pipeline2("image-to-text", "Xenova/vit-gpt2-image-captioning", {
|
|
929
|
-
device: "wasm"
|
|
930
|
-
});
|
|
931
|
-
if (mounted) console.log("[ImagePipeline] Captioner loaded successfully!");
|
|
932
|
-
return captioner;
|
|
933
|
-
} catch (err) {
|
|
934
|
-
console.warn("Failed to initialize background captioner:", err);
|
|
935
|
-
return null;
|
|
936
|
-
}
|
|
937
|
-
};
|
|
938
|
-
captionerPromiseRef.current = initCaptioner();
|
|
939
|
-
return () => {
|
|
940
|
-
mounted = false;
|
|
941
|
-
};
|
|
942
|
-
}, []);
|
|
943
918
|
useEffect2(() => {
|
|
944
919
|
const textarea = textareaRef.current;
|
|
945
920
|
if (!textarea) return;
|
|
@@ -1057,30 +1032,7 @@ ${newText}` : newText);
|
|
|
1057
1032
|
if (e.target?.result && typeof e.target.result === "string") {
|
|
1058
1033
|
const id = Math.random().toString(36).substring(7);
|
|
1059
1034
|
const dataUrl = e.target.result;
|
|
1060
|
-
|
|
1061
|
-
if (captionerPromiseRef.current) {
|
|
1062
|
-
console.log(`[ImagePipeline] Generating caption for ${file.name} (Waiting for captioner)...`);
|
|
1063
|
-
try {
|
|
1064
|
-
const captioner = await captionerPromiseRef.current;
|
|
1065
|
-
if (captioner) {
|
|
1066
|
-
const out = await captioner(dataUrl);
|
|
1067
|
-
console.log("[ImagePipeline] Raw captioner output:", out);
|
|
1068
|
-
if (Array.isArray(out) && out[0] && out[0].generated_text) {
|
|
1069
|
-
extractedText = out[0].generated_text;
|
|
1070
|
-
} else if (!Array.isArray(out) && out.generated_text) {
|
|
1071
|
-
extractedText = out.generated_text;
|
|
1072
|
-
}
|
|
1073
|
-
console.log("[ImagePipeline] Extracted caption text:", extractedText);
|
|
1074
|
-
} else {
|
|
1075
|
-
console.log("[ImagePipeline] Captioner initialized to null, skipping caption generation.");
|
|
1076
|
-
}
|
|
1077
|
-
} catch (err) {
|
|
1078
|
-
console.warn("[ImagePipeline] Background captioning failed for image:", err);
|
|
1079
|
-
}
|
|
1080
|
-
} else {
|
|
1081
|
-
console.log("[ImagePipeline] Captioner promise ref is null, skipping caption generation.");
|
|
1082
|
-
}
|
|
1083
|
-
onImageAdd?.({ id, dataUrl, file, name: file.name, extractedText });
|
|
1035
|
+
onImageAdd?.({ id, dataUrl, file, name: file.name });
|
|
1084
1036
|
}
|
|
1085
1037
|
};
|
|
1086
1038
|
reader.readAsDataURL(file);
|
|
@@ -1493,10 +1445,53 @@ ${systemPrompt}` : systemPrompt;
|
|
|
1493
1445
|
isProcessingRef.current = false;
|
|
1494
1446
|
}
|
|
1495
1447
|
};
|
|
1496
|
-
const handleSend = () => {
|
|
1448
|
+
const handleSend = async () => {
|
|
1497
1449
|
const text = input.trim();
|
|
1498
1450
|
if (!text && images.length === 0) return;
|
|
1499
1451
|
const currentImages = [...images];
|
|
1452
|
+
setInput("");
|
|
1453
|
+
setImages([]);
|
|
1454
|
+
abortRef.current = false;
|
|
1455
|
+
if (!isVisionModel2(modelId || "")) {
|
|
1456
|
+
const needsCaptioning = currentImages.filter((img) => !img.extractedText && !img.name.toLowerCase().endsWith(".svg") && !img.name.toLowerCase().endsWith(".pdf"));
|
|
1457
|
+
if (needsCaptioning.length > 0) {
|
|
1458
|
+
setIsGenerating(true);
|
|
1459
|
+
setStreamingText("[System: Initializing detailed image-to-text captioning pipeline (Florence-2)...]\n");
|
|
1460
|
+
try {
|
|
1461
|
+
const { pipeline: pipeline2, env } = await import("@huggingface/transformers");
|
|
1462
|
+
env.allowLocalModels = false;
|
|
1463
|
+
env.useBrowserCache = true;
|
|
1464
|
+
const captioner = await pipeline2("image-to-text", "onnx-community/Florence-2-base-ft", { device: "wasm", dtype: "q8" });
|
|
1465
|
+
for (let i = 0; i < needsCaptioning.length; i++) {
|
|
1466
|
+
if (abortRef.current) break;
|
|
1467
|
+
const img = needsCaptioning[i];
|
|
1468
|
+
if (!img) continue;
|
|
1469
|
+
setStreamingText(`[System: Extracting detailed visual description for ${img.name}... ${i + 1}/${needsCaptioning.length}]
|
|
1470
|
+
`);
|
|
1471
|
+
const out = await captioner(img.dataUrl, {
|
|
1472
|
+
text: "<MORE_DETAILED_CAPTION>",
|
|
1473
|
+
max_new_tokens: 512,
|
|
1474
|
+
num_beams: 3
|
|
1475
|
+
});
|
|
1476
|
+
let val = "";
|
|
1477
|
+
if (Array.isArray(out) && out[0] && out[0].generated_text) val = out[0].generated_text;
|
|
1478
|
+
else if (!Array.isArray(out) && out.generated_text) val = out.generated_text;
|
|
1479
|
+
img.extractedText = val;
|
|
1480
|
+
}
|
|
1481
|
+
} catch (err) {
|
|
1482
|
+
console.error("[ImagePipeline] Captioning error:", err);
|
|
1483
|
+
setStreamingText(`[System: Fallback captioning failed: ${err}]
|
|
1484
|
+
`);
|
|
1485
|
+
}
|
|
1486
|
+
if (abortRef.current) {
|
|
1487
|
+
setStreamingText("");
|
|
1488
|
+
setIsGenerating(false);
|
|
1489
|
+
return;
|
|
1490
|
+
}
|
|
1491
|
+
setStreamingText("");
|
|
1492
|
+
setIsGenerating(false);
|
|
1493
|
+
}
|
|
1494
|
+
}
|
|
1500
1495
|
let finalText = text;
|
|
1501
1496
|
for (const img of currentImages) {
|
|
1502
1497
|
if (img.extractedText) {
|
|
@@ -1513,8 +1508,6 @@ ${systemPrompt}` : systemPrompt;
|
|
|
1513
1508
|
}
|
|
1514
1509
|
}
|
|
1515
1510
|
}
|
|
1516
|
-
setInput("");
|
|
1517
|
-
setImages([]);
|
|
1518
1511
|
onSendProp?.(finalText);
|
|
1519
1512
|
if (llm && isReady) {
|
|
1520
1513
|
generate(finalText, messages, currentImages);
|
package/dist/index.js
CHANGED
package/dist/react/index.js
CHANGED
package/package.json
CHANGED