@blank-utils/llm 0.4.18 → 0.4.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -915,31 +915,6 @@ function ChatInput({
|
|
|
915
915
|
const textareaRef = useRef2(null);
|
|
916
916
|
const fileInputRef = useRef2(null);
|
|
917
917
|
const [isDragging, setIsDragging] = useState2(false);
|
|
918
|
-
const captionerPromiseRef = useRef2(null);
|
|
919
|
-
useEffect2(() => {
|
|
920
|
-
let mounted = true;
|
|
921
|
-
if (captionerPromiseRef.current) return;
|
|
922
|
-
const initCaptioner = async () => {
|
|
923
|
-
try {
|
|
924
|
-
console.log("[ImagePipeline] Initializing Transformers.js background captioner...");
|
|
925
|
-
const { pipeline: pipeline2, env } = await import("@huggingface/transformers");
|
|
926
|
-
env.allowLocalModels = false;
|
|
927
|
-
env.useBrowserCache = true;
|
|
928
|
-
const captioner = await pipeline2("image-to-text", "Xenova/vit-gpt2-image-captioning", {
|
|
929
|
-
device: "wasm"
|
|
930
|
-
});
|
|
931
|
-
if (mounted) console.log("[ImagePipeline] Captioner loaded successfully!");
|
|
932
|
-
return captioner;
|
|
933
|
-
} catch (err) {
|
|
934
|
-
console.warn("Failed to initialize background captioner:", err);
|
|
935
|
-
return null;
|
|
936
|
-
}
|
|
937
|
-
};
|
|
938
|
-
captionerPromiseRef.current = initCaptioner();
|
|
939
|
-
return () => {
|
|
940
|
-
mounted = false;
|
|
941
|
-
};
|
|
942
|
-
}, []);
|
|
943
918
|
useEffect2(() => {
|
|
944
919
|
const textarea = textareaRef.current;
|
|
945
920
|
if (!textarea) return;
|
|
@@ -1057,34 +1032,7 @@ ${newText}` : newText);
|
|
|
1057
1032
|
if (e.target?.result && typeof e.target.result === "string") {
|
|
1058
1033
|
const id = Math.random().toString(36).substring(7);
|
|
1059
1034
|
const dataUrl = e.target.result;
|
|
1060
|
-
|
|
1061
|
-
if (captionerPromiseRef.current) {
|
|
1062
|
-
console.log(`[ImagePipeline] Generating caption for ${file.name} (Waiting for captioner)...`);
|
|
1063
|
-
try {
|
|
1064
|
-
const captioner = await captionerPromiseRef.current;
|
|
1065
|
-
if (captioner) {
|
|
1066
|
-
const out = await captioner(dataUrl, {
|
|
1067
|
-
max_new_tokens: 100,
|
|
1068
|
-
num_beams: 4,
|
|
1069
|
-
repetition_penalty: 1.5
|
|
1070
|
-
});
|
|
1071
|
-
console.log("[ImagePipeline] Raw captioner output:", out);
|
|
1072
|
-
if (Array.isArray(out) && out[0] && out[0].generated_text) {
|
|
1073
|
-
extractedText = out[0].generated_text;
|
|
1074
|
-
} else if (!Array.isArray(out) && out.generated_text) {
|
|
1075
|
-
extractedText = out.generated_text;
|
|
1076
|
-
}
|
|
1077
|
-
console.log("[ImagePipeline] Extracted caption text:", extractedText);
|
|
1078
|
-
} else {
|
|
1079
|
-
console.log("[ImagePipeline] Captioner initialized to null, skipping caption generation.");
|
|
1080
|
-
}
|
|
1081
|
-
} catch (err) {
|
|
1082
|
-
console.warn("[ImagePipeline] Background captioning failed for image:", err);
|
|
1083
|
-
}
|
|
1084
|
-
} else {
|
|
1085
|
-
console.log("[ImagePipeline] Captioner promise ref is null, skipping caption generation.");
|
|
1086
|
-
}
|
|
1087
|
-
onImageAdd?.({ id, dataUrl, file, name: file.name, extractedText });
|
|
1035
|
+
onImageAdd?.({ id, dataUrl, file, name: file.name });
|
|
1088
1036
|
}
|
|
1089
1037
|
};
|
|
1090
1038
|
reader.readAsDataURL(file);
|
|
@@ -1497,10 +1445,53 @@ ${systemPrompt}` : systemPrompt;
|
|
|
1497
1445
|
isProcessingRef.current = false;
|
|
1498
1446
|
}
|
|
1499
1447
|
};
|
|
1500
|
-
const handleSend = () => {
|
|
1448
|
+
const handleSend = async () => {
|
|
1501
1449
|
const text = input.trim();
|
|
1502
1450
|
if (!text && images.length === 0) return;
|
|
1503
1451
|
const currentImages = [...images];
|
|
1452
|
+
setInput("");
|
|
1453
|
+
setImages([]);
|
|
1454
|
+
abortRef.current = false;
|
|
1455
|
+
if (!isVisionModel2(modelId || "")) {
|
|
1456
|
+
const needsCaptioning = currentImages.filter((img) => !img.extractedText && !img.name.toLowerCase().endsWith(".svg") && !img.name.toLowerCase().endsWith(".pdf"));
|
|
1457
|
+
if (needsCaptioning.length > 0) {
|
|
1458
|
+
setIsGenerating(true);
|
|
1459
|
+
setStreamingText("[System: Initializing detailed image-to-text captioning pipeline (Florence-2)...]\n");
|
|
1460
|
+
try {
|
|
1461
|
+
const { pipeline: pipeline2, env } = await import("@huggingface/transformers");
|
|
1462
|
+
env.allowLocalModels = false;
|
|
1463
|
+
env.useBrowserCache = true;
|
|
1464
|
+
const captioner = await pipeline2("image-to-text", "Xenova/vit-gpt2-image-captioning", { device: "wasm", dtype: "q8" });
|
|
1465
|
+
for (let i = 0; i < needsCaptioning.length; i++) {
|
|
1466
|
+
if (abortRef.current) break;
|
|
1467
|
+
const img = needsCaptioning[i];
|
|
1468
|
+
if (!img) continue;
|
|
1469
|
+
setStreamingText(`[System: Extracting detailed visual description for ${img.name}... ${i + 1}/${needsCaptioning.length}]
|
|
1470
|
+
`);
|
|
1471
|
+
const out = await captioner(img.dataUrl, {
|
|
1472
|
+
max_new_tokens: 64,
|
|
1473
|
+
num_beams: 4,
|
|
1474
|
+
repetition_penalty: 1.5
|
|
1475
|
+
});
|
|
1476
|
+
let val = "";
|
|
1477
|
+
if (Array.isArray(out) && out[0] && out[0].generated_text) val = out[0].generated_text;
|
|
1478
|
+
else if (!Array.isArray(out) && out.generated_text) val = out.generated_text;
|
|
1479
|
+
img.extractedText = val;
|
|
1480
|
+
}
|
|
1481
|
+
} catch (err) {
|
|
1482
|
+
console.error("[ImagePipeline] Captioning error:", err);
|
|
1483
|
+
setStreamingText(`[System: Fallback captioning failed: ${err}]
|
|
1484
|
+
`);
|
|
1485
|
+
}
|
|
1486
|
+
if (abortRef.current) {
|
|
1487
|
+
setStreamingText("");
|
|
1488
|
+
setIsGenerating(false);
|
|
1489
|
+
return;
|
|
1490
|
+
}
|
|
1491
|
+
setStreamingText("");
|
|
1492
|
+
setIsGenerating(false);
|
|
1493
|
+
}
|
|
1494
|
+
}
|
|
1504
1495
|
let finalText = text;
|
|
1505
1496
|
for (const img of currentImages) {
|
|
1506
1497
|
if (img.extractedText) {
|
|
@@ -1517,8 +1508,6 @@ ${systemPrompt}` : systemPrompt;
|
|
|
1517
1508
|
}
|
|
1518
1509
|
}
|
|
1519
1510
|
}
|
|
1520
|
-
setInput("");
|
|
1521
|
-
setImages([]);
|
|
1522
1511
|
onSendProp?.(finalText);
|
|
1523
1512
|
if (llm && isReady) {
|
|
1524
1513
|
generate(finalText, messages, currentImages);
|
package/dist/index.js
CHANGED
package/dist/react/index.js
CHANGED
package/package.json
CHANGED