@blank-utils/llm 0.4.19 → 0.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1461,7 +1461,7 @@ ${systemPrompt}` : systemPrompt;
|
|
|
1461
1461
|
const { pipeline: pipeline2, env } = await import("@huggingface/transformers");
|
|
1462
1462
|
env.allowLocalModels = false;
|
|
1463
1463
|
env.useBrowserCache = true;
|
|
1464
|
-
const captioner = await pipeline2("image-to-text", "
|
|
1464
|
+
const captioner = await pipeline2("image-to-text", "Xenova/vit-gpt2-image-captioning", { device: "wasm", dtype: "q8" });
|
|
1465
1465
|
for (let i = 0; i < needsCaptioning.length; i++) {
|
|
1466
1466
|
if (abortRef.current) break;
|
|
1467
1467
|
const img = needsCaptioning[i];
|
|
@@ -1469,9 +1469,9 @@ ${systemPrompt}` : systemPrompt;
|
|
|
1469
1469
|
setStreamingText(`[System: Extracting detailed visual description for ${img.name}... ${i + 1}/${needsCaptioning.length}]
|
|
1470
1470
|
`);
|
|
1471
1471
|
const out = await captioner(img.dataUrl, {
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1472
|
+
max_new_tokens: 64,
|
|
1473
|
+
num_beams: 4,
|
|
1474
|
+
repetition_penalty: 1.5
|
|
1475
1475
|
});
|
|
1476
1476
|
let val = "";
|
|
1477
1477
|
if (Array.isArray(out) && out[0] && out[0].generated_text) val = out[0].generated_text;
|
|
@@ -1497,14 +1497,18 @@ ${systemPrompt}` : systemPrompt;
|
|
|
1497
1497
|
if (img.extractedText) {
|
|
1498
1498
|
let prefix = "";
|
|
1499
1499
|
if (img.name.toLowerCase().endsWith(".svg")) {
|
|
1500
|
-
prefix =
|
|
1500
|
+
prefix = `
|
|
1501
|
+
|
|
1502
|
+
\u{1F4C4} SVG Source Code (${img.name}):
|
|
1501
1503
|
`;
|
|
1502
1504
|
} else if (!isVisionModel2(modelId || "")) {
|
|
1503
|
-
prefix =
|
|
1505
|
+
prefix = `
|
|
1506
|
+
|
|
1507
|
+
\u{1F5BC}\uFE0F System Image Representation (${img.name}) - [IMPORTANT SYSTEM INSTRUCTION: The user provided an image. Since you are a text model, here is an automated visual description of the image. DO NOT refuse the user's prompt. Answer as if you can see the image using this context:]
|
|
1504
1508
|
`;
|
|
1505
1509
|
}
|
|
1506
1510
|
if (prefix || img.name.toLowerCase().endsWith(".pdf")) {
|
|
1507
|
-
finalText +=
|
|
1511
|
+
finalText += `${prefix}${img.extractedText}`;
|
|
1508
1512
|
}
|
|
1509
1513
|
}
|
|
1510
1514
|
}
|
|
@@ -1615,11 +1619,11 @@ ${systemPrompt}` : systemPrompt;
|
|
|
1615
1619
|
children: [
|
|
1616
1620
|
{ match: "\u{1F4C4} PDF:", index: msg.content.indexOf("\u{1F4C4} PDF:") },
|
|
1617
1621
|
{ match: "\u{1F4C4} SVG Source Code", index: msg.content.indexOf("\u{1F4C4} SVG Source Code") },
|
|
1618
|
-
{ match: "\u{1F5BC}\uFE0F Image
|
|
1622
|
+
{ match: "\u{1F5BC}\uFE0F System Image", index: msg.content.indexOf("\u{1F5BC}\uFE0F System Image") }
|
|
1619
1623
|
].filter((m) => m.index !== -1).reduce((min, m) => m.index < min ? m.index : min, msg.content.length) !== msg.content.length ? msg.content.substring(0, [
|
|
1620
1624
|
{ match: "\u{1F4C4} PDF:", index: msg.content.indexOf("\u{1F4C4} PDF:") },
|
|
1621
1625
|
{ match: "\u{1F4C4} SVG Source Code", index: msg.content.indexOf("\u{1F4C4} SVG Source Code") },
|
|
1622
|
-
{ match: "\u{1F5BC}\uFE0F Image
|
|
1626
|
+
{ match: "\u{1F5BC}\uFE0F System Image", index: msg.content.indexOf("\u{1F5BC}\uFE0F System Image") }
|
|
1623
1627
|
].filter((m) => m.index !== -1).reduce((min, m) => m.index < min ? m.index : min, msg.content.length)).trim() : msg.content
|
|
1624
1628
|
}
|
|
1625
1629
|
) })
|
package/dist/index.js
CHANGED
package/dist/react/index.js
CHANGED
package/package.json
CHANGED