langchain 0.0.176 → 0.0.177
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chat_models/bedrock.cjs +25 -4
- package/dist/chat_models/bedrock.d.ts +2 -1
- package/dist/chat_models/bedrock.js +25 -4
- package/dist/chat_models/llama_cpp.cjs +31 -79
- package/dist/chat_models/llama_cpp.d.ts +15 -58
- package/dist/chat_models/llama_cpp.js +32 -80
- package/dist/chat_models/openai.cjs +91 -6
- package/dist/chat_models/openai.d.ts +10 -0
- package/dist/chat_models/openai.js +91 -6
- package/dist/embeddings/hf.cjs +10 -1
- package/dist/embeddings/hf.d.ts +4 -2
- package/dist/embeddings/hf.js +10 -1
- package/dist/embeddings/llama_cpp.cjs +67 -0
- package/dist/embeddings/llama_cpp.d.ts +26 -0
- package/dist/embeddings/llama_cpp.js +63 -0
- package/dist/embeddings/ollama.cjs +7 -1
- package/dist/embeddings/ollama.js +7 -1
- package/dist/llms/bedrock.cjs +25 -3
- package/dist/llms/bedrock.d.ts +2 -1
- package/dist/llms/bedrock.js +25 -3
- package/dist/llms/hf.cjs +10 -1
- package/dist/llms/hf.d.ts +3 -0
- package/dist/llms/hf.js +10 -1
- package/dist/llms/llama_cpp.cjs +25 -65
- package/dist/llms/llama_cpp.d.ts +7 -43
- package/dist/llms/llama_cpp.js +25 -65
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/prompts/few_shot.cjs +162 -1
- package/dist/prompts/few_shot.d.ts +90 -2
- package/dist/prompts/few_shot.js +160 -0
- package/dist/prompts/index.cjs +2 -1
- package/dist/prompts/index.d.ts +1 -1
- package/dist/prompts/index.js +1 -1
- package/dist/retrievers/zep.cjs +26 -3
- package/dist/retrievers/zep.d.ts +11 -2
- package/dist/retrievers/zep.js +26 -3
- package/dist/util/bedrock.d.ts +2 -0
- package/dist/util/llama_cpp.cjs +34 -0
- package/dist/util/llama_cpp.d.ts +46 -0
- package/dist/util/llama_cpp.js +28 -0
- package/dist/util/openai-format-fndef.cjs +81 -0
- package/dist/util/openai-format-fndef.d.ts +44 -0
- package/dist/util/openai-format-fndef.js +77 -0
- package/dist/util/openapi.d.ts +2 -2
- package/dist/vectorstores/pinecone.cjs +5 -5
- package/dist/vectorstores/pinecone.d.ts +2 -2
- package/dist/vectorstores/pinecone.js +5 -5
- package/embeddings/llama_cpp.cjs +1 -0
- package/embeddings/llama_cpp.d.ts +1 -0
- package/embeddings/llama_cpp.js +1 -0
- package/package.json +13 -5
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.PromptLayerChatOpenAI = exports.ChatOpenAI = void 0;
|
|
4
4
|
const openai_1 = require("openai");
|
|
5
|
-
const count_tokens_js_1 = require("../base_language/count_tokens.cjs");
|
|
6
5
|
const index_js_1 = require("../schema/index.cjs");
|
|
7
6
|
const convert_to_openai_js_1 = require("../tools/convert_to_openai.cjs");
|
|
8
7
|
const azure_js_1 = require("../util/azure.cjs");
|
|
@@ -10,6 +9,7 @@ const env_js_1 = require("../util/env.cjs");
|
|
|
10
9
|
const prompt_layer_js_1 = require("../util/prompt-layer.cjs");
|
|
11
10
|
const base_js_1 = require("./base.cjs");
|
|
12
11
|
const openai_js_1 = require("../util/openai.cjs");
|
|
12
|
+
const openai_format_fndef_js_1 = require("../util/openai-format-fndef.cjs");
|
|
13
13
|
function extractGenericMessageCustomRole(message) {
|
|
14
14
|
if (message.role !== "system" &&
|
|
15
15
|
message.role !== "assistant" &&
|
|
@@ -39,6 +39,19 @@ function messageToOpenAIRole(message) {
|
|
|
39
39
|
throw new Error(`Unknown message type: ${type}`);
|
|
40
40
|
}
|
|
41
41
|
}
|
|
42
|
+
function messageToOpenAIMessage(message) {
|
|
43
|
+
const msg = {
|
|
44
|
+
content: message.content || null,
|
|
45
|
+
name: message.name,
|
|
46
|
+
role: messageToOpenAIRole(message),
|
|
47
|
+
function_call: message.additional_kwargs.function_call,
|
|
48
|
+
};
|
|
49
|
+
if (msg.function_call?.arguments) {
|
|
50
|
+
// Remove spaces, new line characters etc.
|
|
51
|
+
msg.function_call.arguments = JSON.stringify(JSON.parse(msg.function_call.arguments));
|
|
52
|
+
}
|
|
53
|
+
return msg;
|
|
54
|
+
}
|
|
42
55
|
function openAIResponseToChatMessage(message) {
|
|
43
56
|
switch (message.role) {
|
|
44
57
|
case "user":
|
|
@@ -414,6 +427,7 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
|
|
|
414
427
|
}
|
|
415
428
|
/**
|
|
416
429
|
* Get the identifying parameters for the model
|
|
430
|
+
*
|
|
417
431
|
*/
|
|
418
432
|
identifyingParams() {
|
|
419
433
|
return this._identifyingParams();
|
|
@@ -430,7 +444,7 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
|
|
|
430
444
|
.function_call,
|
|
431
445
|
}));
|
|
432
446
|
if (params.stream) {
|
|
433
|
-
const stream =
|
|
447
|
+
const stream = this._streamResponseChunks(messages, options, runManager);
|
|
434
448
|
const finalChunks = {};
|
|
435
449
|
for await (const chunk of stream) {
|
|
436
450
|
const index = chunk.generationInfo?.completion ?? 0;
|
|
@@ -444,7 +458,15 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
|
|
|
444
458
|
const generations = Object.entries(finalChunks)
|
|
445
459
|
.sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10))
|
|
446
460
|
.map(([_, value]) => value);
|
|
447
|
-
|
|
461
|
+
const { functions, function_call } = this.invocationParams(options);
|
|
462
|
+
// OpenAI does not support token usage report under stream mode,
|
|
463
|
+
// fallback to estimation.
|
|
464
|
+
const promptTokenUsage = await this.getNumTokensFromPrompt(messages, functions, function_call);
|
|
465
|
+
const completionTokenUsage = await this.getNumTokensFromGenerations(generations);
|
|
466
|
+
tokenUsage.promptTokens = promptTokenUsage;
|
|
467
|
+
tokenUsage.completionTokens = completionTokenUsage;
|
|
468
|
+
tokenUsage.totalTokens = promptTokenUsage + completionTokenUsage;
|
|
469
|
+
return { generations, llmOutput: { estimatedTokenUsage: tokenUsage } };
|
|
448
470
|
}
|
|
449
471
|
else {
|
|
450
472
|
const data = await this.completionWithRetry({
|
|
@@ -484,16 +506,65 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
|
|
|
484
506
|
};
|
|
485
507
|
}
|
|
486
508
|
}
|
|
509
|
+
/**
|
|
510
|
+
* Estimate the number of tokens a prompt will use.
|
|
511
|
+
* Modified from: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts
|
|
512
|
+
*/
|
|
513
|
+
async getNumTokensFromPrompt(messages, functions, function_call) {
|
|
514
|
+
// It appears that if functions are present, the first system message is padded with a trailing newline. This
|
|
515
|
+
// was inferred by trying lots of combinations of messages and functions and seeing what the token counts were.
|
|
516
|
+
// let paddedSystem = false;
|
|
517
|
+
const openaiMessages = messages.map((m) => messageToOpenAIMessage(m));
|
|
518
|
+
let tokens = (await this.getNumTokensFromMessages(messages)).totalCount;
|
|
519
|
+
// If there are functions, add the function definitions as they count towards token usage
|
|
520
|
+
if (functions && function_call !== "auto") {
|
|
521
|
+
const promptDefinitions = (0, openai_format_fndef_js_1.formatFunctionDefinitions)(functions);
|
|
522
|
+
tokens += await this.getNumTokens(promptDefinitions);
|
|
523
|
+
tokens += 9; // Add nine per completion
|
|
524
|
+
}
|
|
525
|
+
// If there's a system message _and_ functions are present, subtract four tokens. I assume this is because
|
|
526
|
+
// functions typically add a system message, but reuse the first one if it's already there. This offsets
|
|
527
|
+
// the extra 9 tokens added by the function definitions.
|
|
528
|
+
if (functions && openaiMessages.find((m) => m.role === "system")) {
|
|
529
|
+
tokens -= 4;
|
|
530
|
+
}
|
|
531
|
+
// If function_call is 'none', add one token.
|
|
532
|
+
// If it's a FunctionCall object, add 4 + the number of tokens in the function name.
|
|
533
|
+
// If it's undefined or 'auto', don't add anything.
|
|
534
|
+
if (function_call === "none") {
|
|
535
|
+
tokens += 1;
|
|
536
|
+
}
|
|
537
|
+
else if (typeof function_call === "object") {
|
|
538
|
+
tokens += (await this.getNumTokens(function_call.name)) + 4;
|
|
539
|
+
}
|
|
540
|
+
return tokens;
|
|
541
|
+
}
|
|
542
|
+
/**
|
|
543
|
+
* Estimate the number of tokens an array of generations have used.
|
|
544
|
+
*/
|
|
545
|
+
async getNumTokensFromGenerations(generations) {
|
|
546
|
+
const generationUsages = await Promise.all(generations.map(async (generation) => {
|
|
547
|
+
const openAIMessage = messageToOpenAIMessage(generation.message);
|
|
548
|
+
if (openAIMessage.function_call) {
|
|
549
|
+
return (await this.getNumTokensFromMessages([generation.message]))
|
|
550
|
+
.countPerMessage[0];
|
|
551
|
+
}
|
|
552
|
+
else {
|
|
553
|
+
return await this.getNumTokens(generation.message.content);
|
|
554
|
+
}
|
|
555
|
+
}));
|
|
556
|
+
return generationUsages.reduce((a, b) => a + b, 0);
|
|
557
|
+
}
|
|
487
558
|
async getNumTokensFromMessages(messages) {
|
|
488
559
|
let totalCount = 0;
|
|
489
560
|
let tokensPerMessage = 0;
|
|
490
561
|
let tokensPerName = 0;
|
|
491
562
|
// From: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
|
|
492
|
-
if (
|
|
563
|
+
if (this.modelName === "gpt-3.5-turbo-0301") {
|
|
493
564
|
tokensPerMessage = 4;
|
|
494
565
|
tokensPerName = -1;
|
|
495
566
|
}
|
|
496
|
-
else
|
|
567
|
+
else {
|
|
497
568
|
tokensPerMessage = 3;
|
|
498
569
|
tokensPerName = 1;
|
|
499
570
|
}
|
|
@@ -503,7 +574,21 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
|
|
|
503
574
|
const nameCount = message.name !== undefined
|
|
504
575
|
? tokensPerName + (await this.getNumTokens(message.name))
|
|
505
576
|
: 0;
|
|
506
|
-
|
|
577
|
+
let count = textCount + tokensPerMessage + roleCount + nameCount;
|
|
578
|
+
// From: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts messageTokenEstimate
|
|
579
|
+
const openAIMessage = messageToOpenAIMessage(message);
|
|
580
|
+
if (openAIMessage.role === "function") {
|
|
581
|
+
count -= 2;
|
|
582
|
+
}
|
|
583
|
+
if (openAIMessage.function_call) {
|
|
584
|
+
count += 3;
|
|
585
|
+
}
|
|
586
|
+
if (openAIMessage.function_call?.name) {
|
|
587
|
+
count += await this.getNumTokens(openAIMessage.function_call?.name);
|
|
588
|
+
}
|
|
589
|
+
if (openAIMessage.function_call?.arguments) {
|
|
590
|
+
count += await this.getNumTokens(openAIMessage.function_call?.arguments);
|
|
591
|
+
}
|
|
507
592
|
totalCount += count;
|
|
508
593
|
return count;
|
|
509
594
|
}));
|
|
@@ -83,12 +83,22 @@ export declare class ChatOpenAI<CallOptions extends ChatOpenAICallOptions = Chat
|
|
|
83
83
|
_streamResponseChunks(messages: BaseMessage[], options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): AsyncGenerator<ChatGenerationChunk>;
|
|
84
84
|
/**
|
|
85
85
|
* Get the identifying parameters for the model
|
|
86
|
+
*
|
|
86
87
|
*/
|
|
87
88
|
identifyingParams(): Omit<OpenAIClient.Chat.Completions.ChatCompletionCreateParams, "messages"> & {
|
|
88
89
|
model_name: string;
|
|
89
90
|
} & ClientOptions;
|
|
90
91
|
/** @ignore */
|
|
91
92
|
_generate(messages: BaseMessage[], options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): Promise<ChatResult>;
|
|
93
|
+
/**
|
|
94
|
+
* Estimate the number of tokens a prompt will use.
|
|
95
|
+
* Modified from: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts
|
|
96
|
+
*/
|
|
97
|
+
private getNumTokensFromPrompt;
|
|
98
|
+
/**
|
|
99
|
+
* Estimate the number of tokens an array of generations have used.
|
|
100
|
+
*/
|
|
101
|
+
private getNumTokensFromGenerations;
|
|
92
102
|
getNumTokensFromMessages(messages: BaseMessage[]): Promise<{
|
|
93
103
|
totalCount: number;
|
|
94
104
|
countPerMessage: number[];
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { OpenAI as OpenAIClient } from "openai";
|
|
2
|
-
import { getModelNameForTiktoken } from "../base_language/count_tokens.js";
|
|
3
2
|
import { AIMessage, AIMessageChunk, ChatGenerationChunk, ChatMessage, ChatMessageChunk, FunctionMessageChunk, HumanMessage, HumanMessageChunk, SystemMessage, SystemMessageChunk, } from "../schema/index.js";
|
|
4
3
|
import { formatToOpenAIFunction } from "../tools/convert_to_openai.js";
|
|
5
4
|
import { getEndpoint } from "../util/azure.js";
|
|
@@ -7,6 +6,7 @@ import { getEnvironmentVariable } from "../util/env.js";
|
|
|
7
6
|
import { promptLayerTrackRequest } from "../util/prompt-layer.js";
|
|
8
7
|
import { BaseChatModel } from "./base.js";
|
|
9
8
|
import { wrapOpenAIClientError } from "../util/openai.js";
|
|
9
|
+
import { formatFunctionDefinitions, } from "../util/openai-format-fndef.js";
|
|
10
10
|
function extractGenericMessageCustomRole(message) {
|
|
11
11
|
if (message.role !== "system" &&
|
|
12
12
|
message.role !== "assistant" &&
|
|
@@ -36,6 +36,19 @@ function messageToOpenAIRole(message) {
|
|
|
36
36
|
throw new Error(`Unknown message type: ${type}`);
|
|
37
37
|
}
|
|
38
38
|
}
|
|
39
|
+
function messageToOpenAIMessage(message) {
|
|
40
|
+
const msg = {
|
|
41
|
+
content: message.content || null,
|
|
42
|
+
name: message.name,
|
|
43
|
+
role: messageToOpenAIRole(message),
|
|
44
|
+
function_call: message.additional_kwargs.function_call,
|
|
45
|
+
};
|
|
46
|
+
if (msg.function_call?.arguments) {
|
|
47
|
+
// Remove spaces, new line characters etc.
|
|
48
|
+
msg.function_call.arguments = JSON.stringify(JSON.parse(msg.function_call.arguments));
|
|
49
|
+
}
|
|
50
|
+
return msg;
|
|
51
|
+
}
|
|
39
52
|
function openAIResponseToChatMessage(message) {
|
|
40
53
|
switch (message.role) {
|
|
41
54
|
case "user":
|
|
@@ -411,6 +424,7 @@ export class ChatOpenAI extends BaseChatModel {
|
|
|
411
424
|
}
|
|
412
425
|
/**
|
|
413
426
|
* Get the identifying parameters for the model
|
|
427
|
+
*
|
|
414
428
|
*/
|
|
415
429
|
identifyingParams() {
|
|
416
430
|
return this._identifyingParams();
|
|
@@ -427,7 +441,7 @@ export class ChatOpenAI extends BaseChatModel {
|
|
|
427
441
|
.function_call,
|
|
428
442
|
}));
|
|
429
443
|
if (params.stream) {
|
|
430
|
-
const stream =
|
|
444
|
+
const stream = this._streamResponseChunks(messages, options, runManager);
|
|
431
445
|
const finalChunks = {};
|
|
432
446
|
for await (const chunk of stream) {
|
|
433
447
|
const index = chunk.generationInfo?.completion ?? 0;
|
|
@@ -441,7 +455,15 @@ export class ChatOpenAI extends BaseChatModel {
|
|
|
441
455
|
const generations = Object.entries(finalChunks)
|
|
442
456
|
.sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10))
|
|
443
457
|
.map(([_, value]) => value);
|
|
444
|
-
|
|
458
|
+
const { functions, function_call } = this.invocationParams(options);
|
|
459
|
+
// OpenAI does not support token usage report under stream mode,
|
|
460
|
+
// fallback to estimation.
|
|
461
|
+
const promptTokenUsage = await this.getNumTokensFromPrompt(messages, functions, function_call);
|
|
462
|
+
const completionTokenUsage = await this.getNumTokensFromGenerations(generations);
|
|
463
|
+
tokenUsage.promptTokens = promptTokenUsage;
|
|
464
|
+
tokenUsage.completionTokens = completionTokenUsage;
|
|
465
|
+
tokenUsage.totalTokens = promptTokenUsage + completionTokenUsage;
|
|
466
|
+
return { generations, llmOutput: { estimatedTokenUsage: tokenUsage } };
|
|
445
467
|
}
|
|
446
468
|
else {
|
|
447
469
|
const data = await this.completionWithRetry({
|
|
@@ -481,16 +503,65 @@ export class ChatOpenAI extends BaseChatModel {
|
|
|
481
503
|
};
|
|
482
504
|
}
|
|
483
505
|
}
|
|
506
|
+
/**
|
|
507
|
+
* Estimate the number of tokens a prompt will use.
|
|
508
|
+
* Modified from: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts
|
|
509
|
+
*/
|
|
510
|
+
async getNumTokensFromPrompt(messages, functions, function_call) {
|
|
511
|
+
// It appears that if functions are present, the first system message is padded with a trailing newline. This
|
|
512
|
+
// was inferred by trying lots of combinations of messages and functions and seeing what the token counts were.
|
|
513
|
+
// let paddedSystem = false;
|
|
514
|
+
const openaiMessages = messages.map((m) => messageToOpenAIMessage(m));
|
|
515
|
+
let tokens = (await this.getNumTokensFromMessages(messages)).totalCount;
|
|
516
|
+
// If there are functions, add the function definitions as they count towards token usage
|
|
517
|
+
if (functions && function_call !== "auto") {
|
|
518
|
+
const promptDefinitions = formatFunctionDefinitions(functions);
|
|
519
|
+
tokens += await this.getNumTokens(promptDefinitions);
|
|
520
|
+
tokens += 9; // Add nine per completion
|
|
521
|
+
}
|
|
522
|
+
// If there's a system message _and_ functions are present, subtract four tokens. I assume this is because
|
|
523
|
+
// functions typically add a system message, but reuse the first one if it's already there. This offsets
|
|
524
|
+
// the extra 9 tokens added by the function definitions.
|
|
525
|
+
if (functions && openaiMessages.find((m) => m.role === "system")) {
|
|
526
|
+
tokens -= 4;
|
|
527
|
+
}
|
|
528
|
+
// If function_call is 'none', add one token.
|
|
529
|
+
// If it's a FunctionCall object, add 4 + the number of tokens in the function name.
|
|
530
|
+
// If it's undefined or 'auto', don't add anything.
|
|
531
|
+
if (function_call === "none") {
|
|
532
|
+
tokens += 1;
|
|
533
|
+
}
|
|
534
|
+
else if (typeof function_call === "object") {
|
|
535
|
+
tokens += (await this.getNumTokens(function_call.name)) + 4;
|
|
536
|
+
}
|
|
537
|
+
return tokens;
|
|
538
|
+
}
|
|
539
|
+
/**
|
|
540
|
+
* Estimate the number of tokens an array of generations have used.
|
|
541
|
+
*/
|
|
542
|
+
async getNumTokensFromGenerations(generations) {
|
|
543
|
+
const generationUsages = await Promise.all(generations.map(async (generation) => {
|
|
544
|
+
const openAIMessage = messageToOpenAIMessage(generation.message);
|
|
545
|
+
if (openAIMessage.function_call) {
|
|
546
|
+
return (await this.getNumTokensFromMessages([generation.message]))
|
|
547
|
+
.countPerMessage[0];
|
|
548
|
+
}
|
|
549
|
+
else {
|
|
550
|
+
return await this.getNumTokens(generation.message.content);
|
|
551
|
+
}
|
|
552
|
+
}));
|
|
553
|
+
return generationUsages.reduce((a, b) => a + b, 0);
|
|
554
|
+
}
|
|
484
555
|
async getNumTokensFromMessages(messages) {
|
|
485
556
|
let totalCount = 0;
|
|
486
557
|
let tokensPerMessage = 0;
|
|
487
558
|
let tokensPerName = 0;
|
|
488
559
|
// From: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
|
|
489
|
-
if (
|
|
560
|
+
if (this.modelName === "gpt-3.5-turbo-0301") {
|
|
490
561
|
tokensPerMessage = 4;
|
|
491
562
|
tokensPerName = -1;
|
|
492
563
|
}
|
|
493
|
-
else
|
|
564
|
+
else {
|
|
494
565
|
tokensPerMessage = 3;
|
|
495
566
|
tokensPerName = 1;
|
|
496
567
|
}
|
|
@@ -500,7 +571,21 @@ export class ChatOpenAI extends BaseChatModel {
|
|
|
500
571
|
const nameCount = message.name !== undefined
|
|
501
572
|
? tokensPerName + (await this.getNumTokens(message.name))
|
|
502
573
|
: 0;
|
|
503
|
-
|
|
574
|
+
let count = textCount + tokensPerMessage + roleCount + nameCount;
|
|
575
|
+
// From: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts messageTokenEstimate
|
|
576
|
+
const openAIMessage = messageToOpenAIMessage(message);
|
|
577
|
+
if (openAIMessage.role === "function") {
|
|
578
|
+
count -= 2;
|
|
579
|
+
}
|
|
580
|
+
if (openAIMessage.function_call) {
|
|
581
|
+
count += 3;
|
|
582
|
+
}
|
|
583
|
+
if (openAIMessage.function_call?.name) {
|
|
584
|
+
count += await this.getNumTokens(openAIMessage.function_call?.name);
|
|
585
|
+
}
|
|
586
|
+
if (openAIMessage.function_call?.arguments) {
|
|
587
|
+
count += await this.getNumTokens(openAIMessage.function_call?.arguments);
|
|
588
|
+
}
|
|
504
589
|
totalCount += count;
|
|
505
590
|
return count;
|
|
506
591
|
}));
|
package/dist/embeddings/hf.cjs
CHANGED
|
@@ -24,6 +24,12 @@ class HuggingFaceInferenceEmbeddings extends base_js_1.Embeddings {
|
|
|
24
24
|
writable: true,
|
|
25
25
|
value: void 0
|
|
26
26
|
});
|
|
27
|
+
Object.defineProperty(this, "endpointUrl", {
|
|
28
|
+
enumerable: true,
|
|
29
|
+
configurable: true,
|
|
30
|
+
writable: true,
|
|
31
|
+
value: void 0
|
|
32
|
+
});
|
|
27
33
|
Object.defineProperty(this, "client", {
|
|
28
34
|
enumerable: true,
|
|
29
35
|
configurable: true,
|
|
@@ -34,7 +40,10 @@ class HuggingFaceInferenceEmbeddings extends base_js_1.Embeddings {
|
|
|
34
40
|
fields?.model ?? "sentence-transformers/distilbert-base-nli-mean-tokens";
|
|
35
41
|
this.apiKey =
|
|
36
42
|
fields?.apiKey ?? (0, env_js_1.getEnvironmentVariable)("HUGGINGFACEHUB_API_KEY");
|
|
37
|
-
this.
|
|
43
|
+
this.endpointUrl = fields?.endpointUrl;
|
|
44
|
+
this.client = this.endpointUrl
|
|
45
|
+
? new inference_1.HfInference(this.apiKey).endpoint(this.endpointUrl)
|
|
46
|
+
: new inference_1.HfInference(this.apiKey);
|
|
38
47
|
}
|
|
39
48
|
async _embed(texts) {
|
|
40
49
|
// replace newlines, which can negatively affect performance.
|
package/dist/embeddings/hf.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { HfInference } from "@huggingface/inference";
|
|
1
|
+
import { HfInference, HfInferenceEndpoint } from "@huggingface/inference";
|
|
2
2
|
import { Embeddings, EmbeddingsParams } from "./base.js";
|
|
3
3
|
/**
|
|
4
4
|
* Interface that extends EmbeddingsParams and defines additional
|
|
@@ -7,6 +7,7 @@ import { Embeddings, EmbeddingsParams } from "./base.js";
|
|
|
7
7
|
export interface HuggingFaceInferenceEmbeddingsParams extends EmbeddingsParams {
|
|
8
8
|
apiKey?: string;
|
|
9
9
|
model?: string;
|
|
10
|
+
endpointUrl?: string;
|
|
10
11
|
}
|
|
11
12
|
/**
|
|
12
13
|
* Class that extends the Embeddings class and provides methods for
|
|
@@ -16,7 +17,8 @@ export interface HuggingFaceInferenceEmbeddingsParams extends EmbeddingsParams {
|
|
|
16
17
|
export declare class HuggingFaceInferenceEmbeddings extends Embeddings implements HuggingFaceInferenceEmbeddingsParams {
|
|
17
18
|
apiKey?: string;
|
|
18
19
|
model: string;
|
|
19
|
-
|
|
20
|
+
endpointUrl?: string;
|
|
21
|
+
client: HfInference | HfInferenceEndpoint;
|
|
20
22
|
constructor(fields?: HuggingFaceInferenceEmbeddingsParams);
|
|
21
23
|
_embed(texts: string[]): Promise<number[][]>;
|
|
22
24
|
/**
|
package/dist/embeddings/hf.js
CHANGED
|
@@ -21,6 +21,12 @@ export class HuggingFaceInferenceEmbeddings extends Embeddings {
|
|
|
21
21
|
writable: true,
|
|
22
22
|
value: void 0
|
|
23
23
|
});
|
|
24
|
+
Object.defineProperty(this, "endpointUrl", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
configurable: true,
|
|
27
|
+
writable: true,
|
|
28
|
+
value: void 0
|
|
29
|
+
});
|
|
24
30
|
Object.defineProperty(this, "client", {
|
|
25
31
|
enumerable: true,
|
|
26
32
|
configurable: true,
|
|
@@ -31,7 +37,10 @@ export class HuggingFaceInferenceEmbeddings extends Embeddings {
|
|
|
31
37
|
fields?.model ?? "sentence-transformers/distilbert-base-nli-mean-tokens";
|
|
32
38
|
this.apiKey =
|
|
33
39
|
fields?.apiKey ?? getEnvironmentVariable("HUGGINGFACEHUB_API_KEY");
|
|
34
|
-
this.
|
|
40
|
+
this.endpointUrl = fields?.endpointUrl;
|
|
41
|
+
this.client = this.endpointUrl
|
|
42
|
+
? new HfInference(this.apiKey).endpoint(this.endpointUrl)
|
|
43
|
+
: new HfInference(this.apiKey);
|
|
35
44
|
}
|
|
36
45
|
async _embed(texts) {
|
|
37
46
|
// replace newlines, which can negatively affect performance.
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.LlamaCppEmbeddings = void 0;
|
|
4
|
+
const llama_cpp_js_1 = require("../util/llama_cpp.cjs");
|
|
5
|
+
const base_js_1 = require("./base.cjs");
|
|
6
|
+
class LlamaCppEmbeddings extends base_js_1.Embeddings {
|
|
7
|
+
constructor(inputs) {
|
|
8
|
+
super(inputs);
|
|
9
|
+
Object.defineProperty(this, "_model", {
|
|
10
|
+
enumerable: true,
|
|
11
|
+
configurable: true,
|
|
12
|
+
writable: true,
|
|
13
|
+
value: void 0
|
|
14
|
+
});
|
|
15
|
+
Object.defineProperty(this, "_context", {
|
|
16
|
+
enumerable: true,
|
|
17
|
+
configurable: true,
|
|
18
|
+
writable: true,
|
|
19
|
+
value: void 0
|
|
20
|
+
});
|
|
21
|
+
const _inputs = inputs;
|
|
22
|
+
_inputs.embedding = true;
|
|
23
|
+
this._model = (0, llama_cpp_js_1.createLlamaModel)(_inputs);
|
|
24
|
+
this._context = (0, llama_cpp_js_1.createLlamaContext)(this._model, _inputs);
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Generates embeddings for an array of texts.
|
|
28
|
+
* @param texts - An array of strings to generate embeddings for.
|
|
29
|
+
* @returns A Promise that resolves to an array of embeddings.
|
|
30
|
+
*/
|
|
31
|
+
async embedDocuments(texts) {
|
|
32
|
+
const tokensArray = [];
|
|
33
|
+
for (const text of texts) {
|
|
34
|
+
const encodings = await this.caller.call(() => new Promise((resolve) => {
|
|
35
|
+
resolve(this._context.encode(text));
|
|
36
|
+
}));
|
|
37
|
+
tokensArray.push(encodings);
|
|
38
|
+
}
|
|
39
|
+
const embeddings = [];
|
|
40
|
+
for (const tokens of tokensArray) {
|
|
41
|
+
const embedArray = [];
|
|
42
|
+
for (let i = 0; i < tokens.length; i += 1) {
|
|
43
|
+
const nToken = +tokens[i];
|
|
44
|
+
embedArray.push(nToken);
|
|
45
|
+
}
|
|
46
|
+
embeddings.push(embedArray);
|
|
47
|
+
}
|
|
48
|
+
return embeddings;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Generates an embedding for a single text.
|
|
52
|
+
* @param text - A string to generate an embedding for.
|
|
53
|
+
* @returns A Promise that resolves to an array of numbers representing the embedding.
|
|
54
|
+
*/
|
|
55
|
+
async embedQuery(text) {
|
|
56
|
+
const tokens = [];
|
|
57
|
+
const encodings = await this.caller.call(() => new Promise((resolve) => {
|
|
58
|
+
resolve(this._context.encode(text));
|
|
59
|
+
}));
|
|
60
|
+
for (let i = 0; i < encodings.length; i += 1) {
|
|
61
|
+
const token = +encodings[i];
|
|
62
|
+
tokens.push(token);
|
|
63
|
+
}
|
|
64
|
+
return tokens;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
exports.LlamaCppEmbeddings = LlamaCppEmbeddings;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { LlamaModel, LlamaContext } from "node-llama-cpp";
|
|
2
|
+
import { LlamaBaseCppInputs } from "../util/llama_cpp.js";
|
|
3
|
+
import { Embeddings, EmbeddingsParams } from "./base.js";
|
|
4
|
+
/**
|
|
5
|
+
* Note that the modelPath is the only required parameter. For testing you
|
|
6
|
+
* can set this in the environment variable `LLAMA_PATH`.
|
|
7
|
+
*/
|
|
8
|
+
export interface LlamaCppEmbeddingsParams extends LlamaBaseCppInputs, EmbeddingsParams {
|
|
9
|
+
}
|
|
10
|
+
export declare class LlamaCppEmbeddings extends Embeddings {
|
|
11
|
+
_model: LlamaModel;
|
|
12
|
+
_context: LlamaContext;
|
|
13
|
+
constructor(inputs: LlamaCppEmbeddingsParams);
|
|
14
|
+
/**
|
|
15
|
+
* Generates embeddings for an array of texts.
|
|
16
|
+
* @param texts - An array of strings to generate embeddings for.
|
|
17
|
+
* @returns A Promise that resolves to an array of embeddings.
|
|
18
|
+
*/
|
|
19
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
20
|
+
/**
|
|
21
|
+
* Generates an embedding for a single text.
|
|
22
|
+
* @param text - A string to generate an embedding for.
|
|
23
|
+
* @returns A Promise that resolves to an array of numbers representing the embedding.
|
|
24
|
+
*/
|
|
25
|
+
embedQuery(text: string): Promise<number[]>;
|
|
26
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { createLlamaModel, createLlamaContext, } from "../util/llama_cpp.js";
|
|
2
|
+
import { Embeddings } from "./base.js";
|
|
3
|
+
export class LlamaCppEmbeddings extends Embeddings {
|
|
4
|
+
constructor(inputs) {
|
|
5
|
+
super(inputs);
|
|
6
|
+
Object.defineProperty(this, "_model", {
|
|
7
|
+
enumerable: true,
|
|
8
|
+
configurable: true,
|
|
9
|
+
writable: true,
|
|
10
|
+
value: void 0
|
|
11
|
+
});
|
|
12
|
+
Object.defineProperty(this, "_context", {
|
|
13
|
+
enumerable: true,
|
|
14
|
+
configurable: true,
|
|
15
|
+
writable: true,
|
|
16
|
+
value: void 0
|
|
17
|
+
});
|
|
18
|
+
const _inputs = inputs;
|
|
19
|
+
_inputs.embedding = true;
|
|
20
|
+
this._model = createLlamaModel(_inputs);
|
|
21
|
+
this._context = createLlamaContext(this._model, _inputs);
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Generates embeddings for an array of texts.
|
|
25
|
+
* @param texts - An array of strings to generate embeddings for.
|
|
26
|
+
* @returns A Promise that resolves to an array of embeddings.
|
|
27
|
+
*/
|
|
28
|
+
async embedDocuments(texts) {
|
|
29
|
+
const tokensArray = [];
|
|
30
|
+
for (const text of texts) {
|
|
31
|
+
const encodings = await this.caller.call(() => new Promise((resolve) => {
|
|
32
|
+
resolve(this._context.encode(text));
|
|
33
|
+
}));
|
|
34
|
+
tokensArray.push(encodings);
|
|
35
|
+
}
|
|
36
|
+
const embeddings = [];
|
|
37
|
+
for (const tokens of tokensArray) {
|
|
38
|
+
const embedArray = [];
|
|
39
|
+
for (let i = 0; i < tokens.length; i += 1) {
|
|
40
|
+
const nToken = +tokens[i];
|
|
41
|
+
embedArray.push(nToken);
|
|
42
|
+
}
|
|
43
|
+
embeddings.push(embedArray);
|
|
44
|
+
}
|
|
45
|
+
return embeddings;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Generates an embedding for a single text.
|
|
49
|
+
* @param text - A string to generate an embedding for.
|
|
50
|
+
* @returns A Promise that resolves to an array of numbers representing the embedding.
|
|
51
|
+
*/
|
|
52
|
+
async embedQuery(text) {
|
|
53
|
+
const tokens = [];
|
|
54
|
+
const encodings = await this.caller.call(() => new Promise((resolve) => {
|
|
55
|
+
resolve(this._context.encode(text));
|
|
56
|
+
}));
|
|
57
|
+
for (let i = 0; i < encodings.length; i += 1) {
|
|
58
|
+
const token = +encodings[i];
|
|
59
|
+
tokens.push(token);
|
|
60
|
+
}
|
|
61
|
+
return tokens;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
@@ -81,7 +81,13 @@ class OllamaEmbeddings extends base_js_1.Embeddings {
|
|
|
81
81
|
}
|
|
82
82
|
async _request(prompt) {
|
|
83
83
|
const { model, baseUrl, requestOptions } = this;
|
|
84
|
-
|
|
84
|
+
let formattedBaseUrl = baseUrl;
|
|
85
|
+
if (formattedBaseUrl.startsWith("http://localhost:")) {
|
|
86
|
+
// Node 18 has issues with resolving "localhost"
|
|
87
|
+
// See https://github.com/node-fetch/node-fetch/issues/1624
|
|
88
|
+
formattedBaseUrl = formattedBaseUrl.replace("http://localhost:", "http://127.0.0.1:");
|
|
89
|
+
}
|
|
90
|
+
const response = await fetch(`${formattedBaseUrl}/api/embeddings`, {
|
|
85
91
|
method: "POST",
|
|
86
92
|
headers: { "Content-Type": "application/json" },
|
|
87
93
|
body: JSON.stringify({
|
|
@@ -78,7 +78,13 @@ export class OllamaEmbeddings extends Embeddings {
|
|
|
78
78
|
}
|
|
79
79
|
async _request(prompt) {
|
|
80
80
|
const { model, baseUrl, requestOptions } = this;
|
|
81
|
-
|
|
81
|
+
let formattedBaseUrl = baseUrl;
|
|
82
|
+
if (formattedBaseUrl.startsWith("http://localhost:")) {
|
|
83
|
+
// Node 18 has issues with resolving "localhost"
|
|
84
|
+
// See https://github.com/node-fetch/node-fetch/issues/1624
|
|
85
|
+
formattedBaseUrl = formattedBaseUrl.replace("http://localhost:", "http://127.0.0.1:");
|
|
86
|
+
}
|
|
87
|
+
const response = await fetch(`${formattedBaseUrl}/api/embeddings`, {
|
|
82
88
|
method: "POST",
|
|
83
89
|
headers: { "Content-Type": "application/json" },
|
|
84
90
|
body: JSON.stringify({
|
package/dist/llms/bedrock.cjs
CHANGED
|
@@ -92,6 +92,12 @@ class Bedrock extends base_js_1.LLM {
|
|
|
92
92
|
writable: true,
|
|
93
93
|
value: new eventstream_codec_1.EventStreamCodec(util_utf8_1.toUtf8, util_utf8_1.fromUtf8)
|
|
94
94
|
});
|
|
95
|
+
Object.defineProperty(this, "streaming", {
|
|
96
|
+
enumerable: true,
|
|
97
|
+
configurable: true,
|
|
98
|
+
writable: true,
|
|
99
|
+
value: false
|
|
100
|
+
});
|
|
95
101
|
this.model = fields?.model ?? this.model;
|
|
96
102
|
const allowedModels = ["ai21", "anthropic", "amazon"];
|
|
97
103
|
if (!allowedModels.includes(this.model.split(".")[0])) {
|
|
@@ -109,6 +115,7 @@ class Bedrock extends base_js_1.LLM {
|
|
|
109
115
|
this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl;
|
|
110
116
|
this.stopSequences = fields?.stopSequences;
|
|
111
117
|
this.modelKwargs = fields?.modelKwargs;
|
|
118
|
+
this.streaming = fields?.streaming ?? this.streaming;
|
|
112
119
|
}
|
|
113
120
|
/** Call out to Bedrock service model.
|
|
114
121
|
Arguments:
|
|
@@ -120,10 +127,23 @@ class Bedrock extends base_js_1.LLM {
|
|
|
120
127
|
Example:
|
|
121
128
|
response = model.call("Tell me a joke.")
|
|
122
129
|
*/
|
|
123
|
-
async _call(prompt, options) {
|
|
130
|
+
async _call(prompt, options, runManager) {
|
|
124
131
|
const service = "bedrock-runtime";
|
|
125
132
|
const endpointHost = this.endpointHost ?? `${service}.${this.region}.amazonaws.com`;
|
|
126
133
|
const provider = this.model.split(".")[0];
|
|
134
|
+
if (this.streaming) {
|
|
135
|
+
const stream = this._streamResponseChunks(prompt, options, runManager);
|
|
136
|
+
let finalResult;
|
|
137
|
+
for await (const chunk of stream) {
|
|
138
|
+
if (finalResult === undefined) {
|
|
139
|
+
finalResult = chunk;
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
finalResult = finalResult.concat(chunk);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return finalResult?.text ?? "";
|
|
146
|
+
}
|
|
127
147
|
const response = await this._signedFetch(prompt, options, {
|
|
128
148
|
bedrockMethod: "invoke",
|
|
129
149
|
endpointHost,
|
|
@@ -204,7 +224,8 @@ class Bedrock extends base_js_1.LLM {
|
|
|
204
224
|
text,
|
|
205
225
|
generationInfo: {},
|
|
206
226
|
});
|
|
207
|
-
|
|
227
|
+
// eslint-disable-next-line no-void
|
|
228
|
+
void runManager?.handleLLMNewToken(text);
|
|
208
229
|
}
|
|
209
230
|
}
|
|
210
231
|
}
|
|
@@ -215,7 +236,8 @@ class Bedrock extends base_js_1.LLM {
|
|
|
215
236
|
text,
|
|
216
237
|
generationInfo: {},
|
|
217
238
|
});
|
|
218
|
-
|
|
239
|
+
// eslint-disable-next-line no-void
|
|
240
|
+
void runManager?.handleLLMNewToken(text);
|
|
219
241
|
}
|
|
220
242
|
}
|
|
221
243
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|