langchain 0.0.176 → 0.0.178

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/chat_models/iflytek_xinghuo/web.cjs +1 -0
  2. package/chat_models/iflytek_xinghuo/web.d.ts +1 -0
  3. package/chat_models/iflytek_xinghuo/web.js +1 -0
  4. package/chat_models/iflytek_xinghuo.cjs +1 -0
  5. package/chat_models/iflytek_xinghuo.d.ts +1 -0
  6. package/chat_models/iflytek_xinghuo.js +1 -0
  7. package/dist/chat_models/bedrock.cjs +25 -4
  8. package/dist/chat_models/bedrock.d.ts +2 -1
  9. package/dist/chat_models/bedrock.js +25 -4
  10. package/dist/chat_models/cloudflare_workersai.cjs +70 -24
  11. package/dist/chat_models/cloudflare_workersai.d.ts +6 -2
  12. package/dist/chat_models/cloudflare_workersai.js +71 -25
  13. package/dist/chat_models/iflytek_xinghuo/common.cjs +335 -0
  14. package/dist/chat_models/iflytek_xinghuo/common.d.ts +165 -0
  15. package/dist/chat_models/iflytek_xinghuo/common.js +331 -0
  16. package/dist/chat_models/iflytek_xinghuo/index.cjs +35 -0
  17. package/dist/chat_models/iflytek_xinghuo/index.d.ts +5 -0
  18. package/dist/chat_models/iflytek_xinghuo/index.js +28 -0
  19. package/dist/chat_models/iflytek_xinghuo/web.cjs +30 -0
  20. package/dist/chat_models/iflytek_xinghuo/web.d.ts +5 -0
  21. package/dist/chat_models/iflytek_xinghuo/web.js +26 -0
  22. package/dist/chat_models/llama_cpp.cjs +31 -79
  23. package/dist/chat_models/llama_cpp.d.ts +15 -58
  24. package/dist/chat_models/llama_cpp.js +32 -80
  25. package/dist/chat_models/openai.cjs +91 -6
  26. package/dist/chat_models/openai.d.ts +10 -0
  27. package/dist/chat_models/openai.js +91 -6
  28. package/dist/embeddings/hf.cjs +10 -1
  29. package/dist/embeddings/hf.d.ts +4 -2
  30. package/dist/embeddings/hf.js +10 -1
  31. package/dist/embeddings/llama_cpp.cjs +67 -0
  32. package/dist/embeddings/llama_cpp.d.ts +26 -0
  33. package/dist/embeddings/llama_cpp.js +63 -0
  34. package/dist/embeddings/ollama.cjs +7 -1
  35. package/dist/embeddings/ollama.js +7 -1
  36. package/dist/graphs/neo4j_graph.cjs +36 -5
  37. package/dist/graphs/neo4j_graph.js +14 -3
  38. package/dist/llms/bedrock.cjs +25 -3
  39. package/dist/llms/bedrock.d.ts +2 -1
  40. package/dist/llms/bedrock.js +25 -3
  41. package/dist/llms/cloudflare_workersai.cjs +59 -13
  42. package/dist/llms/cloudflare_workersai.d.ts +9 -3
  43. package/dist/llms/cloudflare_workersai.js +59 -13
  44. package/dist/llms/hf.cjs +10 -1
  45. package/dist/llms/hf.d.ts +3 -0
  46. package/dist/llms/hf.js +10 -1
  47. package/dist/llms/llama_cpp.cjs +25 -65
  48. package/dist/llms/llama_cpp.d.ts +7 -43
  49. package/dist/llms/llama_cpp.js +25 -65
  50. package/dist/load/import_constants.cjs +3 -0
  51. package/dist/load/import_constants.js +3 -0
  52. package/dist/prompts/chat.cjs +8 -0
  53. package/dist/prompts/chat.d.ts +5 -0
  54. package/dist/prompts/chat.js +8 -0
  55. package/dist/prompts/few_shot.cjs +162 -1
  56. package/dist/prompts/few_shot.d.ts +90 -2
  57. package/dist/prompts/few_shot.js +160 -0
  58. package/dist/prompts/index.cjs +2 -1
  59. package/dist/prompts/index.d.ts +1 -1
  60. package/dist/prompts/index.js +1 -1
  61. package/dist/retrievers/zep.cjs +26 -3
  62. package/dist/retrievers/zep.d.ts +11 -2
  63. package/dist/retrievers/zep.js +26 -3
  64. package/dist/util/bedrock.d.ts +2 -0
  65. package/dist/util/event-source-parse.cjs +20 -1
  66. package/dist/util/event-source-parse.d.ts +2 -0
  67. package/dist/util/event-source-parse.js +18 -0
  68. package/dist/util/iflytek_websocket_stream.cjs +81 -0
  69. package/dist/util/iflytek_websocket_stream.d.ts +27 -0
  70. package/dist/util/iflytek_websocket_stream.js +77 -0
  71. package/dist/util/llama_cpp.cjs +34 -0
  72. package/dist/util/llama_cpp.d.ts +46 -0
  73. package/dist/util/llama_cpp.js +28 -0
  74. package/dist/util/openai-format-fndef.cjs +81 -0
  75. package/dist/util/openai-format-fndef.d.ts +44 -0
  76. package/dist/util/openai-format-fndef.js +77 -0
  77. package/dist/util/openapi.d.ts +2 -2
  78. package/dist/vectorstores/pinecone.cjs +5 -5
  79. package/dist/vectorstores/pinecone.d.ts +2 -2
  80. package/dist/vectorstores/pinecone.js +5 -5
  81. package/embeddings/llama_cpp.cjs +1 -0
  82. package/embeddings/llama_cpp.d.ts +1 -0
  83. package/embeddings/llama_cpp.js +1 -0
  84. package/package.json +34 -5
@@ -1,42 +1,13 @@
1
1
  import { LlamaModel, LlamaContext, LlamaChatSession, type ConversationInteraction } from "node-llama-cpp";
2
2
  import { SimpleChatModel, BaseChatModelParams } from "./base.js";
3
+ import { LlamaBaseCppInputs } from "../util/llama_cpp.js";
3
4
  import { BaseLanguageModelCallOptions } from "../base_language/index.js";
4
5
  import type { BaseMessage } from "../schema/index.js";
5
6
  /**
6
7
  * Note that the modelPath is the only required parameter. For testing you
7
8
  * can set this in the environment variable `LLAMA_PATH`.
8
9
  */
9
- export interface LlamaCppInputs extends BaseChatModelParams {
10
- /** Prompt processing batch size. */
11
- batchSize?: number;
12
- /** Text context size. */
13
- contextSize?: number;
14
- /** Embedding mode only. */
15
- embedding?: boolean;
16
- /** Use fp16 for KV cache. */
17
- f16Kv?: boolean;
18
- /** Number of layers to store in VRAM. */
19
- gpuLayers?: number;
20
- /** The llama_eval() call computes all logits, not just the last one. */
21
- logitsAll?: boolean;
22
- /** If true, reduce VRAM usage at the cost of performance. */
23
- lowVram?: boolean;
24
- /** Path to the model on the filesystem. */
25
- modelPath: string;
26
- /** If null, a random seed will be used. */
27
- seed?: null | number;
28
- /** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
29
- temperature?: number;
30
- /** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
31
- topK?: number;
32
- /** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
33
- topP?: number;
34
- /** Force system to keep model in RAM. */
35
- useMlock?: boolean;
36
- /** Use mmap if possible. */
37
- useMmap?: boolean;
38
- /** Only load the vocabulary, no weights. */
39
- vocabOnly?: boolean;
10
+ export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseChatModelParams {
40
11
  }
41
12
  export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
42
13
  /** The maximum number of tokens the response should contain. */
@@ -53,42 +24,28 @@ export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
53
24
  export declare class ChatLlamaCpp extends SimpleChatModel<LlamaCppCallOptions> {
54
25
  CallOptions: LlamaCppCallOptions;
55
26
  static inputs: LlamaCppInputs;
56
- batchSize?: number;
57
- contextSize?: number;
58
- embedding?: boolean;
59
- f16Kv?: boolean;
60
- gpuLayers?: number;
61
- logitsAll?: boolean;
62
- lowVram?: boolean;
63
- seed?: null | number;
64
- useMlock?: boolean;
65
- useMmap?: boolean;
66
- vocabOnly?: boolean;
67
- modelPath: string;
27
+ maxTokens?: number;
28
+ temperature?: number;
29
+ topK?: number;
30
+ topP?: number;
31
+ trimWhitespaceSuffix?: boolean;
68
32
  _model: LlamaModel;
69
33
  _context: LlamaContext;
70
34
  _session: LlamaChatSession | null;
71
35
  static lc_name(): string;
72
36
  constructor(inputs: LlamaCppInputs);
73
37
  _llmType(): string;
74
- invocationParams(): {
75
- batchSize: number | undefined;
76
- contextSize: number | undefined;
77
- embedding: boolean | undefined;
78
- f16Kv: boolean | undefined;
79
- gpuLayers: number | undefined;
80
- logitsAll: boolean | undefined;
81
- lowVram: boolean | undefined;
82
- modelPath: string;
83
- seed: number | null | undefined;
84
- useMlock: boolean | undefined;
85
- useMmap: boolean | undefined;
86
- vocabOnly: boolean | undefined;
87
- };
88
38
  /** @ignore */
89
39
  _combineLLMOutput(): {};
40
+ invocationParams(): {
41
+ maxTokens: number | undefined;
42
+ temperature: number | undefined;
43
+ topK: number | undefined;
44
+ topP: number | undefined;
45
+ trimWhitespaceSuffix: boolean | undefined;
46
+ };
90
47
  /** @ignore */
91
- _call(messages: BaseMessage[], options: this["ParsedCallOptions"]): Promise<string>;
48
+ _call(messages: BaseMessage[], _options: this["ParsedCallOptions"]): Promise<string>;
92
49
  protected _buildSession(messages: BaseMessage[]): string;
93
50
  protected _convertMessagesToInteractions(messages: BaseMessage[]): ConversationInteraction[];
94
51
  }
@@ -1,5 +1,6 @@
1
- import { LlamaModel, LlamaContext, LlamaChatSession, } from "node-llama-cpp";
1
+ import { LlamaChatSession, } from "node-llama-cpp";
2
2
  import { SimpleChatModel } from "./base.js";
3
+ import { createLlamaModel, createLlamaContext, } from "../util/llama_cpp.js";
3
4
  /**
4
5
  * To use this model you need to have the `node-llama-cpp` module installed.
5
6
  * This can be installed using `npm install -S node-llama-cpp` and the minimum
@@ -12,73 +13,31 @@ export class ChatLlamaCpp extends SimpleChatModel {
12
13
  }
13
14
  constructor(inputs) {
14
15
  super(inputs);
15
- Object.defineProperty(this, "batchSize", {
16
+ Object.defineProperty(this, "maxTokens", {
16
17
  enumerable: true,
17
18
  configurable: true,
18
19
  writable: true,
19
20
  value: void 0
20
21
  });
21
- Object.defineProperty(this, "contextSize", {
22
+ Object.defineProperty(this, "temperature", {
22
23
  enumerable: true,
23
24
  configurable: true,
24
25
  writable: true,
25
26
  value: void 0
26
27
  });
27
- Object.defineProperty(this, "embedding", {
28
+ Object.defineProperty(this, "topK", {
28
29
  enumerable: true,
29
30
  configurable: true,
30
31
  writable: true,
31
32
  value: void 0
32
33
  });
33
- Object.defineProperty(this, "f16Kv", {
34
+ Object.defineProperty(this, "topP", {
34
35
  enumerable: true,
35
36
  configurable: true,
36
37
  writable: true,
37
38
  value: void 0
38
39
  });
39
- Object.defineProperty(this, "gpuLayers", {
40
- enumerable: true,
41
- configurable: true,
42
- writable: true,
43
- value: void 0
44
- });
45
- Object.defineProperty(this, "logitsAll", {
46
- enumerable: true,
47
- configurable: true,
48
- writable: true,
49
- value: void 0
50
- });
51
- Object.defineProperty(this, "lowVram", {
52
- enumerable: true,
53
- configurable: true,
54
- writable: true,
55
- value: void 0
56
- });
57
- Object.defineProperty(this, "seed", {
58
- enumerable: true,
59
- configurable: true,
60
- writable: true,
61
- value: void 0
62
- });
63
- Object.defineProperty(this, "useMlock", {
64
- enumerable: true,
65
- configurable: true,
66
- writable: true,
67
- value: void 0
68
- });
69
- Object.defineProperty(this, "useMmap", {
70
- enumerable: true,
71
- configurable: true,
72
- writable: true,
73
- value: void 0
74
- });
75
- Object.defineProperty(this, "vocabOnly", {
76
- enumerable: true,
77
- configurable: true,
78
- writable: true,
79
- value: void 0
80
- });
81
- Object.defineProperty(this, "modelPath", {
40
+ Object.defineProperty(this, "trimWhitespaceSuffix", {
82
41
  enumerable: true,
83
42
  configurable: true,
84
43
  writable: true,
@@ -102,47 +61,33 @@ export class ChatLlamaCpp extends SimpleChatModel {
102
61
  writable: true,
103
62
  value: void 0
104
63
  });
105
- this.batchSize = inputs?.batchSize;
106
- this.contextSize = inputs?.contextSize;
107
- this.embedding = inputs?.embedding;
108
- this.f16Kv = inputs?.f16Kv;
109
- this.gpuLayers = inputs?.gpuLayers;
110
- this.logitsAll = inputs?.logitsAll;
111
- this.lowVram = inputs?.lowVram;
112
- this.modelPath = inputs.modelPath;
113
- this.seed = inputs?.seed;
114
- this.useMlock = inputs?.useMlock;
115
- this.useMmap = inputs?.useMmap;
116
- this.vocabOnly = inputs?.vocabOnly;
117
- this._model = new LlamaModel(inputs);
118
- this._context = new LlamaContext({ model: this._model });
64
+ this.maxTokens = inputs?.maxTokens;
65
+ this.temperature = inputs?.temperature;
66
+ this.topK = inputs?.topK;
67
+ this.topP = inputs?.topP;
68
+ this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
69
+ this._model = createLlamaModel(inputs);
70
+ this._context = createLlamaContext(this._model, inputs);
119
71
  this._session = null;
120
72
  }
121
73
  _llmType() {
122
74
  return "llama2_cpp";
123
75
  }
124
- invocationParams() {
125
- return {
126
- batchSize: this.batchSize,
127
- contextSize: this.contextSize,
128
- embedding: this.embedding,
129
- f16Kv: this.f16Kv,
130
- gpuLayers: this.gpuLayers,
131
- logitsAll: this.logitsAll,
132
- lowVram: this.lowVram,
133
- modelPath: this.modelPath,
134
- seed: this.seed,
135
- useMlock: this.useMlock,
136
- useMmap: this.useMmap,
137
- vocabOnly: this.vocabOnly,
138
- };
139
- }
140
76
  /** @ignore */
141
77
  _combineLLMOutput() {
142
78
  return {};
143
79
  }
80
+ invocationParams() {
81
+ return {
82
+ maxTokens: this.maxTokens,
83
+ temperature: this.temperature,
84
+ topK: this.topK,
85
+ topP: this.topP,
86
+ trimWhitespaceSuffix: this.trimWhitespaceSuffix,
87
+ };
88
+ }
144
89
  /** @ignore */
145
- async _call(messages, options) {
90
+ async _call(messages, _options) {
146
91
  let prompt = "";
147
92
  if (messages.length > 1) {
148
93
  // We need to build a new _session
@@ -156,8 +101,15 @@ export class ChatLlamaCpp extends SimpleChatModel {
156
101
  prompt = messages[0].content;
157
102
  }
158
103
  try {
104
+ const promptOptions = {
105
+ maxTokens: this?.maxTokens,
106
+ temperature: this?.temperature,
107
+ topK: this?.topK,
108
+ topP: this?.topP,
109
+ trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
110
+ };
159
111
  // @ts-expect-error - TS2531: Object is possibly 'null'.
160
- const completion = await this._session.prompt(prompt, options);
112
+ const completion = await this._session.prompt(prompt, promptOptions);
161
113
  return completion;
162
114
  }
163
115
  catch (e) {
@@ -2,7 +2,6 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.PromptLayerChatOpenAI = exports.ChatOpenAI = void 0;
4
4
  const openai_1 = require("openai");
5
- const count_tokens_js_1 = require("../base_language/count_tokens.cjs");
6
5
  const index_js_1 = require("../schema/index.cjs");
7
6
  const convert_to_openai_js_1 = require("../tools/convert_to_openai.cjs");
8
7
  const azure_js_1 = require("../util/azure.cjs");
@@ -10,6 +9,7 @@ const env_js_1 = require("../util/env.cjs");
10
9
  const prompt_layer_js_1 = require("../util/prompt-layer.cjs");
11
10
  const base_js_1 = require("./base.cjs");
12
11
  const openai_js_1 = require("../util/openai.cjs");
12
+ const openai_format_fndef_js_1 = require("../util/openai-format-fndef.cjs");
13
13
  function extractGenericMessageCustomRole(message) {
14
14
  if (message.role !== "system" &&
15
15
  message.role !== "assistant" &&
@@ -39,6 +39,19 @@ function messageToOpenAIRole(message) {
39
39
  throw new Error(`Unknown message type: ${type}`);
40
40
  }
41
41
  }
42
+ function messageToOpenAIMessage(message) {
43
+ const msg = {
44
+ content: message.content || null,
45
+ name: message.name,
46
+ role: messageToOpenAIRole(message),
47
+ function_call: message.additional_kwargs.function_call,
48
+ };
49
+ if (msg.function_call?.arguments) {
50
+ // Remove spaces, new line characters etc.
51
+ msg.function_call.arguments = JSON.stringify(JSON.parse(msg.function_call.arguments));
52
+ }
53
+ return msg;
54
+ }
42
55
  function openAIResponseToChatMessage(message) {
43
56
  switch (message.role) {
44
57
  case "user":
@@ -414,6 +427,7 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
414
427
  }
415
428
  /**
416
429
  * Get the identifying parameters for the model
430
+ *
417
431
  */
418
432
  identifyingParams() {
419
433
  return this._identifyingParams();
@@ -430,7 +444,7 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
430
444
  .function_call,
431
445
  }));
432
446
  if (params.stream) {
433
- const stream = await this._streamResponseChunks(messages, options, runManager);
447
+ const stream = this._streamResponseChunks(messages, options, runManager);
434
448
  const finalChunks = {};
435
449
  for await (const chunk of stream) {
436
450
  const index = chunk.generationInfo?.completion ?? 0;
@@ -444,7 +458,15 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
444
458
  const generations = Object.entries(finalChunks)
445
459
  .sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10))
446
460
  .map(([_, value]) => value);
447
- return { generations };
461
+ const { functions, function_call } = this.invocationParams(options);
462
+ // OpenAI does not support token usage report under stream mode,
463
+ // fallback to estimation.
464
+ const promptTokenUsage = await this.getNumTokensFromPrompt(messages, functions, function_call);
465
+ const completionTokenUsage = await this.getNumTokensFromGenerations(generations);
466
+ tokenUsage.promptTokens = promptTokenUsage;
467
+ tokenUsage.completionTokens = completionTokenUsage;
468
+ tokenUsage.totalTokens = promptTokenUsage + completionTokenUsage;
469
+ return { generations, llmOutput: { estimatedTokenUsage: tokenUsage } };
448
470
  }
449
471
  else {
450
472
  const data = await this.completionWithRetry({
@@ -484,16 +506,65 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
484
506
  };
485
507
  }
486
508
  }
509
+ /**
510
+ * Estimate the number of tokens a prompt will use.
511
+ * Modified from: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts
512
+ */
513
+ async getNumTokensFromPrompt(messages, functions, function_call) {
514
+ // It appears that if functions are present, the first system message is padded with a trailing newline. This
515
+ // was inferred by trying lots of combinations of messages and functions and seeing what the token counts were.
516
+ // let paddedSystem = false;
517
+ const openaiMessages = messages.map((m) => messageToOpenAIMessage(m));
518
+ let tokens = (await this.getNumTokensFromMessages(messages)).totalCount;
519
+ // If there are functions, add the function definitions as they count towards token usage
520
+ if (functions && function_call !== "auto") {
521
+ const promptDefinitions = (0, openai_format_fndef_js_1.formatFunctionDefinitions)(functions);
522
+ tokens += await this.getNumTokens(promptDefinitions);
523
+ tokens += 9; // Add nine per completion
524
+ }
525
+ // If there's a system message _and_ functions are present, subtract four tokens. I assume this is because
526
+ // functions typically add a system message, but reuse the first one if it's already there. This offsets
527
+ // the extra 9 tokens added by the function definitions.
528
+ if (functions && openaiMessages.find((m) => m.role === "system")) {
529
+ tokens -= 4;
530
+ }
531
+ // If function_call is 'none', add one token.
532
+ // If it's a FunctionCall object, add 4 + the number of tokens in the function name.
533
+ // If it's undefined or 'auto', don't add anything.
534
+ if (function_call === "none") {
535
+ tokens += 1;
536
+ }
537
+ else if (typeof function_call === "object") {
538
+ tokens += (await this.getNumTokens(function_call.name)) + 4;
539
+ }
540
+ return tokens;
541
+ }
542
+ /**
543
+ * Estimate the number of tokens an array of generations have used.
544
+ */
545
+ async getNumTokensFromGenerations(generations) {
546
+ const generationUsages = await Promise.all(generations.map(async (generation) => {
547
+ const openAIMessage = messageToOpenAIMessage(generation.message);
548
+ if (openAIMessage.function_call) {
549
+ return (await this.getNumTokensFromMessages([generation.message]))
550
+ .countPerMessage[0];
551
+ }
552
+ else {
553
+ return await this.getNumTokens(generation.message.content);
554
+ }
555
+ }));
556
+ return generationUsages.reduce((a, b) => a + b, 0);
557
+ }
487
558
  async getNumTokensFromMessages(messages) {
488
559
  let totalCount = 0;
489
560
  let tokensPerMessage = 0;
490
561
  let tokensPerName = 0;
491
562
  // From: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
492
- if ((0, count_tokens_js_1.getModelNameForTiktoken)(this.modelName) === "gpt-3.5-turbo") {
563
+ if (this.modelName === "gpt-3.5-turbo-0301") {
493
564
  tokensPerMessage = 4;
494
565
  tokensPerName = -1;
495
566
  }
496
- else if ((0, count_tokens_js_1.getModelNameForTiktoken)(this.modelName).startsWith("gpt-4")) {
567
+ else {
497
568
  tokensPerMessage = 3;
498
569
  tokensPerName = 1;
499
570
  }
@@ -503,7 +574,21 @@ class ChatOpenAI extends base_js_1.BaseChatModel {
503
574
  const nameCount = message.name !== undefined
504
575
  ? tokensPerName + (await this.getNumTokens(message.name))
505
576
  : 0;
506
- const count = textCount + tokensPerMessage + roleCount + nameCount;
577
+ let count = textCount + tokensPerMessage + roleCount + nameCount;
578
+ // From: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts messageTokenEstimate
579
+ const openAIMessage = messageToOpenAIMessage(message);
580
+ if (openAIMessage.role === "function") {
581
+ count -= 2;
582
+ }
583
+ if (openAIMessage.function_call) {
584
+ count += 3;
585
+ }
586
+ if (openAIMessage.function_call?.name) {
587
+ count += await this.getNumTokens(openAIMessage.function_call?.name);
588
+ }
589
+ if (openAIMessage.function_call?.arguments) {
590
+ count += await this.getNumTokens(openAIMessage.function_call?.arguments);
591
+ }
507
592
  totalCount += count;
508
593
  return count;
509
594
  }));
@@ -83,12 +83,22 @@ export declare class ChatOpenAI<CallOptions extends ChatOpenAICallOptions = Chat
83
83
  _streamResponseChunks(messages: BaseMessage[], options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): AsyncGenerator<ChatGenerationChunk>;
84
84
  /**
85
85
  * Get the identifying parameters for the model
86
+ *
86
87
  */
87
88
  identifyingParams(): Omit<OpenAIClient.Chat.Completions.ChatCompletionCreateParams, "messages"> & {
88
89
  model_name: string;
89
90
  } & ClientOptions;
90
91
  /** @ignore */
91
92
  _generate(messages: BaseMessage[], options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): Promise<ChatResult>;
93
+ /**
94
+ * Estimate the number of tokens a prompt will use.
95
+ * Modified from: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts
96
+ */
97
+ private getNumTokensFromPrompt;
98
+ /**
99
+ * Estimate the number of tokens an array of generations have used.
100
+ */
101
+ private getNumTokensFromGenerations;
92
102
  getNumTokensFromMessages(messages: BaseMessage[]): Promise<{
93
103
  totalCount: number;
94
104
  countPerMessage: number[];
@@ -1,5 +1,4 @@
1
1
  import { OpenAI as OpenAIClient } from "openai";
2
- import { getModelNameForTiktoken } from "../base_language/count_tokens.js";
3
2
  import { AIMessage, AIMessageChunk, ChatGenerationChunk, ChatMessage, ChatMessageChunk, FunctionMessageChunk, HumanMessage, HumanMessageChunk, SystemMessage, SystemMessageChunk, } from "../schema/index.js";
4
3
  import { formatToOpenAIFunction } from "../tools/convert_to_openai.js";
5
4
  import { getEndpoint } from "../util/azure.js";
@@ -7,6 +6,7 @@ import { getEnvironmentVariable } from "../util/env.js";
7
6
  import { promptLayerTrackRequest } from "../util/prompt-layer.js";
8
7
  import { BaseChatModel } from "./base.js";
9
8
  import { wrapOpenAIClientError } from "../util/openai.js";
9
+ import { formatFunctionDefinitions, } from "../util/openai-format-fndef.js";
10
10
  function extractGenericMessageCustomRole(message) {
11
11
  if (message.role !== "system" &&
12
12
  message.role !== "assistant" &&
@@ -36,6 +36,19 @@ function messageToOpenAIRole(message) {
36
36
  throw new Error(`Unknown message type: ${type}`);
37
37
  }
38
38
  }
39
+ function messageToOpenAIMessage(message) {
40
+ const msg = {
41
+ content: message.content || null,
42
+ name: message.name,
43
+ role: messageToOpenAIRole(message),
44
+ function_call: message.additional_kwargs.function_call,
45
+ };
46
+ if (msg.function_call?.arguments) {
47
+ // Remove spaces, new line characters etc.
48
+ msg.function_call.arguments = JSON.stringify(JSON.parse(msg.function_call.arguments));
49
+ }
50
+ return msg;
51
+ }
39
52
  function openAIResponseToChatMessage(message) {
40
53
  switch (message.role) {
41
54
  case "user":
@@ -411,6 +424,7 @@ export class ChatOpenAI extends BaseChatModel {
411
424
  }
412
425
  /**
413
426
  * Get the identifying parameters for the model
427
+ *
414
428
  */
415
429
  identifyingParams() {
416
430
  return this._identifyingParams();
@@ -427,7 +441,7 @@ export class ChatOpenAI extends BaseChatModel {
427
441
  .function_call,
428
442
  }));
429
443
  if (params.stream) {
430
- const stream = await this._streamResponseChunks(messages, options, runManager);
444
+ const stream = this._streamResponseChunks(messages, options, runManager);
431
445
  const finalChunks = {};
432
446
  for await (const chunk of stream) {
433
447
  const index = chunk.generationInfo?.completion ?? 0;
@@ -441,7 +455,15 @@ export class ChatOpenAI extends BaseChatModel {
441
455
  const generations = Object.entries(finalChunks)
442
456
  .sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10))
443
457
  .map(([_, value]) => value);
444
- return { generations };
458
+ const { functions, function_call } = this.invocationParams(options);
459
+ // OpenAI does not support token usage report under stream mode,
460
+ // fallback to estimation.
461
+ const promptTokenUsage = await this.getNumTokensFromPrompt(messages, functions, function_call);
462
+ const completionTokenUsage = await this.getNumTokensFromGenerations(generations);
463
+ tokenUsage.promptTokens = promptTokenUsage;
464
+ tokenUsage.completionTokens = completionTokenUsage;
465
+ tokenUsage.totalTokens = promptTokenUsage + completionTokenUsage;
466
+ return { generations, llmOutput: { estimatedTokenUsage: tokenUsage } };
445
467
  }
446
468
  else {
447
469
  const data = await this.completionWithRetry({
@@ -481,16 +503,65 @@ export class ChatOpenAI extends BaseChatModel {
481
503
  };
482
504
  }
483
505
  }
506
+ /**
507
+ * Estimate the number of tokens a prompt will use.
508
+ * Modified from: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts
509
+ */
510
+ async getNumTokensFromPrompt(messages, functions, function_call) {
511
+ // It appears that if functions are present, the first system message is padded with a trailing newline. This
512
+ // was inferred by trying lots of combinations of messages and functions and seeing what the token counts were.
513
+ // let paddedSystem = false;
514
+ const openaiMessages = messages.map((m) => messageToOpenAIMessage(m));
515
+ let tokens = (await this.getNumTokensFromMessages(messages)).totalCount;
516
+ // If there are functions, add the function definitions as they count towards token usage
517
+ if (functions && function_call !== "auto") {
518
+ const promptDefinitions = formatFunctionDefinitions(functions);
519
+ tokens += await this.getNumTokens(promptDefinitions);
520
+ tokens += 9; // Add nine per completion
521
+ }
522
+ // If there's a system message _and_ functions are present, subtract four tokens. I assume this is because
523
+ // functions typically add a system message, but reuse the first one if it's already there. This offsets
524
+ // the extra 9 tokens added by the function definitions.
525
+ if (functions && openaiMessages.find((m) => m.role === "system")) {
526
+ tokens -= 4;
527
+ }
528
+ // If function_call is 'none', add one token.
529
+ // If it's a FunctionCall object, add 4 + the number of tokens in the function name.
530
+ // If it's undefined or 'auto', don't add anything.
531
+ if (function_call === "none") {
532
+ tokens += 1;
533
+ }
534
+ else if (typeof function_call === "object") {
535
+ tokens += (await this.getNumTokens(function_call.name)) + 4;
536
+ }
537
+ return tokens;
538
+ }
539
+ /**
540
+ * Estimate the number of tokens an array of generations have used.
541
+ */
542
+ async getNumTokensFromGenerations(generations) {
543
+ const generationUsages = await Promise.all(generations.map(async (generation) => {
544
+ const openAIMessage = messageToOpenAIMessage(generation.message);
545
+ if (openAIMessage.function_call) {
546
+ return (await this.getNumTokensFromMessages([generation.message]))
547
+ .countPerMessage[0];
548
+ }
549
+ else {
550
+ return await this.getNumTokens(generation.message.content);
551
+ }
552
+ }));
553
+ return generationUsages.reduce((a, b) => a + b, 0);
554
+ }
484
555
  async getNumTokensFromMessages(messages) {
485
556
  let totalCount = 0;
486
557
  let tokensPerMessage = 0;
487
558
  let tokensPerName = 0;
488
559
  // From: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb
489
- if (getModelNameForTiktoken(this.modelName) === "gpt-3.5-turbo") {
560
+ if (this.modelName === "gpt-3.5-turbo-0301") {
490
561
  tokensPerMessage = 4;
491
562
  tokensPerName = -1;
492
563
  }
493
- else if (getModelNameForTiktoken(this.modelName).startsWith("gpt-4")) {
564
+ else {
494
565
  tokensPerMessage = 3;
495
566
  tokensPerName = 1;
496
567
  }
@@ -500,7 +571,21 @@ export class ChatOpenAI extends BaseChatModel {
500
571
  const nameCount = message.name !== undefined
501
572
  ? tokensPerName + (await this.getNumTokens(message.name))
502
573
  : 0;
503
- const count = textCount + tokensPerMessage + roleCount + nameCount;
574
+ let count = textCount + tokensPerMessage + roleCount + nameCount;
575
+ // From: https://github.com/hmarr/openai-chat-tokens/blob/main/src/index.ts messageTokenEstimate
576
+ const openAIMessage = messageToOpenAIMessage(message);
577
+ if (openAIMessage.role === "function") {
578
+ count -= 2;
579
+ }
580
+ if (openAIMessage.function_call) {
581
+ count += 3;
582
+ }
583
+ if (openAIMessage.function_call?.name) {
584
+ count += await this.getNumTokens(openAIMessage.function_call?.name);
585
+ }
586
+ if (openAIMessage.function_call?.arguments) {
587
+ count += await this.getNumTokens(openAIMessage.function_call?.arguments);
588
+ }
504
589
  totalCount += count;
505
590
  return count;
506
591
  }));
@@ -24,6 +24,12 @@ class HuggingFaceInferenceEmbeddings extends base_js_1.Embeddings {
24
24
  writable: true,
25
25
  value: void 0
26
26
  });
27
+ Object.defineProperty(this, "endpointUrl", {
28
+ enumerable: true,
29
+ configurable: true,
30
+ writable: true,
31
+ value: void 0
32
+ });
27
33
  Object.defineProperty(this, "client", {
28
34
  enumerable: true,
29
35
  configurable: true,
@@ -34,7 +40,10 @@ class HuggingFaceInferenceEmbeddings extends base_js_1.Embeddings {
34
40
  fields?.model ?? "sentence-transformers/distilbert-base-nli-mean-tokens";
35
41
  this.apiKey =
36
42
  fields?.apiKey ?? (0, env_js_1.getEnvironmentVariable)("HUGGINGFACEHUB_API_KEY");
37
- this.client = new inference_1.HfInference(this.apiKey);
43
+ this.endpointUrl = fields?.endpointUrl;
44
+ this.client = this.endpointUrl
45
+ ? new inference_1.HfInference(this.apiKey).endpoint(this.endpointUrl)
46
+ : new inference_1.HfInference(this.apiKey);
38
47
  }
39
48
  async _embed(texts) {
40
49
  // replace newlines, which can negatively affect performance.
@@ -1,4 +1,4 @@
1
- import { HfInference } from "@huggingface/inference";
1
+ import { HfInference, HfInferenceEndpoint } from "@huggingface/inference";
2
2
  import { Embeddings, EmbeddingsParams } from "./base.js";
3
3
  /**
4
4
  * Interface that extends EmbeddingsParams and defines additional
@@ -7,6 +7,7 @@ import { Embeddings, EmbeddingsParams } from "./base.js";
7
7
  export interface HuggingFaceInferenceEmbeddingsParams extends EmbeddingsParams {
8
8
  apiKey?: string;
9
9
  model?: string;
10
+ endpointUrl?: string;
10
11
  }
11
12
  /**
12
13
  * Class that extends the Embeddings class and provides methods for
@@ -16,7 +17,8 @@ export interface HuggingFaceInferenceEmbeddingsParams extends EmbeddingsParams {
16
17
  export declare class HuggingFaceInferenceEmbeddings extends Embeddings implements HuggingFaceInferenceEmbeddingsParams {
17
18
  apiKey?: string;
18
19
  model: string;
19
- client: HfInference;
20
+ endpointUrl?: string;
21
+ client: HfInference | HfInferenceEndpoint;
20
22
  constructor(fields?: HuggingFaceInferenceEmbeddingsParams);
21
23
  _embed(texts: string[]): Promise<number[][]>;
22
24
  /**