langchainrb 0.13.4 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +3 -18
- data/lib/langchain/assistants/assistant.rb +204 -79
- data/lib/langchain/assistants/messages/base.rb +35 -1
- data/lib/langchain/assistants/messages/ollama_message.rb +86 -0
- data/lib/langchain/assistants/thread.rb +8 -1
- data/lib/langchain/llm/ai21.rb +0 -4
- data/lib/langchain/llm/anthropic.rb +15 -6
- data/lib/langchain/llm/azure.rb +3 -3
- data/lib/langchain/llm/base.rb +1 -0
- data/lib/langchain/llm/cohere.rb +0 -2
- data/lib/langchain/llm/google_gemini.rb +17 -3
- data/lib/langchain/llm/google_palm.rb +1 -4
- data/lib/langchain/llm/ollama.rb +1 -1
- data/lib/langchain/llm/replicate.rb +1 -1
- data/lib/langchain/llm/response/google_gemini_response.rb +1 -1
- data/lib/langchain/llm/response/ollama_response.rb +19 -1
- data/lib/langchain/loader.rb +3 -1
- data/lib/langchain/utils/hash_transformer.rb +25 -0
- data/lib/langchain/vectorsearch/chroma.rb +3 -1
- data/lib/langchain/vectorsearch/milvus.rb +18 -3
- data/lib/langchain/version.rb +1 -1
- metadata +9 -27
- data/lib/langchain/utils/token_length/ai21_validator.rb +0 -41
- data/lib/langchain/utils/token_length/base_validator.rb +0 -42
- data/lib/langchain/utils/token_length/cohere_validator.rb +0 -49
- data/lib/langchain/utils/token_length/google_palm_validator.rb +0 -57
- data/lib/langchain/utils/token_length/openai_validator.rb +0 -138
- data/lib/langchain/utils/token_length/token_limit_exceeded.rb +0 -17
@@ -1,138 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "tiktoken_ruby"
|
4
|
-
|
5
|
-
module Langchain
|
6
|
-
module Utils
|
7
|
-
module TokenLength
|
8
|
-
#
|
9
|
-
# This class is meant to validate the length of the text passed in to OpenAI's API.
|
10
|
-
# It is used to validate the token length before the API call is made
|
11
|
-
#
|
12
|
-
class OpenAIValidator < BaseValidator
|
13
|
-
COMPLETION_TOKEN_LIMITS = {
|
14
|
-
# GPT-4 Turbo has a separate token limit for completion
|
15
|
-
# Source:
|
16
|
-
# https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
|
17
|
-
"gpt-4-1106-preview" => 4096,
|
18
|
-
"gpt-4-vision-preview" => 4096,
|
19
|
-
"gpt-3.5-turbo-1106" => 4096
|
20
|
-
}
|
21
|
-
|
22
|
-
# NOTE: The gpt-4-turbo-preview is an alias that will always point to the latest GPT 4 Turbo preview
|
23
|
-
# the future previews may have a different token limit!
|
24
|
-
TOKEN_LIMITS = {
|
25
|
-
# Source:
|
26
|
-
# https://platform.openai.com/docs/api-reference/embeddings
|
27
|
-
# https://platform.openai.com/docs/models/gpt-4
|
28
|
-
"text-embedding-3-large" => 8191,
|
29
|
-
"text-embedding-3-small" => 8191,
|
30
|
-
"text-embedding-ada-002" => 8191,
|
31
|
-
"gpt-3.5-turbo" => 16385,
|
32
|
-
"gpt-3.5-turbo-0301" => 4096,
|
33
|
-
"gpt-3.5-turbo-0613" => 4096,
|
34
|
-
"gpt-3.5-turbo-1106" => 16385,
|
35
|
-
"gpt-3.5-turbo-0125" => 16385,
|
36
|
-
"gpt-3.5-turbo-16k" => 16384,
|
37
|
-
"gpt-3.5-turbo-16k-0613" => 16384,
|
38
|
-
"text-davinci-003" => 4097,
|
39
|
-
"text-davinci-002" => 4097,
|
40
|
-
"code-davinci-002" => 8001,
|
41
|
-
"gpt-4" => 8192,
|
42
|
-
"gpt-4-0314" => 8192,
|
43
|
-
"gpt-4-0613" => 8192,
|
44
|
-
"gpt-4-32k" => 32768,
|
45
|
-
"gpt-4-32k-0314" => 32768,
|
46
|
-
"gpt-4-32k-0613" => 32768,
|
47
|
-
"gpt-4-1106-preview" => 128000,
|
48
|
-
"gpt-4-turbo" => 128000,
|
49
|
-
"gpt-4-turbo-2024-04-09" => 128000,
|
50
|
-
"gpt-4-turbo-preview" => 128000,
|
51
|
-
"gpt-4-0125-preview" => 128000,
|
52
|
-
"gpt-4-vision-preview" => 128000,
|
53
|
-
"gpt-4o" => 128000,
|
54
|
-
"gpt-4o-2024-05-13" => 128000,
|
55
|
-
"text-curie-001" => 2049,
|
56
|
-
"text-babbage-001" => 2049,
|
57
|
-
"text-ada-001" => 2049,
|
58
|
-
"davinci" => 2049,
|
59
|
-
"curie" => 2049,
|
60
|
-
"babbage" => 2049,
|
61
|
-
"ada" => 2049
|
62
|
-
}.freeze
|
63
|
-
|
64
|
-
#
|
65
|
-
# Calculate token length for a given text and model name
|
66
|
-
#
|
67
|
-
# @param text [String] The text to calculate the token length for
|
68
|
-
# @param model_name [String] The model name to validate against
|
69
|
-
# @return [Integer] The token length of the text
|
70
|
-
#
|
71
|
-
def self.token_length(text, model_name, options = {})
|
72
|
-
# tiktoken-ruby doesn't support text-embedding-3-large or text-embedding-3-small yet
|
73
|
-
if ["text-embedding-3-large", "text-embedding-3-small"].include?(model_name)
|
74
|
-
model_name = "text-embedding-ada-002"
|
75
|
-
end
|
76
|
-
|
77
|
-
encoder = Tiktoken.encoding_for_model(model_name)
|
78
|
-
encoder.encode(text).length
|
79
|
-
end
|
80
|
-
|
81
|
-
def self.token_limit(model_name)
|
82
|
-
TOKEN_LIMITS[model_name]
|
83
|
-
end
|
84
|
-
|
85
|
-
def self.completion_token_limit(model_name)
|
86
|
-
COMPLETION_TOKEN_LIMITS[model_name] || token_limit(model_name)
|
87
|
-
end
|
88
|
-
|
89
|
-
# If :max_tokens is passed in, take the lower of it and the calculated max_tokens
|
90
|
-
def self.validate_max_tokens!(content, model_name, options = {})
|
91
|
-
max_tokens = super(content, model_name, options)
|
92
|
-
[options[:max_tokens], max_tokens].reject(&:nil?).min
|
93
|
-
end
|
94
|
-
|
95
|
-
# Copied from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
96
|
-
# Return the number of tokens used by a list of messages
|
97
|
-
#
|
98
|
-
# @param messages [Array<Hash>] The messages to calculate the token length for
|
99
|
-
# @param model [String] The model name to validate against
|
100
|
-
# @return [Integer] The token length of the messages
|
101
|
-
#
|
102
|
-
def self.token_length_from_messages(messages, model_name, options = {})
|
103
|
-
encoding = Tiktoken.encoding_for_model(model_name)
|
104
|
-
|
105
|
-
if ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-4-0314", "gpt-4-32k-0314", "gpt-4-0613", "gpt-4-32k-0613"].include?(model_name)
|
106
|
-
tokens_per_message = 3
|
107
|
-
tokens_per_name = 1
|
108
|
-
elsif model_name == "gpt-3.5-turbo-0301"
|
109
|
-
tokens_per_message = 4 # every message follows {role/name}\n{content}\n
|
110
|
-
tokens_per_name = -1 # if there's a name, the role is omitted
|
111
|
-
elsif model_name.include?("gpt-3.5-turbo")
|
112
|
-
# puts "Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613."
|
113
|
-
return token_length_from_messages(messages, "gpt-3.5-turbo-0613", options)
|
114
|
-
elsif model_name.include?("gpt-4")
|
115
|
-
# puts "Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613."
|
116
|
-
return token_length_from_messages(messages, "gpt-4-0613", options)
|
117
|
-
else
|
118
|
-
raise NotImplementedError.new(
|
119
|
-
"token_length_from_messages() is not implemented for model #{model_name}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."
|
120
|
-
)
|
121
|
-
end
|
122
|
-
|
123
|
-
num_tokens = 0
|
124
|
-
messages.each do |message|
|
125
|
-
num_tokens += tokens_per_message
|
126
|
-
message.each do |key, value|
|
127
|
-
num_tokens += encoding.encode(value).length
|
128
|
-
num_tokens += tokens_per_name if ["name", :name].include?(key)
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
num_tokens += 3 # every reply is primed with assistant
|
133
|
-
num_tokens
|
134
|
-
end
|
135
|
-
end
|
136
|
-
end
|
137
|
-
end
|
138
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Langchain
|
4
|
-
module Utils
|
5
|
-
module TokenLength
|
6
|
-
class TokenLimitExceeded < StandardError
|
7
|
-
attr_reader :token_overflow
|
8
|
-
|
9
|
-
def initialize(message = "", token_overflow = 0)
|
10
|
-
super(message)
|
11
|
-
|
12
|
-
@token_overflow = token_overflow
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|