langchainrb 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +5 -3
- data/README.md +15 -7
- data/Rakefile +0 -1
- data/lib/langchain/agent/base.rb +8 -0
- data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb +15 -1
- data/lib/langchain/chat.rb +50 -0
- data/lib/langchain/llm/ai21.rb +9 -9
- data/lib/langchain/llm/base.rb +31 -4
- data/lib/langchain/llm/cohere.rb +10 -9
- data/lib/langchain/llm/google_palm.rb +63 -10
- data/lib/langchain/llm/hugging_face.rb +9 -9
- data/lib/langchain/llm/openai.rb +60 -15
- data/lib/langchain/llm/replicate.rb +16 -15
- data/lib/langchain/processors/base.rb +1 -0
- data/lib/langchain/prompt/base.rb +5 -0
- data/lib/langchain/prompt/few_shot_prompt_template.rb +45 -0
- data/lib/langchain/prompt/prompt_template.rb +31 -0
- data/lib/langchain/tool/base.rb +54 -8
- data/lib/langchain/utils/token_length/google_palm_validator.rb +69 -0
- data/lib/langchain/utils/token_length/openai_validator.rb +75 -0
- data/lib/langchain/vectorsearch/base.rb +82 -0
- data/lib/langchain/vectorsearch/hnswlib.rb +122 -0
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +51 -1
- metadata +22 -5
- data/lib/langchain/utils/token_length_validator.rb +0 -84
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: tiktoken_ruby
|
@@ -170,14 +170,14 @@ dependencies:
|
|
170
170
|
requirements:
|
171
171
|
- - "~>"
|
172
172
|
- !ruby/object:Gem::Version
|
173
|
-
version: 0.1.
|
173
|
+
version: 0.1.1
|
174
174
|
type: :development
|
175
175
|
prerelease: false
|
176
176
|
version_requirements: !ruby/object:Gem::Requirement
|
177
177
|
requirements:
|
178
178
|
- - "~>"
|
179
179
|
- !ruby/object:Gem::Version
|
180
|
-
version: 0.1.
|
180
|
+
version: 0.1.1
|
181
181
|
- !ruby/object:Gem::Dependency
|
182
182
|
name: google_search_results
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
@@ -192,6 +192,20 @@ dependencies:
|
|
192
192
|
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
194
|
version: 2.0.0
|
195
|
+
- !ruby/object:Gem::Dependency
|
196
|
+
name: hnswlib
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
198
|
+
requirements:
|
199
|
+
- - "~>"
|
200
|
+
- !ruby/object:Gem::Version
|
201
|
+
version: 0.8.1
|
202
|
+
type: :development
|
203
|
+
prerelease: false
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
205
|
+
requirements:
|
206
|
+
- - "~>"
|
207
|
+
- !ruby/object:Gem::Version
|
208
|
+
version: 0.8.1
|
195
209
|
- !ruby/object:Gem::Dependency
|
196
210
|
name: hugging-face
|
197
211
|
requirement: !ruby/object:Gem::Requirement
|
@@ -432,6 +446,7 @@ files:
|
|
432
446
|
- lib/langchain/agent/sql_query_agent/sql_query_agent.rb
|
433
447
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json
|
434
448
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json
|
449
|
+
- lib/langchain/chat.rb
|
435
450
|
- lib/langchain/data.rb
|
436
451
|
- lib/langchain/dependency_helper.rb
|
437
452
|
- lib/langchain/llm/ai21.rb
|
@@ -462,9 +477,11 @@ files:
|
|
462
477
|
- lib/langchain/tool/ruby_code_interpreter.rb
|
463
478
|
- lib/langchain/tool/serp_api.rb
|
464
479
|
- lib/langchain/tool/wikipedia.rb
|
465
|
-
- lib/langchain/utils/
|
480
|
+
- lib/langchain/utils/token_length/google_palm_validator.rb
|
481
|
+
- lib/langchain/utils/token_length/openai_validator.rb
|
466
482
|
- lib/langchain/vectorsearch/base.rb
|
467
483
|
- lib/langchain/vectorsearch/chroma.rb
|
484
|
+
- lib/langchain/vectorsearch/hnswlib.rb
|
468
485
|
- lib/langchain/vectorsearch/milvus.rb
|
469
486
|
- lib/langchain/vectorsearch/pgvector.rb
|
470
487
|
- lib/langchain/vectorsearch/pinecone.rb
|
@@ -1,84 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "tiktoken_ruby"
|
4
|
-
|
5
|
-
module Langchain
|
6
|
-
module Utils
|
7
|
-
class TokenLimitExceeded < StandardError; end
|
8
|
-
|
9
|
-
class TokenLengthValidator
|
10
|
-
#
|
11
|
-
# This class is meant to validate the length of the text passed in to OpenAI's API.
|
12
|
-
# It is used to validate the token length before the API call is made
|
13
|
-
#
|
14
|
-
TOKEN_LIMITS = {
|
15
|
-
# Source:
|
16
|
-
# https://platform.openai.com/docs/api-reference/embeddings
|
17
|
-
# https://platform.openai.com/docs/models/gpt-4
|
18
|
-
"text-embedding-ada-002" => 8191,
|
19
|
-
"gpt-3.5-turbo" => 4096,
|
20
|
-
"gpt-3.5-turbo-0301" => 4096,
|
21
|
-
"text-davinci-003" => 4097,
|
22
|
-
"text-davinci-002" => 4097,
|
23
|
-
"code-davinci-002" => 8001,
|
24
|
-
"gpt-4" => 8192,
|
25
|
-
"gpt-4-0314" => 8192,
|
26
|
-
"gpt-4-32k" => 32768,
|
27
|
-
"gpt-4-32k-0314" => 32768,
|
28
|
-
"text-curie-001" => 2049,
|
29
|
-
"text-babbage-001" => 2049,
|
30
|
-
"text-ada-001" => 2049,
|
31
|
-
"davinci" => 2049,
|
32
|
-
"curie" => 2049,
|
33
|
-
"babbage" => 2049,
|
34
|
-
"ada" => 2049
|
35
|
-
}.freeze
|
36
|
-
|
37
|
-
# GOOGLE_PALM_TOKEN_LIMITS = {
|
38
|
-
# "chat-bison-001" => {
|
39
|
-
# "inputTokenLimit"=>4096,
|
40
|
-
# "outputTokenLimit"=>1024
|
41
|
-
# },
|
42
|
-
# "text-bison-001" => {
|
43
|
-
# "inputTokenLimit"=>8196,
|
44
|
-
# "outputTokenLimit"=>1024
|
45
|
-
# },
|
46
|
-
# "embedding-gecko-001" => {
|
47
|
-
# "inputTokenLimit"=>1024
|
48
|
-
# }
|
49
|
-
# }.freeze
|
50
|
-
|
51
|
-
#
|
52
|
-
# Calculate the `max_tokens:` parameter to be set by calculating the context length of the text minus the prompt length
|
53
|
-
#
|
54
|
-
# @param text [String] The text to validate
|
55
|
-
# @param model_name [String] The model name to validate against
|
56
|
-
# @return [Integer] Whether the text is valid or not
|
57
|
-
# @raise [TokenLimitExceeded] If the text is too long
|
58
|
-
#
|
59
|
-
def self.validate_max_tokens!(text, model_name)
|
60
|
-
text_token_length = token_length(text, model_name)
|
61
|
-
max_tokens = TOKEN_LIMITS[model_name] - text_token_length
|
62
|
-
|
63
|
-
# Raise an error even if whole prompt is equal to the model's token limit (max_tokens == 0) since not response will be returned
|
64
|
-
if max_tokens <= 0
|
65
|
-
raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS[model_name]} tokens, but the given text is #{text_token_length} tokens long."
|
66
|
-
end
|
67
|
-
|
68
|
-
max_tokens
|
69
|
-
end
|
70
|
-
|
71
|
-
#
|
72
|
-
# Calculate token length for a given text and model name
|
73
|
-
#
|
74
|
-
# @param text [String] The text to validate
|
75
|
-
# @param model_name [String] The model name to validate against
|
76
|
-
# @return [Integer] The token length of the text
|
77
|
-
#
|
78
|
-
def self.token_length(text, model_name)
|
79
|
-
encoder = Tiktoken.encoding_for_model(model_name)
|
80
|
-
encoder.encode(text).length
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|