langchainrb 0.5.2 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-08 00:00:00.000000000 Z
11
+ date: 2023-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: tiktoken_ruby
@@ -170,14 +170,14 @@ dependencies:
170
170
  requirements:
171
171
  - - "~>"
172
172
  - !ruby/object:Gem::Version
173
- version: 0.1.0
173
+ version: 0.1.1
174
174
  type: :development
175
175
  prerelease: false
176
176
  version_requirements: !ruby/object:Gem::Requirement
177
177
  requirements:
178
178
  - - "~>"
179
179
  - !ruby/object:Gem::Version
180
- version: 0.1.0
180
+ version: 0.1.1
181
181
  - !ruby/object:Gem::Dependency
182
182
  name: google_search_results
183
183
  requirement: !ruby/object:Gem::Requirement
@@ -192,6 +192,20 @@ dependencies:
192
192
  - - "~>"
193
193
  - !ruby/object:Gem::Version
194
194
  version: 2.0.0
195
+ - !ruby/object:Gem::Dependency
196
+ name: hnswlib
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: 0.8.1
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - "~>"
207
+ - !ruby/object:Gem::Version
208
+ version: 0.8.1
195
209
  - !ruby/object:Gem::Dependency
196
210
  name: hugging-face
197
211
  requirement: !ruby/object:Gem::Requirement
@@ -432,6 +446,7 @@ files:
432
446
  - lib/langchain/agent/sql_query_agent/sql_query_agent.rb
433
447
  - lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json
434
448
  - lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json
449
+ - lib/langchain/chat.rb
435
450
  - lib/langchain/data.rb
436
451
  - lib/langchain/dependency_helper.rb
437
452
  - lib/langchain/llm/ai21.rb
@@ -462,9 +477,11 @@ files:
462
477
  - lib/langchain/tool/ruby_code_interpreter.rb
463
478
  - lib/langchain/tool/serp_api.rb
464
479
  - lib/langchain/tool/wikipedia.rb
465
- - lib/langchain/utils/token_length_validator.rb
480
+ - lib/langchain/utils/token_length/google_palm_validator.rb
481
+ - lib/langchain/utils/token_length/openai_validator.rb
466
482
  - lib/langchain/vectorsearch/base.rb
467
483
  - lib/langchain/vectorsearch/chroma.rb
484
+ - lib/langchain/vectorsearch/hnswlib.rb
468
485
  - lib/langchain/vectorsearch/milvus.rb
469
486
  - lib/langchain/vectorsearch/pgvector.rb
470
487
  - lib/langchain/vectorsearch/pinecone.rb
@@ -1,84 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "tiktoken_ruby"
4
-
5
- module Langchain
6
- module Utils
7
- class TokenLimitExceeded < StandardError; end
8
-
9
- class TokenLengthValidator
10
- #
11
- # This class is meant to validate the length of the text passed in to OpenAI's API.
12
- # It is used to validate the token length before the API call is made
13
- #
14
- TOKEN_LIMITS = {
15
- # Source:
16
- # https://platform.openai.com/docs/api-reference/embeddings
17
- # https://platform.openai.com/docs/models/gpt-4
18
- "text-embedding-ada-002" => 8191,
19
- "gpt-3.5-turbo" => 4096,
20
- "gpt-3.5-turbo-0301" => 4096,
21
- "text-davinci-003" => 4097,
22
- "text-davinci-002" => 4097,
23
- "code-davinci-002" => 8001,
24
- "gpt-4" => 8192,
25
- "gpt-4-0314" => 8192,
26
- "gpt-4-32k" => 32768,
27
- "gpt-4-32k-0314" => 32768,
28
- "text-curie-001" => 2049,
29
- "text-babbage-001" => 2049,
30
- "text-ada-001" => 2049,
31
- "davinci" => 2049,
32
- "curie" => 2049,
33
- "babbage" => 2049,
34
- "ada" => 2049
35
- }.freeze
36
-
37
- # GOOGLE_PALM_TOKEN_LIMITS = {
38
- # "chat-bison-001" => {
39
- # "inputTokenLimit"=>4096,
40
- # "outputTokenLimit"=>1024
41
- # },
42
- # "text-bison-001" => {
43
- # "inputTokenLimit"=>8196,
44
- # "outputTokenLimit"=>1024
45
- # },
46
- # "embedding-gecko-001" => {
47
- # "inputTokenLimit"=>1024
48
- # }
49
- # }.freeze
50
-
51
- #
52
- # Calculate the `max_tokens:` parameter to be set by calculating the context length of the text minus the prompt length
53
- #
54
- # @param text [String] The text to validate
55
- # @param model_name [String] The model name to validate against
56
- # @return [Integer] Whether the text is valid or not
57
- # @raise [TokenLimitExceeded] If the text is too long
58
- #
59
- def self.validate_max_tokens!(text, model_name)
60
- text_token_length = token_length(text, model_name)
61
- max_tokens = TOKEN_LIMITS[model_name] - text_token_length
62
-
63
- # Raise an error even if whole prompt is equal to the model's token limit (max_tokens == 0) since not response will be returned
64
- if max_tokens <= 0
65
- raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS[model_name]} tokens, but the given text is #{text_token_length} tokens long."
66
- end
67
-
68
- max_tokens
69
- end
70
-
71
- #
72
- # Calculate token length for a given text and model name
73
- #
74
- # @param text [String] The text to validate
75
- # @param model_name [String] The model name to validate against
76
- # @return [Integer] The token length of the text
77
- #
78
- def self.token_length(text, model_name)
79
- encoder = Tiktoken.encoding_for_model(model_name)
80
- encoder.encode(text).length
81
- end
82
- end
83
- end
84
- end