langchainrb 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-08 00:00:00.000000000 Z
11
+ date: 2023-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: tiktoken_ruby
@@ -170,14 +170,14 @@ dependencies:
170
170
  requirements:
171
171
  - - "~>"
172
172
  - !ruby/object:Gem::Version
173
- version: 0.1.0
173
+ version: 0.1.1
174
174
  type: :development
175
175
  prerelease: false
176
176
  version_requirements: !ruby/object:Gem::Requirement
177
177
  requirements:
178
178
  - - "~>"
179
179
  - !ruby/object:Gem::Version
180
- version: 0.1.0
180
+ version: 0.1.1
181
181
  - !ruby/object:Gem::Dependency
182
182
  name: google_search_results
183
183
  requirement: !ruby/object:Gem::Requirement
@@ -192,6 +192,20 @@ dependencies:
192
192
  - - "~>"
193
193
  - !ruby/object:Gem::Version
194
194
  version: 2.0.0
195
+ - !ruby/object:Gem::Dependency
196
+ name: hnswlib
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - "~>"
200
+ - !ruby/object:Gem::Version
201
+ version: 0.8.1
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - "~>"
207
+ - !ruby/object:Gem::Version
208
+ version: 0.8.1
195
209
  - !ruby/object:Gem::Dependency
196
210
  name: hugging-face
197
211
  requirement: !ruby/object:Gem::Requirement
@@ -432,6 +446,7 @@ files:
432
446
  - lib/langchain/agent/sql_query_agent/sql_query_agent.rb
433
447
  - lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json
434
448
  - lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json
449
+ - lib/langchain/chat.rb
435
450
  - lib/langchain/data.rb
436
451
  - lib/langchain/dependency_helper.rb
437
452
  - lib/langchain/llm/ai21.rb
@@ -462,9 +477,11 @@ files:
462
477
  - lib/langchain/tool/ruby_code_interpreter.rb
463
478
  - lib/langchain/tool/serp_api.rb
464
479
  - lib/langchain/tool/wikipedia.rb
465
- - lib/langchain/utils/token_length_validator.rb
480
+ - lib/langchain/utils/token_length/google_palm_validator.rb
481
+ - lib/langchain/utils/token_length/openai_validator.rb
466
482
  - lib/langchain/vectorsearch/base.rb
467
483
  - lib/langchain/vectorsearch/chroma.rb
484
+ - lib/langchain/vectorsearch/hnswlib.rb
468
485
  - lib/langchain/vectorsearch/milvus.rb
469
486
  - lib/langchain/vectorsearch/pgvector.rb
470
487
  - lib/langchain/vectorsearch/pinecone.rb
@@ -1,84 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "tiktoken_ruby"
4
-
5
- module Langchain
6
- module Utils
7
- class TokenLimitExceeded < StandardError; end
8
-
9
- class TokenLengthValidator
10
- #
11
- # This class is meant to validate the length of the text passed in to OpenAI's API.
12
- # It is used to validate the token length before the API call is made
13
- #
14
- TOKEN_LIMITS = {
15
- # Source:
16
- # https://platform.openai.com/docs/api-reference/embeddings
17
- # https://platform.openai.com/docs/models/gpt-4
18
- "text-embedding-ada-002" => 8191,
19
- "gpt-3.5-turbo" => 4096,
20
- "gpt-3.5-turbo-0301" => 4096,
21
- "text-davinci-003" => 4097,
22
- "text-davinci-002" => 4097,
23
- "code-davinci-002" => 8001,
24
- "gpt-4" => 8192,
25
- "gpt-4-0314" => 8192,
26
- "gpt-4-32k" => 32768,
27
- "gpt-4-32k-0314" => 32768,
28
- "text-curie-001" => 2049,
29
- "text-babbage-001" => 2049,
30
- "text-ada-001" => 2049,
31
- "davinci" => 2049,
32
- "curie" => 2049,
33
- "babbage" => 2049,
34
- "ada" => 2049
35
- }.freeze
36
-
37
- # GOOGLE_PALM_TOKEN_LIMITS = {
38
- # "chat-bison-001" => {
39
- # "inputTokenLimit"=>4096,
40
- # "outputTokenLimit"=>1024
41
- # },
42
- # "text-bison-001" => {
43
- # "inputTokenLimit"=>8196,
44
- # "outputTokenLimit"=>1024
45
- # },
46
- # "embedding-gecko-001" => {
47
- # "inputTokenLimit"=>1024
48
- # }
49
- # }.freeze
50
-
51
- #
52
- # Calculate the `max_tokens:` parameter to be set by calculating the context length of the text minus the prompt length
53
- #
54
- # @param text [String] The text to validate
55
- # @param model_name [String] The model name to validate against
56
- # @return [Integer] Whether the text is valid or not
57
- # @raise [TokenLimitExceeded] If the text is too long
58
- #
59
- def self.validate_max_tokens!(text, model_name)
60
- text_token_length = token_length(text, model_name)
61
- max_tokens = TOKEN_LIMITS[model_name] - text_token_length
62
-
63
- # Raise an error even if whole prompt is equal to the model's token limit (max_tokens == 0) since not response will be returned
64
- if max_tokens <= 0
65
- raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS[model_name]} tokens, but the given text is #{text_token_length} tokens long."
66
- end
67
-
68
- max_tokens
69
- end
70
-
71
- #
72
- # Calculate token length for a given text and model name
73
- #
74
- # @param text [String] The text to validate
75
- # @param model_name [String] The model name to validate against
76
- # @return [Integer] The token length of the text
77
- #
78
- def self.token_length(text, model_name)
79
- encoder = Tiktoken.encoding_for_model(model_name)
80
- encoder.encode(text).length
81
- end
82
- end
83
- end
84
- end