langchainrb 0.3.15 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/Gemfile.lock +8 -1
  4. data/README.md +20 -20
  5. data/lib/{agent → langchain/agent}/base.rb +1 -1
  6. data/lib/{agent → langchain/agent}/chain_of_thought_agent/chain_of_thought_agent.rb +10 -10
  7. data/lib/{llm → langchain/llm}/base.rb +3 -3
  8. data/lib/{llm → langchain/llm}/cohere.rb +1 -1
  9. data/lib/{llm → langchain/llm}/google_palm.rb +3 -3
  10. data/lib/{llm → langchain/llm}/hugging_face.rb +1 -1
  11. data/lib/{llm → langchain/llm}/openai.rb +18 -6
  12. data/lib/{llm → langchain/llm}/replicate.rb +3 -3
  13. data/lib/{prompt → langchain/prompt}/base.rb +2 -2
  14. data/lib/{prompt → langchain/prompt}/few_shot_prompt_template.rb +1 -1
  15. data/lib/{prompt → langchain/prompt}/loading.rb +3 -3
  16. data/lib/{prompt → langchain/prompt}/prompt_template.rb +1 -1
  17. data/lib/{tool → langchain/tool}/base.rb +5 -5
  18. data/lib/{tool → langchain/tool}/calculator.rb +2 -2
  19. data/lib/{tool → langchain/tool}/serp_api.rb +1 -1
  20. data/lib/{tool → langchain/tool}/wikipedia.rb +1 -1
  21. data/lib/langchain/utils/token_length_validator.rb +57 -0
  22. data/lib/{vectorsearch → langchain/vectorsearch}/base.rb +5 -5
  23. data/lib/{vectorsearch → langchain/vectorsearch}/chroma.rb +1 -1
  24. data/lib/{vectorsearch → langchain/vectorsearch}/milvus.rb +1 -1
  25. data/lib/{vectorsearch → langchain/vectorsearch}/pgvector.rb +15 -4
  26. data/lib/{vectorsearch → langchain/vectorsearch}/pinecone.rb +1 -1
  27. data/lib/{vectorsearch → langchain/vectorsearch}/qdrant.rb +1 -1
  28. data/lib/{vectorsearch → langchain/vectorsearch}/weaviate.rb +1 -1
  29. data/lib/langchain.rb +39 -34
  30. data/lib/version.rb +1 -1
  31. metadata +43 -28
  32. /data/lib/{agent → langchain/agent}/chain_of_thought_agent/chain_of_thought_agent_prompt.json +0 -0
  33. /data/lib/{llm → langchain/llm}/prompts/summarize_template.json +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f855e3c0e1f0d7b59e0255004a1a806c7048da6d3fe0a8ddf10be68e36ed9ba
4
- data.tar.gz: 5758c90205c3e2bea420cf7fa0dec07638917beced60bd0482e1d803ced96c07
3
+ metadata.gz: ce8728ec2208577809174e154642db161121cb9dd49e0ec5d190d080e68b1d78
4
+ data.tar.gz: bb0e0ccc4558ca849549f495a4adfacc5f7851c786869974afdaef29f0cde3ca
5
5
  SHA512:
6
- metadata.gz: ec26f8c4257a6949d829d7f68d2175943b80c2837739bdbead8e6b61891a6738cf84ec1934caff777b63dee39f3d0111e8f29a81c37c125be1cd3ae8137b6968
7
- data.tar.gz: 31bb1aa0296dbbc8e1a1c6c2bc7236c92bf3e627935c752bd527c7f7d769da69f11c6bb63de6e0b4e0d1d8d0c739744045669220c35e253ae83eaa0e23482e6a
6
+ metadata.gz: b2b4c27e31d730563aeca70a0aa3c4cf129e69773e34f397ba057faa8298a4368c1b9f66f925188f867f1feb47b4e07f77df702fa7c6cb76ad1e1a8464b895f6
7
+ data.tar.gz: 55dd3fbc21e2cdf9bd84afcd6bb4de0f72c960dec0c6b1d2efff1f9492b3d5c7399f2d14c323597045e64eafb6f2f20992348d640317c64721fb0556f8a64126
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.4.0] - 2023-06-01
4
+ - [BREAKING] Everything is namespaced under `Langchain::` now
5
+ - Pgvector similarity search uses the cosine distance by default now
6
+ - OpenAI token length validation using tiktoken_ruby
7
+
3
8
  ## [0.3.15] - 2023-05-30
4
9
  - Drop Ruby 2.7 support. It had reached EOD.
5
10
  - Bump pgvector-ruby to 0.2
data/Gemfile.lock CHANGED
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.15)
4
+ langchainrb (0.4.0)
5
+ tiktoken_ruby (~> 0.0.5)
5
6
 
6
7
  GEM
7
8
  remote: https://rubygems.org/
@@ -205,6 +206,7 @@ GEM
205
206
  zeitwerk (~> 2.5)
206
207
  rainbow (3.1.1)
207
208
  rake (13.0.6)
209
+ rb_sys (0.9.78)
208
210
  regexp_parser (2.8.0)
209
211
  replicate-ruby (0.2.2)
210
212
  addressable
@@ -262,6 +264,11 @@ GEM
262
264
  standardrb (1.0.1)
263
265
  standard
264
266
  thor (1.2.1)
267
+ tiktoken_ruby (0.0.5)
268
+ rb_sys (~> 0.9.68)
269
+ tiktoken_ruby (0.0.5-arm64-darwin)
270
+ tiktoken_ruby (0.0.5-x86_64-darwin)
271
+ tiktoken_ruby (0.0.5-x86_64-linux)
265
272
  treetop (1.6.12)
266
273
  polyglot (~> 0.3)
267
274
  ttfunk (1.7.0)
data/README.md CHANGED
@@ -44,7 +44,7 @@ Add `gem "weaviate-ruby", "~> 0.8.0"` to your Gemfile.
44
44
 
45
45
  Pick the vector search database you'll be using and instantiate the client:
46
46
  ```ruby
47
- client = Vectorsearch::Weaviate.new(
47
+ client = Langchain::Vectorsearch::Weaviate.new(
48
48
  url: ENV["WEAVIATE_URL"],
49
49
  api_key: ENV["WEAVIATE_API_KEY"],
50
50
  llm: :openai, # or :cohere
@@ -52,10 +52,10 @@ client = Vectorsearch::Weaviate.new(
52
52
  )
53
53
 
54
54
  # You can instantiate any other supported vector search database:
55
- client = Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
56
- client = Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
57
- client = Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
58
- client = Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
55
+ client = Langchain::Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
56
+ client = Langchain::Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
57
+ client = Langchain::Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
58
+ client = Langchain::Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
59
59
  ```
60
60
 
61
61
  ```ruby
@@ -107,7 +107,7 @@ Add `gem "ruby-openai", "~> 4.0.0"` to your Gemfile.
107
107
 
108
108
  #### OpenAI
109
109
  ```ruby
110
- openai = LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
110
+ openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
111
111
  ```
112
112
  ```ruby
113
113
  openai.embed(text: "foo bar")
@@ -120,7 +120,7 @@ openai.complete(prompt: "What is the meaning of life?")
120
120
  Add `gem "cohere-ruby", "~> 0.9.3"` to your Gemfile.
121
121
 
122
122
  ```ruby
123
- cohere = LLM::Cohere.new(api_key: ENV["COHERE_API_KEY"])
123
+ cohere = Langchain::LLM::Cohere.new(api_key: ENV["COHERE_API_KEY"])
124
124
  ```
125
125
  ```ruby
126
126
  cohere.embed(text: "foo bar")
@@ -132,19 +132,19 @@ cohere.complete(prompt: "What is the meaning of life?")
132
132
  #### HuggingFace
133
133
  Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
134
134
  ```ruby
135
- cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
135
+ cohere = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
136
136
  ```
137
137
 
138
138
  #### Replicate
139
139
  Add `gem "replicate-ruby", "~> 0.2.2"` to your Gemfile.
140
140
  ```ruby
141
- cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
141
+ cohere = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
142
142
  ```
143
143
 
144
144
  #### Google PaLM (Pathways Language Model)
145
145
  Add `"google_palm_api", "~> 0.1.0"` to your Gemfile.
146
146
  ```ruby
147
- google_palm = LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
147
+ google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
148
148
  ```
149
149
 
150
150
  ### Using Prompts 📋
@@ -154,21 +154,21 @@ google_palm = LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
154
154
  Create a prompt with one input variable:
155
155
 
156
156
  ```ruby
157
- prompt = Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
157
+ prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
158
158
  prompt.format(adjective: "funny") # "Tell me a funny joke."
159
159
  ```
160
160
 
161
161
  Create a prompt with multiple input variables:
162
162
 
163
163
  ```ruby
164
- prompt = Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
164
+ prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
165
165
  prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
166
166
  ```
167
167
 
168
168
  Creating a PromptTemplate using just a prompt and no input_variables:
169
169
 
170
170
  ```ruby
171
- prompt = Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
171
+ prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
172
172
  prompt.input_variables # ["adjective", "content"]
173
173
  prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
174
174
  ```
@@ -182,7 +182,7 @@ prompt.save(file_path: "spec/fixtures/prompt/prompt_template.json")
182
182
  Loading a new prompt template using a JSON file:
183
183
 
184
184
  ```ruby
185
- prompt = Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
185
+ prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
186
186
  prompt.input_variables # ["adjective", "content"]
187
187
  ```
188
188
 
@@ -191,10 +191,10 @@ prompt.input_variables # ["adjective", "content"]
191
191
  Create a prompt with a few shot examples:
192
192
 
193
193
  ```ruby
194
- prompt = Prompt::FewShotPromptTemplate.new(
194
+ prompt = Langchain::Prompt::FewShotPromptTemplate.new(
195
195
  prefix: "Write antonyms for the following words.",
196
196
  suffix: "Input: {adjective}\nOutput:",
197
- example_prompt: Prompt::PromptTemplate.new(
197
+ example_prompt: Langchain::Prompt::PromptTemplate.new(
198
198
  input_variables: ["input", "output"],
199
199
  template: "Input: {input}\nOutput: {output}"
200
200
  ),
@@ -228,7 +228,7 @@ prompt.save(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
228
228
  Loading a new prompt template using a JSON file:
229
229
 
230
230
  ```ruby
231
- prompt = Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
231
+ prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
232
232
  prompt.prefix # "Write antonyms for the following words."
233
233
  ```
234
234
 
@@ -237,10 +237,10 @@ Agents are semi-autonomous bots that can respond to user questions and use avail
237
237
 
238
238
  #### Chain-of-Thought Agent
239
239
 
240
- Add `gem "openai-ruby"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
240
+ Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
241
241
 
242
242
  ```ruby
243
- agent = Agent::ChainOfThoughtAgent.new(llm: :openai, llm_api_key: ENV["OPENAI_API_KEY"], tools: ['search', 'calculator'])
243
+ agent = Langchain::Agent::ChainOfThoughtAgent.new(llm: :openai, llm_api_key: ENV["OPENAI_API_KEY"], tools: ['search', 'calculator'])
244
244
 
245
245
  agent.tools
246
246
  # => ["search", "calculator"]
@@ -273,7 +273,7 @@ Need to read data from various sources? Load it up.
273
273
  Just call `Langchan::Loader.load` with the path to the file or a URL you want to load.
274
274
 
275
275
  ```ruby
276
- Langchaing::Loader.load('/path/to/file.pdf')
276
+ Langchain::Loader.load('/path/to/file.pdf')
277
277
  ```
278
278
 
279
279
  or
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Agent
3
+ module Langchain::Agent
4
4
  class Base
5
5
  end
6
6
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Agent
3
+ module Langchain::Agent
4
4
  class ChainOfThoughtAgent < Base
5
5
  attr_reader :llm, :llm_api_key, :llm_client, :tools
6
6
 
@@ -11,14 +11,14 @@ module Agent
11
11
  # @param tools [Array] The tools to use
12
12
  # @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
13
13
  def initialize(llm:, llm_api_key:, tools: [])
14
- LLM::Base.validate_llm!(llm: llm)
15
- Tool::Base.validate_tools!(tools: tools)
14
+ Langchain::LLM::Base.validate_llm!(llm: llm)
15
+ Langchain::Tool::Base.validate_tools!(tools: tools)
16
16
 
17
17
  @llm = llm
18
18
  @llm_api_key = llm_api_key
19
19
  @tools = tools
20
20
 
21
- @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
21
+ @llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
22
22
  end
23
23
 
24
24
  # Validate tools when they're re-assigned
@@ -26,7 +26,7 @@ module Agent
26
26
  # @param value [Array] The tools to use
27
27
  # @return [Array] The tools that will be used
28
28
  def tools=(value)
29
- Tool::Base.validate_tools!(tools: value)
29
+ Langchain::Tool::Base.validate_tools!(tools: value)
30
30
  @tools = value
31
31
  end
32
32
 
@@ -62,8 +62,8 @@ module Agent
62
62
  Langchain.logger.info("Agent: Using the \"#{action}\" Tool with \"#{action_input}\"")
63
63
 
64
64
  # Retrieve the Tool::[ToolName] class and call `execute`` with action_input as the input
65
- result = Tool
66
- .const_get(Tool::Base::TOOLS[action.strip])
65
+ result = Langchain::Tool
66
+ .const_get(Langchain::Tool::Base::TOOLS[action.strip])
67
67
  .execute(input: action_input)
68
68
 
69
69
  # Append the Observation to the prompt
@@ -91,7 +91,7 @@ module Agent
91
91
  question: question,
92
92
  tool_names: "[#{tools.join(", ")}]",
93
93
  tools: tools.map do |tool|
94
- "#{tool}: #{Tool.const_get(Tool::Base::TOOLS[tool]).const_get(:DESCRIPTION)}"
94
+ "#{tool}: #{Langchain::Tool.const_get(Langchain::Tool::Base::TOOLS[tool]).const_get(:DESCRIPTION)}"
95
95
  end.join("\n")
96
96
  )
97
97
  end
@@ -99,8 +99,8 @@ module Agent
99
99
  # Load the PromptTemplate from the JSON file
100
100
  # @return [PromptTemplate] PromptTemplate instance
101
101
  def prompt_template
102
- @template ||= Prompt.load_from_path(
103
- file_path: Langchain.root.join("agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json")
102
+ @template ||= Langchain::Prompt.load_from_path(
103
+ file_path: Langchain.root.join("langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json")
104
104
  )
105
105
  end
106
106
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class Base
5
5
  attr_reader :client
6
6
 
@@ -42,8 +42,8 @@ module LLM
42
42
  # @param llm [Symbol] The LLM to use
43
43
  def self.validate_llm!(llm:)
44
44
  # TODO: Fix so this works when `llm` value is a string instead of a symbol
45
- unless LLM::Base::LLMS.key?(llm)
46
- raise ArgumentError, "LLM must be one of #{LLM::Base::LLMS.keys}"
45
+ unless Langchain::LLM::Base::LLMS.key?(llm)
46
+ raise ArgumentError, "LLM must be one of #{Langchain::LLM::Base::LLMS.keys}"
47
47
  end
48
48
  end
49
49
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class Cohere < Base
5
5
  DEFAULTS = {
6
6
  temperature: 0.0,
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class GooglePalm < Base
5
5
  # Wrapper around the Google PaLM (Pathways Language Model) APIs.
6
6
 
@@ -89,8 +89,8 @@ module LLM
89
89
  # @return [String] The summarization
90
90
  #
91
91
  def summarize(text:)
92
- prompt_template = Prompt.load_from_path(
93
- file_path: Langchain.root.join("llm/prompts/summarize_template.json")
92
+ prompt_template = Langchain::Prompt.load_from_path(
93
+ file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.json")
94
94
  )
95
95
  prompt = prompt_template.format(text: text)
96
96
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class HuggingFace < Base
5
5
  # The gem does not currently accept other models:
6
6
  # https://github.com/alchaplinsky/hugging-face/blob/main/lib/hugging_face/inference_api.rb#L32-L34
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class OpenAI < Base
5
5
  DEFAULTS = {
6
6
  temperature: 0.0,
@@ -25,9 +25,13 @@ module LLM
25
25
  # @return [Array] The embedding
26
26
  #
27
27
  def embed(text:)
28
+ model = DEFAULTS[:embeddings_model_name]
29
+
30
+ Langchain::Utils::TokenLengthValidator.validate!(text, model)
31
+
28
32
  response = client.embeddings(
29
33
  parameters: {
30
- model: DEFAULTS[:embeddings_model_name],
34
+ model: model,
31
35
  input: text
32
36
  }
33
37
  )
@@ -41,8 +45,12 @@ module LLM
41
45
  # @return [String] The completion
42
46
  #
43
47
  def complete(prompt:, **params)
48
+ model = DEFAULTS[:completion_model_name]
49
+
50
+ Langchain::Utils::TokenLengthValidator.validate!(prompt, model)
51
+
44
52
  default_params = {
45
- model: DEFAULTS[:completion_model_name],
53
+ model: model,
46
54
  temperature: DEFAULTS[:temperature],
47
55
  prompt: prompt
48
56
  }
@@ -64,8 +72,12 @@ module LLM
64
72
  # @return [String] The chat completion
65
73
  #
66
74
  def chat(prompt:, **params)
75
+ model = DEFAULTS[:chat_completion_model_name]
76
+
77
+ Langchain::Utils::TokenLengthValidator.validate!(prompt, model)
78
+
67
79
  default_params = {
68
- model: DEFAULTS[:chat_completion_model_name],
80
+ model: model,
69
81
  temperature: DEFAULTS[:temperature],
70
82
  # TODO: Figure out how to introduce persisted conversations
71
83
  messages: [{role: "user", content: prompt}]
@@ -88,8 +100,8 @@ module LLM
88
100
  # @return [String] The summary
89
101
  #
90
102
  def summarize(text:)
91
- prompt_template = Prompt.load_from_path(
92
- file_path: Langchain.root.join("llm/prompts/summarize_template.json")
103
+ prompt_template = Langchain::Prompt.load_from_path(
104
+ file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.json")
93
105
  )
94
106
  prompt = prompt_template.format(text: text)
95
107
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class Replicate < Base
5
5
  # Wrapper around Replicate.com LLM provider
6
6
  # Use it directly:
@@ -89,8 +89,8 @@ module LLM
89
89
  # @return [String] The summary
90
90
  #
91
91
  def summarize(text:)
92
- prompt_template = Prompt.load_from_path(
93
- file_path: Langchain.root.join("llm/prompts/summarize_template.json")
92
+ prompt_template = Langchain::Prompt.load_from_path(
93
+ file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.json")
94
94
  )
95
95
  prompt = prompt_template.format(text: text)
96
96
 
@@ -3,7 +3,7 @@
3
3
  require "strscan"
4
4
  require "json"
5
5
 
6
- module Prompt
6
+ module Langchain::Prompt
7
7
  class Base
8
8
  def format(**kwargs)
9
9
  raise NotImplementedError
@@ -29,7 +29,7 @@ module Prompt
29
29
  #
30
30
  def validate(template:, input_variables:)
31
31
  input_variables_set = @input_variables.uniq
32
- variables_from_template = Prompt::Base.extract_variables_from_template(template)
32
+ variables_from_template = Langchain::Prompt::Base.extract_variables_from_template(template)
33
33
 
34
34
  missing_variables = variables_from_template - input_variables_set
35
35
  extra_variables = input_variables_set - variables_from_template
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Prompt
3
+ module Langchain::Prompt
4
4
  class FewShotPromptTemplate < Base
5
5
  attr_reader :examples, :example_prompt, :input_variables, :prefix, :suffix, :example_separator
6
6
 
@@ -3,10 +3,10 @@
3
3
  require "strscan"
4
4
  require "pathname"
5
5
 
6
- module Prompt
6
+ module Langchain::Prompt
7
7
  TYPE_TO_LOADER = {
8
- "prompt" => ->(config) { Prompt.load_prompt(config) },
9
- "few_shot" => ->(config) { Prompt.load_few_shot_prompt(config) }
8
+ "prompt" => ->(config) { load_prompt(config) },
9
+ "few_shot" => ->(config) { load_few_shot_prompt(config) }
10
10
  }
11
11
 
12
12
  class << self
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Prompt
3
+ module Langchain::Prompt
4
4
  class PromptTemplate < Base
5
5
  attr_reader :template, :input_variables, :validate_template
6
6
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Tool
3
+ module Langchain::Tool
4
4
  class Base
5
5
  # How to add additional Tools?
6
6
  # 1. Create a new file in lib/tool/your_tool_name.rb
@@ -10,9 +10,9 @@ module Tool
10
10
  # 4. Add your tool to the README.md
11
11
 
12
12
  TOOLS = {
13
- "calculator" => "Tool::Calculator",
14
- "search" => "Tool::SerpApi",
15
- "wikipedia" => "Tool::Wikipedia"
13
+ "calculator" => "Langchain::Tool::Calculator",
14
+ "search" => "Langchain::Tool::SerpApi",
15
+ "wikipedia" => "Langchain::Tool::Wikipedia"
16
16
  }
17
17
 
18
18
  def self.description(value)
@@ -40,7 +40,7 @@ module Tool
40
40
  # @raise [ArgumentError] If any of the tools are not supported
41
41
  #
42
42
  def self.validate_tools!(tools:)
43
- unrecognized_tools = tools - Tool::Base::TOOLS.keys
43
+ unrecognized_tools = tools - Langchain::Tool::Base::TOOLS.keys
44
44
 
45
45
  if unrecognized_tools.any?
46
46
  raise ArgumentError, "Unrecognized Tools: #{unrecognized_tools}"
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Tool
3
+ module Langchain::Tool
4
4
  class Calculator < Base
5
5
  description <<~DESC
6
6
  Useful for getting the result of a math expression.
@@ -22,7 +22,7 @@ module Tool
22
22
  rescue Eqn::ParseError, Eqn::NoVariableValueError
23
23
  # Sometimes the input is not a pure math expression, e.g: "12F in Celsius"
24
24
  # We can use the google answer box to evaluate this expression
25
- hash_results = Tool::SerpApi.execute_search(input: input)
25
+ hash_results = Langchain::Tool::SerpApi.execute_search(input: input)
26
26
  hash_results.dig(:answer_box, :to)
27
27
  end
28
28
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Tool
3
+ module Langchain::Tool
4
4
  class SerpApi < Base
5
5
  # Wrapper around SerpAPI
6
6
  # Set ENV["SERPAPI_API_KEY"] to use it
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Tool
3
+ module Langchain::Tool
4
4
  class Wikipedia < Base
5
5
  # Tool that adds the capability to search using the Wikipedia API
6
6
 
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tiktoken_ruby"
4
+
5
+ module Langchain
6
+ module Utils
7
+ class TokenLimitExceeded < StandardError; end
8
+
9
+ class TokenLengthValidator
10
+ #
11
+ # This class is meant to validate the length of the text passed in to OpenAI's API.
12
+ # It is used to validate the token length before the API call is made
13
+ #
14
+ TOKEN_LIMITS = {
15
+ # Source:
16
+ # https://platform.openai.com/docs/api-reference/embeddings
17
+ # https://platform.openai.com/docs/models/gpt-4
18
+ "text-embedding-ada-002" => 8191,
19
+ "gpt-3.5-turbo" => 4096,
20
+ "gpt-3.5-turbo-0301" => 4096,
21
+ "text-davinci-003" => 4097,
22
+ "text-davinci-002" => 4097,
23
+ "code-davinci-002" => 8001,
24
+ "gpt-4" => 8192,
25
+ "gpt-4-0314" => 8192,
26
+ "gpt-4-32k" => 32768,
27
+ "gpt-4-32k-0314" => 32768,
28
+ "text-curie-001" => 2049,
29
+ "text-babbage-001" => 2049,
30
+ "text-ada-001" => 2049,
31
+ "davinci" => 2049,
32
+ "curie" => 2049,
33
+ "babbage" => 2049,
34
+ "ada" => 2049
35
+ }.freeze
36
+
37
+ #
38
+ # Validate the length of the text passed in to OpenAI's API
39
+ #
40
+ # @param text [String] The text to validate
41
+ # @param model_name [String] The model name to validate against
42
+ # @return [Boolean] Whether the text is valid or not
43
+ # @raise [TokenLimitExceeded] If the text is too long
44
+ #
45
+ def self.validate!(text, model_name)
46
+ encoder = Tiktoken.encoding_for_model(model_name)
47
+ token_length = encoder.encode(text).length
48
+
49
+ if token_length > TOKEN_LIMITS[model_name]
50
+ raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS[model_name]} tokens, but the given text is #{token_length} tokens long."
51
+ end
52
+
53
+ true
54
+ end
55
+ end
56
+ end
57
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  require "forwardable"
4
4
 
5
- module Vectorsearch
5
+ module Langchain::Vectorsearch
6
6
  class Base
7
7
  extend Forwardable
8
8
 
@@ -13,12 +13,12 @@ module Vectorsearch
13
13
  # @param llm [Symbol] The LLM to use
14
14
  # @param llm_api_key [String] The API key for the LLM
15
15
  def initialize(llm:, llm_api_key:)
16
- LLM::Base.validate_llm!(llm: llm)
16
+ Langchain::LLM::Base.validate_llm!(llm: llm)
17
17
 
18
18
  @llm = llm
19
19
  @llm_api_key = llm_api_key
20
20
 
21
- @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
21
+ @llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
22
22
  end
23
23
 
24
24
  # Method supported by Vectorsearch DB to create a default schema
@@ -51,10 +51,10 @@ module Vectorsearch
51
51
  :default_dimension
52
52
 
53
53
  def generate_prompt(question:, context:)
54
- prompt_template = Prompt::FewShotPromptTemplate.new(
54
+ prompt_template = Langchain::Prompt::FewShotPromptTemplate.new(
55
55
  prefix: "Context:",
56
56
  suffix: "---\nQuestion: {question}\n---\nAnswer:",
57
- example_prompt: Prompt::PromptTemplate.new(
57
+ example_prompt: Langchain::Prompt::PromptTemplate.new(
58
58
  template: "{context}",
59
59
  input_variables: ["context"]
60
60
  ),
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  class Chroma < Base
5
5
  # Initialize the Chroma client
6
6
  # @param url [String] The URL of the Qdrant server
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  class Milvus < Base
5
5
  def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
6
6
  depends_on "milvus"
@@ -1,8 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  # The PostgreSQL vector search adapter
5
5
  class Pgvector < Base
6
+ # The operators supported by the PostgreSQL vector search adapter
7
+ OPERATORS = {
8
+ "cosine_distance" => "<=>",
9
+ "euclidean_distance" => "<->"
10
+ }
11
+ DEFAULT_OPERATOR = "cosine_distance"
12
+
13
+ attr_reader :operator, :quoted_table_name
14
+
6
15
  # @param url [String] The URL of the PostgreSQL database
7
16
  # @param index_name [String] The name of the table to use for the index
8
17
  # @param llm [String] The URL of the Language Layer API
@@ -18,6 +27,8 @@ module Vectorsearch
18
27
  @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
19
28
 
20
29
  @index_name = index_name
30
+ @quoted_table_name = @client.quote_ident(index_name)
31
+ @operator = OPERATORS[DEFAULT_OPERATOR]
21
32
 
22
33
  super(llm: llm, llm_api_key: llm_api_key)
23
34
  end
@@ -31,7 +42,7 @@ module Vectorsearch
31
42
  end
32
43
  values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
33
44
  client.exec_params(
34
- "INSERT INTO #{@index_name} (content, vectors) VALUES #{values};",
45
+ "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values};",
35
46
  data
36
47
  )
37
48
  end
@@ -42,7 +53,7 @@ module Vectorsearch
42
53
  client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
43
54
  client.exec(
44
55
  <<~SQL
45
- CREATE TABLE IF NOT EXISTS #{@index_name} (
56
+ CREATE TABLE IF NOT EXISTS #{quoted_table_name} (
46
57
  id serial PRIMARY KEY,
47
58
  content TEXT,
48
59
  vectors VECTOR(#{default_dimension})
@@ -73,7 +84,7 @@ module Vectorsearch
73
84
  result = client.transaction do |conn|
74
85
  conn.exec("SET LOCAL ivfflat.probes = 10;")
75
86
  query = <<~SQL
76
- SELECT id, content FROM #{@index_name} ORDER BY vectors <-> $1 ASC LIMIT $2;
87
+ SELECT id, content FROM #{quoted_table_name} ORDER BY vectors #{operator} $1 ASC LIMIT $2;
77
88
  SQL
78
89
  conn.exec_params(query, [embedding, k])
79
90
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  class Pinecone < Base
5
5
  # Initialize the Pinecone client
6
6
  # @param environment [String] The environment to use
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  class Qdrant < Base
5
5
  # Initialize the Qdrant client
6
6
  # @param url [String] The URL of the Qdrant server
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  class Weaviate < Base
5
5
  # Initialize the Weaviate adapter
6
6
  # @param url [String] The URL of the Weaviate instance
data/lib/langchain.rb CHANGED
@@ -5,6 +5,7 @@ require "pathname"
5
5
 
6
6
  require_relative "./version"
7
7
  require_relative "./dependency_helper"
8
+
8
9
  module Langchain
9
10
  class << self
10
11
  attr_accessor :logger
@@ -19,6 +20,18 @@ module Langchain
19
20
  autoload :Loader, "langchain/loader"
20
21
  autoload :Data, "langchain/data"
21
22
 
23
+ module Agent
24
+ autoload :Base, "langchain/agent/base"
25
+ autoload :ChainOfThoughtAgent, "langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb"
26
+ end
27
+
28
+ module Tool
29
+ autoload :Base, "langchain/tool/base"
30
+ autoload :Calculator, "langchain/tool/calculator"
31
+ autoload :SerpApi, "langchain/tool/serp_api"
32
+ autoload :Wikipedia, "langchain/tool/wikipedia"
33
+ end
34
+
22
35
  module Processors
23
36
  autoload :Base, "langchain/processors/base"
24
37
  autoload :CSV, "langchain/processors/csv"
@@ -29,43 +42,35 @@ module Langchain
29
42
  autoload :PDF, "langchain/processors/pdf"
30
43
  autoload :Text, "langchain/processors/text"
31
44
  end
32
- end
33
-
34
- module Agent
35
- autoload :Base, "agent/base"
36
- autoload :ChainOfThoughtAgent, "agent/chain_of_thought_agent/chain_of_thought_agent.rb"
37
- end
38
45
 
39
- module Vectorsearch
40
- autoload :Base, "vectorsearch/base"
41
- autoload :Chroma, "vectorsearch/chroma"
42
- autoload :Milvus, "vectorsearch/milvus"
43
- autoload :Pinecone, "vectorsearch/pinecone"
44
- autoload :Pgvector, "vectorsearch/pgvector"
45
- autoload :Qdrant, "vectorsearch/qdrant"
46
- autoload :Weaviate, "vectorsearch/weaviate"
47
- end
46
+ module Utils
47
+ autoload :TokenLengthValidator, "langchain/utils/token_length_validator"
48
+ end
48
49
 
49
- module LLM
50
- autoload :Base, "llm/base"
51
- autoload :Cohere, "llm/cohere"
52
- autoload :GooglePalm, "llm/google_palm"
53
- autoload :HuggingFace, "llm/hugging_face"
54
- autoload :OpenAI, "llm/openai"
55
- autoload :Replicate, "llm/replicate"
56
- end
50
+ module Vectorsearch
51
+ autoload :Base, "langchain/vectorsearch/base"
52
+ autoload :Chroma, "langchain/vectorsearch/chroma"
53
+ autoload :Milvus, "langchain/vectorsearch/milvus"
54
+ autoload :Pinecone, "langchain/vectorsearch/pinecone"
55
+ autoload :Pgvector, "langchain/vectorsearch/pgvector"
56
+ autoload :Qdrant, "langchain/vectorsearch/qdrant"
57
+ autoload :Weaviate, "langchain/vectorsearch/weaviate"
58
+ end
57
59
 
58
- module Prompt
59
- require_relative "prompt/loading"
60
+ module LLM
61
+ autoload :Base, "langchain/llm/base"
62
+ autoload :Cohere, "langchain/llm/cohere"
63
+ autoload :GooglePalm, "langchain/llm/google_palm"
64
+ autoload :HuggingFace, "langchain/llm/hugging_face"
65
+ autoload :OpenAI, "langchain/llm/openai"
66
+ autoload :Replicate, "langchain/llm/replicate"
67
+ end
60
68
 
61
- autoload :Base, "prompt/base"
62
- autoload :PromptTemplate, "prompt/prompt_template"
63
- autoload :FewShotPromptTemplate, "prompt/few_shot_prompt_template"
64
- end
69
+ module Prompt
70
+ require_relative "langchain/prompt/loading"
65
71
 
66
- module Tool
67
- autoload :Base, "tool/base"
68
- autoload :Calculator, "tool/calculator"
69
- autoload :SerpApi, "tool/serp_api"
70
- autoload :Wikipedia, "tool/wikipedia"
72
+ autoload :Base, "langchain/prompt/base"
73
+ autoload :PromptTemplate, "langchain/prompt/prompt_template"
74
+ autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
75
+ end
71
76
  end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.15"
4
+ VERSION = "0.4.0"
5
5
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.15
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-30 00:00:00.000000000 Z
11
+ date: 2023-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: tiktoken_ruby
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.0.5
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.5
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: dotenv-rails
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -311,12 +325,19 @@ files:
311
325
  - examples/store_and_query_with_pinecone.rb
312
326
  - examples/store_and_query_with_qdrant.rb
313
327
  - examples/store_and_query_with_weaviate.rb
314
- - lib/agent/base.rb
315
- - lib/agent/chain_of_thought_agent/chain_of_thought_agent.rb
316
- - lib/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
317
328
  - lib/dependency_helper.rb
318
329
  - lib/langchain.rb
330
+ - lib/langchain/agent/base.rb
331
+ - lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb
332
+ - lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
319
333
  - lib/langchain/data.rb
334
+ - lib/langchain/llm/base.rb
335
+ - lib/langchain/llm/cohere.rb
336
+ - lib/langchain/llm/google_palm.rb
337
+ - lib/langchain/llm/hugging_face.rb
338
+ - lib/langchain/llm/openai.rb
339
+ - lib/langchain/llm/prompts/summarize_template.json
340
+ - lib/langchain/llm/replicate.rb
320
341
  - lib/langchain/loader.rb
321
342
  - lib/langchain/processors/base.rb
322
343
  - lib/langchain/processors/csv.rb
@@ -326,29 +347,23 @@ files:
326
347
  - lib/langchain/processors/jsonl.rb
327
348
  - lib/langchain/processors/pdf.rb
328
349
  - lib/langchain/processors/text.rb
350
+ - lib/langchain/prompt/base.rb
351
+ - lib/langchain/prompt/few_shot_prompt_template.rb
352
+ - lib/langchain/prompt/loading.rb
353
+ - lib/langchain/prompt/prompt_template.rb
354
+ - lib/langchain/tool/base.rb
355
+ - lib/langchain/tool/calculator.rb
356
+ - lib/langchain/tool/serp_api.rb
357
+ - lib/langchain/tool/wikipedia.rb
358
+ - lib/langchain/utils/token_length_validator.rb
359
+ - lib/langchain/vectorsearch/base.rb
360
+ - lib/langchain/vectorsearch/chroma.rb
361
+ - lib/langchain/vectorsearch/milvus.rb
362
+ - lib/langchain/vectorsearch/pgvector.rb
363
+ - lib/langchain/vectorsearch/pinecone.rb
364
+ - lib/langchain/vectorsearch/qdrant.rb
365
+ - lib/langchain/vectorsearch/weaviate.rb
329
366
  - lib/langchainrb.rb
330
- - lib/llm/base.rb
331
- - lib/llm/cohere.rb
332
- - lib/llm/google_palm.rb
333
- - lib/llm/hugging_face.rb
334
- - lib/llm/openai.rb
335
- - lib/llm/prompts/summarize_template.json
336
- - lib/llm/replicate.rb
337
- - lib/prompt/base.rb
338
- - lib/prompt/few_shot_prompt_template.rb
339
- - lib/prompt/loading.rb
340
- - lib/prompt/prompt_template.rb
341
- - lib/tool/base.rb
342
- - lib/tool/calculator.rb
343
- - lib/tool/serp_api.rb
344
- - lib/tool/wikipedia.rb
345
- - lib/vectorsearch/base.rb
346
- - lib/vectorsearch/chroma.rb
347
- - lib/vectorsearch/milvus.rb
348
- - lib/vectorsearch/pgvector.rb
349
- - lib/vectorsearch/pinecone.rb
350
- - lib/vectorsearch/qdrant.rb
351
- - lib/vectorsearch/weaviate.rb
352
367
  - lib/version.rb
353
368
  - sig/langchain.rbs
354
369
  homepage: https://rubygems.org/gems/langchainrb
@@ -373,7 +388,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
373
388
  - !ruby/object:Gem::Version
374
389
  version: '0'
375
390
  requirements: []
376
- rubygems_version: 3.3.7
391
+ rubygems_version: 3.2.3
377
392
  signing_key:
378
393
  specification_version: 4
379
394
  summary: Build LLM-backed Ruby applications with Ruby's LangChain