langchainrb 0.3.15 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/Gemfile.lock +8 -1
  4. data/README.md +20 -20
  5. data/lib/{agent → langchain/agent}/base.rb +1 -1
  6. data/lib/{agent → langchain/agent}/chain_of_thought_agent/chain_of_thought_agent.rb +10 -10
  7. data/lib/{llm → langchain/llm}/base.rb +3 -3
  8. data/lib/{llm → langchain/llm}/cohere.rb +1 -1
  9. data/lib/{llm → langchain/llm}/google_palm.rb +3 -3
  10. data/lib/{llm → langchain/llm}/hugging_face.rb +1 -1
  11. data/lib/{llm → langchain/llm}/openai.rb +18 -6
  12. data/lib/{llm → langchain/llm}/replicate.rb +3 -3
  13. data/lib/{prompt → langchain/prompt}/base.rb +2 -2
  14. data/lib/{prompt → langchain/prompt}/few_shot_prompt_template.rb +1 -1
  15. data/lib/{prompt → langchain/prompt}/loading.rb +3 -3
  16. data/lib/{prompt → langchain/prompt}/prompt_template.rb +1 -1
  17. data/lib/{tool → langchain/tool}/base.rb +5 -5
  18. data/lib/{tool → langchain/tool}/calculator.rb +2 -2
  19. data/lib/{tool → langchain/tool}/serp_api.rb +1 -1
  20. data/lib/{tool → langchain/tool}/wikipedia.rb +1 -1
  21. data/lib/langchain/utils/token_length_validator.rb +57 -0
  22. data/lib/{vectorsearch → langchain/vectorsearch}/base.rb +5 -5
  23. data/lib/{vectorsearch → langchain/vectorsearch}/chroma.rb +1 -1
  24. data/lib/{vectorsearch → langchain/vectorsearch}/milvus.rb +1 -1
  25. data/lib/{vectorsearch → langchain/vectorsearch}/pgvector.rb +15 -4
  26. data/lib/{vectorsearch → langchain/vectorsearch}/pinecone.rb +1 -1
  27. data/lib/{vectorsearch → langchain/vectorsearch}/qdrant.rb +1 -1
  28. data/lib/{vectorsearch → langchain/vectorsearch}/weaviate.rb +1 -1
  29. data/lib/langchain.rb +39 -34
  30. data/lib/version.rb +1 -1
  31. metadata +43 -28
  32. /data/lib/{agent → langchain/agent}/chain_of_thought_agent/chain_of_thought_agent_prompt.json +0 -0
  33. /data/lib/{llm → langchain/llm}/prompts/summarize_template.json +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f855e3c0e1f0d7b59e0255004a1a806c7048da6d3fe0a8ddf10be68e36ed9ba
4
- data.tar.gz: 5758c90205c3e2bea420cf7fa0dec07638917beced60bd0482e1d803ced96c07
3
+ metadata.gz: ce8728ec2208577809174e154642db161121cb9dd49e0ec5d190d080e68b1d78
4
+ data.tar.gz: bb0e0ccc4558ca849549f495a4adfacc5f7851c786869974afdaef29f0cde3ca
5
5
  SHA512:
6
- metadata.gz: ec26f8c4257a6949d829d7f68d2175943b80c2837739bdbead8e6b61891a6738cf84ec1934caff777b63dee39f3d0111e8f29a81c37c125be1cd3ae8137b6968
7
- data.tar.gz: 31bb1aa0296dbbc8e1a1c6c2bc7236c92bf3e627935c752bd527c7f7d769da69f11c6bb63de6e0b4e0d1d8d0c739744045669220c35e253ae83eaa0e23482e6a
6
+ metadata.gz: b2b4c27e31d730563aeca70a0aa3c4cf129e69773e34f397ba057faa8298a4368c1b9f66f925188f867f1feb47b4e07f77df702fa7c6cb76ad1e1a8464b895f6
7
+ data.tar.gz: 55dd3fbc21e2cdf9bd84afcd6bb4de0f72c960dec0c6b1d2efff1f9492b3d5c7399f2d14c323597045e64eafb6f2f20992348d640317c64721fb0556f8a64126
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.4.0] - 2023-06-01
4
+ - [BREAKING] Everything is namespaced under `Langchain::` now
5
+ - Pgvector similarity search uses the cosine distance by default now
6
+ - OpenAI token length validation using tiktoken_ruby
7
+
3
8
  ## [0.3.15] - 2023-05-30
4
9
  - Drop Ruby 2.7 support. It had reached EOD.
5
10
  - Bump pgvector-ruby to 0.2
data/Gemfile.lock CHANGED
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.15)
4
+ langchainrb (0.4.0)
5
+ tiktoken_ruby (~> 0.0.5)
5
6
 
6
7
  GEM
7
8
  remote: https://rubygems.org/
@@ -205,6 +206,7 @@ GEM
205
206
  zeitwerk (~> 2.5)
206
207
  rainbow (3.1.1)
207
208
  rake (13.0.6)
209
+ rb_sys (0.9.78)
208
210
  regexp_parser (2.8.0)
209
211
  replicate-ruby (0.2.2)
210
212
  addressable
@@ -262,6 +264,11 @@ GEM
262
264
  standardrb (1.0.1)
263
265
  standard
264
266
  thor (1.2.1)
267
+ tiktoken_ruby (0.0.5)
268
+ rb_sys (~> 0.9.68)
269
+ tiktoken_ruby (0.0.5-arm64-darwin)
270
+ tiktoken_ruby (0.0.5-x86_64-darwin)
271
+ tiktoken_ruby (0.0.5-x86_64-linux)
265
272
  treetop (1.6.12)
266
273
  polyglot (~> 0.3)
267
274
  ttfunk (1.7.0)
data/README.md CHANGED
@@ -44,7 +44,7 @@ Add `gem "weaviate-ruby", "~> 0.8.0"` to your Gemfile.
44
44
 
45
45
  Pick the vector search database you'll be using and instantiate the client:
46
46
  ```ruby
47
- client = Vectorsearch::Weaviate.new(
47
+ client = Langchain::Vectorsearch::Weaviate.new(
48
48
  url: ENV["WEAVIATE_URL"],
49
49
  api_key: ENV["WEAVIATE_API_KEY"],
50
50
  llm: :openai, # or :cohere
@@ -52,10 +52,10 @@ client = Vectorsearch::Weaviate.new(
52
52
  )
53
53
 
54
54
  # You can instantiate any other supported vector search database:
55
- client = Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
56
- client = Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
57
- client = Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
58
- client = Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
55
+ client = Langchain::Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
56
+ client = Langchain::Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
57
+ client = Langchain::Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
58
+ client = Langchain::Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
59
59
  ```
60
60
 
61
61
  ```ruby
@@ -107,7 +107,7 @@ Add `gem "ruby-openai", "~> 4.0.0"` to your Gemfile.
107
107
 
108
108
  #### OpenAI
109
109
  ```ruby
110
- openai = LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
110
+ openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
111
111
  ```
112
112
  ```ruby
113
113
  openai.embed(text: "foo bar")
@@ -120,7 +120,7 @@ openai.complete(prompt: "What is the meaning of life?")
120
120
  Add `gem "cohere-ruby", "~> 0.9.3"` to your Gemfile.
121
121
 
122
122
  ```ruby
123
- cohere = LLM::Cohere.new(api_key: ENV["COHERE_API_KEY"])
123
+ cohere = Langchain::LLM::Cohere.new(api_key: ENV["COHERE_API_KEY"])
124
124
  ```
125
125
  ```ruby
126
126
  cohere.embed(text: "foo bar")
@@ -132,19 +132,19 @@ cohere.complete(prompt: "What is the meaning of life?")
132
132
  #### HuggingFace
133
133
  Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
134
134
  ```ruby
135
- cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
135
+ cohere = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
136
136
  ```
137
137
 
138
138
  #### Replicate
139
139
  Add `gem "replicate-ruby", "~> 0.2.2"` to your Gemfile.
140
140
  ```ruby
141
- cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
141
+ cohere = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
142
142
  ```
143
143
 
144
144
  #### Google PaLM (Pathways Language Model)
145
145
  Add `"google_palm_api", "~> 0.1.0"` to your Gemfile.
146
146
  ```ruby
147
- google_palm = LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
147
+ google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
148
148
  ```
149
149
 
150
150
  ### Using Prompts 📋
@@ -154,21 +154,21 @@ google_palm = LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
154
154
  Create a prompt with one input variable:
155
155
 
156
156
  ```ruby
157
- prompt = Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
157
+ prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
158
158
  prompt.format(adjective: "funny") # "Tell me a funny joke."
159
159
  ```
160
160
 
161
161
  Create a prompt with multiple input variables:
162
162
 
163
163
  ```ruby
164
- prompt = Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
164
+ prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
165
165
  prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
166
166
  ```
167
167
 
168
168
  Creating a PromptTemplate using just a prompt and no input_variables:
169
169
 
170
170
  ```ruby
171
- prompt = Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
171
+ prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
172
172
  prompt.input_variables # ["adjective", "content"]
173
173
  prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
174
174
  ```
@@ -182,7 +182,7 @@ prompt.save(file_path: "spec/fixtures/prompt/prompt_template.json")
182
182
  Loading a new prompt template using a JSON file:
183
183
 
184
184
  ```ruby
185
- prompt = Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
185
+ prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
186
186
  prompt.input_variables # ["adjective", "content"]
187
187
  ```
188
188
 
@@ -191,10 +191,10 @@ prompt.input_variables # ["adjective", "content"]
191
191
  Create a prompt with a few shot examples:
192
192
 
193
193
  ```ruby
194
- prompt = Prompt::FewShotPromptTemplate.new(
194
+ prompt = Langchain::Prompt::FewShotPromptTemplate.new(
195
195
  prefix: "Write antonyms for the following words.",
196
196
  suffix: "Input: {adjective}\nOutput:",
197
- example_prompt: Prompt::PromptTemplate.new(
197
+ example_prompt: Langchain::Prompt::PromptTemplate.new(
198
198
  input_variables: ["input", "output"],
199
199
  template: "Input: {input}\nOutput: {output}"
200
200
  ),
@@ -228,7 +228,7 @@ prompt.save(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
228
228
  Loading a new prompt template using a JSON file:
229
229
 
230
230
  ```ruby
231
- prompt = Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
231
+ prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
232
232
  prompt.prefix # "Write antonyms for the following words."
233
233
  ```
234
234
 
@@ -237,10 +237,10 @@ Agents are semi-autonomous bots that can respond to user questions and use avail
237
237
 
238
238
  #### Chain-of-Thought Agent
239
239
 
240
- Add `gem "openai-ruby"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
240
+ Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
241
241
 
242
242
  ```ruby
243
- agent = Agent::ChainOfThoughtAgent.new(llm: :openai, llm_api_key: ENV["OPENAI_API_KEY"], tools: ['search', 'calculator'])
243
+ agent = Langchain::Agent::ChainOfThoughtAgent.new(llm: :openai, llm_api_key: ENV["OPENAI_API_KEY"], tools: ['search', 'calculator'])
244
244
 
245
245
  agent.tools
246
246
  # => ["search", "calculator"]
@@ -273,7 +273,7 @@ Need to read data from various sources? Load it up.
273
273
  Just call `Langchan::Loader.load` with the path to the file or a URL you want to load.
274
274
 
275
275
  ```ruby
276
- Langchaing::Loader.load('/path/to/file.pdf')
276
+ Langchain::Loader.load('/path/to/file.pdf')
277
277
  ```
278
278
 
279
279
  or
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Agent
3
+ module Langchain::Agent
4
4
  class Base
5
5
  end
6
6
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Agent
3
+ module Langchain::Agent
4
4
  class ChainOfThoughtAgent < Base
5
5
  attr_reader :llm, :llm_api_key, :llm_client, :tools
6
6
 
@@ -11,14 +11,14 @@ module Agent
11
11
  # @param tools [Array] The tools to use
12
12
  # @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
13
13
  def initialize(llm:, llm_api_key:, tools: [])
14
- LLM::Base.validate_llm!(llm: llm)
15
- Tool::Base.validate_tools!(tools: tools)
14
+ Langchain::LLM::Base.validate_llm!(llm: llm)
15
+ Langchain::Tool::Base.validate_tools!(tools: tools)
16
16
 
17
17
  @llm = llm
18
18
  @llm_api_key = llm_api_key
19
19
  @tools = tools
20
20
 
21
- @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
21
+ @llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
22
22
  end
23
23
 
24
24
  # Validate tools when they're re-assigned
@@ -26,7 +26,7 @@ module Agent
26
26
  # @param value [Array] The tools to use
27
27
  # @return [Array] The tools that will be used
28
28
  def tools=(value)
29
- Tool::Base.validate_tools!(tools: value)
29
+ Langchain::Tool::Base.validate_tools!(tools: value)
30
30
  @tools = value
31
31
  end
32
32
 
@@ -62,8 +62,8 @@ module Agent
62
62
  Langchain.logger.info("Agent: Using the \"#{action}\" Tool with \"#{action_input}\"")
63
63
 
64
64
  # Retrieve the Tool::[ToolName] class and call `execute`` with action_input as the input
65
- result = Tool
66
- .const_get(Tool::Base::TOOLS[action.strip])
65
+ result = Langchain::Tool
66
+ .const_get(Langchain::Tool::Base::TOOLS[action.strip])
67
67
  .execute(input: action_input)
68
68
 
69
69
  # Append the Observation to the prompt
@@ -91,7 +91,7 @@ module Agent
91
91
  question: question,
92
92
  tool_names: "[#{tools.join(", ")}]",
93
93
  tools: tools.map do |tool|
94
- "#{tool}: #{Tool.const_get(Tool::Base::TOOLS[tool]).const_get(:DESCRIPTION)}"
94
+ "#{tool}: #{Langchain::Tool.const_get(Langchain::Tool::Base::TOOLS[tool]).const_get(:DESCRIPTION)}"
95
95
  end.join("\n")
96
96
  )
97
97
  end
@@ -99,8 +99,8 @@ module Agent
99
99
  # Load the PromptTemplate from the JSON file
100
100
  # @return [PromptTemplate] PromptTemplate instance
101
101
  def prompt_template
102
- @template ||= Prompt.load_from_path(
103
- file_path: Langchain.root.join("agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json")
102
+ @template ||= Langchain::Prompt.load_from_path(
103
+ file_path: Langchain.root.join("langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json")
104
104
  )
105
105
  end
106
106
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class Base
5
5
  attr_reader :client
6
6
 
@@ -42,8 +42,8 @@ module LLM
42
42
  # @param llm [Symbol] The LLM to use
43
43
  def self.validate_llm!(llm:)
44
44
  # TODO: Fix so this works when `llm` value is a string instead of a symbol
45
- unless LLM::Base::LLMS.key?(llm)
46
- raise ArgumentError, "LLM must be one of #{LLM::Base::LLMS.keys}"
45
+ unless Langchain::LLM::Base::LLMS.key?(llm)
46
+ raise ArgumentError, "LLM must be one of #{Langchain::LLM::Base::LLMS.keys}"
47
47
  end
48
48
  end
49
49
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class Cohere < Base
5
5
  DEFAULTS = {
6
6
  temperature: 0.0,
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class GooglePalm < Base
5
5
  # Wrapper around the Google PaLM (Pathways Language Model) APIs.
6
6
 
@@ -89,8 +89,8 @@ module LLM
89
89
  # @return [String] The summarization
90
90
  #
91
91
  def summarize(text:)
92
- prompt_template = Prompt.load_from_path(
93
- file_path: Langchain.root.join("llm/prompts/summarize_template.json")
92
+ prompt_template = Langchain::Prompt.load_from_path(
93
+ file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.json")
94
94
  )
95
95
  prompt = prompt_template.format(text: text)
96
96
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class HuggingFace < Base
5
5
  # The gem does not currently accept other models:
6
6
  # https://github.com/alchaplinsky/hugging-face/blob/main/lib/hugging_face/inference_api.rb#L32-L34
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class OpenAI < Base
5
5
  DEFAULTS = {
6
6
  temperature: 0.0,
@@ -25,9 +25,13 @@ module LLM
25
25
  # @return [Array] The embedding
26
26
  #
27
27
  def embed(text:)
28
+ model = DEFAULTS[:embeddings_model_name]
29
+
30
+ Langchain::Utils::TokenLengthValidator.validate!(text, model)
31
+
28
32
  response = client.embeddings(
29
33
  parameters: {
30
- model: DEFAULTS[:embeddings_model_name],
34
+ model: model,
31
35
  input: text
32
36
  }
33
37
  )
@@ -41,8 +45,12 @@ module LLM
41
45
  # @return [String] The completion
42
46
  #
43
47
  def complete(prompt:, **params)
48
+ model = DEFAULTS[:completion_model_name]
49
+
50
+ Langchain::Utils::TokenLengthValidator.validate!(prompt, model)
51
+
44
52
  default_params = {
45
- model: DEFAULTS[:completion_model_name],
53
+ model: model,
46
54
  temperature: DEFAULTS[:temperature],
47
55
  prompt: prompt
48
56
  }
@@ -64,8 +72,12 @@ module LLM
64
72
  # @return [String] The chat completion
65
73
  #
66
74
  def chat(prompt:, **params)
75
+ model = DEFAULTS[:chat_completion_model_name]
76
+
77
+ Langchain::Utils::TokenLengthValidator.validate!(prompt, model)
78
+
67
79
  default_params = {
68
- model: DEFAULTS[:chat_completion_model_name],
80
+ model: model,
69
81
  temperature: DEFAULTS[:temperature],
70
82
  # TODO: Figure out how to introduce persisted conversations
71
83
  messages: [{role: "user", content: prompt}]
@@ -88,8 +100,8 @@ module LLM
88
100
  # @return [String] The summary
89
101
  #
90
102
  def summarize(text:)
91
- prompt_template = Prompt.load_from_path(
92
- file_path: Langchain.root.join("llm/prompts/summarize_template.json")
103
+ prompt_template = Langchain::Prompt.load_from_path(
104
+ file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.json")
93
105
  )
94
106
  prompt = prompt_template.format(text: text)
95
107
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module LLM
3
+ module Langchain::LLM
4
4
  class Replicate < Base
5
5
  # Wrapper around Replicate.com LLM provider
6
6
  # Use it directly:
@@ -89,8 +89,8 @@ module LLM
89
89
  # @return [String] The summary
90
90
  #
91
91
  def summarize(text:)
92
- prompt_template = Prompt.load_from_path(
93
- file_path: Langchain.root.join("llm/prompts/summarize_template.json")
92
+ prompt_template = Langchain::Prompt.load_from_path(
93
+ file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.json")
94
94
  )
95
95
  prompt = prompt_template.format(text: text)
96
96
 
@@ -3,7 +3,7 @@
3
3
  require "strscan"
4
4
  require "json"
5
5
 
6
- module Prompt
6
+ module Langchain::Prompt
7
7
  class Base
8
8
  def format(**kwargs)
9
9
  raise NotImplementedError
@@ -29,7 +29,7 @@ module Prompt
29
29
  #
30
30
  def validate(template:, input_variables:)
31
31
  input_variables_set = @input_variables.uniq
32
- variables_from_template = Prompt::Base.extract_variables_from_template(template)
32
+ variables_from_template = Langchain::Prompt::Base.extract_variables_from_template(template)
33
33
 
34
34
  missing_variables = variables_from_template - input_variables_set
35
35
  extra_variables = input_variables_set - variables_from_template
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Prompt
3
+ module Langchain::Prompt
4
4
  class FewShotPromptTemplate < Base
5
5
  attr_reader :examples, :example_prompt, :input_variables, :prefix, :suffix, :example_separator
6
6
 
@@ -3,10 +3,10 @@
3
3
  require "strscan"
4
4
  require "pathname"
5
5
 
6
- module Prompt
6
+ module Langchain::Prompt
7
7
  TYPE_TO_LOADER = {
8
- "prompt" => ->(config) { Prompt.load_prompt(config) },
9
- "few_shot" => ->(config) { Prompt.load_few_shot_prompt(config) }
8
+ "prompt" => ->(config) { load_prompt(config) },
9
+ "few_shot" => ->(config) { load_few_shot_prompt(config) }
10
10
  }
11
11
 
12
12
  class << self
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Prompt
3
+ module Langchain::Prompt
4
4
  class PromptTemplate < Base
5
5
  attr_reader :template, :input_variables, :validate_template
6
6
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Tool
3
+ module Langchain::Tool
4
4
  class Base
5
5
  # How to add additional Tools?
6
6
  # 1. Create a new file in lib/tool/your_tool_name.rb
@@ -10,9 +10,9 @@ module Tool
10
10
  # 4. Add your tool to the README.md
11
11
 
12
12
  TOOLS = {
13
- "calculator" => "Tool::Calculator",
14
- "search" => "Tool::SerpApi",
15
- "wikipedia" => "Tool::Wikipedia"
13
+ "calculator" => "Langchain::Tool::Calculator",
14
+ "search" => "Langchain::Tool::SerpApi",
15
+ "wikipedia" => "Langchain::Tool::Wikipedia"
16
16
  }
17
17
 
18
18
  def self.description(value)
@@ -40,7 +40,7 @@ module Tool
40
40
  # @raise [ArgumentError] If any of the tools are not supported
41
41
  #
42
42
  def self.validate_tools!(tools:)
43
- unrecognized_tools = tools - Tool::Base::TOOLS.keys
43
+ unrecognized_tools = tools - Langchain::Tool::Base::TOOLS.keys
44
44
 
45
45
  if unrecognized_tools.any?
46
46
  raise ArgumentError, "Unrecognized Tools: #{unrecognized_tools}"
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Tool
3
+ module Langchain::Tool
4
4
  class Calculator < Base
5
5
  description <<~DESC
6
6
  Useful for getting the result of a math expression.
@@ -22,7 +22,7 @@ module Tool
22
22
  rescue Eqn::ParseError, Eqn::NoVariableValueError
23
23
  # Sometimes the input is not a pure math expression, e.g: "12F in Celsius"
24
24
  # We can use the google answer box to evaluate this expression
25
- hash_results = Tool::SerpApi.execute_search(input: input)
25
+ hash_results = Langchain::Tool::SerpApi.execute_search(input: input)
26
26
  hash_results.dig(:answer_box, :to)
27
27
  end
28
28
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Tool
3
+ module Langchain::Tool
4
4
  class SerpApi < Base
5
5
  # Wrapper around SerpAPI
6
6
  # Set ENV["SERPAPI_API_KEY"] to use it
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Tool
3
+ module Langchain::Tool
4
4
  class Wikipedia < Base
5
5
  # Tool that adds the capability to search using the Wikipedia API
6
6
 
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tiktoken_ruby"
4
+
5
+ module Langchain
6
+ module Utils
7
+ class TokenLimitExceeded < StandardError; end
8
+
9
+ class TokenLengthValidator
10
+ #
11
+ # This class is meant to validate the length of the text passed in to OpenAI's API.
12
+ # It is used to validate the token length before the API call is made
13
+ #
14
+ TOKEN_LIMITS = {
15
+ # Source:
16
+ # https://platform.openai.com/docs/api-reference/embeddings
17
+ # https://platform.openai.com/docs/models/gpt-4
18
+ "text-embedding-ada-002" => 8191,
19
+ "gpt-3.5-turbo" => 4096,
20
+ "gpt-3.5-turbo-0301" => 4096,
21
+ "text-davinci-003" => 4097,
22
+ "text-davinci-002" => 4097,
23
+ "code-davinci-002" => 8001,
24
+ "gpt-4" => 8192,
25
+ "gpt-4-0314" => 8192,
26
+ "gpt-4-32k" => 32768,
27
+ "gpt-4-32k-0314" => 32768,
28
+ "text-curie-001" => 2049,
29
+ "text-babbage-001" => 2049,
30
+ "text-ada-001" => 2049,
31
+ "davinci" => 2049,
32
+ "curie" => 2049,
33
+ "babbage" => 2049,
34
+ "ada" => 2049
35
+ }.freeze
36
+
37
+ #
38
+ # Validate the length of the text passed in to OpenAI's API
39
+ #
40
+ # @param text [String] The text to validate
41
+ # @param model_name [String] The model name to validate against
42
+ # @return [Boolean] Whether the text is valid or not
43
+ # @raise [TokenLimitExceeded] If the text is too long
44
+ #
45
+ def self.validate!(text, model_name)
46
+ encoder = Tiktoken.encoding_for_model(model_name)
47
+ token_length = encoder.encode(text).length
48
+
49
+ if token_length > TOKEN_LIMITS[model_name]
50
+ raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS[model_name]} tokens, but the given text is #{token_length} tokens long."
51
+ end
52
+
53
+ true
54
+ end
55
+ end
56
+ end
57
+ end
@@ -2,7 +2,7 @@
2
2
 
3
3
  require "forwardable"
4
4
 
5
- module Vectorsearch
5
+ module Langchain::Vectorsearch
6
6
  class Base
7
7
  extend Forwardable
8
8
 
@@ -13,12 +13,12 @@ module Vectorsearch
13
13
  # @param llm [Symbol] The LLM to use
14
14
  # @param llm_api_key [String] The API key for the LLM
15
15
  def initialize(llm:, llm_api_key:)
16
- LLM::Base.validate_llm!(llm: llm)
16
+ Langchain::LLM::Base.validate_llm!(llm: llm)
17
17
 
18
18
  @llm = llm
19
19
  @llm_api_key = llm_api_key
20
20
 
21
- @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
21
+ @llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
22
22
  end
23
23
 
24
24
  # Method supported by Vectorsearch DB to create a default schema
@@ -51,10 +51,10 @@ module Vectorsearch
51
51
  :default_dimension
52
52
 
53
53
  def generate_prompt(question:, context:)
54
- prompt_template = Prompt::FewShotPromptTemplate.new(
54
+ prompt_template = Langchain::Prompt::FewShotPromptTemplate.new(
55
55
  prefix: "Context:",
56
56
  suffix: "---\nQuestion: {question}\n---\nAnswer:",
57
- example_prompt: Prompt::PromptTemplate.new(
57
+ example_prompt: Langchain::Prompt::PromptTemplate.new(
58
58
  template: "{context}",
59
59
  input_variables: ["context"]
60
60
  ),
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  class Chroma < Base
5
5
  # Initialize the Chroma client
6
6
  # @param url [String] The URL of the Qdrant server
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  class Milvus < Base
5
5
  def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
6
6
  depends_on "milvus"
@@ -1,8 +1,17 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  # The PostgreSQL vector search adapter
5
5
  class Pgvector < Base
6
+ # The operators supported by the PostgreSQL vector search adapter
7
+ OPERATORS = {
8
+ "cosine_distance" => "<=>",
9
+ "euclidean_distance" => "<->"
10
+ }
11
+ DEFAULT_OPERATOR = "cosine_distance"
12
+
13
+ attr_reader :operator, :quoted_table_name
14
+
6
15
  # @param url [String] The URL of the PostgreSQL database
7
16
  # @param index_name [String] The name of the table to use for the index
8
17
  # @param llm [String] The URL of the Language Layer API
@@ -18,6 +27,8 @@ module Vectorsearch
18
27
  @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
19
28
 
20
29
  @index_name = index_name
30
+ @quoted_table_name = @client.quote_ident(index_name)
31
+ @operator = OPERATORS[DEFAULT_OPERATOR]
21
32
 
22
33
  super(llm: llm, llm_api_key: llm_api_key)
23
34
  end
@@ -31,7 +42,7 @@ module Vectorsearch
31
42
  end
32
43
  values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
33
44
  client.exec_params(
34
- "INSERT INTO #{@index_name} (content, vectors) VALUES #{values};",
45
+ "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values};",
35
46
  data
36
47
  )
37
48
  end
@@ -42,7 +53,7 @@ module Vectorsearch
42
53
  client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
43
54
  client.exec(
44
55
  <<~SQL
45
- CREATE TABLE IF NOT EXISTS #{@index_name} (
56
+ CREATE TABLE IF NOT EXISTS #{quoted_table_name} (
46
57
  id serial PRIMARY KEY,
47
58
  content TEXT,
48
59
  vectors VECTOR(#{default_dimension})
@@ -73,7 +84,7 @@ module Vectorsearch
73
84
  result = client.transaction do |conn|
74
85
  conn.exec("SET LOCAL ivfflat.probes = 10;")
75
86
  query = <<~SQL
76
- SELECT id, content FROM #{@index_name} ORDER BY vectors <-> $1 ASC LIMIT $2;
87
+ SELECT id, content FROM #{quoted_table_name} ORDER BY vectors #{operator} $1 ASC LIMIT $2;
77
88
  SQL
78
89
  conn.exec_params(query, [embedding, k])
79
90
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  class Pinecone < Base
5
5
  # Initialize the Pinecone client
6
6
  # @param environment [String] The environment to use
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  class Qdrant < Base
5
5
  # Initialize the Qdrant client
6
6
  # @param url [String] The URL of the Qdrant server
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Vectorsearch
3
+ module Langchain::Vectorsearch
4
4
  class Weaviate < Base
5
5
  # Initialize the Weaviate adapter
6
6
  # @param url [String] The URL of the Weaviate instance
data/lib/langchain.rb CHANGED
@@ -5,6 +5,7 @@ require "pathname"
5
5
 
6
6
  require_relative "./version"
7
7
  require_relative "./dependency_helper"
8
+
8
9
  module Langchain
9
10
  class << self
10
11
  attr_accessor :logger
@@ -19,6 +20,18 @@ module Langchain
19
20
  autoload :Loader, "langchain/loader"
20
21
  autoload :Data, "langchain/data"
21
22
 
23
+ module Agent
24
+ autoload :Base, "langchain/agent/base"
25
+ autoload :ChainOfThoughtAgent, "langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb"
26
+ end
27
+
28
+ module Tool
29
+ autoload :Base, "langchain/tool/base"
30
+ autoload :Calculator, "langchain/tool/calculator"
31
+ autoload :SerpApi, "langchain/tool/serp_api"
32
+ autoload :Wikipedia, "langchain/tool/wikipedia"
33
+ end
34
+
22
35
  module Processors
23
36
  autoload :Base, "langchain/processors/base"
24
37
  autoload :CSV, "langchain/processors/csv"
@@ -29,43 +42,35 @@ module Langchain
29
42
  autoload :PDF, "langchain/processors/pdf"
30
43
  autoload :Text, "langchain/processors/text"
31
44
  end
32
- end
33
-
34
- module Agent
35
- autoload :Base, "agent/base"
36
- autoload :ChainOfThoughtAgent, "agent/chain_of_thought_agent/chain_of_thought_agent.rb"
37
- end
38
45
 
39
- module Vectorsearch
40
- autoload :Base, "vectorsearch/base"
41
- autoload :Chroma, "vectorsearch/chroma"
42
- autoload :Milvus, "vectorsearch/milvus"
43
- autoload :Pinecone, "vectorsearch/pinecone"
44
- autoload :Pgvector, "vectorsearch/pgvector"
45
- autoload :Qdrant, "vectorsearch/qdrant"
46
- autoload :Weaviate, "vectorsearch/weaviate"
47
- end
46
+ module Utils
47
+ autoload :TokenLengthValidator, "langchain/utils/token_length_validator"
48
+ end
48
49
 
49
- module LLM
50
- autoload :Base, "llm/base"
51
- autoload :Cohere, "llm/cohere"
52
- autoload :GooglePalm, "llm/google_palm"
53
- autoload :HuggingFace, "llm/hugging_face"
54
- autoload :OpenAI, "llm/openai"
55
- autoload :Replicate, "llm/replicate"
56
- end
50
+ module Vectorsearch
51
+ autoload :Base, "langchain/vectorsearch/base"
52
+ autoload :Chroma, "langchain/vectorsearch/chroma"
53
+ autoload :Milvus, "langchain/vectorsearch/milvus"
54
+ autoload :Pinecone, "langchain/vectorsearch/pinecone"
55
+ autoload :Pgvector, "langchain/vectorsearch/pgvector"
56
+ autoload :Qdrant, "langchain/vectorsearch/qdrant"
57
+ autoload :Weaviate, "langchain/vectorsearch/weaviate"
58
+ end
57
59
 
58
- module Prompt
59
- require_relative "prompt/loading"
60
+ module LLM
61
+ autoload :Base, "langchain/llm/base"
62
+ autoload :Cohere, "langchain/llm/cohere"
63
+ autoload :GooglePalm, "langchain/llm/google_palm"
64
+ autoload :HuggingFace, "langchain/llm/hugging_face"
65
+ autoload :OpenAI, "langchain/llm/openai"
66
+ autoload :Replicate, "langchain/llm/replicate"
67
+ end
60
68
 
61
- autoload :Base, "prompt/base"
62
- autoload :PromptTemplate, "prompt/prompt_template"
63
- autoload :FewShotPromptTemplate, "prompt/few_shot_prompt_template"
64
- end
69
+ module Prompt
70
+ require_relative "langchain/prompt/loading"
65
71
 
66
- module Tool
67
- autoload :Base, "tool/base"
68
- autoload :Calculator, "tool/calculator"
69
- autoload :SerpApi, "tool/serp_api"
70
- autoload :Wikipedia, "tool/wikipedia"
72
+ autoload :Base, "langchain/prompt/base"
73
+ autoload :PromptTemplate, "langchain/prompt/prompt_template"
74
+ autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
75
+ end
71
76
  end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.15"
4
+ VERSION = "0.4.0"
5
5
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.15
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-30 00:00:00.000000000 Z
11
+ date: 2023-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: tiktoken_ruby
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.0.5
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.5
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: dotenv-rails
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -311,12 +325,19 @@ files:
311
325
  - examples/store_and_query_with_pinecone.rb
312
326
  - examples/store_and_query_with_qdrant.rb
313
327
  - examples/store_and_query_with_weaviate.rb
314
- - lib/agent/base.rb
315
- - lib/agent/chain_of_thought_agent/chain_of_thought_agent.rb
316
- - lib/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
317
328
  - lib/dependency_helper.rb
318
329
  - lib/langchain.rb
330
+ - lib/langchain/agent/base.rb
331
+ - lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb
332
+ - lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
319
333
  - lib/langchain/data.rb
334
+ - lib/langchain/llm/base.rb
335
+ - lib/langchain/llm/cohere.rb
336
+ - lib/langchain/llm/google_palm.rb
337
+ - lib/langchain/llm/hugging_face.rb
338
+ - lib/langchain/llm/openai.rb
339
+ - lib/langchain/llm/prompts/summarize_template.json
340
+ - lib/langchain/llm/replicate.rb
320
341
  - lib/langchain/loader.rb
321
342
  - lib/langchain/processors/base.rb
322
343
  - lib/langchain/processors/csv.rb
@@ -326,29 +347,23 @@ files:
326
347
  - lib/langchain/processors/jsonl.rb
327
348
  - lib/langchain/processors/pdf.rb
328
349
  - lib/langchain/processors/text.rb
350
+ - lib/langchain/prompt/base.rb
351
+ - lib/langchain/prompt/few_shot_prompt_template.rb
352
+ - lib/langchain/prompt/loading.rb
353
+ - lib/langchain/prompt/prompt_template.rb
354
+ - lib/langchain/tool/base.rb
355
+ - lib/langchain/tool/calculator.rb
356
+ - lib/langchain/tool/serp_api.rb
357
+ - lib/langchain/tool/wikipedia.rb
358
+ - lib/langchain/utils/token_length_validator.rb
359
+ - lib/langchain/vectorsearch/base.rb
360
+ - lib/langchain/vectorsearch/chroma.rb
361
+ - lib/langchain/vectorsearch/milvus.rb
362
+ - lib/langchain/vectorsearch/pgvector.rb
363
+ - lib/langchain/vectorsearch/pinecone.rb
364
+ - lib/langchain/vectorsearch/qdrant.rb
365
+ - lib/langchain/vectorsearch/weaviate.rb
329
366
  - lib/langchainrb.rb
330
- - lib/llm/base.rb
331
- - lib/llm/cohere.rb
332
- - lib/llm/google_palm.rb
333
- - lib/llm/hugging_face.rb
334
- - lib/llm/openai.rb
335
- - lib/llm/prompts/summarize_template.json
336
- - lib/llm/replicate.rb
337
- - lib/prompt/base.rb
338
- - lib/prompt/few_shot_prompt_template.rb
339
- - lib/prompt/loading.rb
340
- - lib/prompt/prompt_template.rb
341
- - lib/tool/base.rb
342
- - lib/tool/calculator.rb
343
- - lib/tool/serp_api.rb
344
- - lib/tool/wikipedia.rb
345
- - lib/vectorsearch/base.rb
346
- - lib/vectorsearch/chroma.rb
347
- - lib/vectorsearch/milvus.rb
348
- - lib/vectorsearch/pgvector.rb
349
- - lib/vectorsearch/pinecone.rb
350
- - lib/vectorsearch/qdrant.rb
351
- - lib/vectorsearch/weaviate.rb
352
367
  - lib/version.rb
353
368
  - sig/langchain.rbs
354
369
  homepage: https://rubygems.org/gems/langchainrb
@@ -373,7 +388,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
373
388
  - !ruby/object:Gem::Version
374
389
  version: '0'
375
390
  requirements: []
376
- rubygems_version: 3.3.7
391
+ rubygems_version: 3.2.3
377
392
  signing_key:
378
393
  specification_version: 4
379
394
  summary: Build LLM-backed Ruby applications with Ruby's LangChain