langchainrb 0.4.2 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6e84f50b6e12bd94f5fa8de956549537f5d34b8a901bc6af3fbc5d392fc2e0a
4
- data.tar.gz: '08c01f481d64b0c35f7e86491d1115d975497c8561f50408516fad388f084c3e'
3
+ metadata.gz: b0a2fe8026e861c9d97465bce7da08a0b077492d6f7cf8fb42c45dbfdfe6749f
4
+ data.tar.gz: c04099c44a847bd9c05e8594859f92ca1f54d338c463ce59a375c2cb9731b1ad
5
5
  SHA512:
6
- metadata.gz: 8a1d29180f3c0cf89307413bc99e22accc6875d458b3ae12ea72d30146cf5ff172fba7047fe00b385d324057638115254c8dcc6f01459a75f96dccb9a99a301b
7
- data.tar.gz: 070a0b6836cdb7dd356c99186964f61926498d5f585910bbb30449d4b2a12d50797797dc34c6fe3eb6a4e64c156230d23606c2ac0cf131eeccc942c6231ab3c9
6
+ metadata.gz: dec375b2b7cae377cf31f3f8ed0a6ac9d79215c945e7c0da78ed1fbad3c502ecfcc5ce5318c55a9a634db88fea1f5fbbeed7a0f7dc6ab8096c909e0a3ff02154
7
+ data.tar.gz: 558a0f6ddf90ad044f9e2cc7c6ca678958472748d2e430a3cbb4308290898b9b22a204a94a5b5943f06137f7da5238d038a65b8c79d96f8a3705499d95cfb597
data/.env.example CHANGED
@@ -1,3 +1,4 @@
1
+ AI21_API_KEY=
1
2
  CHROMA_URL=
2
3
  COHERE_API_KEY=
3
4
  HUGGING_FACE_API_KEY=
@@ -6,10 +7,10 @@ OPENAI_API_KEY=
6
7
  GOOGLE_PALM_API_KEY=
7
8
  PINECONE_API_KEY=
8
9
  PINECONE_ENVIRONMENT=
10
+ POSTGRES_URL=
9
11
  REPLICATE_API_KEY=
10
12
  QDRANT_API_KEY=
11
13
  QDRANT_URL=
12
14
  SERPAPI_API_KEY=
13
15
  WEAVIATE_API_KEY=
14
16
  WEAVIATE_URL=
15
- POSTGRES_URL=
data/.rubocop.yml ADDED
@@ -0,0 +1,11 @@
1
+ AllCops:
2
+ SuggestExtensions: false
3
+
4
+ require:
5
+ - standard
6
+ - rubocop-performance
7
+
8
+ inherit_gem:
9
+ standard: config/base.yml
10
+ standard-performance: config/base.yml
11
+ standard-custom: config/base.yml
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.5.0] - 2023-06-05
4
+ - [BREAKING] LLMs are now passed as objects to Vectorsearch classes instead of `llm: :name, llm_api_key:` previously
5
+ - 📋 Prompts
6
+ - YAML prompt templates are now supported
7
+ - 🚚 Loaders
8
+ - Introduce `Langchain::Processors::Xlsx` to parse .xlsx files
9
+
3
10
  ## [0.4.2] - 2023-06-03
4
11
  - 🗣️ LLMs
5
12
  - Introducing support for AI21
data/Gemfile CHANGED
@@ -10,3 +10,5 @@ gem "rake", "~> 13.0"
10
10
  gem "rspec", "~> 3.0"
11
11
 
12
12
  gem "standardrb"
13
+ # Lets add rubocop explicitly here, we are using only standardrb rules in .rubocop.yml
14
+ gem "rubocop"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.4.2)
4
+ langchainrb (0.5.0)
5
5
  colorize (~> 0.8.1)
6
6
  tiktoken_ruby (~> 0.0.5)
7
7
 
@@ -219,6 +219,9 @@ GEM
219
219
  faraday-multipart
220
220
  faraday-retry
221
221
  rexml (3.2.5)
222
+ roo (2.10.0)
223
+ nokogiri (~> 1)
224
+ rubyzip (>= 1.3.0, < 3.0.0)
222
225
  rspec (3.12.0)
223
226
  rspec-core (~> 3.12.0)
224
227
  rspec-expectations (~> 3.12.0)
@@ -257,6 +260,7 @@ GEM
257
260
  rubyzip (2.3.2)
258
261
  safe_ruby (1.0.4)
259
262
  childprocess (>= 0.3.9)
263
+ sequel (5.68.0)
260
264
  standard (1.28.2)
261
265
  language_server-protocol (~> 3.17.0.2)
262
266
  lint_roller (~> 1.0)
@@ -321,9 +325,12 @@ DEPENDENCIES
321
325
  rake (~> 13.0)
322
326
  rdiscount
323
327
  replicate-ruby (~> 0.2.2)
328
+ roo (~> 2.10.0)
324
329
  rspec (~> 3.0)
330
+ rubocop
325
331
  ruby-openai (~> 4.0.0)
326
332
  safe_ruby (~> 1.0.4)
333
+ sequel (~> 5.68.0)
327
334
  standardrb
328
335
  weaviate-ruby (~> 0.8.0)
329
336
  wikipedia-client (~> 1.17.0)
data/README.md CHANGED
@@ -47,8 +47,7 @@ Pick the vector search database you'll be using and instantiate the client:
47
47
  client = Langchain::Vectorsearch::Weaviate.new(
48
48
  url: ENV["WEAVIATE_URL"],
49
49
  api_key: ENV["WEAVIATE_API_KEY"],
50
- llm: :openai, # or :cohere
51
- llm_api_key: ENV["OPENAI_API_KEY"]
50
+ llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
52
51
  )
53
52
 
54
53
  # You can instantiate any other supported vector search database:
@@ -151,6 +150,12 @@ Add `"google_palm_api", "~> 0.1.0"` to your Gemfile.
151
150
  google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
152
151
  ```
153
152
 
153
+ #### AI21
154
+ Add `gem "ai21", "~> 0.2.0"` to your Gemfile.
155
+ ```ruby
156
+ ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
157
+ ```
158
+
154
159
  ### Using Prompts 📋
155
160
 
156
161
  #### Prompt Templates
@@ -172,9 +177,9 @@ prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke a
172
177
  Creating a PromptTemplate using just a prompt and no input_variables:
173
178
 
174
179
  ```ruby
175
- prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
176
- prompt.input_variables # ["adjective", "content"]
177
- prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
180
+ prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a funny joke about chickens.")
181
+ prompt.input_variables # []
182
+ prompt.format # "Tell me a funny joke about chickens."
178
183
  ```
179
184
 
180
185
  Save prompt template to JSON file:
@@ -236,6 +241,13 @@ prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_s
236
241
  prompt.prefix # "Write antonyms for the following words."
237
242
  ```
238
243
 
244
+ Loading a new prompt template using a YAML file:
245
+
246
+ ```ruby
247
+ prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.yaml")
248
+ prompt.input_variables #=> ["adjective", "content"]
249
+ ```
250
+
239
251
  ### Using Agents 🤖
240
252
  Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
241
253
 
@@ -244,7 +256,7 @@ Agents are semi-autonomous bots that can respond to user questions and use avail
244
256
  Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
245
257
 
246
258
  ```ruby
247
- agent = Langchain::Agent::ChainOfThoughtAgent.new(llm: :openai, llm_api_key: ENV["OPENAI_API_KEY"], tools: ['search', 'calculator'])
259
+ agent = Langchain::Agent::ChainOfThoughtAgent.new(llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]), tools: ['search', 'calculator'])
248
260
 
249
261
  agent.tools
250
262
  # => ["search", "calculator"]
@@ -254,6 +266,19 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
254
266
  #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
255
267
  ```
256
268
 
269
+ #### SQL-Query Agent
270
+
271
+ Add `gem "sequel"` to your Gemfile
272
+
273
+ ```ruby
274
+ agent = Langchain::Agent::SQLQueryAgent.new(llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]), db_connection_string: "postgres://user:password@localhost:5432/db_name")
275
+
276
+ ```
277
+ ```ruby
278
+ agent.ask(question: "How many users have a name with length greater than 5 in the users table?")
279
+ #=> "14 users have a name with length greater than 5 in the users table."
280
+ ```
281
+
257
282
  #### Demo
258
283
  ![May-12-2023 13-09-13](https://github.com/andreibondarev/langchainrb/assets/541665/6bad4cd9-976c-420f-9cf9-b85bf84f7eaf)
259
284
 
@@ -264,6 +289,7 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
264
289
  | Name | Description | ENV Requirements | Gem Requirements |
265
290
  | ------------ | :------------------------------------------------: | :-----------------------------------------------------------: | :---------------------------------------: |
266
291
  | "calculator" | Useful for getting the result of a math expression | | `gem "eqn", "~> 1.6.5"` |
292
+ | "database" | Useful for querying a SQL database | | `gem "sequel", "~> 5.68.0"` |
267
293
  | "ruby_code_interpreter" | Interprets Ruby expressions | | `gem "safe_ruby", "~> 1.0.4"` |
268
294
  | "search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
269
295
  | "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
@@ -298,6 +324,7 @@ Langchain::Loader.load('https://www.example.com/file.pdf')
298
324
  | JSON | Langchain::Processors::JSON | |
299
325
  | JSONL | Langchain::Processors::JSONL | |
300
326
  | csv | Langchain::Processors::CSV | |
327
+ | xlsx | Langchain::Processors::Xlsx | `gem "roo", "~> 2.10.0"` |
301
328
 
302
329
  ## Examples
303
330
  Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
@@ -317,6 +344,7 @@ Langchain.logger.level = :info
317
344
  2. `cp .env.example .env`, then fill out the environment variables in `.env`
318
345
  3. `bundle exec rake` to ensure that the tests pass and to run standardrb
319
346
  4. `bin/console` to load the gem in a REPL session. Feel free to add your own instances of LLMs, Tools, Agents, etc. and experiment with them.
347
+ 5. Optionally, install lefthook git hooks for pre-commit to auto lint: `gem install lefthook && lefthook install -f`
320
348
 
321
349
  ## Community
322
350
  Join us in the [Ruby AI Builders](https://discord.gg/SBmjAnKT) Discord community in #langchainrb
@@ -7,8 +7,7 @@ require "langchain"
7
7
  chroma = Vectorsearch::Chroma.new(
8
8
  url: ENV["CHROMA_URL"],
9
9
  index_name: "documents",
10
- llm: :openai,
11
- llm_api_key: ENV["OPENAI_API_KEY"]
10
+ llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
12
11
  )
13
12
 
14
13
  # Create the default schema.
@@ -8,8 +8,7 @@ pinecone = Vectorsearch::Pinecone.new(
8
8
  environment: ENV["PINECONE_ENVIRONMENT"],
9
9
  api_key: ENV["PINECONE_API_KEY"],
10
10
  index_name: "recipes",
11
- llm: :openai,
12
- llm_api_key: ENV["OPENAI_API_KEY"]
11
+ llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
13
12
  )
14
13
 
15
14
  # Create the default schema.
@@ -8,8 +8,7 @@ qdrant = Vectorsearch::Qdrant.new(
8
8
  url: ENV["QDRANT_URL"],
9
9
  api_key: ENV["QDRANT_API_KEY"],
10
10
  index_name: "recipes",
11
- llm: :cohere,
12
- llm_api_key: ENV["COHERE_API_KEY"]
11
+ llm: Langchain::LLM::Cohere.new(api_key: ENV["COHERE_API_KEY"])
13
12
  )
14
13
 
15
14
  # Create the default schema.
@@ -8,8 +8,7 @@ weaviate = Vectorsearch::Weaviate.new(
8
8
  url: ENV["WEAVIATE_URL"],
9
9
  api_key: ENV["WEAVIATE_API_KEY"],
10
10
  index_name: "Recipes",
11
- llm: :openai,
12
- llm_api_key: ENV["OPENAI_API_KEY"]
11
+ llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
13
12
  )
14
13
 
15
14
  # Create the default schema. A text field `content` will be used.
data/lefthook.yml ADDED
@@ -0,0 +1,5 @@
1
+ pre-commit:
2
+ commands:
3
+ lint:
4
+ run: standardrb --fix
5
+ stage_fixed: true
@@ -2,23 +2,19 @@
2
2
 
3
3
  module Langchain::Agent
4
4
  class ChainOfThoughtAgent < Base
5
- attr_reader :llm, :llm_api_key, :llm_client, :tools
5
+ attr_reader :llm, :tools
6
6
 
7
7
  # Initializes the Agent
8
8
  #
9
- # @param llm [Symbol] The LLM to use
10
- # @param llm_api_key [String] The API key for the LLM
9
+ # @param llm [Object] The LLM client to use
11
10
  # @param tools [Array] The tools to use
12
11
  # @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
13
- def initialize(llm:, llm_api_key:, tools: [])
14
- Langchain::LLM::Base.validate_llm!(llm: llm)
12
+ def initialize(llm:, tools: [])
15
13
  Langchain::Tool::Base.validate_tools!(tools: tools)
16
14
 
17
- @llm = llm
18
- @llm_api_key = llm_api_key
19
15
  @tools = tools
20
16
 
21
- @llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
17
+ @llm = llm
22
18
  end
23
19
 
24
20
  # Validate tools when they're re-assigned
@@ -42,8 +38,8 @@ module Langchain::Agent
42
38
  )
43
39
 
44
40
  loop do
45
- Langchain.logger.info("[#{self.class.name}]".red + ": Sending the prompt to the #{llm} LLM")
46
- response = llm_client.complete(
41
+ Langchain.logger.info("[#{self.class.name}]".red + ": Sending the prompt to the #{llm.class} LLM")
42
+ response = llm.complete(
47
43
  prompt: prompt,
48
44
  stop_sequences: ["Observation:"],
49
45
  max_tokens: 500
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::Agent
4
+ class SQLQueryAgent < Base
5
+ attr_reader :llm, :db, :schema
6
+
7
+ # Initializes the Agent
8
+ #
9
+ # @param llm [Object] The LLM client to use
10
+ # @param db_connection_string [String] Database connection info
11
+ def initialize(llm:, db_connection_string:)
12
+ @llm = llm
13
+ @db = Langchain::Tool::Database.new(db_connection_string)
14
+ @schema = @db.schema
15
+ end
16
+
17
+ # Ask a question and get an answer
18
+ #
19
+ # @param question [String] Question to ask the LLM/Database
20
+ # @return [String] Answer to the question
21
+ def ask(question:)
22
+ prompt = create_prompt_for_sql(question: question)
23
+
24
+ # Get the SQL string to execute
25
+ Langchain.logger.info("[#{self.class.name}]".red + ": Passing the inital prompt to the #{llm.class} LLM")
26
+ sql_string = llm.complete(prompt: prompt, max_tokens: 500)
27
+
28
+ # Execute the SQL string and collect the results
29
+ Langchain.logger.info("[#{self.class.name}]".red + ": Passing the SQL to the Database: #{sql_string}")
30
+ results = db.execute(input: sql_string)
31
+
32
+ # Pass the results and get the LLM to synthesize the answer to the question
33
+ Langchain.logger.info("[#{self.class.name}]".red + ": Passing the synthesize prompt to the #{llm.class} LLM with results: #{results}")
34
+ prompt2 = create_prompt_for_answer(question: question, sql_query: sql_string, results: results)
35
+ llm.complete(prompt: prompt2, max_tokens: 500)
36
+ end
37
+
38
+ private
39
+
40
+ # Create the initial prompt to pass to the LLM
41
+ # @param question[String] Question to ask
42
+ # @return [String] Prompt
43
+ def create_prompt_for_sql(question:)
44
+ prompt_template_sql.format(
45
+ dialect: "standard SQL",
46
+ schema: schema,
47
+ question: question
48
+ )
49
+ end
50
+
51
+ # Load the PromptTemplate from the JSON file
52
+ # @return [PromptTemplate] PromptTemplate instance
53
+ def prompt_template_sql
54
+ Langchain::Prompt.load_from_path(
55
+ file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json")
56
+ )
57
+ end
58
+
59
+ # Create the second prompt to pass to the LLM
60
+ # @param question [String] Question to ask
61
+ # @return [String] Prompt
62
+ def create_prompt_for_answer(question:, sql_query:, results:)
63
+ prompt_template_answer.format(
64
+ question: question,
65
+ sql_query: sql_query,
66
+ results: results
67
+ )
68
+ end
69
+
70
+ # Load the PromptTemplate from the JSON file
71
+ # @return [PromptTemplate] PromptTemplate instance
72
+ def prompt_template_answer
73
+ Langchain::Prompt.load_from_path(
74
+ file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json")
75
+ )
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,10 @@
1
+ {
2
+ "_type": "prompt",
3
+ "template":
4
+ "Given an input question and results of a SQL query, look at the results and return the answer. Use the following format:\nQuestion: {question}\nThe SQL query: {sql_query}\nResult of the SQLQuery: {results}\nFinal answer: Final answer here",
5
+ "input_variables": [
6
+ "question",
7
+ "sql_query",
8
+ "results"
9
+ ]
10
+ }
@@ -0,0 +1,10 @@
1
+ {
2
+ "_type": "prompt",
3
+ "template":
4
+ "Given an input question, create a syntactically correct {dialect} query to run, then return the query in valid SQL.\nNever query for all the columns from a specific table, only ask for a the few relevant columns given the question.\nPay attention to use only the column names that you can see in the schema description. Be careful to not query for columns that do not exist. Pay attention to which column is in which table. Also, qualify column names with the table name when needed.\nOnly use the tables listed below.\n{schema}\nUse the following format:\nQuestion: {question}\nSQLQuery:",
5
+ "input_variables": [
6
+ "dialect",
7
+ "schema",
8
+ "question"
9
+ ]
10
+ }
@@ -6,15 +6,6 @@ module Langchain::LLM
6
6
 
7
7
  attr_reader :client
8
8
 
9
- # Currently supported LLMs
10
- LLMS = {
11
- cohere: "Cohere",
12
- google_palm: "GooglePalm",
13
- huggingface: "HuggingFace",
14
- openai: "OpenAI",
15
- replicate: "Replicate"
16
- }.freeze
17
-
18
9
  def default_dimension
19
10
  self.class.const_get(:DEFAULTS).dig(:dimension)
20
11
  end
@@ -38,14 +29,5 @@ module Langchain::LLM
38
29
  def summarize(...)
39
30
  raise NotImplementedError, "#{self.class.name} does not support summarization"
40
31
  end
41
-
42
- # Ensure that the LLM value passed in is supported
43
- # @param llm [Symbol] The LLM to use
44
- def self.validate_llm!(llm:)
45
- # TODO: Fix so this works when `llm` value is a string instead of a symbol
46
- unless Langchain::LLM::Base::LLMS.key?(llm)
47
- raise ArgumentError, "LLM must be one of #{Langchain::LLM::Base::LLMS.keys}"
48
- end
49
- end
50
32
  end
51
33
  end
@@ -14,8 +14,7 @@ module Langchain::LLM
14
14
  # chroma = Vectorsearch::Chroma.new(
15
15
  # url: ENV["CHROMA_URL"],
16
16
  # index_name: "...",
17
- # llm: :replicate,
18
- # llm_api_key: ENV["REPLICATE_API_KEY"],
17
+ # llm: Langchain::LLM::Replicate(api_key: ENV["REPLICATE_API_KEY"])
19
18
  # )
20
19
 
21
20
  DEFAULTS = {
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class Xlsx < Base
6
+ EXTENSIONS = [".xlsx", ".xlsm"].freeze
7
+ CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"].freeze
8
+
9
+ def initialize(*)
10
+ depends_on "roo"
11
+ require "roo"
12
+ end
13
+
14
+ # Parse the document and return the text
15
+ # @param [File] data
16
+ # @return [Array<Array<String>>] Array of rows, each row is an array of cells
17
+ def parse(data)
18
+ xlsx_file = Roo::Spreadsheet.open(data)
19
+ xlsx_file.each_with_pagename.flat_map do |_, sheet|
20
+ sheet.map do |row|
21
+ row.map { |i| i.to_s.strip }
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "strscan"
4
4
  require "json"
5
+ require "yaml"
5
6
 
6
7
  module Langchain::Prompt
7
8
  class Base
@@ -52,10 +53,13 @@ module Langchain::Prompt
52
53
  directory_path = save_path.dirname
53
54
  FileUtils.mkdir_p(directory_path) unless directory_path.directory?
54
55
 
55
- if save_path.extname == ".json"
56
+ case save_path.extname
57
+ when ".json"
56
58
  File.write(file_path, to_h.to_json)
59
+ when ".yaml", ".yml"
60
+ File.write(file_path, to_h.to_yaml)
57
61
  else
58
- raise ArgumentError, "#{file_path} must be json"
62
+ raise ArgumentError, "#{file_path} must be json or yaml file"
59
63
  end
60
64
  end
61
65
 
@@ -3,6 +3,7 @@
3
3
  require "strscan"
4
4
  require "pathname"
5
5
  require "json"
6
+ require "yaml"
6
7
 
7
8
  module Langchain::Prompt
8
9
  TYPE_TO_LOADER = {
@@ -23,8 +24,11 @@ module Langchain::Prompt
23
24
  def load_from_path(file_path:)
24
25
  file_path = file_path.is_a?(String) ? Pathname.new(file_path) : file_path
25
26
 
26
- if file_path.extname == ".json"
27
+ case file_path.extname
28
+ when ".json"
27
29
  config = JSON.parse(File.read(file_path))
30
+ when ".yaml", ".yml"
31
+ config = YAML.safe_load(File.read(file_path))
28
32
  else
29
33
  raise ArgumentError, "Got unsupported file type #{file_path.extname}"
30
34
  end
@@ -14,7 +14,8 @@ module Langchain::Tool
14
14
  TOOLS = {
15
15
  "calculator" => "Langchain::Tool::Calculator",
16
16
  "search" => "Langchain::Tool::SerpApi",
17
- "wikipedia" => "Langchain::Tool::Wikipedia"
17
+ "wikipedia" => "Langchain::Tool::Wikipedia",
18
+ "database" => "Langchain::Tool::Database"
18
19
  }
19
20
 
20
21
  def self.description(value)
@@ -0,0 +1,45 @@
1
+ module Langchain::Tool
2
+ class Database < Base
3
+ #
4
+ # Connects to a database, executes SQL queries, and outputs DB schema for Agents to use
5
+ #
6
+ # Gem requirements: gem "sequel", "~> 5.68.0"
7
+ #
8
+
9
+ description <<~DESC
10
+ Useful for getting the result of a database query.
11
+
12
+ The input to this tool should be valid SQL.
13
+ DESC
14
+
15
+ # Establish a database connection
16
+ # @param db_connection_string [String] Database connection info, e.g. 'postgres://user:password@localhost:5432/db_name'
17
+ def initialize(db_connection_string)
18
+ depends_on "sequel"
19
+ require "sequel"
20
+ require "sequel/extensions/schema_dumper"
21
+
22
+ raise StandardError, "db_connection_string parameter cannot be blank" if db_connection_string.empty?
23
+
24
+ @db = Sequel.connect(db_connection_string)
25
+ @db.extension :schema_dumper
26
+ end
27
+
28
+ def schema
29
+ Langchain.logger.info("[#{self.class.name}]".light_blue + ": Dumping schema")
30
+ @db.dump_schema_migration(same_db: true, indexes: false) unless @db.adapter_scheme == :mock
31
+ end
32
+
33
+ # Evaluates a sql expression
34
+ # @param input [String] sql expression
35
+ # @return [Array] results
36
+ def execute(input:)
37
+ Langchain.logger.info("[#{self.class.name}]".light_blue + ": Executing \"#{input}\"")
38
+ begin
39
+ @db[input].to_a
40
+ rescue Sequel::DatabaseError => e
41
+ Langchain.logger.error("[#{self.class.name}]".light_red + ": #{e.message}")
42
+ end
43
+ end
44
+ end
45
+ end
@@ -7,19 +7,13 @@ module Langchain::Vectorsearch
7
7
  include Langchain::DependencyHelper
8
8
  extend Forwardable
9
9
 
10
- attr_reader :client, :index_name, :llm, :llm_api_key, :llm_client
10
+ attr_reader :client, :index_name, :llm
11
11
 
12
12
  DEFAULT_METRIC = "cosine"
13
13
 
14
- # @param llm [Symbol] The LLM to use
15
- # @param llm_api_key [String] The API key for the LLM
16
- def initialize(llm:, llm_api_key:)
17
- Langchain::LLM::Base.validate_llm!(llm: llm)
18
-
14
+ # @param llm [Object] The LLM client to use
15
+ def initialize(llm:)
19
16
  @llm = llm
20
- @llm_api_key = llm_api_key
21
-
22
- @llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
23
17
  end
24
18
 
25
19
  # Method supported by Vectorsearch DB to create a default schema
@@ -48,7 +42,7 @@ module Langchain::Vectorsearch
48
42
  raise NotImplementedError, "#{self.class.name} does not support asking questions"
49
43
  end
50
44
 
51
- def_delegators :llm_client,
45
+ def_delegators :llm,
52
46
  :default_dimension
53
47
 
54
48
  def generate_prompt(question:, context:)
@@ -69,11 +63,10 @@ module Langchain::Vectorsearch
69
63
  prompt_template.format(question: question)
70
64
  end
71
65
 
72
- def add_data(path: nil, paths: nil)
73
- raise ArgumentError, "Either path or paths must be provided" if path.nil? && paths.nil?
74
- raise ArgumentError, "Either path or paths must be provided, not both" if !path.nil? && !paths.nil?
66
+ def add_data(paths:)
67
+ raise ArgumentError, "Paths must be provided" if paths.to_a.empty?
75
68
 
76
- texts = Array(path || paths)
69
+ texts = Array(paths)
77
70
  .flatten
78
71
  .map { |path| Langchain::Loader.new(path)&.load&.value }
79
72
  .compact
@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
15
15
  # @param url [String] The URL of the Qdrant server
16
16
  # @param api_key [String] The API key to use
17
17
  # @param index_name [String] The name of the index to use
18
- # @param llm [Symbol] The LLM to use
19
- # @param llm_api_key [String] The API key for the LLM
20
- def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
18
+ # @param llm [Object] The LLM client to use
19
+ def initialize(url:, index_name:, llm:, api_key: nil)
21
20
  depends_on "chroma-db"
22
21
  require "chroma-db"
23
22
 
@@ -27,7 +26,7 @@ module Langchain::Vectorsearch
27
26
 
28
27
  @index_name = index_name
29
28
 
30
- super(llm: llm, llm_api_key: llm_api_key)
29
+ super(llm: llm)
31
30
  end
32
31
 
33
32
  # Add a list of texts to the index
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
38
37
  ::Chroma::Resources::Embedding.new(
39
38
  # TODO: Add support for passing your own IDs
40
39
  id: SecureRandom.uuid,
41
- embedding: llm_client.embed(text: text),
40
+ embedding: llm.embed(text: text),
42
41
  # TODO: Add support for passing metadata
43
42
  metadata: [], # metadatas[index],
44
43
  document: text # Do we actually need to store the whole original document?
@@ -63,7 +62,7 @@ module Langchain::Vectorsearch
63
62
  query:,
64
63
  k: 4
65
64
  )
66
- embedding = llm_client.embed(text: query)
65
+ embedding = llm.embed(text: query)
67
66
 
68
67
  similarity_search_by_vector(
69
68
  embedding: embedding,
@@ -101,7 +100,7 @@ module Langchain::Vectorsearch
101
100
 
102
101
  prompt = generate_prompt(question: question, context: context)
103
102
 
104
- llm_client.chat(prompt: prompt)
103
+ llm.chat(prompt: prompt)
105
104
  end
106
105
 
107
106
  private
@@ -11,14 +11,14 @@ module Langchain::Vectorsearch
11
11
  # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, llm_api_key:)
12
12
  #
13
13
 
14
- def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
14
+ def initialize(url:, index_name:, llm:, api_key: nil)
15
15
  depends_on "milvus"
16
16
  require "milvus"
17
17
 
18
18
  @client = ::Milvus::Client.new(url: url)
19
19
  @index_name = index_name
20
20
 
21
- super(llm: llm, llm_api_key: llm_api_key)
21
+ super(llm: llm)
22
22
  end
23
23
 
24
24
  def add_texts(texts:)
@@ -33,7 +33,7 @@ module Langchain::Vectorsearch
33
33
  }, {
34
34
  field_name: "vectors",
35
35
  type: ::Milvus::DATA_TYPES["binary_vector"],
36
- field: Array(texts).map { |text| llm_client.embed(text: text) }
36
+ field: Array(texts).map { |text| llm.embed(text: text) }
37
37
  }
38
38
  ]
39
39
  )
@@ -78,7 +78,7 @@ module Langchain::Vectorsearch
78
78
  end
79
79
 
80
80
  def similarity_search(query:, k: 4)
81
- embedding = llm_client.embed(text: query)
81
+ embedding = llm.embed(text: query)
82
82
 
83
83
  similarity_search_by_vector(
84
84
  embedding: embedding,
@@ -22,10 +22,9 @@ module Langchain::Vectorsearch
22
22
 
23
23
  # @param url [String] The URL of the PostgreSQL database
24
24
  # @param index_name [String] The name of the table to use for the index
25
- # @param llm [String] The URL of the Language Layer API
26
- # @param llm_api_key [String] The API key for the Language Layer API
25
+ # @param llm [Object] The LLM client to use
27
26
  # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
28
- def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
27
+ def initialize(url:, index_name:, llm:, api_key: nil)
29
28
  require "pg"
30
29
  require "pgvector"
31
30
 
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
38
37
  @quoted_table_name = @client.quote_ident(index_name)
39
38
  @operator = OPERATORS[DEFAULT_OPERATOR]
40
39
 
41
- super(llm: llm, llm_api_key: llm_api_key)
40
+ super(llm: llm)
42
41
  end
43
42
 
44
43
  # Add a list of texts to the index
@@ -46,7 +45,7 @@ module Langchain::Vectorsearch
46
45
  # @return [PG::Result] The response from the database
47
46
  def add_texts(texts:)
48
47
  data = texts.flat_map do |text|
49
- [text, llm_client.embed(text: text)]
48
+ [text, llm.embed(text: text)]
50
49
  end
51
50
  values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
52
51
  client.exec_params(
@@ -75,7 +74,7 @@ module Langchain::Vectorsearch
75
74
  # @param k [Integer] The number of top results to return
76
75
  # @return [Array<Hash>] The results of the search
77
76
  def similarity_search(query:, k: 4)
78
- embedding = llm_client.embed(text: query)
77
+ embedding = llm.embed(text: query)
79
78
 
80
79
  similarity_search_by_vector(
81
80
  embedding: embedding,
@@ -113,7 +112,7 @@ module Langchain::Vectorsearch
113
112
 
114
113
  prompt = generate_prompt(question: question, context: context)
115
114
 
116
- llm_client.chat(prompt: prompt)
115
+ llm.chat(prompt: prompt)
117
116
  end
118
117
  end
119
118
  end
@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
15
15
  # @param environment [String] The environment to use
16
16
  # @param api_key [String] The API key to use
17
17
  # @param index_name [String] The name of the index to use
18
- # @param llm [Symbol] The LLM to use
19
- # @param llm_api_key [String] The API key for the LLM
20
- def initialize(environment:, api_key:, index_name:, llm:, llm_api_key:)
18
+ # @param llm [Object] The LLM client to use
19
+ def initialize(environment:, api_key:, index_name:, llm:)
21
20
  depends_on "pinecone"
22
21
  require "pinecone"
23
22
 
@@ -29,7 +28,7 @@ module Langchain::Vectorsearch
29
28
  @client = ::Pinecone::Client.new
30
29
  @index_name = index_name
31
30
 
32
- super(llm: llm, llm_api_key: llm_api_key)
31
+ super(llm: llm)
33
32
  end
34
33
 
35
34
  # Add a list of texts to the index
@@ -43,7 +42,7 @@ module Langchain::Vectorsearch
43
42
  # TODO: Allows passing in your own IDs
44
43
  id: SecureRandom.uuid,
45
44
  metadata: metadata || {content: text},
46
- values: llm_client.embed(text: text)
45
+ values: llm.embed(text: text)
47
46
  }
48
47
  end
49
48
 
@@ -74,7 +73,7 @@ module Langchain::Vectorsearch
74
73
  namespace: "",
75
74
  filter: nil
76
75
  )
77
- embedding = llm_client.embed(text: query)
76
+ embedding = llm.embed(text: query)
78
77
 
79
78
  similarity_search_by_vector(
80
79
  embedding: embedding,
@@ -121,7 +120,7 @@ module Langchain::Vectorsearch
121
120
 
122
121
  prompt = generate_prompt(question: question, context: context)
123
122
 
124
- llm_client.chat(prompt: prompt)
123
+ llm.chat(prompt: prompt)
125
124
  end
126
125
  end
127
126
  end
@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
15
15
  # @param url [String] The URL of the Qdrant server
16
16
  # @param api_key [String] The API key to use
17
17
  # @param index_name [String] The name of the index to use
18
- # @param llm [Symbol] The LLM to use
19
- # @param llm_api_key [String] The API key for the LLM
20
- def initialize(url:, api_key:, index_name:, llm:, llm_api_key:)
18
+ # @param llm [Object] The LLM client to use
19
+ def initialize(url:, api_key:, index_name:, llm:)
21
20
  depends_on "qdrant-ruby"
22
21
  require "qdrant"
23
22
 
@@ -27,7 +26,7 @@ module Langchain::Vectorsearch
27
26
  )
28
27
  @index_name = index_name
29
28
 
30
- super(llm: llm, llm_api_key: llm_api_key)
29
+ super(llm: llm)
31
30
  end
32
31
 
33
32
  # Add a list of texts to the index
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
38
37
 
39
38
  Array(texts).each do |text|
40
39
  batch[:ids].push(SecureRandom.uuid)
41
- batch[:vectors].push(llm_client.embed(text: text))
40
+ batch[:vectors].push(llm.embed(text: text))
42
41
  batch[:payloads].push({content: text})
43
42
  end
44
43
 
@@ -68,7 +67,7 @@ module Langchain::Vectorsearch
68
67
  query:,
69
68
  k: 4
70
69
  )
71
- embedding = llm_client.embed(text: query)
70
+ embedding = llm.embed(text: query)
72
71
 
73
72
  similarity_search_by_vector(
74
73
  embedding: embedding,
@@ -105,7 +104,7 @@ module Langchain::Vectorsearch
105
104
 
106
105
  prompt = generate_prompt(question: question, context: context)
107
106
 
108
- llm_client.chat(prompt: prompt)
107
+ llm.chat(prompt: prompt)
109
108
  end
110
109
  end
111
110
  end
@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
15
15
  # @param url [String] The URL of the Weaviate instance
16
16
  # @param api_key [String] The API key to use
17
17
  # @param index_name [String] The name of the index to use
18
- # @param llm [Symbol] The LLM to use
19
- # @param llm_api_key [String] The API key for the LLM
20
- def initialize(url:, api_key:, index_name:, llm:, llm_api_key:)
18
+ # @param llm [Object] The LLM client to use
19
+ def initialize(url:, api_key:, index_name:, llm:)
21
20
  depends_on "weaviate-ruby"
22
21
  require "weaviate"
23
22
 
@@ -27,7 +26,7 @@ module Langchain::Vectorsearch
27
26
  )
28
27
  @index_name = index_name
29
28
 
30
- super(llm: llm, llm_api_key: llm_api_key)
29
+ super(llm: llm)
31
30
  end
32
31
 
33
32
  # Add a list of texts to the index
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
38
37
  {
39
38
  class: index_name,
40
39
  properties: {content: text},
41
- vector: llm_client.embed(text: text)
40
+ vector: llm.embed(text: text)
42
41
  }
43
42
  end
44
43
 
@@ -67,7 +66,7 @@ module Langchain::Vectorsearch
67
66
  # @param k [Integer|String] The number of results to return
68
67
  # @return [Hash] The search results
69
68
  def similarity_search(query:, k: 4)
70
- embedding = llm_client.embed(text: query)
69
+ embedding = llm.embed(text: query)
71
70
 
72
71
  similarity_search_by_vector(embedding: embedding, k: k)
73
72
  end
@@ -100,7 +99,7 @@ module Langchain::Vectorsearch
100
99
 
101
100
  prompt = generate_prompt(question: question, context: context)
102
101
 
103
- llm_client.chat(prompt: prompt)
102
+ llm.chat(prompt: prompt)
104
103
  end
105
104
  end
106
105
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.4.2"
4
+ VERSION = "0.5.0"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -24,6 +24,7 @@ module Langchain
24
24
  module Agent
25
25
  autoload :Base, "langchain/agent/base"
26
26
  autoload :ChainOfThoughtAgent, "langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb"
27
+ autoload :SQLQueryAgent, "langchain/agent/sql_query_agent/sql_query_agent.rb"
27
28
  end
28
29
 
29
30
  module Tool
@@ -32,6 +33,7 @@ module Langchain
32
33
  autoload :RubyCodeInterpreter, "langchain/tool/ruby_code_interpreter"
33
34
  autoload :SerpApi, "langchain/tool/serp_api"
34
35
  autoload :Wikipedia, "langchain/tool/wikipedia"
36
+ autoload :Database, "langchain/tool/database"
35
37
  end
36
38
 
37
39
  module Processors
@@ -43,6 +45,7 @@ module Langchain
43
45
  autoload :JSONL, "langchain/processors/jsonl"
44
46
  autoload :PDF, "langchain/processors/pdf"
45
47
  autoload :Text, "langchain/processors/text"
48
+ autoload :Xlsx, "langchain/processors/xlsx"
46
49
  end
47
50
 
48
51
  module Utils
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-03 00:00:00.000000000 Z
11
+ date: 2023-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: tiktoken_ruby
@@ -318,6 +318,20 @@ dependencies:
318
318
  - - "~>"
319
319
  - !ruby/object:Gem::Version
320
320
  version: 0.9.0
321
+ - !ruby/object:Gem::Dependency
322
+ name: roo
323
+ requirement: !ruby/object:Gem::Requirement
324
+ requirements:
325
+ - - "~>"
326
+ - !ruby/object:Gem::Version
327
+ version: 2.10.0
328
+ type: :development
329
+ prerelease: false
330
+ version_requirements: !ruby/object:Gem::Requirement
331
+ requirements:
332
+ - - "~>"
333
+ - !ruby/object:Gem::Version
334
+ version: 2.10.0
321
335
  - !ruby/object:Gem::Dependency
322
336
  name: ruby-openai
323
337
  requirement: !ruby/object:Gem::Requirement
@@ -346,6 +360,20 @@ dependencies:
346
360
  - - "~>"
347
361
  - !ruby/object:Gem::Version
348
362
  version: 1.0.4
363
+ - !ruby/object:Gem::Dependency
364
+ name: sequel
365
+ requirement: !ruby/object:Gem::Requirement
366
+ requirements:
367
+ - - "~>"
368
+ - !ruby/object:Gem::Version
369
+ version: 5.68.0
370
+ type: :development
371
+ prerelease: false
372
+ version_requirements: !ruby/object:Gem::Requirement
373
+ requirements:
374
+ - - "~>"
375
+ - !ruby/object:Gem::Version
376
+ version: 5.68.0
349
377
  - !ruby/object:Gem::Dependency
350
378
  name: weaviate-ruby
351
379
  requirement: !ruby/object:Gem::Requirement
@@ -383,6 +411,7 @@ extra_rdoc_files: []
383
411
  files:
384
412
  - ".env.example"
385
413
  - ".rspec"
414
+ - ".rubocop.yml"
386
415
  - CHANGELOG.md
387
416
  - Gemfile
388
417
  - Gemfile.lock
@@ -395,10 +424,14 @@ files:
395
424
  - examples/store_and_query_with_pinecone.rb
396
425
  - examples/store_and_query_with_qdrant.rb
397
426
  - examples/store_and_query_with_weaviate.rb
427
+ - lefthook.yml
398
428
  - lib/langchain.rb
399
429
  - lib/langchain/agent/base.rb
400
430
  - lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb
401
431
  - lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
432
+ - lib/langchain/agent/sql_query_agent/sql_query_agent.rb
433
+ - lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json
434
+ - lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json
402
435
  - lib/langchain/data.rb
403
436
  - lib/langchain/dependency_helper.rb
404
437
  - lib/langchain/llm/ai21.rb
@@ -418,12 +451,14 @@ files:
418
451
  - lib/langchain/processors/jsonl.rb
419
452
  - lib/langchain/processors/pdf.rb
420
453
  - lib/langchain/processors/text.rb
454
+ - lib/langchain/processors/xlsx.rb
421
455
  - lib/langchain/prompt/base.rb
422
456
  - lib/langchain/prompt/few_shot_prompt_template.rb
423
457
  - lib/langchain/prompt/loading.rb
424
458
  - lib/langchain/prompt/prompt_template.rb
425
459
  - lib/langchain/tool/base.rb
426
460
  - lib/langchain/tool/calculator.rb
461
+ - lib/langchain/tool/database.rb
427
462
  - lib/langchain/tool/ruby_code_interpreter.rb
428
463
  - lib/langchain/tool/serp_api.rb
429
464
  - lib/langchain/tool/wikipedia.rb