langchainrb 0.4.2 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.env.example +2 -1
- data/.rubocop.yml +11 -0
- data/CHANGELOG.md +11 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +8 -1
- data/README.md +44 -6
- data/examples/pdf_store_and_query_with_chroma.rb +1 -2
- data/examples/store_and_query_with_pinecone.rb +1 -2
- data/examples/store_and_query_with_qdrant.rb +1 -2
- data/examples/store_and_query_with_weaviate.rb +1 -2
- data/lefthook.yml +5 -0
- data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb +16 -15
- data/lib/langchain/agent/sql_query_agent/sql_query_agent.rb +82 -0
- data/lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json +10 -0
- data/lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json +10 -0
- data/lib/langchain/llm/base.rb +0 -18
- data/lib/langchain/llm/replicate.rb +1 -2
- data/lib/langchain/processors/xlsx.rb +27 -0
- data/lib/langchain/prompt/base.rb +6 -2
- data/lib/langchain/prompt/loading.rb +5 -1
- data/lib/langchain/tool/base.rb +28 -15
- data/lib/langchain/tool/calculator.rb +6 -2
- data/lib/langchain/tool/database.rb +60 -0
- data/lib/langchain/tool/ruby_code_interpreter.rb +1 -1
- data/lib/langchain/tool/serp_api.rb +33 -10
- data/lib/langchain/tool/wikipedia.rb +1 -1
- data/lib/langchain/vectorsearch/base.rb +7 -14
- data/lib/langchain/vectorsearch/chroma.rb +6 -7
- data/lib/langchain/vectorsearch/milvus.rb +4 -4
- data/lib/langchain/vectorsearch/pgvector.rb +6 -7
- data/lib/langchain/vectorsearch/pinecone.rb +6 -7
- data/lib/langchain/vectorsearch/qdrant.rb +6 -7
- data/lib/langchain/vectorsearch/weaviate.rb +6 -7
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +3 -0
- metadata +37 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2673339c5bbe874a8bdf1722a2556f26d9fe13394875af914b5203632714f2f0
|
4
|
+
data.tar.gz: 216ab880c2c6094b267cbf3efcaf19ce74bea7cc665442fbf2b23108a9cb087b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 408cf6194d85a4af076adbfd8be4a360d094200d127672f218d8914fbcd67d1a8a803645219532f66d4e79214571d61b629570df071de871b013e2d9d6c0d3a5
|
7
|
+
data.tar.gz: 123016bd42d1d2539c13f7d68074ddc19dc8a5880ae0b02b103e20bf7f058adfe2659beb90263a852bbf38b4b169622a1b7ac8a245c3791ab9b9ae8f8fc4e3cb
|
data/.env.example
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
AI21_API_KEY=
|
1
2
|
CHROMA_URL=
|
2
3
|
COHERE_API_KEY=
|
3
4
|
HUGGING_FACE_API_KEY=
|
@@ -6,10 +7,10 @@ OPENAI_API_KEY=
|
|
6
7
|
GOOGLE_PALM_API_KEY=
|
7
8
|
PINECONE_API_KEY=
|
8
9
|
PINECONE_ENVIRONMENT=
|
10
|
+
POSTGRES_URL=
|
9
11
|
REPLICATE_API_KEY=
|
10
12
|
QDRANT_API_KEY=
|
11
13
|
QDRANT_URL=
|
12
14
|
SERPAPI_API_KEY=
|
13
15
|
WEAVIATE_API_KEY=
|
14
16
|
WEAVIATE_URL=
|
15
|
-
POSTGRES_URL=
|
data/.rubocop.yml
ADDED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.5.1] - 2023-06-06
|
4
|
+
- 🛠️ Tools
|
5
|
+
- Modified Tool usage. Agents now accept Tools instances instead of Tool strings.
|
6
|
+
|
7
|
+
## [0.5.0] - 2023-06-05
|
8
|
+
- [BREAKING] LLMs are now passed as objects to Vectorsearch classes instead of `llm: :name, llm_api_key:` previously
|
9
|
+
- 📋 Prompts
|
10
|
+
- YAML prompt templates are now supported
|
11
|
+
- 🚚 Loaders
|
12
|
+
- Introduce `Langchain::Processors::Xlsx` to parse .xlsx files
|
13
|
+
|
3
14
|
## [0.4.2] - 2023-06-03
|
4
15
|
- 🗣️ LLMs
|
5
16
|
- Introducing support for AI21
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb (0.
|
4
|
+
langchainrb (0.5.1)
|
5
5
|
colorize (~> 0.8.1)
|
6
6
|
tiktoken_ruby (~> 0.0.5)
|
7
7
|
|
@@ -219,6 +219,9 @@ GEM
|
|
219
219
|
faraday-multipart
|
220
220
|
faraday-retry
|
221
221
|
rexml (3.2.5)
|
222
|
+
roo (2.10.0)
|
223
|
+
nokogiri (~> 1)
|
224
|
+
rubyzip (>= 1.3.0, < 3.0.0)
|
222
225
|
rspec (3.12.0)
|
223
226
|
rspec-core (~> 3.12.0)
|
224
227
|
rspec-expectations (~> 3.12.0)
|
@@ -257,6 +260,7 @@ GEM
|
|
257
260
|
rubyzip (2.3.2)
|
258
261
|
safe_ruby (1.0.4)
|
259
262
|
childprocess (>= 0.3.9)
|
263
|
+
sequel (5.68.0)
|
260
264
|
standard (1.28.2)
|
261
265
|
language_server-protocol (~> 3.17.0.2)
|
262
266
|
lint_roller (~> 1.0)
|
@@ -321,9 +325,12 @@ DEPENDENCIES
|
|
321
325
|
rake (~> 13.0)
|
322
326
|
rdiscount
|
323
327
|
replicate-ruby (~> 0.2.2)
|
328
|
+
roo (~> 2.10.0)
|
324
329
|
rspec (~> 3.0)
|
330
|
+
rubocop
|
325
331
|
ruby-openai (~> 4.0.0)
|
326
332
|
safe_ruby (~> 1.0.4)
|
333
|
+
sequel (~> 5.68.0)
|
327
334
|
standardrb
|
328
335
|
weaviate-ruby (~> 0.8.0)
|
329
336
|
wikipedia-client (~> 1.17.0)
|
data/README.md
CHANGED
@@ -47,8 +47,7 @@ Pick the vector search database you'll be using and instantiate the client:
|
|
47
47
|
client = Langchain::Vectorsearch::Weaviate.new(
|
48
48
|
url: ENV["WEAVIATE_URL"],
|
49
49
|
api_key: ENV["WEAVIATE_API_KEY"],
|
50
|
-
llm: :
|
51
|
-
llm_api_key: ENV["OPENAI_API_KEY"]
|
50
|
+
llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
52
51
|
)
|
53
52
|
|
54
53
|
# You can instantiate any other supported vector search database:
|
@@ -151,6 +150,12 @@ Add `"google_palm_api", "~> 0.1.0"` to your Gemfile.
|
|
151
150
|
google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
|
152
151
|
```
|
153
152
|
|
153
|
+
#### AI21
|
154
|
+
Add `gem "ai21", "~> 0.2.0"` to your Gemfile.
|
155
|
+
```ruby
|
156
|
+
ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
|
157
|
+
```
|
158
|
+
|
154
159
|
### Using Prompts 📋
|
155
160
|
|
156
161
|
#### Prompt Templates
|
@@ -172,9 +177,9 @@ prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke a
|
|
172
177
|
Creating a PromptTemplate using just a prompt and no input_variables:
|
173
178
|
|
174
179
|
```ruby
|
175
|
-
prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a
|
176
|
-
prompt.input_variables # [
|
177
|
-
prompt.format
|
180
|
+
prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a funny joke about chickens.")
|
181
|
+
prompt.input_variables # []
|
182
|
+
prompt.format # "Tell me a funny joke about chickens."
|
178
183
|
```
|
179
184
|
|
180
185
|
Save prompt template to JSON file:
|
@@ -236,6 +241,13 @@ prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_s
|
|
236
241
|
prompt.prefix # "Write antonyms for the following words."
|
237
242
|
```
|
238
243
|
|
244
|
+
Loading a new prompt template using a YAML file:
|
245
|
+
|
246
|
+
```ruby
|
247
|
+
prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.yaml")
|
248
|
+
prompt.input_variables #=> ["adjective", "content"]
|
249
|
+
```
|
250
|
+
|
239
251
|
### Using Agents 🤖
|
240
252
|
Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
|
241
253
|
|
@@ -244,7 +256,15 @@ Agents are semi-autonomous bots that can respond to user questions and use avail
|
|
244
256
|
Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
|
245
257
|
|
246
258
|
```ruby
|
247
|
-
|
259
|
+
search_tool = Langchain::Tool::SerpApi.new(api_key: ENV["SERPAPI_API_KEY"])
|
260
|
+
calculator = Langchain::Tool::Calculator.new
|
261
|
+
|
262
|
+
openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
263
|
+
|
264
|
+
agent = Langchain::Agent::ChainOfThoughtAgent.new(
|
265
|
+
llm: openai,
|
266
|
+
tools: [search_tool, calculator]
|
267
|
+
)
|
248
268
|
|
249
269
|
agent.tools
|
250
270
|
# => ["search", "calculator"]
|
@@ -254,6 +274,21 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
|
|
254
274
|
#=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
|
255
275
|
```
|
256
276
|
|
277
|
+
#### SQL-Query Agent
|
278
|
+
|
279
|
+
Add `gem "sequel"` to your Gemfile
|
280
|
+
|
281
|
+
```ruby
|
282
|
+
database = Langchain::Tool::Database.new(connection_string: "postgres://user:password@localhost:5432/db_name")
|
283
|
+
|
284
|
+
agent = Langchain::Agent::SQLQueryAgent.new(llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]), tools: [database])
|
285
|
+
|
286
|
+
```
|
287
|
+
```ruby
|
288
|
+
agent.ask(question: "How many users have a name with length greater than 5 in the users table?")
|
289
|
+
#=> "14 users have a name with length greater than 5 in the users table."
|
290
|
+
```
|
291
|
+
|
257
292
|
#### Demo
|
258
293
|
![May-12-2023 13-09-13](https://github.com/andreibondarev/langchainrb/assets/541665/6bad4cd9-976c-420f-9cf9-b85bf84f7eaf)
|
259
294
|
|
@@ -264,6 +299,7 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
|
|
264
299
|
| Name | Description | ENV Requirements | Gem Requirements |
|
265
300
|
| ------------ | :------------------------------------------------: | :-----------------------------------------------------------: | :---------------------------------------: |
|
266
301
|
| "calculator" | Useful for getting the result of a math expression | | `gem "eqn", "~> 1.6.5"` |
|
302
|
+
| "database" | Useful for querying a SQL database | | `gem "sequel", "~> 5.68.0"` |
|
267
303
|
| "ruby_code_interpreter" | Interprets Ruby expressions | | `gem "safe_ruby", "~> 1.0.4"` |
|
268
304
|
| "search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
|
269
305
|
| "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
|
@@ -298,6 +334,7 @@ Langchain::Loader.load('https://www.example.com/file.pdf')
|
|
298
334
|
| JSON | Langchain::Processors::JSON | |
|
299
335
|
| JSONL | Langchain::Processors::JSONL | |
|
300
336
|
| csv | Langchain::Processors::CSV | |
|
337
|
+
| xlsx | Langchain::Processors::Xlsx | `gem "roo", "~> 2.10.0"` |
|
301
338
|
|
302
339
|
## Examples
|
303
340
|
Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
|
@@ -317,6 +354,7 @@ Langchain.logger.level = :info
|
|
317
354
|
2. `cp .env.example .env`, then fill out the environment variables in `.env`
|
318
355
|
3. `bundle exec rake` to ensure that the tests pass and to run standardrb
|
319
356
|
4. `bin/console` to load the gem in a REPL session. Feel free to add your own instances of LLMs, Tools, Agents, etc. and experiment with them.
|
357
|
+
5. Optionally, install lefthook git hooks for pre-commit to auto lint: `gem install lefthook && lefthook install -f`
|
320
358
|
|
321
359
|
## Community
|
322
360
|
Join us in the [Ruby AI Builders](https://discord.gg/SBmjAnKT) Discord community in #langchainrb
|
@@ -7,8 +7,7 @@ require "langchain"
|
|
7
7
|
chroma = Vectorsearch::Chroma.new(
|
8
8
|
url: ENV["CHROMA_URL"],
|
9
9
|
index_name: "documents",
|
10
|
-
llm: :
|
11
|
-
llm_api_key: ENV["OPENAI_API_KEY"]
|
10
|
+
llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
12
11
|
)
|
13
12
|
|
14
13
|
# Create the default schema.
|
@@ -8,8 +8,7 @@ pinecone = Vectorsearch::Pinecone.new(
|
|
8
8
|
environment: ENV["PINECONE_ENVIRONMENT"],
|
9
9
|
api_key: ENV["PINECONE_API_KEY"],
|
10
10
|
index_name: "recipes",
|
11
|
-
llm: :
|
12
|
-
llm_api_key: ENV["OPENAI_API_KEY"]
|
11
|
+
llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
13
12
|
)
|
14
13
|
|
15
14
|
# Create the default schema.
|
@@ -8,8 +8,7 @@ qdrant = Vectorsearch::Qdrant.new(
|
|
8
8
|
url: ENV["QDRANT_URL"],
|
9
9
|
api_key: ENV["QDRANT_API_KEY"],
|
10
10
|
index_name: "recipes",
|
11
|
-
llm: :
|
12
|
-
llm_api_key: ENV["COHERE_API_KEY"]
|
11
|
+
llm: Langchain::LLM::Cohere.new(api_key: ENV["COHERE_API_KEY"])
|
13
12
|
)
|
14
13
|
|
15
14
|
# Create the default schema.
|
@@ -8,8 +8,7 @@ weaviate = Vectorsearch::Weaviate.new(
|
|
8
8
|
url: ENV["WEAVIATE_URL"],
|
9
9
|
api_key: ENV["WEAVIATE_API_KEY"],
|
10
10
|
index_name: "Recipes",
|
11
|
-
llm: :
|
12
|
-
llm_api_key: ENV["OPENAI_API_KEY"]
|
11
|
+
llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
13
12
|
)
|
14
13
|
|
15
14
|
# Create the default schema. A text field `content` will be used.
|
data/lefthook.yml
ADDED
@@ -2,23 +2,19 @@
|
|
2
2
|
|
3
3
|
module Langchain::Agent
|
4
4
|
class ChainOfThoughtAgent < Base
|
5
|
-
attr_reader :llm, :
|
5
|
+
attr_reader :llm, :tools
|
6
6
|
|
7
7
|
# Initializes the Agent
|
8
8
|
#
|
9
|
-
# @param llm [
|
10
|
-
# @param llm_api_key [String] The API key for the LLM
|
9
|
+
# @param llm [Object] The LLM client to use
|
11
10
|
# @param tools [Array] The tools to use
|
12
11
|
# @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
|
13
|
-
def initialize(llm:,
|
14
|
-
Langchain::LLM::Base.validate_llm!(llm: llm)
|
12
|
+
def initialize(llm:, tools: [])
|
15
13
|
Langchain::Tool::Base.validate_tools!(tools: tools)
|
16
14
|
|
17
|
-
@llm = llm
|
18
|
-
@llm_api_key = llm_api_key
|
19
15
|
@tools = tools
|
20
16
|
|
21
|
-
@
|
17
|
+
@llm = llm
|
22
18
|
end
|
23
19
|
|
24
20
|
# Validate tools when they're re-assigned
|
@@ -42,8 +38,8 @@ module Langchain::Agent
|
|
42
38
|
)
|
43
39
|
|
44
40
|
loop do
|
45
|
-
Langchain.logger.info("[#{self.class.name}]".red + ": Sending the prompt to the #{llm} LLM")
|
46
|
-
response =
|
41
|
+
Langchain.logger.info("[#{self.class.name}]".red + ": Sending the prompt to the #{llm.class} LLM")
|
42
|
+
response = llm.complete(
|
47
43
|
prompt: prompt,
|
48
44
|
stop_sequences: ["Observation:"],
|
49
45
|
max_tokens: 500
|
@@ -59,10 +55,11 @@ module Langchain::Agent
|
|
59
55
|
# Find the input to the action in the "Action Input: [action_input]" format
|
60
56
|
action_input = response.match(/Action Input: "?(.*)"?/)&.send(:[], -1)
|
61
57
|
|
62
|
-
#
|
63
|
-
tool =
|
64
|
-
Langchain.logger.info("[#{self.class.name}]".red + ": Invoking \"#{tool}\" Tool with \"#{action_input}\"")
|
58
|
+
# Find the Tool and call `execute`` with action_input as the input
|
59
|
+
tool = tools.find { |tool| tool.tool_name == action.strip }
|
60
|
+
Langchain.logger.info("[#{self.class.name}]".red + ": Invoking \"#{tool.class}\" Tool with \"#{action_input}\"")
|
65
61
|
|
62
|
+
# Call `execute` with action_input as the input
|
66
63
|
result = tool.execute(input: action_input)
|
67
64
|
|
68
65
|
# Append the Observation to the prompt
|
@@ -85,12 +82,16 @@ module Langchain::Agent
|
|
85
82
|
# @param tools [Array] Tools to use
|
86
83
|
# @return [String] Prompt
|
87
84
|
def create_prompt(question:, tools:)
|
85
|
+
tool_list = tools.map(&:tool_name)
|
86
|
+
|
88
87
|
prompt_template.format(
|
89
88
|
date: Date.today.strftime("%B %d, %Y"),
|
90
89
|
question: question,
|
91
|
-
tool_names: "[#{
|
90
|
+
tool_names: "[#{tool_list.join(", ")}]",
|
92
91
|
tools: tools.map do |tool|
|
93
|
-
|
92
|
+
tool_name = tool.tool_name
|
93
|
+
tool_description = tool.class.const_get(:DESCRIPTION)
|
94
|
+
"#{tool_name}: #{tool_description}"
|
94
95
|
end.join("\n")
|
95
96
|
)
|
96
97
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::Agent
|
4
|
+
class SQLQueryAgent < Base
|
5
|
+
attr_reader :llm, :db, :schema
|
6
|
+
|
7
|
+
#
|
8
|
+
# Initializes the Agent
|
9
|
+
#
|
10
|
+
# @param llm [Object] The LLM client to use
|
11
|
+
# @param db [Object] Database connection info
|
12
|
+
#
|
13
|
+
def initialize(llm:, db:)
|
14
|
+
@llm = llm
|
15
|
+
@db = db
|
16
|
+
@schema = @db.schema
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
# Ask a question and get an answer
|
21
|
+
#
|
22
|
+
# @param question [String] Question to ask the LLM/Database
|
23
|
+
# @return [String] Answer to the question
|
24
|
+
#
|
25
|
+
def ask(question:)
|
26
|
+
prompt = create_prompt_for_sql(question: question)
|
27
|
+
|
28
|
+
# Get the SQL string to execute
|
29
|
+
Langchain.logger.info("[#{self.class.name}]".red + ": Passing the inital prompt to the #{llm.class} LLM")
|
30
|
+
sql_string = llm.complete(prompt: prompt, max_tokens: 500)
|
31
|
+
|
32
|
+
# Execute the SQL string and collect the results
|
33
|
+
Langchain.logger.info("[#{self.class.name}]".red + ": Passing the SQL to the Database: #{sql_string}")
|
34
|
+
results = db.execute(input: sql_string)
|
35
|
+
|
36
|
+
# Pass the results and get the LLM to synthesize the answer to the question
|
37
|
+
Langchain.logger.info("[#{self.class.name}]".red + ": Passing the synthesize prompt to the #{llm.class} LLM with results: #{results}")
|
38
|
+
prompt2 = create_prompt_for_answer(question: question, sql_query: sql_string, results: results)
|
39
|
+
llm.complete(prompt: prompt2, max_tokens: 500)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
# Create the initial prompt to pass to the LLM
|
45
|
+
# @param question[String] Question to ask
|
46
|
+
# @return [String] Prompt
|
47
|
+
def create_prompt_for_sql(question:)
|
48
|
+
prompt_template_sql.format(
|
49
|
+
dialect: "standard SQL",
|
50
|
+
schema: schema,
|
51
|
+
question: question
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Load the PromptTemplate from the JSON file
|
56
|
+
# @return [PromptTemplate] PromptTemplate instance
|
57
|
+
def prompt_template_sql
|
58
|
+
Langchain::Prompt.load_from_path(
|
59
|
+
file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json")
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Create the second prompt to pass to the LLM
|
64
|
+
# @param question [String] Question to ask
|
65
|
+
# @return [String] Prompt
|
66
|
+
def create_prompt_for_answer(question:, sql_query:, results:)
|
67
|
+
prompt_template_answer.format(
|
68
|
+
question: question,
|
69
|
+
sql_query: sql_query,
|
70
|
+
results: results
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Load the PromptTemplate from the JSON file
|
75
|
+
# @return [PromptTemplate] PromptTemplate instance
|
76
|
+
def prompt_template_answer
|
77
|
+
Langchain::Prompt.load_from_path(
|
78
|
+
file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json")
|
79
|
+
)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
{
|
2
|
+
"_type": "prompt",
|
3
|
+
"template":
|
4
|
+
"Given an input question and results of a SQL query, look at the results and return the answer. Use the following format:\nQuestion: {question}\nThe SQL query: {sql_query}\nResult of the SQLQuery: {results}\nFinal answer: Final answer here",
|
5
|
+
"input_variables": [
|
6
|
+
"question",
|
7
|
+
"sql_query",
|
8
|
+
"results"
|
9
|
+
]
|
10
|
+
}
|
@@ -0,0 +1,10 @@
|
|
1
|
+
{
|
2
|
+
"_type": "prompt",
|
3
|
+
"template":
|
4
|
+
"Given an input question, create a syntactically correct {dialect} query to run, then return the query in valid SQL.\nNever query for all the columns from a specific table, only ask for a the few relevant columns given the question.\nPay attention to use only the column names that you can see in the schema description. Be careful to not query for columns that do not exist. Pay attention to which column is in which table. Also, qualify column names with the table name when needed.\nOnly use the tables listed below.\n{schema}\nUse the following format:\nQuestion: {question}\nSQLQuery:",
|
5
|
+
"input_variables": [
|
6
|
+
"dialect",
|
7
|
+
"schema",
|
8
|
+
"question"
|
9
|
+
]
|
10
|
+
}
|
data/lib/langchain/llm/base.rb
CHANGED
@@ -6,15 +6,6 @@ module Langchain::LLM
|
|
6
6
|
|
7
7
|
attr_reader :client
|
8
8
|
|
9
|
-
# Currently supported LLMs
|
10
|
-
LLMS = {
|
11
|
-
cohere: "Cohere",
|
12
|
-
google_palm: "GooglePalm",
|
13
|
-
huggingface: "HuggingFace",
|
14
|
-
openai: "OpenAI",
|
15
|
-
replicate: "Replicate"
|
16
|
-
}.freeze
|
17
|
-
|
18
9
|
def default_dimension
|
19
10
|
self.class.const_get(:DEFAULTS).dig(:dimension)
|
20
11
|
end
|
@@ -38,14 +29,5 @@ module Langchain::LLM
|
|
38
29
|
def summarize(...)
|
39
30
|
raise NotImplementedError, "#{self.class.name} does not support summarization"
|
40
31
|
end
|
41
|
-
|
42
|
-
# Ensure that the LLM value passed in is supported
|
43
|
-
# @param llm [Symbol] The LLM to use
|
44
|
-
def self.validate_llm!(llm:)
|
45
|
-
# TODO: Fix so this works when `llm` value is a string instead of a symbol
|
46
|
-
unless Langchain::LLM::Base::LLMS.key?(llm)
|
47
|
-
raise ArgumentError, "LLM must be one of #{Langchain::LLM::Base::LLMS.keys}"
|
48
|
-
end
|
49
|
-
end
|
50
32
|
end
|
51
33
|
end
|
@@ -14,8 +14,7 @@ module Langchain::LLM
|
|
14
14
|
# chroma = Vectorsearch::Chroma.new(
|
15
15
|
# url: ENV["CHROMA_URL"],
|
16
16
|
# index_name: "...",
|
17
|
-
# llm: :
|
18
|
-
# llm_api_key: ENV["REPLICATE_API_KEY"],
|
17
|
+
# llm: Langchain::LLM::Replicate(api_key: ENV["REPLICATE_API_KEY"])
|
19
18
|
# )
|
20
19
|
|
21
20
|
DEFAULTS = {
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Processors
|
5
|
+
class Xlsx < Base
|
6
|
+
EXTENSIONS = [".xlsx", ".xlsm"].freeze
|
7
|
+
CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"].freeze
|
8
|
+
|
9
|
+
def initialize(*)
|
10
|
+
depends_on "roo"
|
11
|
+
require "roo"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Parse the document and return the text
|
15
|
+
# @param [File] data
|
16
|
+
# @return [Array<Array<String>>] Array of rows, each row is an array of cells
|
17
|
+
def parse(data)
|
18
|
+
xlsx_file = Roo::Spreadsheet.open(data)
|
19
|
+
xlsx_file.each_with_pagename.flat_map do |_, sheet|
|
20
|
+
sheet.map do |row|
|
21
|
+
row.map { |i| i.to_s.strip }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "strscan"
|
4
4
|
require "json"
|
5
|
+
require "yaml"
|
5
6
|
|
6
7
|
module Langchain::Prompt
|
7
8
|
class Base
|
@@ -52,10 +53,13 @@ module Langchain::Prompt
|
|
52
53
|
directory_path = save_path.dirname
|
53
54
|
FileUtils.mkdir_p(directory_path) unless directory_path.directory?
|
54
55
|
|
55
|
-
|
56
|
+
case save_path.extname
|
57
|
+
when ".json"
|
56
58
|
File.write(file_path, to_h.to_json)
|
59
|
+
when ".yaml", ".yml"
|
60
|
+
File.write(file_path, to_h.to_yaml)
|
57
61
|
else
|
58
|
-
raise ArgumentError, "#{file_path} must be json"
|
62
|
+
raise ArgumentError, "#{file_path} must be json or yaml file"
|
59
63
|
end
|
60
64
|
end
|
61
65
|
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require "strscan"
|
4
4
|
require "pathname"
|
5
5
|
require "json"
|
6
|
+
require "yaml"
|
6
7
|
|
7
8
|
module Langchain::Prompt
|
8
9
|
TYPE_TO_LOADER = {
|
@@ -23,8 +24,11 @@ module Langchain::Prompt
|
|
23
24
|
def load_from_path(file_path:)
|
24
25
|
file_path = file_path.is_a?(String) ? Pathname.new(file_path) : file_path
|
25
26
|
|
26
|
-
|
27
|
+
case file_path.extname
|
28
|
+
when ".json"
|
27
29
|
config = JSON.parse(File.read(file_path))
|
30
|
+
when ".yaml", ".yml"
|
31
|
+
config = YAML.safe_load(File.read(file_path))
|
28
32
|
else
|
29
33
|
raise ArgumentError, "Got unsupported file type #{file_path.extname}"
|
30
34
|
end
|
data/lib/langchain/tool/base.rb
CHANGED
@@ -6,46 +6,59 @@ module Langchain::Tool
|
|
6
6
|
|
7
7
|
# How to add additional Tools?
|
8
8
|
# 1. Create a new file in lib/tool/your_tool_name.rb
|
9
|
-
# 2.
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
9
|
+
# 2. Create a class in the file that inherits from Langchain::Tool::Base
|
10
|
+
# 3. Add `NAME=` and `DESCRIPTION=` constants in your Tool class
|
11
|
+
# 4. Implement `execute(input:)` method in your tool class
|
12
|
+
# 5. Add your tool to the README.md
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
14
|
+
#
|
15
|
+
# Returns the NAME constant of the tool
|
16
|
+
#
|
17
|
+
# @return [String] tool name
|
18
|
+
#
|
19
|
+
def tool_name
|
20
|
+
self.class.const_get(:NAME)
|
21
|
+
end
|
19
22
|
|
23
|
+
#
|
24
|
+
# Sets the DESCRIPTION constant of the tool
|
25
|
+
#
|
26
|
+
# @param value [String] tool description
|
27
|
+
#
|
20
28
|
def self.description(value)
|
21
29
|
const_set(:DESCRIPTION, value.tr("\n", " ").strip)
|
22
30
|
end
|
23
31
|
|
32
|
+
#
|
24
33
|
# Instantiates and executes the tool and returns the answer
|
34
|
+
#
|
25
35
|
# @param input [String] input to the tool
|
26
36
|
# @return [String] answer
|
37
|
+
#
|
27
38
|
def self.execute(input:)
|
28
39
|
new.execute(input: input)
|
29
40
|
end
|
30
41
|
|
42
|
+
#
|
31
43
|
# Executes the tool and returns the answer
|
44
|
+
#
|
32
45
|
# @param input [String] input to the tool
|
33
46
|
# @return [String] answer
|
47
|
+
#
|
34
48
|
def execute(input:)
|
35
49
|
raise NotImplementedError, "Your tool must implement the `#execute(input:)` method that returns a string"
|
36
50
|
end
|
37
51
|
|
38
52
|
#
|
39
|
-
# Validates the list of
|
40
|
-
# @param tools [Array<
|
53
|
+
# Validates the list of tools or raises an error
|
54
|
+
# @param tools [Array<Langchain::Tool>] list of tools to be used
|
41
55
|
#
|
42
56
|
# @raise [ArgumentError] If any of the tools are not supported
|
43
57
|
#
|
44
58
|
def self.validate_tools!(tools:)
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
raise ArgumentError, "Unrecognized Tools: #{unrecognized_tools}"
|
59
|
+
# Check if the tool count is equal to unique tool count
|
60
|
+
if tools.count != tools.map(&:tool_name).uniq.count
|
61
|
+
raise ArgumentError, "Either tools are not unique or are conflicting with each other"
|
49
62
|
end
|
50
63
|
end
|
51
64
|
end
|
@@ -8,9 +8,10 @@ module Langchain::Tool
|
|
8
8
|
# Gem requirements:
|
9
9
|
# gem "eqn", "~> 1.6.5"
|
10
10
|
# gem "google_search_results", "~> 2.0.0"
|
11
|
-
# ENV requirements: ENV["SERPAPI_API_KEY"]
|
12
11
|
#
|
13
12
|
|
13
|
+
NAME = "calculator"
|
14
|
+
|
14
15
|
description <<~DESC
|
15
16
|
Useful for getting the result of a math expression.
|
16
17
|
|
@@ -33,7 +34,10 @@ module Langchain::Tool
|
|
33
34
|
rescue Eqn::ParseError, Eqn::NoVariableValueError
|
34
35
|
# Sometimes the input is not a pure math expression, e.g: "12F in Celsius"
|
35
36
|
# We can use the google answer box to evaluate this expression
|
36
|
-
|
37
|
+
# TODO: Figure out to find a better way to evaluate these language expressions.
|
38
|
+
hash_results = Langchain::Tool::SerpApi
|
39
|
+
.new(api_key: ENV["SERPAPI_API_KEY"])
|
40
|
+
.execute_search(input: input)
|
37
41
|
hash_results.dig(:answer_box, :to)
|
38
42
|
end
|
39
43
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Langchain::Tool
|
2
|
+
class Database < Base
|
3
|
+
#
|
4
|
+
# Connects to a database, executes SQL queries, and outputs DB schema for Agents to use
|
5
|
+
#
|
6
|
+
# Gem requirements: gem "sequel", "~> 5.68.0"
|
7
|
+
#
|
8
|
+
|
9
|
+
NAME = "database"
|
10
|
+
|
11
|
+
description <<~DESC
|
12
|
+
Useful for getting the result of a database query.
|
13
|
+
|
14
|
+
The input to this tool should be valid SQL.
|
15
|
+
DESC
|
16
|
+
|
17
|
+
attr_reader :db
|
18
|
+
|
19
|
+
#
|
20
|
+
# Establish a database connection
|
21
|
+
#
|
22
|
+
# @param connection_string [String] Database connection info, e.g. 'postgres://user:password@localhost:5432/db_name'
|
23
|
+
# @return [Database] Database object
|
24
|
+
#
|
25
|
+
def initialize(connection_string:)
|
26
|
+
depends_on "sequel"
|
27
|
+
require "sequel"
|
28
|
+
require "sequel/extensions/schema_dumper"
|
29
|
+
|
30
|
+
raise StandardError, "connection_string parameter cannot be blank" if connection_string.empty?
|
31
|
+
|
32
|
+
@db = Sequel.connect(connection_string)
|
33
|
+
@db.extension :schema_dumper
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Returns the database schema
|
38
|
+
#
|
39
|
+
# @return [String] schema
|
40
|
+
#
|
41
|
+
def schema
|
42
|
+
Langchain.logger.info("[#{self.class.name}]".light_blue + ": Dumping schema")
|
43
|
+
db.dump_schema_migration(same_db: true, indexes: false) unless db.adapter_scheme == :mock
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Evaluates a sql expression
|
48
|
+
#
|
49
|
+
# @param input [String] sql expression
|
50
|
+
# @return [Array] results
|
51
|
+
#
|
52
|
+
def execute(input:)
|
53
|
+
Langchain.logger.info("[#{self.class.name}]".light_blue + ": Executing \"#{input}\"")
|
54
|
+
|
55
|
+
db[input].to_a
|
56
|
+
rescue Sequel::DatabaseError => e
|
57
|
+
Langchain.logger.error("[#{self.class.name}]".light_red + ": #{e.message}")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -7,7 +7,7 @@ module Langchain::Tool
|
|
7
7
|
#
|
8
8
|
# Gem requirements: gem "safe_ruby", "~> 1.0.4"
|
9
9
|
#
|
10
|
-
|
10
|
+
NAME = "ruby_code_interpreter"
|
11
11
|
description <<~DESC
|
12
12
|
A Ruby code interpreter. Use this to execute ruby expressions. Input should be a valid ruby expression. If you want to see the output of the tool, make sure to return a value.
|
13
13
|
DESC
|
@@ -6,8 +6,13 @@ module Langchain::Tool
|
|
6
6
|
# Wrapper around SerpAPI
|
7
7
|
#
|
8
8
|
# Gem requirements: gem "google_search_results", "~> 2.0.0"
|
9
|
-
# ENV requirements: ENV["SERPAPI_API_KEY"] # https://serpapi.com/manage-api-key)
|
10
9
|
#
|
10
|
+
# Usage:
|
11
|
+
# search = Langchain::Tool::SerpApi.new(api_key: "YOUR_API_KEY")
|
12
|
+
# search.execute(input: "What is the capital of France?")
|
13
|
+
#
|
14
|
+
|
15
|
+
NAME = "search"
|
11
16
|
|
12
17
|
description <<~DESC
|
13
18
|
A wrapper around Google Search.
|
@@ -18,39 +23,57 @@ module Langchain::Tool
|
|
18
23
|
Input should be a search query.
|
19
24
|
DESC
|
20
25
|
|
21
|
-
|
26
|
+
attr_reader :api_key
|
27
|
+
|
28
|
+
#
|
29
|
+
# Initializes the SerpAPI tool
|
30
|
+
#
|
31
|
+
# @param api_key [String] SerpAPI API key
|
32
|
+
# @return [Langchain::Tool::SerpApi] SerpAPI tool
|
33
|
+
#
|
34
|
+
def initialize(api_key:)
|
22
35
|
depends_on "google_search_results"
|
23
36
|
require "google_search_results"
|
37
|
+
@api_key = api_key
|
24
38
|
end
|
25
39
|
|
40
|
+
#
|
26
41
|
# Executes Google Search and returns hash_results JSON
|
42
|
+
#
|
27
43
|
# @param input [String] search query
|
28
44
|
# @return [Hash] hash_results JSON
|
29
|
-
|
45
|
+
#
|
30
46
|
def self.execute_search(input:)
|
31
47
|
new.execute_search(input: input)
|
32
48
|
end
|
33
49
|
|
34
|
-
#
|
50
|
+
#
|
51
|
+
# Executes Google Search and returns the result
|
52
|
+
#
|
35
53
|
# @param input [String] search query
|
36
54
|
# @return [String] Answer
|
37
|
-
#
|
38
|
-
# We may need to do the same thing here.
|
55
|
+
#
|
39
56
|
def execute(input:)
|
40
57
|
Langchain.logger.info("[#{self.class.name}]".light_blue + ": Executing \"#{input}\"")
|
41
58
|
|
42
59
|
hash_results = execute_search(input: input)
|
43
60
|
|
61
|
+
# TODO: Glance at all of the fields that langchain Python looks through: https://github.com/hwchase17/langchain/blob/v0.0.166/langchain/utilities/serpapi.py#L128-L156
|
62
|
+
# We may need to do the same thing here.
|
44
63
|
hash_results.dig(:answer_box, :answer) ||
|
45
64
|
hash_results.dig(:answer_box, :snippet) ||
|
46
65
|
hash_results.dig(:organic_results, 0, :snippet)
|
47
66
|
end
|
48
67
|
|
68
|
+
#
|
69
|
+
# Executes Google Search and returns hash_results JSON
|
70
|
+
#
|
71
|
+
# @param input [String] search query
|
72
|
+
# @return [Hash] hash_results JSON
|
73
|
+
#
|
49
74
|
def execute_search(input:)
|
50
|
-
GoogleSearch
|
51
|
-
q: input,
|
52
|
-
serp_api_key: ENV["SERPAPI_API_KEY"]
|
53
|
-
)
|
75
|
+
GoogleSearch
|
76
|
+
.new(q: input, serp_api_key: api_key)
|
54
77
|
.get_hash
|
55
78
|
end
|
56
79
|
end
|
@@ -7,19 +7,13 @@ module Langchain::Vectorsearch
|
|
7
7
|
include Langchain::DependencyHelper
|
8
8
|
extend Forwardable
|
9
9
|
|
10
|
-
attr_reader :client, :index_name, :llm
|
10
|
+
attr_reader :client, :index_name, :llm
|
11
11
|
|
12
12
|
DEFAULT_METRIC = "cosine"
|
13
13
|
|
14
|
-
# @param llm [
|
15
|
-
|
16
|
-
def initialize(llm:, llm_api_key:)
|
17
|
-
Langchain::LLM::Base.validate_llm!(llm: llm)
|
18
|
-
|
14
|
+
# @param llm [Object] The LLM client to use
|
15
|
+
def initialize(llm:)
|
19
16
|
@llm = llm
|
20
|
-
@llm_api_key = llm_api_key
|
21
|
-
|
22
|
-
@llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
|
23
17
|
end
|
24
18
|
|
25
19
|
# Method supported by Vectorsearch DB to create a default schema
|
@@ -48,7 +42,7 @@ module Langchain::Vectorsearch
|
|
48
42
|
raise NotImplementedError, "#{self.class.name} does not support asking questions"
|
49
43
|
end
|
50
44
|
|
51
|
-
def_delegators :
|
45
|
+
def_delegators :llm,
|
52
46
|
:default_dimension
|
53
47
|
|
54
48
|
def generate_prompt(question:, context:)
|
@@ -69,11 +63,10 @@ module Langchain::Vectorsearch
|
|
69
63
|
prompt_template.format(question: question)
|
70
64
|
end
|
71
65
|
|
72
|
-
def add_data(
|
73
|
-
raise ArgumentError, "
|
74
|
-
raise ArgumentError, "Either path or paths must be provided, not both" if !path.nil? && !paths.nil?
|
66
|
+
def add_data(paths:)
|
67
|
+
raise ArgumentError, "Paths must be provided" if paths.to_a.empty?
|
75
68
|
|
76
|
-
texts = Array(
|
69
|
+
texts = Array(paths)
|
77
70
|
.flatten
|
78
71
|
.map { |path| Langchain::Loader.new(path)&.load&.value }
|
79
72
|
.compact
|
@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
|
|
15
15
|
# @param url [String] The URL of the Qdrant server
|
16
16
|
# @param api_key [String] The API key to use
|
17
17
|
# @param index_name [String] The name of the index to use
|
18
|
-
# @param llm [
|
19
|
-
|
20
|
-
def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
|
18
|
+
# @param llm [Object] The LLM client to use
|
19
|
+
def initialize(url:, index_name:, llm:, api_key: nil)
|
21
20
|
depends_on "chroma-db"
|
22
21
|
require "chroma-db"
|
23
22
|
|
@@ -27,7 +26,7 @@ module Langchain::Vectorsearch
|
|
27
26
|
|
28
27
|
@index_name = index_name
|
29
28
|
|
30
|
-
super(llm: llm
|
29
|
+
super(llm: llm)
|
31
30
|
end
|
32
31
|
|
33
32
|
# Add a list of texts to the index
|
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
|
|
38
37
|
::Chroma::Resources::Embedding.new(
|
39
38
|
# TODO: Add support for passing your own IDs
|
40
39
|
id: SecureRandom.uuid,
|
41
|
-
embedding:
|
40
|
+
embedding: llm.embed(text: text),
|
42
41
|
# TODO: Add support for passing metadata
|
43
42
|
metadata: [], # metadatas[index],
|
44
43
|
document: text # Do we actually need to store the whole original document?
|
@@ -63,7 +62,7 @@ module Langchain::Vectorsearch
|
|
63
62
|
query:,
|
64
63
|
k: 4
|
65
64
|
)
|
66
|
-
embedding =
|
65
|
+
embedding = llm.embed(text: query)
|
67
66
|
|
68
67
|
similarity_search_by_vector(
|
69
68
|
embedding: embedding,
|
@@ -101,7 +100,7 @@ module Langchain::Vectorsearch
|
|
101
100
|
|
102
101
|
prompt = generate_prompt(question: question, context: context)
|
103
102
|
|
104
|
-
|
103
|
+
llm.chat(prompt: prompt)
|
105
104
|
end
|
106
105
|
|
107
106
|
private
|
@@ -11,14 +11,14 @@ module Langchain::Vectorsearch
|
|
11
11
|
# milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, llm_api_key:)
|
12
12
|
#
|
13
13
|
|
14
|
-
def initialize(url:, index_name:, llm:,
|
14
|
+
def initialize(url:, index_name:, llm:, api_key: nil)
|
15
15
|
depends_on "milvus"
|
16
16
|
require "milvus"
|
17
17
|
|
18
18
|
@client = ::Milvus::Client.new(url: url)
|
19
19
|
@index_name = index_name
|
20
20
|
|
21
|
-
super(llm: llm
|
21
|
+
super(llm: llm)
|
22
22
|
end
|
23
23
|
|
24
24
|
def add_texts(texts:)
|
@@ -33,7 +33,7 @@ module Langchain::Vectorsearch
|
|
33
33
|
}, {
|
34
34
|
field_name: "vectors",
|
35
35
|
type: ::Milvus::DATA_TYPES["binary_vector"],
|
36
|
-
field: Array(texts).map { |text|
|
36
|
+
field: Array(texts).map { |text| llm.embed(text: text) }
|
37
37
|
}
|
38
38
|
]
|
39
39
|
)
|
@@ -78,7 +78,7 @@ module Langchain::Vectorsearch
|
|
78
78
|
end
|
79
79
|
|
80
80
|
def similarity_search(query:, k: 4)
|
81
|
-
embedding =
|
81
|
+
embedding = llm.embed(text: query)
|
82
82
|
|
83
83
|
similarity_search_by_vector(
|
84
84
|
embedding: embedding,
|
@@ -22,10 +22,9 @@ module Langchain::Vectorsearch
|
|
22
22
|
|
23
23
|
# @param url [String] The URL of the PostgreSQL database
|
24
24
|
# @param index_name [String] The name of the table to use for the index
|
25
|
-
# @param llm [
|
26
|
-
# @param llm_api_key [String] The API key for the Language Layer API
|
25
|
+
# @param llm [Object] The LLM client to use
|
27
26
|
# @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
|
28
|
-
def initialize(url:, index_name:, llm:,
|
27
|
+
def initialize(url:, index_name:, llm:, api_key: nil)
|
29
28
|
require "pg"
|
30
29
|
require "pgvector"
|
31
30
|
|
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
|
|
38
37
|
@quoted_table_name = @client.quote_ident(index_name)
|
39
38
|
@operator = OPERATORS[DEFAULT_OPERATOR]
|
40
39
|
|
41
|
-
super(llm: llm
|
40
|
+
super(llm: llm)
|
42
41
|
end
|
43
42
|
|
44
43
|
# Add a list of texts to the index
|
@@ -46,7 +45,7 @@ module Langchain::Vectorsearch
|
|
46
45
|
# @return [PG::Result] The response from the database
|
47
46
|
def add_texts(texts:)
|
48
47
|
data = texts.flat_map do |text|
|
49
|
-
[text,
|
48
|
+
[text, llm.embed(text: text)]
|
50
49
|
end
|
51
50
|
values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
|
52
51
|
client.exec_params(
|
@@ -75,7 +74,7 @@ module Langchain::Vectorsearch
|
|
75
74
|
# @param k [Integer] The number of top results to return
|
76
75
|
# @return [Array<Hash>] The results of the search
|
77
76
|
def similarity_search(query:, k: 4)
|
78
|
-
embedding =
|
77
|
+
embedding = llm.embed(text: query)
|
79
78
|
|
80
79
|
similarity_search_by_vector(
|
81
80
|
embedding: embedding,
|
@@ -113,7 +112,7 @@ module Langchain::Vectorsearch
|
|
113
112
|
|
114
113
|
prompt = generate_prompt(question: question, context: context)
|
115
114
|
|
116
|
-
|
115
|
+
llm.chat(prompt: prompt)
|
117
116
|
end
|
118
117
|
end
|
119
118
|
end
|
@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
|
|
15
15
|
# @param environment [String] The environment to use
|
16
16
|
# @param api_key [String] The API key to use
|
17
17
|
# @param index_name [String] The name of the index to use
|
18
|
-
# @param llm [
|
19
|
-
|
20
|
-
def initialize(environment:, api_key:, index_name:, llm:, llm_api_key:)
|
18
|
+
# @param llm [Object] The LLM client to use
|
19
|
+
def initialize(environment:, api_key:, index_name:, llm:)
|
21
20
|
depends_on "pinecone"
|
22
21
|
require "pinecone"
|
23
22
|
|
@@ -29,7 +28,7 @@ module Langchain::Vectorsearch
|
|
29
28
|
@client = ::Pinecone::Client.new
|
30
29
|
@index_name = index_name
|
31
30
|
|
32
|
-
super(llm: llm
|
31
|
+
super(llm: llm)
|
33
32
|
end
|
34
33
|
|
35
34
|
# Add a list of texts to the index
|
@@ -43,7 +42,7 @@ module Langchain::Vectorsearch
|
|
43
42
|
# TODO: Allows passing in your own IDs
|
44
43
|
id: SecureRandom.uuid,
|
45
44
|
metadata: metadata || {content: text},
|
46
|
-
values:
|
45
|
+
values: llm.embed(text: text)
|
47
46
|
}
|
48
47
|
end
|
49
48
|
|
@@ -74,7 +73,7 @@ module Langchain::Vectorsearch
|
|
74
73
|
namespace: "",
|
75
74
|
filter: nil
|
76
75
|
)
|
77
|
-
embedding =
|
76
|
+
embedding = llm.embed(text: query)
|
78
77
|
|
79
78
|
similarity_search_by_vector(
|
80
79
|
embedding: embedding,
|
@@ -121,7 +120,7 @@ module Langchain::Vectorsearch
|
|
121
120
|
|
122
121
|
prompt = generate_prompt(question: question, context: context)
|
123
122
|
|
124
|
-
|
123
|
+
llm.chat(prompt: prompt)
|
125
124
|
end
|
126
125
|
end
|
127
126
|
end
|
@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
|
|
15
15
|
# @param url [String] The URL of the Qdrant server
|
16
16
|
# @param api_key [String] The API key to use
|
17
17
|
# @param index_name [String] The name of the index to use
|
18
|
-
# @param llm [
|
19
|
-
|
20
|
-
def initialize(url:, api_key:, index_name:, llm:, llm_api_key:)
|
18
|
+
# @param llm [Object] The LLM client to use
|
19
|
+
def initialize(url:, api_key:, index_name:, llm:)
|
21
20
|
depends_on "qdrant-ruby"
|
22
21
|
require "qdrant"
|
23
22
|
|
@@ -27,7 +26,7 @@ module Langchain::Vectorsearch
|
|
27
26
|
)
|
28
27
|
@index_name = index_name
|
29
28
|
|
30
|
-
super(llm: llm
|
29
|
+
super(llm: llm)
|
31
30
|
end
|
32
31
|
|
33
32
|
# Add a list of texts to the index
|
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
|
|
38
37
|
|
39
38
|
Array(texts).each do |text|
|
40
39
|
batch[:ids].push(SecureRandom.uuid)
|
41
|
-
batch[:vectors].push(
|
40
|
+
batch[:vectors].push(llm.embed(text: text))
|
42
41
|
batch[:payloads].push({content: text})
|
43
42
|
end
|
44
43
|
|
@@ -68,7 +67,7 @@ module Langchain::Vectorsearch
|
|
68
67
|
query:,
|
69
68
|
k: 4
|
70
69
|
)
|
71
|
-
embedding =
|
70
|
+
embedding = llm.embed(text: query)
|
72
71
|
|
73
72
|
similarity_search_by_vector(
|
74
73
|
embedding: embedding,
|
@@ -105,7 +104,7 @@ module Langchain::Vectorsearch
|
|
105
104
|
|
106
105
|
prompt = generate_prompt(question: question, context: context)
|
107
106
|
|
108
|
-
|
107
|
+
llm.chat(prompt: prompt)
|
109
108
|
end
|
110
109
|
end
|
111
110
|
end
|
@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
|
|
15
15
|
# @param url [String] The URL of the Weaviate instance
|
16
16
|
# @param api_key [String] The API key to use
|
17
17
|
# @param index_name [String] The name of the index to use
|
18
|
-
# @param llm [
|
19
|
-
|
20
|
-
def initialize(url:, api_key:, index_name:, llm:, llm_api_key:)
|
18
|
+
# @param llm [Object] The LLM client to use
|
19
|
+
def initialize(url:, api_key:, index_name:, llm:)
|
21
20
|
depends_on "weaviate-ruby"
|
22
21
|
require "weaviate"
|
23
22
|
|
@@ -27,7 +26,7 @@ module Langchain::Vectorsearch
|
|
27
26
|
)
|
28
27
|
@index_name = index_name
|
29
28
|
|
30
|
-
super(llm: llm
|
29
|
+
super(llm: llm)
|
31
30
|
end
|
32
31
|
|
33
32
|
# Add a list of texts to the index
|
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
|
|
38
37
|
{
|
39
38
|
class: index_name,
|
40
39
|
properties: {content: text},
|
41
|
-
vector:
|
40
|
+
vector: llm.embed(text: text)
|
42
41
|
}
|
43
42
|
end
|
44
43
|
|
@@ -67,7 +66,7 @@ module Langchain::Vectorsearch
|
|
67
66
|
# @param k [Integer|String] The number of results to return
|
68
67
|
# @return [Hash] The search results
|
69
68
|
def similarity_search(query:, k: 4)
|
70
|
-
embedding =
|
69
|
+
embedding = llm.embed(text: query)
|
71
70
|
|
72
71
|
similarity_search_by_vector(embedding: embedding, k: k)
|
73
72
|
end
|
@@ -100,7 +99,7 @@ module Langchain::Vectorsearch
|
|
100
99
|
|
101
100
|
prompt = generate_prompt(question: question, context: context)
|
102
101
|
|
103
|
-
|
102
|
+
llm.chat(prompt: prompt)
|
104
103
|
end
|
105
104
|
end
|
106
105
|
end
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
@@ -24,6 +24,7 @@ module Langchain
|
|
24
24
|
module Agent
|
25
25
|
autoload :Base, "langchain/agent/base"
|
26
26
|
autoload :ChainOfThoughtAgent, "langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb"
|
27
|
+
autoload :SQLQueryAgent, "langchain/agent/sql_query_agent/sql_query_agent.rb"
|
27
28
|
end
|
28
29
|
|
29
30
|
module Tool
|
@@ -32,6 +33,7 @@ module Langchain
|
|
32
33
|
autoload :RubyCodeInterpreter, "langchain/tool/ruby_code_interpreter"
|
33
34
|
autoload :SerpApi, "langchain/tool/serp_api"
|
34
35
|
autoload :Wikipedia, "langchain/tool/wikipedia"
|
36
|
+
autoload :Database, "langchain/tool/database"
|
35
37
|
end
|
36
38
|
|
37
39
|
module Processors
|
@@ -43,6 +45,7 @@ module Langchain
|
|
43
45
|
autoload :JSONL, "langchain/processors/jsonl"
|
44
46
|
autoload :PDF, "langchain/processors/pdf"
|
45
47
|
autoload :Text, "langchain/processors/text"
|
48
|
+
autoload :Xlsx, "langchain/processors/xlsx"
|
46
49
|
end
|
47
50
|
|
48
51
|
module Utils
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: tiktoken_ruby
|
@@ -318,6 +318,20 @@ dependencies:
|
|
318
318
|
- - "~>"
|
319
319
|
- !ruby/object:Gem::Version
|
320
320
|
version: 0.9.0
|
321
|
+
- !ruby/object:Gem::Dependency
|
322
|
+
name: roo
|
323
|
+
requirement: !ruby/object:Gem::Requirement
|
324
|
+
requirements:
|
325
|
+
- - "~>"
|
326
|
+
- !ruby/object:Gem::Version
|
327
|
+
version: 2.10.0
|
328
|
+
type: :development
|
329
|
+
prerelease: false
|
330
|
+
version_requirements: !ruby/object:Gem::Requirement
|
331
|
+
requirements:
|
332
|
+
- - "~>"
|
333
|
+
- !ruby/object:Gem::Version
|
334
|
+
version: 2.10.0
|
321
335
|
- !ruby/object:Gem::Dependency
|
322
336
|
name: ruby-openai
|
323
337
|
requirement: !ruby/object:Gem::Requirement
|
@@ -346,6 +360,20 @@ dependencies:
|
|
346
360
|
- - "~>"
|
347
361
|
- !ruby/object:Gem::Version
|
348
362
|
version: 1.0.4
|
363
|
+
- !ruby/object:Gem::Dependency
|
364
|
+
name: sequel
|
365
|
+
requirement: !ruby/object:Gem::Requirement
|
366
|
+
requirements:
|
367
|
+
- - "~>"
|
368
|
+
- !ruby/object:Gem::Version
|
369
|
+
version: 5.68.0
|
370
|
+
type: :development
|
371
|
+
prerelease: false
|
372
|
+
version_requirements: !ruby/object:Gem::Requirement
|
373
|
+
requirements:
|
374
|
+
- - "~>"
|
375
|
+
- !ruby/object:Gem::Version
|
376
|
+
version: 5.68.0
|
349
377
|
- !ruby/object:Gem::Dependency
|
350
378
|
name: weaviate-ruby
|
351
379
|
requirement: !ruby/object:Gem::Requirement
|
@@ -383,6 +411,7 @@ extra_rdoc_files: []
|
|
383
411
|
files:
|
384
412
|
- ".env.example"
|
385
413
|
- ".rspec"
|
414
|
+
- ".rubocop.yml"
|
386
415
|
- CHANGELOG.md
|
387
416
|
- Gemfile
|
388
417
|
- Gemfile.lock
|
@@ -395,10 +424,14 @@ files:
|
|
395
424
|
- examples/store_and_query_with_pinecone.rb
|
396
425
|
- examples/store_and_query_with_qdrant.rb
|
397
426
|
- examples/store_and_query_with_weaviate.rb
|
427
|
+
- lefthook.yml
|
398
428
|
- lib/langchain.rb
|
399
429
|
- lib/langchain/agent/base.rb
|
400
430
|
- lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb
|
401
431
|
- lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
|
432
|
+
- lib/langchain/agent/sql_query_agent/sql_query_agent.rb
|
433
|
+
- lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json
|
434
|
+
- lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json
|
402
435
|
- lib/langchain/data.rb
|
403
436
|
- lib/langchain/dependency_helper.rb
|
404
437
|
- lib/langchain/llm/ai21.rb
|
@@ -418,12 +451,14 @@ files:
|
|
418
451
|
- lib/langchain/processors/jsonl.rb
|
419
452
|
- lib/langchain/processors/pdf.rb
|
420
453
|
- lib/langchain/processors/text.rb
|
454
|
+
- lib/langchain/processors/xlsx.rb
|
421
455
|
- lib/langchain/prompt/base.rb
|
422
456
|
- lib/langchain/prompt/few_shot_prompt_template.rb
|
423
457
|
- lib/langchain/prompt/loading.rb
|
424
458
|
- lib/langchain/prompt/prompt_template.rb
|
425
459
|
- lib/langchain/tool/base.rb
|
426
460
|
- lib/langchain/tool/calculator.rb
|
461
|
+
- lib/langchain/tool/database.rb
|
427
462
|
- lib/langchain/tool/ruby_code_interpreter.rb
|
428
463
|
- lib/langchain/tool/serp_api.rb
|
429
464
|
- lib/langchain/tool/wikipedia.rb
|