langchainrb 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.env.example +2 -1
- data/.rubocop.yml +11 -0
- data/CHANGELOG.md +13 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +14 -1
- data/README.md +42 -7
- data/Rakefile +5 -0
- data/examples/pdf_store_and_query_with_chroma.rb +1 -2
- data/examples/store_and_query_with_pinecone.rb +1 -2
- data/examples/store_and_query_with_qdrant.rb +1 -2
- data/examples/store_and_query_with_weaviate.rb +1 -2
- data/lefthook.yml +5 -0
- data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb +6 -10
- data/lib/langchain/agent/sql_query_agent/sql_query_agent.rb +78 -0
- data/lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json +10 -0
- data/lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json +10 -0
- data/lib/langchain/dependency_helper.rb +34 -0
- data/lib/langchain/llm/ai21.rb +45 -0
- data/lib/langchain/llm/base.rb +2 -19
- data/lib/langchain/llm/cohere.rb +9 -0
- data/lib/langchain/llm/google_palm.rb +7 -0
- data/lib/langchain/llm/hugging_face.rb +9 -0
- data/lib/langchain/llm/openai.rb +33 -41
- data/lib/langchain/llm/replicate.rb +5 -2
- data/lib/langchain/processors/base.rb +2 -0
- data/lib/langchain/processors/xlsx.rb +27 -0
- data/lib/langchain/prompt/base.rb +8 -4
- data/lib/langchain/prompt/loading.rb +6 -1
- data/lib/langchain/prompt/prompt_template.rb +1 -1
- data/lib/langchain/tool/base.rb +4 -1
- data/lib/langchain/tool/calculator.rb +9 -0
- data/lib/langchain/tool/database.rb +45 -0
- data/lib/langchain/tool/ruby_code_interpreter.rb +6 -0
- data/lib/langchain/tool/serp_api.rb +5 -1
- data/lib/langchain/tool/wikipedia.rb +4 -0
- data/lib/langchain/vectorsearch/base.rb +8 -14
- data/lib/langchain/vectorsearch/chroma.rb +15 -7
- data/lib/langchain/vectorsearch/milvus.rb +13 -4
- data/lib/langchain/vectorsearch/pgvector.rb +15 -8
- data/lib/langchain/vectorsearch/pinecone.rb +15 -7
- data/lib/langchain/vectorsearch/qdrant.rb +15 -7
- data/lib/langchain/vectorsearch/weaviate.rb +15 -7
- data/lib/{version.rb → langchain/version.rb} +1 -1
- data/lib/langchain.rb +6 -2
- metadata +82 -4
- data/lib/dependency_helper.rb +0 -30
data/lib/langchain/llm/openai.rb
CHANGED
@@ -2,6 +2,15 @@
|
|
2
2
|
|
3
3
|
module Langchain::LLM
|
4
4
|
class OpenAI < Base
|
5
|
+
#
|
6
|
+
# Wrapper around OpenAI APIs.
|
7
|
+
#
|
8
|
+
# Gem requirements: gem "ruby-openai", "~> 4.0.0"
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# openai = Langchain::LLM::OpenAI.new(api_key:, llm_options: {})
|
12
|
+
#
|
13
|
+
|
5
14
|
DEFAULTS = {
|
6
15
|
temperature: 0.0,
|
7
16
|
completion_model_name: "text-davinci-003",
|
@@ -10,12 +19,11 @@ module Langchain::LLM
|
|
10
19
|
dimension: 1536
|
11
20
|
}.freeze
|
12
21
|
|
13
|
-
def initialize(api_key:)
|
22
|
+
def initialize(api_key:, llm_options: {})
|
14
23
|
depends_on "ruby-openai"
|
15
24
|
require "openai"
|
16
25
|
|
17
|
-
|
18
|
-
@client = ::OpenAI::Client.new(access_token: api_key)
|
26
|
+
@client = ::OpenAI::Client.new(access_token: api_key, **llm_options)
|
19
27
|
end
|
20
28
|
|
21
29
|
#
|
@@ -24,17 +32,12 @@ module Langchain::LLM
|
|
24
32
|
# @param text [String] The text to generate an embedding for
|
25
33
|
# @return [Array] The embedding
|
26
34
|
#
|
27
|
-
def embed(text
|
28
|
-
|
35
|
+
def embed(text:, **params)
|
36
|
+
parameters = {model: DEFAULTS[:embeddings_model_name], input: text}
|
29
37
|
|
30
|
-
Langchain::Utils::TokenLengthValidator.validate!(text, model)
|
38
|
+
Langchain::Utils::TokenLengthValidator.validate!(text, parameters[:model])
|
31
39
|
|
32
|
-
response = client.embeddings(
|
33
|
-
parameters: {
|
34
|
-
model: model,
|
35
|
-
input: text
|
36
|
-
}
|
37
|
-
)
|
40
|
+
response = client.embeddings(parameters: parameters.merge(params))
|
38
41
|
response.dig("data").first.dig("embedding")
|
39
42
|
end
|
40
43
|
|
@@ -45,23 +48,13 @@ module Langchain::LLM
|
|
45
48
|
# @return [String] The completion
|
46
49
|
#
|
47
50
|
def complete(prompt:, **params)
|
48
|
-
|
49
|
-
|
50
|
-
Langchain::Utils::TokenLengthValidator.validate!(prompt, model)
|
51
|
-
|
52
|
-
default_params = {
|
53
|
-
model: model,
|
54
|
-
temperature: DEFAULTS[:temperature],
|
55
|
-
prompt: prompt
|
56
|
-
}
|
51
|
+
parameters = compose_parameters DEFAULTS[:completion_model_name], params
|
57
52
|
|
58
|
-
|
59
|
-
default_params[:stop] = params.delete(:stop_sequences)
|
60
|
-
end
|
53
|
+
Langchain::Utils::TokenLengthValidator.validate!(prompt, parameters[:model])
|
61
54
|
|
62
|
-
|
55
|
+
parameters[:prompt] = prompt
|
63
56
|
|
64
|
-
response = client.completions(parameters:
|
57
|
+
response = client.completions(parameters: parameters)
|
65
58
|
response.dig("choices", 0, "text")
|
66
59
|
end
|
67
60
|
|
@@ -72,24 +65,13 @@ module Langchain::LLM
|
|
72
65
|
# @return [String] The chat completion
|
73
66
|
#
|
74
67
|
def chat(prompt:, **params)
|
75
|
-
|
68
|
+
parameters = compose_parameters DEFAULTS[:chat_completion_model_name], params
|
76
69
|
|
77
|
-
Langchain::Utils::TokenLengthValidator.validate!(prompt, model)
|
70
|
+
Langchain::Utils::TokenLengthValidator.validate!(prompt, parameters[:model])
|
78
71
|
|
79
|
-
|
80
|
-
model: model,
|
81
|
-
temperature: DEFAULTS[:temperature],
|
82
|
-
# TODO: Figure out how to introduce persisted conversations
|
83
|
-
messages: [{role: "user", content: prompt}]
|
84
|
-
}
|
85
|
-
|
86
|
-
if params[:stop_sequences]
|
87
|
-
default_params[:stop] = params.delete(:stop_sequences)
|
88
|
-
end
|
72
|
+
parameters[:messages] = [{role: "user", content: prompt}]
|
89
73
|
|
90
|
-
|
91
|
-
|
92
|
-
response = client.chat(parameters: default_params)
|
74
|
+
response = client.chat(parameters: parameters)
|
93
75
|
response.dig("choices", 0, "message", "content")
|
94
76
|
end
|
95
77
|
|
@@ -112,5 +94,15 @@ module Langchain::LLM
|
|
112
94
|
max_tokens: 2048
|
113
95
|
)
|
114
96
|
end
|
97
|
+
|
98
|
+
private
|
99
|
+
|
100
|
+
def compose_parameters(model, params)
|
101
|
+
default_params = {model: model, temperature: DEFAULTS[:temperature]}
|
102
|
+
|
103
|
+
default_params[:stop] = params.delete(:stop_sequences) if params[:stop_sequences]
|
104
|
+
|
105
|
+
default_params.merge(params)
|
106
|
+
end
|
115
107
|
end
|
116
108
|
end
|
@@ -2,7 +2,11 @@
|
|
2
2
|
|
3
3
|
module Langchain::LLM
|
4
4
|
class Replicate < Base
|
5
|
+
#
|
5
6
|
# Wrapper around Replicate.com LLM provider
|
7
|
+
#
|
8
|
+
# Gem requirements: gem "replicate-ruby", "~> 0.2.2"
|
9
|
+
#
|
6
10
|
# Use it directly:
|
7
11
|
# replicate = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
|
8
12
|
#
|
@@ -10,8 +14,7 @@ module Langchain::LLM
|
|
10
14
|
# chroma = Vectorsearch::Chroma.new(
|
11
15
|
# url: ENV["CHROMA_URL"],
|
12
16
|
# index_name: "...",
|
13
|
-
# llm: :
|
14
|
-
# llm_api_key: ENV["REPLICATE_API_KEY"],
|
17
|
+
# llm: Langchain::LLM::Replicate(api_key: ENV["REPLICATE_API_KEY"])
|
15
18
|
# )
|
16
19
|
|
17
20
|
DEFAULTS = {
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Processors
|
5
|
+
class Xlsx < Base
|
6
|
+
EXTENSIONS = [".xlsx", ".xlsm"].freeze
|
7
|
+
CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"].freeze
|
8
|
+
|
9
|
+
def initialize(*)
|
10
|
+
depends_on "roo"
|
11
|
+
require "roo"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Parse the document and return the text
|
15
|
+
# @param [File] data
|
16
|
+
# @return [Array<Array<String>>] Array of rows, each row is an array of cells
|
17
|
+
def parse(data)
|
18
|
+
xlsx_file = Roo::Spreadsheet.open(data)
|
19
|
+
xlsx_file.each_with_pagename.flat_map do |_, sheet|
|
20
|
+
sheet.map do |row|
|
21
|
+
row.map { |i| i.to_s.strip }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require "strscan"
|
4
4
|
require "json"
|
5
|
+
require "yaml"
|
5
6
|
|
6
7
|
module Langchain::Prompt
|
7
8
|
class Base
|
@@ -52,10 +53,13 @@ module Langchain::Prompt
|
|
52
53
|
directory_path = save_path.dirname
|
53
54
|
FileUtils.mkdir_p(directory_path) unless directory_path.directory?
|
54
55
|
|
55
|
-
|
56
|
+
case save_path.extname
|
57
|
+
when ".json"
|
56
58
|
File.write(file_path, to_h.to_json)
|
59
|
+
when ".yaml", ".yml"
|
60
|
+
File.write(file_path, to_h.to_yaml)
|
57
61
|
else
|
58
|
-
raise ArgumentError, "#{file_path} must be json"
|
62
|
+
raise ArgumentError, "#{file_path} must be json or yaml file"
|
59
63
|
end
|
60
64
|
end
|
61
65
|
|
@@ -64,9 +68,9 @@ module Langchain::Prompt
|
|
64
68
|
#
|
65
69
|
# This method takes a template string and returns an array of input variable names
|
66
70
|
# contained within the template. Input variables are defined as text enclosed in
|
67
|
-
# curly braces (e.g.
|
71
|
+
# curly braces (e.g. <code>\{variable_name\}</code>).
|
68
72
|
#
|
69
|
-
# Content within two consecutive curly braces (e.g.
|
73
|
+
# Content within two consecutive curly braces (e.g. <code>\{\{ignore_me}}</code>) are ignored.
|
70
74
|
#
|
71
75
|
# @param template [String] The template string to extract variables from.
|
72
76
|
#
|
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
require "strscan"
|
4
4
|
require "pathname"
|
5
|
+
require "json"
|
6
|
+
require "yaml"
|
5
7
|
|
6
8
|
module Langchain::Prompt
|
7
9
|
TYPE_TO_LOADER = {
|
@@ -22,8 +24,11 @@ module Langchain::Prompt
|
|
22
24
|
def load_from_path(file_path:)
|
23
25
|
file_path = file_path.is_a?(String) ? Pathname.new(file_path) : file_path
|
24
26
|
|
25
|
-
|
27
|
+
case file_path.extname
|
28
|
+
when ".json"
|
26
29
|
config = JSON.parse(File.read(file_path))
|
30
|
+
when ".yaml", ".yml"
|
31
|
+
config = YAML.safe_load(File.read(file_path))
|
27
32
|
else
|
28
33
|
raise ArgumentError, "Got unsupported file type #{file_path.extname}"
|
29
34
|
end
|
@@ -20,7 +20,7 @@ module Langchain::Prompt
|
|
20
20
|
end
|
21
21
|
|
22
22
|
#
|
23
|
-
# Format the prompt with the inputs. Double {{}} replaced with single {} to adhere to f-string spec.
|
23
|
+
# Format the prompt with the inputs. Double <code>{{}}</code> replaced with single <code>{}</code> to adhere to f-string spec.
|
24
24
|
#
|
25
25
|
# @param kwargs [Hash] Any arguments to be passed to the prompt template.
|
26
26
|
# @return [String] A formatted string.
|
data/lib/langchain/tool/base.rb
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
module Langchain::Tool
|
4
4
|
class Base
|
5
|
+
include Langchain::DependencyHelper
|
6
|
+
|
5
7
|
# How to add additional Tools?
|
6
8
|
# 1. Create a new file in lib/tool/your_tool_name.rb
|
7
9
|
# 2. Add your tool to the TOOLS hash below
|
@@ -12,7 +14,8 @@ module Langchain::Tool
|
|
12
14
|
TOOLS = {
|
13
15
|
"calculator" => "Langchain::Tool::Calculator",
|
14
16
|
"search" => "Langchain::Tool::SerpApi",
|
15
|
-
"wikipedia" => "Langchain::Tool::Wikipedia"
|
17
|
+
"wikipedia" => "Langchain::Tool::Wikipedia",
|
18
|
+
"database" => "Langchain::Tool::Database"
|
16
19
|
}
|
17
20
|
|
18
21
|
def self.description(value)
|
@@ -2,6 +2,15 @@
|
|
2
2
|
|
3
3
|
module Langchain::Tool
|
4
4
|
class Calculator < Base
|
5
|
+
#
|
6
|
+
# A calculator tool that falls back to the Google calculator widget
|
7
|
+
#
|
8
|
+
# Gem requirements:
|
9
|
+
# gem "eqn", "~> 1.6.5"
|
10
|
+
# gem "google_search_results", "~> 2.0.0"
|
11
|
+
# ENV requirements: ENV["SERPAPI_API_KEY"]
|
12
|
+
#
|
13
|
+
|
5
14
|
description <<~DESC
|
6
15
|
Useful for getting the result of a math expression.
|
7
16
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Langchain::Tool
|
2
|
+
class Database < Base
|
3
|
+
#
|
4
|
+
# Connects to a database, executes SQL queries, and outputs DB schema for Agents to use
|
5
|
+
#
|
6
|
+
# Gem requirements: gem "sequel", "~> 5.68.0"
|
7
|
+
#
|
8
|
+
|
9
|
+
description <<~DESC
|
10
|
+
Useful for getting the result of a database query.
|
11
|
+
|
12
|
+
The input to this tool should be valid SQL.
|
13
|
+
DESC
|
14
|
+
|
15
|
+
# Establish a database connection
|
16
|
+
# @param db_connection_string [String] Database connection info, e.g. 'postgres://user:password@localhost:5432/db_name'
|
17
|
+
def initialize(db_connection_string)
|
18
|
+
depends_on "sequel"
|
19
|
+
require "sequel"
|
20
|
+
require "sequel/extensions/schema_dumper"
|
21
|
+
|
22
|
+
raise StandardError, "db_connection_string parameter cannot be blank" if db_connection_string.empty?
|
23
|
+
|
24
|
+
@db = Sequel.connect(db_connection_string)
|
25
|
+
@db.extension :schema_dumper
|
26
|
+
end
|
27
|
+
|
28
|
+
def schema
|
29
|
+
Langchain.logger.info("[#{self.class.name}]".light_blue + ": Dumping schema")
|
30
|
+
@db.dump_schema_migration(same_db: true, indexes: false) unless @db.adapter_scheme == :mock
|
31
|
+
end
|
32
|
+
|
33
|
+
# Evaluates a sql expression
|
34
|
+
# @param input [String] sql expression
|
35
|
+
# @return [Array] results
|
36
|
+
def execute(input:)
|
37
|
+
Langchain.logger.info("[#{self.class.name}]".light_blue + ": Executing \"#{input}\"")
|
38
|
+
begin
|
39
|
+
@db[input].to_a
|
40
|
+
rescue Sequel::DatabaseError => e
|
41
|
+
Langchain.logger.error("[#{self.class.name}]".light_red + ": #{e.message}")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -2,6 +2,12 @@
|
|
2
2
|
|
3
3
|
module Langchain::Tool
|
4
4
|
class RubyCodeInterpreter < Base
|
5
|
+
#
|
6
|
+
# A tool that execute Ruby code in a sandboxed environment.
|
7
|
+
#
|
8
|
+
# Gem requirements: gem "safe_ruby", "~> 1.0.4"
|
9
|
+
#
|
10
|
+
|
5
11
|
description <<~DESC
|
6
12
|
A Ruby code interpreter. Use this to execute ruby expressions. Input should be a valid ruby expression. If you want to see the output of the tool, make sure to return a value.
|
7
13
|
DESC
|
@@ -2,8 +2,12 @@
|
|
2
2
|
|
3
3
|
module Langchain::Tool
|
4
4
|
class SerpApi < Base
|
5
|
+
#
|
5
6
|
# Wrapper around SerpAPI
|
6
|
-
#
|
7
|
+
#
|
8
|
+
# Gem requirements: gem "google_search_results", "~> 2.0.0"
|
9
|
+
# ENV requirements: ENV["SERPAPI_API_KEY"] # https://serpapi.com/manage-api-key)
|
10
|
+
#
|
7
11
|
|
8
12
|
description <<~DESC
|
9
13
|
A wrapper around Google Search.
|
@@ -4,21 +4,16 @@ require "forwardable"
|
|
4
4
|
|
5
5
|
module Langchain::Vectorsearch
|
6
6
|
class Base
|
7
|
+
include Langchain::DependencyHelper
|
7
8
|
extend Forwardable
|
8
9
|
|
9
|
-
attr_reader :client, :index_name, :llm
|
10
|
+
attr_reader :client, :index_name, :llm
|
10
11
|
|
11
12
|
DEFAULT_METRIC = "cosine"
|
12
13
|
|
13
|
-
# @param llm [
|
14
|
-
|
15
|
-
def initialize(llm:, llm_api_key:)
|
16
|
-
Langchain::LLM::Base.validate_llm!(llm: llm)
|
17
|
-
|
14
|
+
# @param llm [Object] The LLM client to use
|
15
|
+
def initialize(llm:)
|
18
16
|
@llm = llm
|
19
|
-
@llm_api_key = llm_api_key
|
20
|
-
|
21
|
-
@llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
|
22
17
|
end
|
23
18
|
|
24
19
|
# Method supported by Vectorsearch DB to create a default schema
|
@@ -47,7 +42,7 @@ module Langchain::Vectorsearch
|
|
47
42
|
raise NotImplementedError, "#{self.class.name} does not support asking questions"
|
48
43
|
end
|
49
44
|
|
50
|
-
def_delegators :
|
45
|
+
def_delegators :llm,
|
51
46
|
:default_dimension
|
52
47
|
|
53
48
|
def generate_prompt(question:, context:)
|
@@ -68,11 +63,10 @@ module Langchain::Vectorsearch
|
|
68
63
|
prompt_template.format(question: question)
|
69
64
|
end
|
70
65
|
|
71
|
-
def add_data(
|
72
|
-
raise ArgumentError, "
|
73
|
-
raise ArgumentError, "Either path or paths must be provided, not both" if !path.nil? && !paths.nil?
|
66
|
+
def add_data(paths:)
|
67
|
+
raise ArgumentError, "Paths must be provided" if paths.to_a.empty?
|
74
68
|
|
75
|
-
texts = Array(
|
69
|
+
texts = Array(paths)
|
76
70
|
.flatten
|
77
71
|
.map { |path| Langchain::Loader.new(path)&.load&.value }
|
78
72
|
.compact
|
@@ -2,13 +2,21 @@
|
|
2
2
|
|
3
3
|
module Langchain::Vectorsearch
|
4
4
|
class Chroma < Base
|
5
|
+
#
|
6
|
+
# Wrapper around Chroma DB
|
7
|
+
#
|
8
|
+
# Gem requirements: gem "chroma-db", "~> 0.3.0"
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# chroma = Langchain::Vectorsearch::Chroma.new(url:, index_name:, llm:, llm_api_key:, api_key: nil)
|
12
|
+
#
|
13
|
+
|
5
14
|
# Initialize the Chroma client
|
6
15
|
# @param url [String] The URL of the Qdrant server
|
7
16
|
# @param api_key [String] The API key to use
|
8
17
|
# @param index_name [String] The name of the index to use
|
9
|
-
# @param llm [
|
10
|
-
|
11
|
-
def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
|
18
|
+
# @param llm [Object] The LLM client to use
|
19
|
+
def initialize(url:, index_name:, llm:, api_key: nil)
|
12
20
|
depends_on "chroma-db"
|
13
21
|
require "chroma-db"
|
14
22
|
|
@@ -18,7 +26,7 @@ module Langchain::Vectorsearch
|
|
18
26
|
|
19
27
|
@index_name = index_name
|
20
28
|
|
21
|
-
super(llm: llm
|
29
|
+
super(llm: llm)
|
22
30
|
end
|
23
31
|
|
24
32
|
# Add a list of texts to the index
|
@@ -29,7 +37,7 @@ module Langchain::Vectorsearch
|
|
29
37
|
::Chroma::Resources::Embedding.new(
|
30
38
|
# TODO: Add support for passing your own IDs
|
31
39
|
id: SecureRandom.uuid,
|
32
|
-
embedding:
|
40
|
+
embedding: llm.embed(text: text),
|
33
41
|
# TODO: Add support for passing metadata
|
34
42
|
metadata: [], # metadatas[index],
|
35
43
|
document: text # Do we actually need to store the whole original document?
|
@@ -54,7 +62,7 @@ module Langchain::Vectorsearch
|
|
54
62
|
query:,
|
55
63
|
k: 4
|
56
64
|
)
|
57
|
-
embedding =
|
65
|
+
embedding = llm.embed(text: query)
|
58
66
|
|
59
67
|
similarity_search_by_vector(
|
60
68
|
embedding: embedding,
|
@@ -92,7 +100,7 @@ module Langchain::Vectorsearch
|
|
92
100
|
|
93
101
|
prompt = generate_prompt(question: question, context: context)
|
94
102
|
|
95
|
-
|
103
|
+
llm.chat(prompt: prompt)
|
96
104
|
end
|
97
105
|
|
98
106
|
private
|
@@ -2,14 +2,23 @@
|
|
2
2
|
|
3
3
|
module Langchain::Vectorsearch
|
4
4
|
class Milvus < Base
|
5
|
-
|
5
|
+
#
|
6
|
+
# Wrapper around Milvus REST APIs.
|
7
|
+
#
|
8
|
+
# Gem requirements: gem "milvus", "~> 0.9.0"
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, llm_api_key:)
|
12
|
+
#
|
13
|
+
|
14
|
+
def initialize(url:, index_name:, llm:, api_key: nil)
|
6
15
|
depends_on "milvus"
|
7
16
|
require "milvus"
|
8
17
|
|
9
18
|
@client = ::Milvus::Client.new(url: url)
|
10
19
|
@index_name = index_name
|
11
20
|
|
12
|
-
super(llm: llm
|
21
|
+
super(llm: llm)
|
13
22
|
end
|
14
23
|
|
15
24
|
def add_texts(texts:)
|
@@ -24,7 +33,7 @@ module Langchain::Vectorsearch
|
|
24
33
|
}, {
|
25
34
|
field_name: "vectors",
|
26
35
|
type: ::Milvus::DATA_TYPES["binary_vector"],
|
27
|
-
field: Array(texts).map { |text|
|
36
|
+
field: Array(texts).map { |text| llm.embed(text: text) }
|
28
37
|
}
|
29
38
|
]
|
30
39
|
)
|
@@ -69,7 +78,7 @@ module Langchain::Vectorsearch
|
|
69
78
|
end
|
70
79
|
|
71
80
|
def similarity_search(query:, k: 4)
|
72
|
-
embedding =
|
81
|
+
embedding = llm.embed(text: query)
|
73
82
|
|
74
83
|
similarity_search_by_vector(
|
75
84
|
embedding: embedding,
|
@@ -1,8 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Langchain::Vectorsearch
|
4
|
-
# The PostgreSQL vector search adapter
|
5
4
|
class Pgvector < Base
|
5
|
+
#
|
6
|
+
# The PostgreSQL vector search adapter
|
7
|
+
#
|
8
|
+
# Gem requirements: gem "pgvector", "~> 0.2"
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, llm_api_key:)
|
12
|
+
#
|
13
|
+
|
6
14
|
# The operators supported by the PostgreSQL vector search adapter
|
7
15
|
OPERATORS = {
|
8
16
|
"cosine_distance" => "<=>",
|
@@ -14,10 +22,9 @@ module Langchain::Vectorsearch
|
|
14
22
|
|
15
23
|
# @param url [String] The URL of the PostgreSQL database
|
16
24
|
# @param index_name [String] The name of the table to use for the index
|
17
|
-
# @param llm [
|
18
|
-
# @param llm_api_key [String] The API key for the Language Layer API
|
25
|
+
# @param llm [Object] The LLM client to use
|
19
26
|
# @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
|
20
|
-
def initialize(url:, index_name:, llm:,
|
27
|
+
def initialize(url:, index_name:, llm:, api_key: nil)
|
21
28
|
require "pg"
|
22
29
|
require "pgvector"
|
23
30
|
|
@@ -30,7 +37,7 @@ module Langchain::Vectorsearch
|
|
30
37
|
@quoted_table_name = @client.quote_ident(index_name)
|
31
38
|
@operator = OPERATORS[DEFAULT_OPERATOR]
|
32
39
|
|
33
|
-
super(llm: llm
|
40
|
+
super(llm: llm)
|
34
41
|
end
|
35
42
|
|
36
43
|
# Add a list of texts to the index
|
@@ -38,7 +45,7 @@ module Langchain::Vectorsearch
|
|
38
45
|
# @return [PG::Result] The response from the database
|
39
46
|
def add_texts(texts:)
|
40
47
|
data = texts.flat_map do |text|
|
41
|
-
[text,
|
48
|
+
[text, llm.embed(text: text)]
|
42
49
|
end
|
43
50
|
values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
|
44
51
|
client.exec_params(
|
@@ -67,7 +74,7 @@ module Langchain::Vectorsearch
|
|
67
74
|
# @param k [Integer] The number of top results to return
|
68
75
|
# @return [Array<Hash>] The results of the search
|
69
76
|
def similarity_search(query:, k: 4)
|
70
|
-
embedding =
|
77
|
+
embedding = llm.embed(text: query)
|
71
78
|
|
72
79
|
similarity_search_by_vector(
|
73
80
|
embedding: embedding,
|
@@ -105,7 +112,7 @@ module Langchain::Vectorsearch
|
|
105
112
|
|
106
113
|
prompt = generate_prompt(question: question, context: context)
|
107
114
|
|
108
|
-
|
115
|
+
llm.chat(prompt: prompt)
|
109
116
|
end
|
110
117
|
end
|
111
118
|
end
|
@@ -2,13 +2,21 @@
|
|
2
2
|
|
3
3
|
module Langchain::Vectorsearch
|
4
4
|
class Pinecone < Base
|
5
|
+
#
|
6
|
+
# Wrapper around Pinecone API.
|
7
|
+
#
|
8
|
+
# Gem requirements: gem "pinecone", "~> 0.1.6"
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# pinecone = Langchain::Vectorsearch::Pinecone.new(environment:, api_key:, index_name:, llm:, llm_api_key:)
|
12
|
+
#
|
13
|
+
|
5
14
|
# Initialize the Pinecone client
|
6
15
|
# @param environment [String] The environment to use
|
7
16
|
# @param api_key [String] The API key to use
|
8
17
|
# @param index_name [String] The name of the index to use
|
9
|
-
# @param llm [
|
10
|
-
|
11
|
-
def initialize(environment:, api_key:, index_name:, llm:, llm_api_key:)
|
18
|
+
# @param llm [Object] The LLM client to use
|
19
|
+
def initialize(environment:, api_key:, index_name:, llm:)
|
12
20
|
depends_on "pinecone"
|
13
21
|
require "pinecone"
|
14
22
|
|
@@ -20,7 +28,7 @@ module Langchain::Vectorsearch
|
|
20
28
|
@client = ::Pinecone::Client.new
|
21
29
|
@index_name = index_name
|
22
30
|
|
23
|
-
super(llm: llm
|
31
|
+
super(llm: llm)
|
24
32
|
end
|
25
33
|
|
26
34
|
# Add a list of texts to the index
|
@@ -34,7 +42,7 @@ module Langchain::Vectorsearch
|
|
34
42
|
# TODO: Allows passing in your own IDs
|
35
43
|
id: SecureRandom.uuid,
|
36
44
|
metadata: metadata || {content: text},
|
37
|
-
values:
|
45
|
+
values: llm.embed(text: text)
|
38
46
|
}
|
39
47
|
end
|
40
48
|
|
@@ -65,7 +73,7 @@ module Langchain::Vectorsearch
|
|
65
73
|
namespace: "",
|
66
74
|
filter: nil
|
67
75
|
)
|
68
|
-
embedding =
|
76
|
+
embedding = llm.embed(text: query)
|
69
77
|
|
70
78
|
similarity_search_by_vector(
|
71
79
|
embedding: embedding,
|
@@ -112,7 +120,7 @@ module Langchain::Vectorsearch
|
|
112
120
|
|
113
121
|
prompt = generate_prompt(question: question, context: context)
|
114
122
|
|
115
|
-
|
123
|
+
llm.chat(prompt: prompt)
|
116
124
|
end
|
117
125
|
end
|
118
126
|
end
|