langchainrb 0.7.5 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +78 -0
- data/README.md +113 -56
- data/lib/langchain/assistants/assistant.rb +213 -0
- data/lib/langchain/assistants/message.rb +58 -0
- data/lib/langchain/assistants/thread.rb +34 -0
- data/lib/langchain/chunker/markdown.rb +37 -0
- data/lib/langchain/chunker/recursive_text.rb +0 -2
- data/lib/langchain/chunker/semantic.rb +1 -3
- data/lib/langchain/chunker/sentence.rb +0 -2
- data/lib/langchain/chunker/text.rb +0 -2
- data/lib/langchain/contextual_logger.rb +1 -1
- data/lib/langchain/data.rb +4 -3
- data/lib/langchain/llm/ai21.rb +1 -1
- data/lib/langchain/llm/anthropic.rb +86 -11
- data/lib/langchain/llm/aws_bedrock.rb +52 -0
- data/lib/langchain/llm/azure.rb +10 -97
- data/lib/langchain/llm/base.rb +3 -2
- data/lib/langchain/llm/cohere.rb +5 -7
- data/lib/langchain/llm/google_palm.rb +4 -2
- data/lib/langchain/llm/google_vertex_ai.rb +151 -0
- data/lib/langchain/llm/hugging_face.rb +1 -1
- data/lib/langchain/llm/llama_cpp.rb +18 -16
- data/lib/langchain/llm/mistral_ai.rb +68 -0
- data/lib/langchain/llm/ollama.rb +209 -27
- data/lib/langchain/llm/openai.rb +138 -170
- data/lib/langchain/llm/prompts/ollama/summarize_template.yaml +9 -0
- data/lib/langchain/llm/replicate.rb +1 -7
- data/lib/langchain/llm/response/anthropic_response.rb +20 -0
- data/lib/langchain/llm/response/base_response.rb +7 -0
- data/lib/langchain/llm/response/google_palm_response.rb +4 -0
- data/lib/langchain/llm/response/google_vertex_ai_response.rb +33 -0
- data/lib/langchain/llm/response/llama_cpp_response.rb +13 -0
- data/lib/langchain/llm/response/mistral_ai_response.rb +39 -0
- data/lib/langchain/llm/response/ollama_response.rb +27 -1
- data/lib/langchain/llm/response/openai_response.rb +8 -0
- data/lib/langchain/loader.rb +3 -2
- data/lib/langchain/output_parsers/base.rb +0 -4
- data/lib/langchain/output_parsers/output_fixing_parser.rb +7 -14
- data/lib/langchain/output_parsers/structured_output_parser.rb +0 -10
- data/lib/langchain/processors/csv.rb +37 -3
- data/lib/langchain/processors/eml.rb +64 -0
- data/lib/langchain/processors/markdown.rb +17 -0
- data/lib/langchain/processors/pptx.rb +29 -0
- data/lib/langchain/prompt/loading.rb +1 -1
- data/lib/langchain/tool/base.rb +21 -53
- data/lib/langchain/tool/calculator/calculator.json +19 -0
- data/lib/langchain/tool/{calculator.rb → calculator/calculator.rb} +8 -16
- data/lib/langchain/tool/database/database.json +46 -0
- data/lib/langchain/tool/database/database.rb +99 -0
- data/lib/langchain/tool/file_system/file_system.json +57 -0
- data/lib/langchain/tool/file_system/file_system.rb +32 -0
- data/lib/langchain/tool/google_search/google_search.json +19 -0
- data/lib/langchain/tool/{google_search.rb → google_search/google_search.rb} +5 -15
- data/lib/langchain/tool/ruby_code_interpreter/ruby_code_interpreter.json +19 -0
- data/lib/langchain/tool/{ruby_code_interpreter.rb → ruby_code_interpreter/ruby_code_interpreter.rb} +8 -4
- data/lib/langchain/tool/vectorsearch/vectorsearch.json +24 -0
- data/lib/langchain/tool/vectorsearch/vectorsearch.rb +36 -0
- data/lib/langchain/tool/weather/weather.json +19 -0
- data/lib/langchain/tool/{weather.rb → weather/weather.rb} +3 -15
- data/lib/langchain/tool/wikipedia/wikipedia.json +19 -0
- data/lib/langchain/tool/{wikipedia.rb → wikipedia/wikipedia.rb} +9 -9
- data/lib/langchain/utils/token_length/ai21_validator.rb +6 -2
- data/lib/langchain/utils/token_length/base_validator.rb +1 -1
- data/lib/langchain/utils/token_length/cohere_validator.rb +6 -2
- data/lib/langchain/utils/token_length/google_palm_validator.rb +5 -1
- data/lib/langchain/utils/token_length/openai_validator.rb +55 -1
- data/lib/langchain/utils/token_length/token_limit_exceeded.rb +1 -1
- data/lib/langchain/vectorsearch/base.rb +11 -4
- data/lib/langchain/vectorsearch/chroma.rb +10 -1
- data/lib/langchain/vectorsearch/elasticsearch.rb +53 -4
- data/lib/langchain/vectorsearch/epsilla.rb +149 -0
- data/lib/langchain/vectorsearch/hnswlib.rb +5 -1
- data/lib/langchain/vectorsearch/milvus.rb +4 -2
- data/lib/langchain/vectorsearch/pgvector.rb +14 -4
- data/lib/langchain/vectorsearch/pinecone.rb +8 -5
- data/lib/langchain/vectorsearch/qdrant.rb +16 -4
- data/lib/langchain/vectorsearch/weaviate.rb +20 -2
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +20 -5
- metadata +182 -45
- data/lib/langchain/agent/agents.md +0 -54
- data/lib/langchain/agent/base.rb +0 -20
- data/lib/langchain/agent/react_agent/react_agent_prompt.yaml +0 -26
- data/lib/langchain/agent/react_agent.rb +0 -131
- data/lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml +0 -11
- data/lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml +0 -21
- data/lib/langchain/agent/sql_query_agent.rb +0 -82
- data/lib/langchain/conversation/context.rb +0 -8
- data/lib/langchain/conversation/memory.rb +0 -86
- data/lib/langchain/conversation/message.rb +0 -48
- data/lib/langchain/conversation/prompt.rb +0 -8
- data/lib/langchain/conversation/response.rb +0 -8
- data/lib/langchain/conversation.rb +0 -93
- data/lib/langchain/tool/database.rb +0 -90
@@ -46,6 +46,9 @@ module Langchain::Vectorsearch
|
|
46
46
|
super(llm: llm)
|
47
47
|
end
|
48
48
|
|
49
|
+
# Add a list of texts to the index
|
50
|
+
# @param texts [Array<String>] The list of texts to add
|
51
|
+
# @return [Elasticsearch::Response] from the Elasticsearch server
|
49
52
|
def add_texts(texts: [])
|
50
53
|
body = texts.map do |text|
|
51
54
|
[
|
@@ -57,6 +60,10 @@ module Langchain::Vectorsearch
|
|
57
60
|
es_client.bulk(body: body)
|
58
61
|
end
|
59
62
|
|
63
|
+
# Add a list of texts to the index
|
64
|
+
# @param texts [Array<String>] The list of texts to update
|
65
|
+
# @param texts [Array<Integer>] The list of texts to update
|
66
|
+
# @return [Elasticsearch::Response] from the Elasticsearch server
|
60
67
|
def update_texts(texts: [], ids: [])
|
61
68
|
body = texts.map.with_index do |text, i|
|
62
69
|
[
|
@@ -68,6 +75,19 @@ module Langchain::Vectorsearch
|
|
68
75
|
es_client.bulk(body: body)
|
69
76
|
end
|
70
77
|
|
78
|
+
# Remove a list of texts from the index
|
79
|
+
# @param ids [Array<Integer>] The list of ids to delete
|
80
|
+
# @return [Elasticsearch::Response] from the Elasticsearch server
|
81
|
+
def remove_texts(ids: [])
|
82
|
+
body = ids.map do |id|
|
83
|
+
{delete: {_index: index_name, _id: id}}
|
84
|
+
end
|
85
|
+
|
86
|
+
es_client.bulk(body: body)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Create the index with the default schema
|
90
|
+
# @return [Elasticsearch::Response] Index creation
|
71
91
|
def create_default_schema
|
72
92
|
es_client.indices.create(
|
73
93
|
index: index_name,
|
@@ -75,6 +95,8 @@ module Langchain::Vectorsearch
|
|
75
95
|
)
|
76
96
|
end
|
77
97
|
|
98
|
+
# Deletes the default schema
|
99
|
+
# @return [Elasticsearch::Response] Index deletion
|
78
100
|
def delete_default_schema
|
79
101
|
es_client.indices.delete(
|
80
102
|
index: index_name
|
@@ -82,7 +104,7 @@ module Langchain::Vectorsearch
|
|
82
104
|
end
|
83
105
|
|
84
106
|
def default_vector_settings
|
85
|
-
{type: "dense_vector", dims: llm.
|
107
|
+
{type: "dense_vector", dims: llm.default_dimensions}
|
86
108
|
end
|
87
109
|
|
88
110
|
def vector_settings
|
@@ -116,10 +138,32 @@ module Langchain::Vectorsearch
|
|
116
138
|
}
|
117
139
|
end
|
118
140
|
|
119
|
-
#
|
120
|
-
#
|
121
|
-
#
|
141
|
+
# Ask a question and return the answer
|
142
|
+
# @param question [String] The question to ask
|
143
|
+
# @param k [Integer] The number of results to have in context
|
144
|
+
# @yield [String] Stream responses back one String at a time
|
145
|
+
# @return [String] The answer to the question
|
146
|
+
def ask(question:, k: 4, &block)
|
147
|
+
search_results = similarity_search(query: question, k: k)
|
148
|
+
|
149
|
+
context = search_results.map do |result|
|
150
|
+
result[:input]
|
151
|
+
end.join("\n---\n")
|
152
|
+
|
153
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
154
|
+
|
155
|
+
messages = [{role: "user", content: prompt}]
|
156
|
+
response = llm.chat(messages: messages, &block)
|
157
|
+
|
158
|
+
response.context = context
|
159
|
+
response
|
160
|
+
end
|
122
161
|
|
162
|
+
# Search for similar texts
|
163
|
+
# @param text [String] The text to search for
|
164
|
+
# @param k [Integer] The number of results to return
|
165
|
+
# @param query [Hash] Elasticsearch query that needs to be used while searching (Optional)
|
166
|
+
# @return [Elasticsearch::Response] The response from the server
|
123
167
|
def similarity_search(text: "", k: 10, query: {})
|
124
168
|
if text.empty? && query.empty?
|
125
169
|
raise "Either text or query should pass as an argument"
|
@@ -134,6 +178,11 @@ module Langchain::Vectorsearch
|
|
134
178
|
es_client.search(body: {query: query, size: k}).body
|
135
179
|
end
|
136
180
|
|
181
|
+
# Search for similar texts by embedding
|
182
|
+
# @param embedding [Array<Float>] The embedding to search for
|
183
|
+
# @param k [Integer] The number of results to return
|
184
|
+
# @param query [Hash] Elasticsearch query that needs to be used while searching (Optional)
|
185
|
+
# @return [Elasticsearch::Response] The response from the server
|
137
186
|
def similarity_search_by_vector(embedding: [], k: 10, query: {})
|
138
187
|
if embedding.empty? && query.empty?
|
139
188
|
raise "Either embedding or query should pass as an argument"
|
@@ -0,0 +1,149 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "securerandom"
|
4
|
+
require "json"
|
5
|
+
require "timeout"
|
6
|
+
require "uri"
|
7
|
+
|
8
|
+
module Langchain::Vectorsearch
|
9
|
+
class Epsilla < Base
|
10
|
+
#
|
11
|
+
# Wrapper around Epsilla client library
|
12
|
+
#
|
13
|
+
# Gem requirements:
|
14
|
+
# gem "epsilla-ruby", "~> 0.0.3"
|
15
|
+
#
|
16
|
+
# Usage:
|
17
|
+
# epsilla = Langchain::Vectorsearch::Epsilla.new(url:, db_name:, db_path:, index_name:, llm:)
|
18
|
+
#
|
19
|
+
# Initialize Epsilla client
|
20
|
+
# @param url [String] URL to connect to the Epsilla db instance, protocol://host:port
|
21
|
+
# @param db_name [String] The name of the database to use
|
22
|
+
# @param db_path [String] The path to the database to use
|
23
|
+
# @param index_name [String] The name of the Epsilla table to use
|
24
|
+
# @param llm [Object] The LLM client to use
|
25
|
+
def initialize(url:, db_name:, db_path:, index_name:, llm:)
|
26
|
+
depends_on "epsilla-ruby", req: "epsilla"
|
27
|
+
|
28
|
+
uri = URI.parse(url)
|
29
|
+
protocol = uri.scheme
|
30
|
+
host = uri.host
|
31
|
+
port = uri.port
|
32
|
+
|
33
|
+
@client = ::Epsilla::Client.new(protocol, host, port)
|
34
|
+
|
35
|
+
Timeout.timeout(5) do
|
36
|
+
status_code, response = @client.database.load_db(db_name, db_path)
|
37
|
+
|
38
|
+
if status_code != 200
|
39
|
+
if status_code == 409 || (status_code == 500 && response["message"].include?("already loaded"))
|
40
|
+
# When db is already loaded, Epsilla may return HTTP 409 Conflict.
|
41
|
+
# This behavior is changed in https://github.com/epsilla-cloud/vectordb/pull/95
|
42
|
+
# Old behavior (HTTP 500) is preserved for backwards compatibility.
|
43
|
+
# It does not prevent us from using the db.
|
44
|
+
Langchain.logger.info("Database already loaded")
|
45
|
+
else
|
46
|
+
raise "Failed to load database: #{response}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
@client.database.use_db(db_name)
|
52
|
+
|
53
|
+
@db_name = db_name
|
54
|
+
@db_path = db_path
|
55
|
+
@table_name = index_name
|
56
|
+
|
57
|
+
@vector_dimensions = llm.default_dimensions
|
58
|
+
|
59
|
+
super(llm: llm)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Create a table using the index_name passed in the constructor
|
63
|
+
def create_default_schema
|
64
|
+
status_code, response = @client.database.create_table(@table_name, [
|
65
|
+
{"name" => "ID", "dataType" => "STRING", "primaryKey" => true},
|
66
|
+
{"name" => "Doc", "dataType" => "STRING"},
|
67
|
+
{"name" => "Embedding", "dataType" => "VECTOR_FLOAT", "dimensions" => @vector_dimensions}
|
68
|
+
])
|
69
|
+
raise "Failed to create table: #{response}" if status_code != 200
|
70
|
+
|
71
|
+
response
|
72
|
+
end
|
73
|
+
|
74
|
+
# Drop the table using the index_name passed in the constructor
|
75
|
+
def destroy_default_schema
|
76
|
+
status_code, response = @client.database.drop_table(@table_name)
|
77
|
+
raise "Failed to drop table: #{response}" if status_code != 200
|
78
|
+
|
79
|
+
response
|
80
|
+
end
|
81
|
+
|
82
|
+
# Add a list of texts to the database
|
83
|
+
# @param texts [Array<String>] The list of texts to add
|
84
|
+
# @param ids [Array<String>] The unique ids to add to the index, in the same order as the texts; if nil, it will be random uuids
|
85
|
+
def add_texts(texts:, ids: nil)
|
86
|
+
validated_ids = ids
|
87
|
+
if ids.nil?
|
88
|
+
validated_ids = texts.map { SecureRandom.uuid }
|
89
|
+
elsif ids.length != texts.length
|
90
|
+
raise "The number of ids must match the number of texts"
|
91
|
+
end
|
92
|
+
|
93
|
+
data = texts.map.with_index do |text, idx|
|
94
|
+
{Doc: text, Embedding: llm.embed(text: text).embedding, ID: validated_ids[idx]}
|
95
|
+
end
|
96
|
+
|
97
|
+
status_code, response = @client.database.insert(@table_name, data)
|
98
|
+
raise "Failed to insert texts: #{response}" if status_code != 200
|
99
|
+
response
|
100
|
+
end
|
101
|
+
|
102
|
+
# Search for similar texts
|
103
|
+
# @param query [String] The text to search for
|
104
|
+
# @param k [Integer] The number of results to return
|
105
|
+
# @return [String] The response from the server
|
106
|
+
def similarity_search(query:, k: 4)
|
107
|
+
embedding = llm.embed(text: query).embedding
|
108
|
+
|
109
|
+
similarity_search_by_vector(
|
110
|
+
embedding: embedding,
|
111
|
+
k: k
|
112
|
+
)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Search for entries by embedding
|
116
|
+
# @param embedding [Array<Float>] The embedding to search for
|
117
|
+
# @param k [Integer] The number of results to return
|
118
|
+
# @return [String] The response from the server
|
119
|
+
def similarity_search_by_vector(embedding:, k: 4)
|
120
|
+
status_code, response = @client.database.query(@table_name, "Embedding", embedding, ["Doc"], k, false)
|
121
|
+
raise "Failed to do similarity search: #{response}" if status_code != 200
|
122
|
+
|
123
|
+
data = JSON.parse(response)["result"]
|
124
|
+
data.map { |result| result["Doc"] }
|
125
|
+
end
|
126
|
+
|
127
|
+
# Ask a question and return the answer
|
128
|
+
# @param question [String] The question to ask
|
129
|
+
# @param k [Integer] The number of results to have in context
|
130
|
+
# @yield [String] Stream responses back one String at a time
|
131
|
+
# @return [String] The answer to the question
|
132
|
+
def ask(question:, k: 4, &block)
|
133
|
+
search_results = similarity_search(query: question, k: k)
|
134
|
+
|
135
|
+
context = search_results.map do |result|
|
136
|
+
result.to_s
|
137
|
+
end
|
138
|
+
context = context.join("\n---\n")
|
139
|
+
|
140
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
141
|
+
|
142
|
+
messages = [{role: "user", content: prompt}]
|
143
|
+
response = llm.chat(messages: messages, &block)
|
144
|
+
|
145
|
+
response.context = context
|
146
|
+
response
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
@@ -26,7 +26,7 @@ module Langchain::Vectorsearch
|
|
26
26
|
|
27
27
|
super(llm: llm)
|
28
28
|
|
29
|
-
@client = ::Hnswlib::HierarchicalNSW.new(space: DEFAULT_METRIC, dim: llm.
|
29
|
+
@client = ::Hnswlib::HierarchicalNSW.new(space: DEFAULT_METRIC, dim: llm.default_dimensions)
|
30
30
|
@path_to_index = path_to_index
|
31
31
|
|
32
32
|
initialize_index
|
@@ -86,6 +86,10 @@ module Langchain::Vectorsearch
|
|
86
86
|
client.search_knn(embedding, k)
|
87
87
|
end
|
88
88
|
|
89
|
+
# TODO: Add the ask() method
|
90
|
+
# def ask
|
91
|
+
# end
|
92
|
+
|
89
93
|
private
|
90
94
|
|
91
95
|
#
|
@@ -71,7 +71,7 @@ module Langchain::Vectorsearch
|
|
71
71
|
type_params: [
|
72
72
|
{
|
73
73
|
key: "dim",
|
74
|
-
value: llm.
|
74
|
+
value: llm.default_dimensions.to_s
|
75
75
|
}
|
76
76
|
]
|
77
77
|
}
|
@@ -151,7 +151,9 @@ module Langchain::Vectorsearch
|
|
151
151
|
|
152
152
|
prompt = generate_rag_prompt(question: question, context: context)
|
153
153
|
|
154
|
-
|
154
|
+
messages = [{role: "user", content: prompt}]
|
155
|
+
response = llm.chat(messages: messages, &block)
|
156
|
+
|
155
157
|
response.context = context
|
156
158
|
response
|
157
159
|
end
|
@@ -16,7 +16,8 @@ module Langchain::Vectorsearch
|
|
16
16
|
# The operators supported by the PostgreSQL vector search adapter
|
17
17
|
OPERATORS = {
|
18
18
|
"cosine_distance" => "cosine",
|
19
|
-
"euclidean_distance" => "euclidean"
|
19
|
+
"euclidean_distance" => "euclidean",
|
20
|
+
"inner_product_distance" => "inner_product"
|
20
21
|
}
|
21
22
|
DEFAULT_OPERATOR = "cosine_distance"
|
22
23
|
|
@@ -89,15 +90,22 @@ module Langchain::Vectorsearch
|
|
89
90
|
upsert_texts(texts: texts, ids: ids)
|
90
91
|
end
|
91
92
|
|
93
|
+
# Remove a list of texts from the index
|
94
|
+
# @param ids [Array<Integer>] The ids of the texts to remove from the index
|
95
|
+
# @return [Integer] The number of texts removed from the index
|
96
|
+
def remove_texts(ids:)
|
97
|
+
@db[table_name.to_sym].where(id: ids).delete
|
98
|
+
end
|
99
|
+
|
92
100
|
# Create default schema
|
93
101
|
def create_default_schema
|
94
102
|
db.run "CREATE EXTENSION IF NOT EXISTS vector"
|
95
103
|
namespace_column = @namespace_column
|
96
|
-
|
104
|
+
vector_dimensions = llm.default_dimensions
|
97
105
|
db.create_table? table_name.to_sym do
|
98
106
|
primary_key :id
|
99
107
|
text :content
|
100
|
-
column :vectors, "vector(#{
|
108
|
+
column :vectors, "vector(#{vector_dimensions})"
|
101
109
|
text namespace_column.to_sym, default: nil
|
102
110
|
end
|
103
111
|
end
|
@@ -148,7 +156,9 @@ module Langchain::Vectorsearch
|
|
148
156
|
|
149
157
|
prompt = generate_rag_prompt(question: question, context: context)
|
150
158
|
|
151
|
-
|
159
|
+
messages = [{role: "user", content: prompt}]
|
160
|
+
response = llm.chat(messages: messages, &block)
|
161
|
+
|
152
162
|
response.context = context
|
153
163
|
response
|
154
164
|
end
|
@@ -17,12 +17,13 @@ module Langchain::Vectorsearch
|
|
17
17
|
# @param api_key [String] The API key to use
|
18
18
|
# @param index_name [String] The name of the index to use
|
19
19
|
# @param llm [Object] The LLM client to use
|
20
|
-
def initialize(environment:, api_key:, index_name:, llm:)
|
20
|
+
def initialize(environment:, api_key:, index_name:, llm:, base_uri: nil)
|
21
21
|
depends_on "pinecone"
|
22
22
|
|
23
23
|
::Pinecone.configure do |config|
|
24
24
|
config.api_key = api_key
|
25
25
|
config.environment = environment
|
26
|
+
config.base_uri = base_uri if base_uri
|
26
27
|
end
|
27
28
|
|
28
29
|
@client = ::Pinecone::Client.new
|
@@ -64,13 +65,13 @@ module Langchain::Vectorsearch
|
|
64
65
|
index.upsert(vectors: vectors, namespace: namespace)
|
65
66
|
end
|
66
67
|
|
67
|
-
def add_data(paths:, namespace: "")
|
68
|
+
def add_data(paths:, namespace: "", options: {}, chunker: Langchain::Chunker::Text)
|
68
69
|
raise ArgumentError, "Paths must be provided" if Array(paths).empty?
|
69
70
|
|
70
71
|
texts = Array(paths)
|
71
72
|
.flatten
|
72
73
|
.map do |path|
|
73
|
-
data = Langchain::Loader.new(path)&.load&.chunks
|
74
|
+
data = Langchain::Loader.new(path, options, chunker: chunker)&.load&.chunks
|
74
75
|
data.map { |chunk| chunk.text }
|
75
76
|
end
|
76
77
|
|
@@ -103,7 +104,7 @@ module Langchain::Vectorsearch
|
|
103
104
|
client.create_index(
|
104
105
|
metric: DEFAULT_METRIC,
|
105
106
|
name: index_name,
|
106
|
-
dimension: llm.
|
107
|
+
dimension: llm.default_dimensions
|
107
108
|
)
|
108
109
|
end
|
109
110
|
|
@@ -180,7 +181,9 @@ module Langchain::Vectorsearch
|
|
180
181
|
|
181
182
|
prompt = generate_rag_prompt(question: question, context: context)
|
182
183
|
|
183
|
-
|
184
|
+
messages = [{role: "user", content: prompt}]
|
185
|
+
response = llm.chat(messages: messages, &block)
|
186
|
+
|
184
187
|
response.context = context
|
185
188
|
response
|
186
189
|
end
|
@@ -44,14 +44,14 @@ module Langchain::Vectorsearch
|
|
44
44
|
# Add a list of texts to the index
|
45
45
|
# @param texts [Array<String>] The list of texts to add
|
46
46
|
# @return [Hash] The response from the server
|
47
|
-
def add_texts(texts:, ids: [])
|
47
|
+
def add_texts(texts:, ids: [], payload: {})
|
48
48
|
batch = {ids: [], vectors: [], payloads: []}
|
49
49
|
|
50
50
|
Array(texts).each_with_index do |text, i|
|
51
51
|
id = ids[i] || SecureRandom.uuid
|
52
52
|
batch[:ids].push(id)
|
53
53
|
batch[:vectors].push(llm.embed(text: text).embedding)
|
54
|
-
batch[:payloads].push({content: text})
|
54
|
+
batch[:payloads].push({content: text}.merge(payload))
|
55
55
|
end
|
56
56
|
|
57
57
|
client.points.upsert(
|
@@ -64,6 +64,16 @@ module Langchain::Vectorsearch
|
|
64
64
|
add_texts(texts: texts, ids: ids)
|
65
65
|
end
|
66
66
|
|
67
|
+
# Remove a list of texts from the index
|
68
|
+
# @param ids [Array<Integer>] The ids to remove
|
69
|
+
# @return [Hash] The response from the server
|
70
|
+
def remove_texts(ids:)
|
71
|
+
client.points.delete(
|
72
|
+
collection_name: index_name,
|
73
|
+
points: ids
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
67
77
|
# Get the default schema
|
68
78
|
# @return [Hash] The response from the server
|
69
79
|
def get_default_schema
|
@@ -83,7 +93,7 @@ module Langchain::Vectorsearch
|
|
83
93
|
collection_name: index_name,
|
84
94
|
vectors: {
|
85
95
|
distance: DEFAULT_METRIC.capitalize,
|
86
|
-
size: llm.
|
96
|
+
size: llm.default_dimensions
|
87
97
|
}
|
88
98
|
)
|
89
99
|
end
|
@@ -137,7 +147,9 @@ module Langchain::Vectorsearch
|
|
137
147
|
|
138
148
|
prompt = generate_rag_prompt(question: question, context: context)
|
139
149
|
|
140
|
-
|
150
|
+
messages = [{role: "user", content: prompt}]
|
151
|
+
response = llm.chat(messages: messages, &block)
|
152
|
+
|
141
153
|
response.context = context
|
142
154
|
response
|
143
155
|
end
|
@@ -9,7 +9,7 @@ module Langchain::Vectorsearch
|
|
9
9
|
# gem "weaviate-ruby", "~> 0.8.9"
|
10
10
|
#
|
11
11
|
# Usage:
|
12
|
-
# weaviate = Langchain::Vectorsearch::Weaviate.new(url
|
12
|
+
# weaviate = Langchain::Vectorsearch::Weaviate.new(url: ENV["WEAVIATE_URL"], api_key: ENV["WEAVIATE_API_KEY"], index_name: "Docs", llm: llm)
|
13
13
|
#
|
14
14
|
|
15
15
|
# Initialize the Weaviate adapter
|
@@ -71,6 +71,22 @@ module Langchain::Vectorsearch
|
|
71
71
|
end
|
72
72
|
end
|
73
73
|
|
74
|
+
# Deletes a list of texts in the index
|
75
|
+
# @param ids [Array] The ids of texts to delete
|
76
|
+
# @return [Hash] The response from the server
|
77
|
+
def remove_texts(ids:)
|
78
|
+
raise ArgumentError, "ids must be an array" unless ids.is_a?(Array)
|
79
|
+
|
80
|
+
client.objects.batch_delete(
|
81
|
+
class_name: index_name,
|
82
|
+
where: {
|
83
|
+
path: ["__id"],
|
84
|
+
operator: "ContainsAny",
|
85
|
+
valueTextArray: ids
|
86
|
+
}
|
87
|
+
)
|
88
|
+
end
|
89
|
+
|
74
90
|
# Create default schema
|
75
91
|
# @return [Hash] The response from the server
|
76
92
|
def create_default_schema
|
@@ -137,7 +153,9 @@ module Langchain::Vectorsearch
|
|
137
153
|
|
138
154
|
prompt = generate_rag_prompt(question: question, context: context)
|
139
155
|
|
140
|
-
|
156
|
+
messages = [{role: "user", content: prompt}]
|
157
|
+
response = llm.chat(messages: messages, &block)
|
158
|
+
|
141
159
|
response.context = context
|
142
160
|
response
|
143
161
|
end
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
@@ -16,14 +16,29 @@ loader.inflector.inflect(
|
|
16
16
|
"json" => "JSON",
|
17
17
|
"jsonl" => "JSONL",
|
18
18
|
"llm" => "LLM",
|
19
|
+
"mistral_ai" => "MistralAI",
|
20
|
+
"mistral_ai_response" => "MistralAIResponse",
|
19
21
|
"openai" => "OpenAI",
|
20
22
|
"openai_validator" => "OpenAIValidator",
|
21
23
|
"openai_response" => "OpenAIResponse",
|
22
|
-
"pdf" => "PDF"
|
23
|
-
"react_agent" => "ReActAgent",
|
24
|
-
"sql_query_agent" => "SQLQueryAgent"
|
24
|
+
"pdf" => "PDF"
|
25
25
|
)
|
26
26
|
loader.collapse("#{__dir__}/langchain/llm/response")
|
27
|
+
loader.collapse("#{__dir__}/langchain/assistants")
|
28
|
+
|
29
|
+
loader.collapse("#{__dir__}/langchain/tool/calculator")
|
30
|
+
loader.collapse("#{__dir__}/langchain/tool/database")
|
31
|
+
loader.collapse("#{__dir__}/langchain/tool/file_system")
|
32
|
+
loader.collapse("#{__dir__}/langchain/tool/google_search")
|
33
|
+
loader.collapse("#{__dir__}/langchain/tool/ruby_code_interpreter")
|
34
|
+
loader.collapse("#{__dir__}/langchain/tool/vectorsearch")
|
35
|
+
loader.collapse("#{__dir__}/langchain/tool/weather")
|
36
|
+
loader.collapse("#{__dir__}/langchain/tool/wikipedia")
|
37
|
+
|
38
|
+
# RubyCodeInterpreter does not work with Ruby 3.3;
|
39
|
+
# https://github.com/ukutaht/safe_ruby/issues/4
|
40
|
+
loader.ignore("#{__dir__}/langchain/tool/ruby_code_interpreter") if RUBY_VERSION >= "3.3.0"
|
41
|
+
|
27
42
|
loader.setup
|
28
43
|
|
29
44
|
# Langchain.rb a is library for building LLM-backed Ruby applications. It is an abstraction layer that sits on top of the emerging AI-related tools that makes it easy for developers to consume and string those services together.
|
@@ -64,7 +79,7 @@ loader.setup
|
|
64
79
|
#
|
65
80
|
# = Logging
|
66
81
|
#
|
67
|
-
#
|
82
|
+
# Langchain.rb uses standard logging mechanisms and defaults to :debug level. Most messages are at info level, but we will add debug or warn statements as needed. To show all log messages:
|
68
83
|
#
|
69
84
|
# Langchain.logger.level = :info
|
70
85
|
module Langchain
|
@@ -82,7 +97,7 @@ module Langchain
|
|
82
97
|
attr_reader :root
|
83
98
|
end
|
84
99
|
|
85
|
-
self.logger ||= ::Logger.new($stdout, level: :
|
100
|
+
self.logger ||= ::Logger.new($stdout, level: :debug)
|
86
101
|
|
87
102
|
@root = Pathname.new(__dir__)
|
88
103
|
|