langchainrb 0.6.18 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +3 -3
- data/lib/langchain/vectorsearch/base.rb +3 -6
- data/lib/langchain/vectorsearch/chroma.rb +8 -7
- data/lib/langchain/vectorsearch/elasticsearch.rb +147 -0
- data/lib/langchain/vectorsearch/milvus.rb +3 -2
- data/lib/langchain/vectorsearch/pgvector.rb +5 -3
- data/lib/langchain/vectorsearch/pinecone.rb +4 -3
- data/lib/langchain/vectorsearch/qdrant.rb +4 -3
- data/lib/langchain/vectorsearch/weaviate.rb +3 -2
- data/lib/langchain/version.rb +1 -1
- metadata +21 -8
- data/lib/langchain/active_record/hooks.rb +0 -112
- data/lib/langchain/railtie.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4efc896c1c0fa895ebd11bdb3c4d5604ccd34878aa29472419efca85800072da
|
4
|
+
data.tar.gz: c703f8150c7a6a6cb802260da2eeb95d4f7542cc0c5490794cea844351b4fe7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 18b0c48b747978b2a92ecd8d118f31b199d5f58469a5dd55ccbfc3a56a9044faf42784e4760ff9a9fca94da019107629c107a3fe586b7d55243aa92bd1c5b949
|
7
|
+
data.tar.gz: d8513d2018ce48a60fbecc9ca3efb91411385fd71301e0c79ac3612d91b2b504427f2ea19b41991f2f4b43a8bd3b650035684b02a46ae0b114d83343ef5bce18
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.7.0] - 2023-10-22
|
4
|
+
- BREAKING: Moving Rails-specific code to `langchainrb_rails` gem
|
5
|
+
|
6
|
+
## [0.6.19] - 2023-10-18
|
7
|
+
- Elasticsearch vector search support
|
8
|
+
- Fix `lib/langchain/railtie.rb` not being loaded with the gem
|
9
|
+
|
3
10
|
## [0.6.18] - 2023-10-16
|
4
11
|
- Introduce `Langchain::LLM::Response`` object
|
5
12
|
- Introduce `Langchain::Chunk` object
|
data/README.md
CHANGED
@@ -19,11 +19,11 @@ Langchain.rb is a library that's an abstraction layer on top many emergent AI, M
|
|
19
19
|
|
20
20
|
Install the gem and add to the application's Gemfile by executing:
|
21
21
|
|
22
|
-
|
22
|
+
bundle add langchainrb
|
23
23
|
|
24
24
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
25
25
|
|
26
|
-
|
26
|
+
gem install langchainrb
|
27
27
|
|
28
28
|
## Usage
|
29
29
|
|
@@ -37,7 +37,7 @@ require "langchain"
|
|
37
37
|
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
|
38
38
|
| [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
39
39
|
| [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
40
|
-
| [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
40
|
+
| [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
41
41
|
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
42
42
|
| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
43
43
|
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "forwardable"
|
4
|
-
|
5
3
|
module Langchain::Vectorsearch
|
6
4
|
# = Vector Databases
|
7
5
|
# A vector database a type of database that stores data as high-dimensional vectors, which are mathematical representations of features or attributes. Each vector has a certain number of dimensions, which can range from tens to thousands, depending on the complexity and granularity of the data.
|
@@ -9,11 +7,13 @@ module Langchain::Vectorsearch
|
|
9
7
|
# == Available vector databases
|
10
8
|
#
|
11
9
|
# - {Langchain::Vectorsearch::Chroma}
|
10
|
+
# - {Langchain::Vectorsearch::Elasticsearch}
|
11
|
+
# - {Langchain::Vectorsearch::Hnswlib}
|
12
12
|
# - {Langchain::Vectorsearch::Milvus}
|
13
|
+
# - {Langchain::Vectorsearch::Pgvector}
|
13
14
|
# - {Langchain::Vectorsearch::Pinecone}
|
14
15
|
# - {Langchain::Vectorsearch::Qdrant}
|
15
16
|
# - {Langchain::Vectorsearch::Weaviate}
|
16
|
-
# - {Langchain::Vectorsearch::Pgvector}
|
17
17
|
#
|
18
18
|
# == Usage
|
19
19
|
#
|
@@ -150,9 +150,6 @@ module Langchain::Vectorsearch
|
|
150
150
|
raise NotImplementedError, "#{self.class.name} does not support asking questions"
|
151
151
|
end
|
152
152
|
|
153
|
-
def_delegators :llm,
|
154
|
-
:default_dimension
|
155
|
-
|
156
153
|
# HyDE-style prompt
|
157
154
|
#
|
158
155
|
# @param [String] User's question
|
@@ -5,7 +5,8 @@ module Langchain::Vectorsearch
|
|
5
5
|
#
|
6
6
|
# Wrapper around Chroma DB
|
7
7
|
#
|
8
|
-
# Gem requirements:
|
8
|
+
# Gem requirements:
|
9
|
+
# gem "chroma-db", "~> 0.6.0"
|
9
10
|
#
|
10
11
|
# Usage:
|
11
12
|
# chroma = Langchain::Vectorsearch::Chroma.new(url:, index_name:, llm:, llm_api_key:, api_key: nil)
|
@@ -29,14 +30,15 @@ module Langchain::Vectorsearch
|
|
29
30
|
|
30
31
|
# Add a list of texts to the index
|
31
32
|
# @param texts [Array<String>] The list of texts to add
|
33
|
+
# @param ids [Array<String>] The list of ids to use for the texts (optional)
|
34
|
+
# @param metadatas [Array<Hash>] The list of metadata to use for the texts (optional)
|
32
35
|
# @return [Hash] The response from the server
|
33
|
-
def add_texts(texts:, ids: [])
|
36
|
+
def add_texts(texts:, ids: [], metadatas: [])
|
34
37
|
embeddings = Array(texts).map.with_index do |text, i|
|
35
38
|
::Chroma::Resources::Embedding.new(
|
36
39
|
id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
|
37
40
|
embedding: llm.embed(text: text).embedding,
|
38
|
-
|
39
|
-
metadata: {}, # metadatas[index],
|
41
|
+
metadata: metadatas[i] || {},
|
40
42
|
document: text # Do we actually need to store the whole original document?
|
41
43
|
)
|
42
44
|
end
|
@@ -45,13 +47,12 @@ module Langchain::Vectorsearch
|
|
45
47
|
collection.add(embeddings)
|
46
48
|
end
|
47
49
|
|
48
|
-
def update_texts(texts:, ids:)
|
50
|
+
def update_texts(texts:, ids:, metadatas: [])
|
49
51
|
embeddings = Array(texts).map.with_index do |text, i|
|
50
52
|
::Chroma::Resources::Embedding.new(
|
51
53
|
id: ids[i].to_s,
|
52
54
|
embedding: llm.embed(text: text).embedding,
|
53
|
-
|
54
|
-
metadata: [], # metadatas[index],
|
55
|
+
metadata: metadatas[i] || {},
|
55
56
|
document: text # Do we actually need to store the whole original document?
|
56
57
|
)
|
57
58
|
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::Vectorsearch
|
4
|
+
class Elasticsearch < Base
|
5
|
+
#
|
6
|
+
# Wrapper around Elasticsearch vector search capabilities.
|
7
|
+
#
|
8
|
+
# Setting up Elasticsearch:
|
9
|
+
# 1. Get Elasticsearch up and running with Docker: https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html
|
10
|
+
# 2. Copy the HTTP CA certificate SHA-256 fingerprint and set the ELASTICSEARCH_CA_FINGERPRINT environment variable
|
11
|
+
# 3. Set the ELASTICSEARCH_URL environment variable
|
12
|
+
#
|
13
|
+
# Gem requirements:
|
14
|
+
# gem "elasticsearch", "~> 8.0.0"
|
15
|
+
#
|
16
|
+
# Usage:
|
17
|
+
# llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
18
|
+
# es = Langchain::Vectorsearch::Elasticsearch.new(
|
19
|
+
# url: ENV["ELASTICSEARCH_URL"],
|
20
|
+
# index_name: "docs",
|
21
|
+
# llm: llm,
|
22
|
+
# es_options: {
|
23
|
+
# transport_options: {ssl: {verify: false}},
|
24
|
+
# ca_fingerprint: ENV["ELASTICSEARCH_CA_FINGERPRINT"]
|
25
|
+
# }
|
26
|
+
# )
|
27
|
+
#
|
28
|
+
# es.create_default_schema
|
29
|
+
# es.add_texts(texts: ["..."])
|
30
|
+
# es.similarity_search(text: "...")
|
31
|
+
#
|
32
|
+
attr_accessor :es_client, :index_name, :options
|
33
|
+
|
34
|
+
def initialize(url:, index_name:, llm:, api_key: nil, es_options: {})
|
35
|
+
require "elasticsearch"
|
36
|
+
|
37
|
+
@options = {
|
38
|
+
url: url,
|
39
|
+
request_timeout: 20,
|
40
|
+
log: false
|
41
|
+
}.merge(es_options)
|
42
|
+
|
43
|
+
@es_client = ::Elasticsearch::Client.new(**options)
|
44
|
+
@index_name = index_name
|
45
|
+
|
46
|
+
super(llm: llm)
|
47
|
+
end
|
48
|
+
|
49
|
+
def add_texts(texts: [])
|
50
|
+
body = texts.map do |text|
|
51
|
+
[
|
52
|
+
{index: {_index: index_name}},
|
53
|
+
{input: text, input_vector: llm.embed(text: text).embedding}
|
54
|
+
]
|
55
|
+
end.flatten
|
56
|
+
|
57
|
+
es_client.bulk(body: body)
|
58
|
+
end
|
59
|
+
|
60
|
+
def update_texts(texts: [], ids: [])
|
61
|
+
body = texts.map.with_index do |text, i|
|
62
|
+
[
|
63
|
+
{index: {_index: index_name, _id: ids[i]}},
|
64
|
+
{input: text, input_vector: llm.embed(text: text).embedding}
|
65
|
+
]
|
66
|
+
end.flatten
|
67
|
+
|
68
|
+
es_client.bulk(body: body)
|
69
|
+
end
|
70
|
+
|
71
|
+
def create_default_schema
|
72
|
+
es_client.indices.create(
|
73
|
+
index: index_name,
|
74
|
+
body: default_schema
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
def delete_default_schema
|
79
|
+
es_client.indices.delete(
|
80
|
+
index: index_name
|
81
|
+
)
|
82
|
+
end
|
83
|
+
|
84
|
+
def default_vector_settings
|
85
|
+
{type: "dense_vector", dims: llm.default_dimension}
|
86
|
+
end
|
87
|
+
|
88
|
+
def vector_settings
|
89
|
+
options[:vector_settings] || default_vector_settings
|
90
|
+
end
|
91
|
+
|
92
|
+
def default_schema
|
93
|
+
{
|
94
|
+
mappings: {
|
95
|
+
properties: {
|
96
|
+
input: {
|
97
|
+
type: "text"
|
98
|
+
},
|
99
|
+
input_vector: vector_settings
|
100
|
+
}
|
101
|
+
}
|
102
|
+
}
|
103
|
+
end
|
104
|
+
|
105
|
+
def default_query(query_vector)
|
106
|
+
{
|
107
|
+
script_score: {
|
108
|
+
query: {match_all: {}},
|
109
|
+
script: {
|
110
|
+
source: "cosineSimilarity(params.query_vector, 'input_vector') + 1.0",
|
111
|
+
params: {
|
112
|
+
query_vector: query_vector
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
}
|
117
|
+
end
|
118
|
+
|
119
|
+
# TODO: Implement this
|
120
|
+
# def ask()
|
121
|
+
# end
|
122
|
+
|
123
|
+
def similarity_search(text: "", k: 10, query: {})
|
124
|
+
if text.empty? && query.empty?
|
125
|
+
raise "Either text or query should pass as an argument"
|
126
|
+
end
|
127
|
+
|
128
|
+
if query.empty?
|
129
|
+
query_vector = llm.embed(text: text).embedding
|
130
|
+
|
131
|
+
query = default_query(query_vector)
|
132
|
+
end
|
133
|
+
|
134
|
+
es_client.search(body: {query: query, size: k}).body
|
135
|
+
end
|
136
|
+
|
137
|
+
def similarity_search_by_vector(embedding: [], k: 10, query: {})
|
138
|
+
if embedding.empty? && query.empty?
|
139
|
+
raise "Either embedding or query should pass as an argument"
|
140
|
+
end
|
141
|
+
|
142
|
+
query = default_query(embedding) if query.empty?
|
143
|
+
|
144
|
+
es_client.search(body: {query: query, size: k}).body
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -5,7 +5,8 @@ module Langchain::Vectorsearch
|
|
5
5
|
#
|
6
6
|
# Wrapper around Milvus REST APIs.
|
7
7
|
#
|
8
|
-
# Gem requirements:
|
8
|
+
# Gem requirements:
|
9
|
+
# gem "milvus", "~> 0.9.2"
|
9
10
|
#
|
10
11
|
# Usage:
|
11
12
|
# milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
|
@@ -70,7 +71,7 @@ module Langchain::Vectorsearch
|
|
70
71
|
type_params: [
|
71
72
|
{
|
72
73
|
key: "dim",
|
73
|
-
value: default_dimension.to_s
|
74
|
+
value: llm.default_dimension.to_s
|
74
75
|
}
|
75
76
|
]
|
76
77
|
}
|
@@ -5,10 +5,12 @@ module Langchain::Vectorsearch
|
|
5
5
|
#
|
6
6
|
# The PostgreSQL vector search adapter
|
7
7
|
#
|
8
|
-
# Gem requirements:
|
8
|
+
# Gem requirements:
|
9
|
+
# gem "sequel", "~> 5.68.0"
|
10
|
+
# gem "pgvector", "~> 0.2"
|
9
11
|
#
|
10
12
|
# Usage:
|
11
|
-
#
|
13
|
+
# pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace: nil)
|
12
14
|
#
|
13
15
|
|
14
16
|
# The operators supported by the PostgreSQL vector search adapter
|
@@ -91,7 +93,7 @@ module Langchain::Vectorsearch
|
|
91
93
|
def create_default_schema
|
92
94
|
db.run "CREATE EXTENSION IF NOT EXISTS vector"
|
93
95
|
namespace_column = @namespace_column
|
94
|
-
vector_dimension = default_dimension
|
96
|
+
vector_dimension = llm.default_dimension
|
95
97
|
db.create_table? table_name.to_sym do
|
96
98
|
primary_key :id
|
97
99
|
text :content
|
@@ -5,10 +5,11 @@ module Langchain::Vectorsearch
|
|
5
5
|
#
|
6
6
|
# Wrapper around Pinecone API.
|
7
7
|
#
|
8
|
-
# Gem requirements:
|
8
|
+
# Gem requirements:
|
9
|
+
# gem "pinecone", "~> 0.1.6"
|
9
10
|
#
|
10
11
|
# Usage:
|
11
|
-
#
|
12
|
+
# pinecone = Langchain::Vectorsearch::Pinecone.new(environment:, api_key:, index_name:, llm:, llm_api_key:)
|
12
13
|
#
|
13
14
|
|
14
15
|
# Initialize the Pinecone client
|
@@ -102,7 +103,7 @@ module Langchain::Vectorsearch
|
|
102
103
|
client.create_index(
|
103
104
|
metric: DEFAULT_METRIC,
|
104
105
|
name: index_name,
|
105
|
-
dimension: default_dimension
|
106
|
+
dimension: llm.default_dimension
|
106
107
|
)
|
107
108
|
end
|
108
109
|
|
@@ -5,10 +5,11 @@ module Langchain::Vectorsearch
|
|
5
5
|
#
|
6
6
|
# Wrapper around Qdrant
|
7
7
|
#
|
8
|
-
# Gem requirements:
|
8
|
+
# Gem requirements:
|
9
|
+
# gem "qdrant-ruby", "~> 0.9.3"
|
9
10
|
#
|
10
11
|
# Usage:
|
11
|
-
#
|
12
|
+
# qdrant = Langchain::Vectorsearch::Qdrant.new(url:, api_key:, index_name:, llm:, llm_api_key:)
|
12
13
|
#
|
13
14
|
|
14
15
|
# Initialize the Qdrant client
|
@@ -82,7 +83,7 @@ module Langchain::Vectorsearch
|
|
82
83
|
collection_name: index_name,
|
83
84
|
vectors: {
|
84
85
|
distance: DEFAULT_METRIC.capitalize,
|
85
|
-
size: default_dimension
|
86
|
+
size: llm.default_dimension
|
86
87
|
}
|
87
88
|
)
|
88
89
|
end
|
@@ -5,10 +5,11 @@ module Langchain::Vectorsearch
|
|
5
5
|
#
|
6
6
|
# Wrapper around Weaviate
|
7
7
|
#
|
8
|
-
# Gem requirements:
|
8
|
+
# Gem requirements:
|
9
|
+
# gem "weaviate-ruby", "~> 0.8.3"
|
9
10
|
#
|
10
11
|
# Usage:
|
11
|
-
#
|
12
|
+
# weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
|
12
13
|
#
|
13
14
|
|
14
15
|
# Initialize the Weaviate adapter
|
data/lib/langchain/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -70,16 +70,16 @@ dependencies:
|
|
70
70
|
name: zeitwerk
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 2.
|
75
|
+
version: '2.5'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 2.
|
82
|
+
version: '2.5'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: pragmatic_segmenter
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,6 +220,20 @@ dependencies:
|
|
220
220
|
- - "~>"
|
221
221
|
- !ruby/object:Gem::Version
|
222
222
|
version: 0.8.0
|
223
|
+
- !ruby/object:Gem::Dependency
|
224
|
+
name: elasticsearch
|
225
|
+
requirement: !ruby/object:Gem::Requirement
|
226
|
+
requirements:
|
227
|
+
- - "~>"
|
228
|
+
- !ruby/object:Gem::Version
|
229
|
+
version: 8.2.0
|
230
|
+
type: :development
|
231
|
+
prerelease: false
|
232
|
+
version_requirements: !ruby/object:Gem::Requirement
|
233
|
+
requirements:
|
234
|
+
- - "~>"
|
235
|
+
- !ruby/object:Gem::Version
|
236
|
+
version: 8.2.0
|
223
237
|
- !ruby/object:Gem::Dependency
|
224
238
|
name: eqn
|
225
239
|
requirement: !ruby/object:Gem::Requirement
|
@@ -525,7 +539,6 @@ files:
|
|
525
539
|
- LICENSE.txt
|
526
540
|
- README.md
|
527
541
|
- lib/langchain.rb
|
528
|
-
- lib/langchain/active_record/hooks.rb
|
529
542
|
- lib/langchain/agent/base.rb
|
530
543
|
- lib/langchain/agent/react_agent.rb
|
531
544
|
- lib/langchain/agent/react_agent/react_agent_prompt.yaml
|
@@ -587,7 +600,6 @@ files:
|
|
587
600
|
- lib/langchain/prompt/few_shot_prompt_template.rb
|
588
601
|
- lib/langchain/prompt/loading.rb
|
589
602
|
- lib/langchain/prompt/prompt_template.rb
|
590
|
-
- lib/langchain/railtie.rb
|
591
603
|
- lib/langchain/tool/base.rb
|
592
604
|
- lib/langchain/tool/calculator.rb
|
593
605
|
- lib/langchain/tool/database.rb
|
@@ -603,6 +615,7 @@ files:
|
|
603
615
|
- lib/langchain/utils/token_length/token_limit_exceeded.rb
|
604
616
|
- lib/langchain/vectorsearch/base.rb
|
605
617
|
- lib/langchain/vectorsearch/chroma.rb
|
618
|
+
- lib/langchain/vectorsearch/elasticsearch.rb
|
606
619
|
- lib/langchain/vectorsearch/hnswlib.rb
|
607
620
|
- lib/langchain/vectorsearch/milvus.rb
|
608
621
|
- lib/langchain/vectorsearch/pgvector.rb
|
@@ -1,112 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Langchain
|
4
|
-
module ActiveRecord
|
5
|
-
# This module adds the following functionality to your ActiveRecord models:
|
6
|
-
# * `vectorsearch` class method to set the vector search provider
|
7
|
-
# * `similarity_search` class method to search for similar texts
|
8
|
-
# * `upsert_to_vectorsearch` instance method to upsert the record to the vector search provider
|
9
|
-
#
|
10
|
-
# Usage:
|
11
|
-
# class Recipe < ActiveRecord::Base
|
12
|
-
# vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
|
13
|
-
# api_key: ENV["WEAVIATE_API_KEY"],
|
14
|
-
# url: ENV["WEAVIATE_URL"],
|
15
|
-
# index_name: "Recipes",
|
16
|
-
# llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
17
|
-
# )
|
18
|
-
#
|
19
|
-
# after_save :upsert_to_vectorsearch
|
20
|
-
#
|
21
|
-
# # Overwriting how the model is serialized before it's indexed
|
22
|
-
# def as_vector
|
23
|
-
# [
|
24
|
-
# "Title: #{title}",
|
25
|
-
# "Description: #{description}",
|
26
|
-
# ...
|
27
|
-
# ]
|
28
|
-
# .compact
|
29
|
-
# .join("\n")
|
30
|
-
# end
|
31
|
-
# end
|
32
|
-
#
|
33
|
-
# Create the default schema
|
34
|
-
# Recipe.class_variable_get(:@@provider).create_default_schema
|
35
|
-
# Query the vector search provider
|
36
|
-
# Recipe.similarity_search("carnivore dish")
|
37
|
-
# Delete the default schema to start over
|
38
|
-
# Recipe.class_variable_get(:@@provider).destroy_default_schema
|
39
|
-
#
|
40
|
-
module Hooks
|
41
|
-
def self.included(base)
|
42
|
-
base.extend ClassMethods
|
43
|
-
end
|
44
|
-
|
45
|
-
# Index the text to the vector search provider
|
46
|
-
# You'd typically call this method in an ActiveRecord callback
|
47
|
-
#
|
48
|
-
# @return [Boolean] true
|
49
|
-
# @raise [Error] Indexing to vector search DB failed
|
50
|
-
def upsert_to_vectorsearch
|
51
|
-
if previously_new_record?
|
52
|
-
self.class.class_variable_get(:@@provider).add_texts(
|
53
|
-
texts: [as_vector],
|
54
|
-
ids: [id]
|
55
|
-
)
|
56
|
-
else
|
57
|
-
self.class.class_variable_get(:@@provider).update_texts(
|
58
|
-
texts: [as_vector],
|
59
|
-
ids: [id]
|
60
|
-
)
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# Used to serialize the DB record to an indexable vector text
|
65
|
-
# Overwrite this method in your model to customize
|
66
|
-
#
|
67
|
-
# @return [String] the text representation of the model
|
68
|
-
def as_vector
|
69
|
-
to_json
|
70
|
-
end
|
71
|
-
|
72
|
-
module ClassMethods
|
73
|
-
# Set the vector search provider
|
74
|
-
#
|
75
|
-
# @param provider [Object] The `Langchain::Vectorsearch::*` instance
|
76
|
-
def vectorsearch(provider:)
|
77
|
-
class_variable_set(:@@provider, provider)
|
78
|
-
end
|
79
|
-
|
80
|
-
# Search for similar texts
|
81
|
-
#
|
82
|
-
# @param query [String] The query to search for
|
83
|
-
# @param k [Integer] The number of results to return
|
84
|
-
# @return [ActiveRecord::Relation] The ActiveRecord relation
|
85
|
-
def similarity_search(query, k: 1)
|
86
|
-
records = class_variable_get(:@@provider).similarity_search(
|
87
|
-
query: query,
|
88
|
-
k: k
|
89
|
-
)
|
90
|
-
|
91
|
-
# We use "__id" when Weaviate is the provider
|
92
|
-
ids = records.map { |record| record.dig("id") || record.dig("__id") }
|
93
|
-
where(id: ids)
|
94
|
-
end
|
95
|
-
|
96
|
-
# Ask a question and return the answer
|
97
|
-
#
|
98
|
-
# @param question [String] The question to ask
|
99
|
-
# @param k [Integer] The number of results to have in context
|
100
|
-
# @yield [String] Stream responses back one String at a time
|
101
|
-
# @return [String] The answer to the question
|
102
|
-
def ask(question:, k: 4, &block)
|
103
|
-
class_variable_get(:@@provider).ask(
|
104
|
-
question: question,
|
105
|
-
k: k,
|
106
|
-
&block
|
107
|
-
)
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
data/lib/langchain/railtie.rb
DELETED