langchainrb 0.6.1 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/Gemfile.lock +4 -4
- data/README.md +4 -4
- data/lib/langchain/active_record/hooks.rb +3 -1
- data/lib/langchain/vectorsearch/base.rb +5 -0
- data/lib/langchain/vectorsearch/chroma.rb +17 -4
- data/lib/langchain/vectorsearch/pinecone.rb +28 -4
- data/lib/langchain/vectorsearch/qdrant.rb +11 -5
- data/lib/langchain/vectorsearch/weaviate.rb +4 -4
- data/lib/langchain/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3df4368be6da730348143ba599d9fba642277a644a6dff9b9dc1b81014d29a8b
|
4
|
+
data.tar.gz: 964c65411f2fe7d1768a65a1700eeb573f56946a60b8cfd04b43fade8faf75d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b032b337b109a953c49a5eafe4ad4161818bd02a2bd0076f7922aad438f4eb79029338dcb1c5d1f267b9f6ba1e0a35e7ba6fabe1369ae9d02b45240837012c8
|
7
|
+
data.tar.gz: e6d03a7bf10aa60ffc833ecda448e75c84edaaf9febece3ec06385857f340f19efd674dfd2f43a7d08c1a6698a13880a0e54dd90b740ce438a4658b08eae37d7
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb (0.6.
|
4
|
+
langchainrb (0.6.2)
|
5
5
|
baran (~> 0.1.6)
|
6
6
|
colorize (~> 0.8.1)
|
7
7
|
json-schema (~> 4.0.0)
|
@@ -133,7 +133,7 @@ GEM
|
|
133
133
|
faraday (>= 1.0)
|
134
134
|
faraday_middleware
|
135
135
|
graphql-client
|
136
|
-
graphql (2.0.
|
136
|
+
graphql (2.0.23)
|
137
137
|
graphql-client (0.18.0)
|
138
138
|
activesupport (>= 3.0)
|
139
139
|
graphql
|
@@ -298,7 +298,7 @@ GEM
|
|
298
298
|
tzinfo (2.0.6)
|
299
299
|
concurrent-ruby (~> 1.0)
|
300
300
|
unicode-display_width (2.4.2)
|
301
|
-
weaviate-ruby (0.8.
|
301
|
+
weaviate-ruby (0.8.3)
|
302
302
|
faraday (~> 1)
|
303
303
|
faraday_middleware (~> 1)
|
304
304
|
graphlient (~> 0.6.0)
|
@@ -346,7 +346,7 @@ DEPENDENCIES
|
|
346
346
|
safe_ruby (~> 1.0.4)
|
347
347
|
sequel (~> 5.68.0)
|
348
348
|
standardrb
|
349
|
-
weaviate-ruby (~> 0.8.
|
349
|
+
weaviate-ruby (~> 0.8.3)
|
350
350
|
wikipedia-client (~> 1.17.0)
|
351
351
|
yard
|
352
352
|
|
data/README.md
CHANGED
@@ -35,19 +35,19 @@ require "langchain"
|
|
35
35
|
|
36
36
|
| Database | Querying | Storage | Schema Management | Backups | Rails Integration |
|
37
37
|
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
|
38
|
-
| [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
38
|
+
| [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
39
39
|
| [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
40
40
|
| [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
41
|
-
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
41
|
+
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
42
42
|
| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
43
|
-
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
43
|
+
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
44
44
|
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
45
45
|
|
46
46
|
### Using Vector Search Databases 🔍
|
47
47
|
|
48
48
|
Choose the LLM provider you'll be using (OpenAI or Cohere) and retrieve the API key.
|
49
49
|
|
50
|
-
Add `gem "weaviate-ruby", "~> 0.8.
|
50
|
+
Add `gem "weaviate-ruby", "~> 0.8.3"` to your Gemfile.
|
51
51
|
|
52
52
|
Pick the vector search database you'll be using and instantiate the client:
|
53
53
|
```ruby
|
@@ -87,7 +87,9 @@ module Langchain
|
|
87
87
|
query: query,
|
88
88
|
k: k
|
89
89
|
)
|
90
|
-
|
90
|
+
|
91
|
+
# We use "__id" when Weaviate is the provider
|
92
|
+
ids = records.map { |record| record.dig("id") || record.dig("__id") }
|
91
93
|
where(id: ids)
|
92
94
|
end
|
93
95
|
end
|
@@ -108,6 +108,11 @@ module Langchain::Vectorsearch
|
|
108
108
|
raise NotImplementedError, "#{self.class.name} does not support adding texts"
|
109
109
|
end
|
110
110
|
|
111
|
+
# Method supported by Vectorsearch DB to update a list of texts to the index
|
112
|
+
def update_texts(...)
|
113
|
+
raise NotImplementedError, "#{self.class.name} does not support updating texts"
|
114
|
+
end
|
115
|
+
|
111
116
|
# Method supported by Vectorsearch DB to search for similar texts in the index
|
112
117
|
def similarity_search(...)
|
113
118
|
raise NotImplementedError, "#{self.class.name} does not support similarity search"
|
@@ -32,11 +32,10 @@ module Langchain::Vectorsearch
|
|
32
32
|
# Add a list of texts to the index
|
33
33
|
# @param texts [Array] The list of texts to add
|
34
34
|
# @return [Hash] The response from the server
|
35
|
-
def add_texts(texts:)
|
36
|
-
embeddings = Array(texts).map do |text|
|
35
|
+
def add_texts(texts:, ids: [])
|
36
|
+
embeddings = Array(texts).map.with_index do |text, i|
|
37
37
|
::Chroma::Resources::Embedding.new(
|
38
|
-
|
39
|
-
id: SecureRandom.uuid,
|
38
|
+
id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
|
40
39
|
embedding: llm.embed(text: text),
|
41
40
|
# TODO: Add support for passing metadata
|
42
41
|
metadata: [], # metadatas[index],
|
@@ -48,6 +47,20 @@ module Langchain::Vectorsearch
|
|
48
47
|
collection.add(embeddings)
|
49
48
|
end
|
50
49
|
|
50
|
+
def update_texts(texts:, ids:)
|
51
|
+
embeddings = Array(texts).map.with_index do |text, i|
|
52
|
+
::Chroma::Resources::Embedding.new(
|
53
|
+
id: ids[i].to_s,
|
54
|
+
embedding: llm.embed(text: text),
|
55
|
+
# TODO: Add support for passing metadata
|
56
|
+
metadata: [], # metadatas[index],
|
57
|
+
document: text # Do we actually need to store the whole original document?
|
58
|
+
)
|
59
|
+
end
|
60
|
+
|
61
|
+
collection.update(embeddings)
|
62
|
+
end
|
63
|
+
|
51
64
|
# Create the collection with the default schema
|
52
65
|
# @return [Hash] The response from the server
|
53
66
|
def create_default_schema
|
@@ -33,14 +33,14 @@ module Langchain::Vectorsearch
|
|
33
33
|
|
34
34
|
# Add a list of texts to the index
|
35
35
|
# @param texts [Array] The list of texts to add
|
36
|
+
# @param ids [Array] The list of IDs to add
|
36
37
|
# @param namespace [String] The namespace to add the texts to
|
37
38
|
# @param metadata [Hash] The metadata to use for the texts
|
38
39
|
# @return [Hash] The response from the server
|
39
|
-
def add_texts(texts:, namespace: "", metadata: nil)
|
40
|
-
vectors = texts.map do |text|
|
40
|
+
def add_texts(texts:, ids: [], namespace: "", metadata: nil)
|
41
|
+
vectors = texts.map.with_index do |text, i|
|
41
42
|
{
|
42
|
-
|
43
|
-
id: SecureRandom.uuid,
|
43
|
+
id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
|
44
44
|
metadata: metadata || {content: text},
|
45
45
|
values: llm.embed(text: text)
|
46
46
|
}
|
@@ -51,6 +51,24 @@ module Langchain::Vectorsearch
|
|
51
51
|
index.upsert(vectors: vectors, namespace: namespace)
|
52
52
|
end
|
53
53
|
|
54
|
+
# Update a list of texts in the index
|
55
|
+
# @param texts [Array] The list of texts to update
|
56
|
+
# @param ids [Array] The list of IDs to update
|
57
|
+
# @param namespace [String] The namespace to update the texts in
|
58
|
+
# @param metadata [Hash] The metadata to use for the texts
|
59
|
+
# @return [Array] The response from the server
|
60
|
+
def update_texts(texts:, ids:, namespace: "", metadata: nil)
|
61
|
+
texts.map.with_index do |text, i|
|
62
|
+
# Pinecone::Vector#update ignore args when it is empty
|
63
|
+
index.update(
|
64
|
+
namespace: namespace,
|
65
|
+
id: ids[i].to_s,
|
66
|
+
values: llm.embed(text: text),
|
67
|
+
set_metadata: metadata
|
68
|
+
)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
54
72
|
# Create the index with the default schema
|
55
73
|
# @return [Hash] The response from the server
|
56
74
|
def create_default_schema
|
@@ -122,5 +140,11 @@ module Langchain::Vectorsearch
|
|
122
140
|
|
123
141
|
llm.chat(prompt: prompt)
|
124
142
|
end
|
143
|
+
|
144
|
+
# Pinecone index
|
145
|
+
# @return [Object] The Pinecone index
|
146
|
+
private def index
|
147
|
+
client.index(index_name)
|
148
|
+
end
|
125
149
|
end
|
126
150
|
end
|
@@ -32,11 +32,11 @@ module Langchain::Vectorsearch
|
|
32
32
|
# Add a list of texts to the index
|
33
33
|
# @param texts [Array] The list of texts to add
|
34
34
|
# @return [Hash] The response from the server
|
35
|
-
def add_texts(texts:)
|
35
|
+
def add_texts(texts:, ids:)
|
36
36
|
batch = {ids: [], vectors: [], payloads: []}
|
37
37
|
|
38
|
-
Array(texts).
|
39
|
-
batch[:ids].push(SecureRandom.uuid)
|
38
|
+
Array(texts).each_with_index do |text, i|
|
39
|
+
batch[:ids].push(ids[i] || SecureRandom.uuid)
|
40
40
|
batch[:vectors].push(llm.embed(text: text))
|
41
41
|
batch[:payloads].push({content: text})
|
42
42
|
end
|
@@ -47,6 +47,10 @@ module Langchain::Vectorsearch
|
|
47
47
|
)
|
48
48
|
end
|
49
49
|
|
50
|
+
def update_texts(texts:, ids:)
|
51
|
+
add_texts(texts: texts, ids: ids)
|
52
|
+
end
|
53
|
+
|
50
54
|
# Create the index with the default schema
|
51
55
|
# @return [Hash] The response from the server
|
52
56
|
def create_default_schema
|
@@ -83,12 +87,14 @@ module Langchain::Vectorsearch
|
|
83
87
|
embedding:,
|
84
88
|
k: 4
|
85
89
|
)
|
86
|
-
client.points.search(
|
90
|
+
response = client.points.search(
|
87
91
|
collection_name: index_name,
|
88
92
|
limit: k,
|
89
93
|
vector: embedding,
|
90
|
-
with_payload: true
|
94
|
+
with_payload: true,
|
95
|
+
with_vector: true
|
91
96
|
)
|
97
|
+
response.dig("result")
|
92
98
|
end
|
93
99
|
|
94
100
|
# Ask a question and return the answer
|
@@ -5,7 +5,7 @@ module Langchain::Vectorsearch
|
|
5
5
|
#
|
6
6
|
# Wrapper around Weaviate
|
7
7
|
#
|
8
|
-
# Gem requirements: gem "weaviate-ruby", "~> 0.8.
|
8
|
+
# Gem requirements: gem "weaviate-ruby", "~> 0.8.3"
|
9
9
|
#
|
10
10
|
# Usage:
|
11
11
|
# weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
|
@@ -35,7 +35,7 @@ module Langchain::Vectorsearch
|
|
35
35
|
# Add a list of texts to the index
|
36
36
|
# @param texts [Array] The list of texts to add
|
37
37
|
# @return [Hash] The response from the server
|
38
|
-
def add_texts(texts:, ids:)
|
38
|
+
def add_texts(texts:, ids: [])
|
39
39
|
client.objects.batch_create(
|
40
40
|
objects: weaviate_objects(texts, ids)
|
41
41
|
)
|
@@ -127,13 +127,13 @@ module Langchain::Vectorsearch
|
|
127
127
|
|
128
128
|
private
|
129
129
|
|
130
|
-
def weaviate_objects(texts, ids)
|
130
|
+
def weaviate_objects(texts, ids = [])
|
131
131
|
Array(texts).map.with_index do |text, i|
|
132
132
|
weaviate_object(text, ids[i])
|
133
133
|
end
|
134
134
|
end
|
135
135
|
|
136
|
-
def weaviate_object(text, id)
|
136
|
+
def weaviate_object(text, id = nil)
|
137
137
|
{
|
138
138
|
class: index_name,
|
139
139
|
properties: {
|
data/lib/langchain/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -436,14 +436,14 @@ dependencies:
|
|
436
436
|
requirements:
|
437
437
|
- - "~>"
|
438
438
|
- !ruby/object:Gem::Version
|
439
|
-
version: 0.8.
|
439
|
+
version: 0.8.3
|
440
440
|
type: :development
|
441
441
|
prerelease: false
|
442
442
|
version_requirements: !ruby/object:Gem::Requirement
|
443
443
|
requirements:
|
444
444
|
- - "~>"
|
445
445
|
- !ruby/object:Gem::Version
|
446
|
-
version: 0.8.
|
446
|
+
version: 0.8.3
|
447
447
|
- !ruby/object:Gem::Dependency
|
448
448
|
name: wikipedia-client
|
449
449
|
requirement: !ruby/object:Gem::Requirement
|