langchainrb 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/Gemfile.lock +4 -4
- data/README.md +4 -4
- data/lib/langchain/active_record/hooks.rb +3 -1
- data/lib/langchain/vectorsearch/base.rb +5 -0
- data/lib/langchain/vectorsearch/chroma.rb +17 -4
- data/lib/langchain/vectorsearch/pinecone.rb +28 -4
- data/lib/langchain/vectorsearch/qdrant.rb +11 -5
- data/lib/langchain/vectorsearch/weaviate.rb +4 -4
- data/lib/langchain/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3df4368be6da730348143ba599d9fba642277a644a6dff9b9dc1b81014d29a8b
|
4
|
+
data.tar.gz: 964c65411f2fe7d1768a65a1700eeb573f56946a60b8cfd04b43fade8faf75d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b032b337b109a953c49a5eafe4ad4161818bd02a2bd0076f7922aad438f4eb79029338dcb1c5d1f267b9f6ba1e0a35e7ba6fabe1369ae9d02b45240837012c8
|
7
|
+
data.tar.gz: e6d03a7bf10aa60ffc833ecda448e75c84edaaf9febece3ec06385857f340f19efd674dfd2f43a7d08c1a6698a13880a0e54dd90b740ce438a4658b08eae37d7
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb (0.6.
|
4
|
+
langchainrb (0.6.2)
|
5
5
|
baran (~> 0.1.6)
|
6
6
|
colorize (~> 0.8.1)
|
7
7
|
json-schema (~> 4.0.0)
|
@@ -133,7 +133,7 @@ GEM
|
|
133
133
|
faraday (>= 1.0)
|
134
134
|
faraday_middleware
|
135
135
|
graphql-client
|
136
|
-
graphql (2.0.
|
136
|
+
graphql (2.0.23)
|
137
137
|
graphql-client (0.18.0)
|
138
138
|
activesupport (>= 3.0)
|
139
139
|
graphql
|
@@ -298,7 +298,7 @@ GEM
|
|
298
298
|
tzinfo (2.0.6)
|
299
299
|
concurrent-ruby (~> 1.0)
|
300
300
|
unicode-display_width (2.4.2)
|
301
|
-
weaviate-ruby (0.8.
|
301
|
+
weaviate-ruby (0.8.3)
|
302
302
|
faraday (~> 1)
|
303
303
|
faraday_middleware (~> 1)
|
304
304
|
graphlient (~> 0.6.0)
|
@@ -346,7 +346,7 @@ DEPENDENCIES
|
|
346
346
|
safe_ruby (~> 1.0.4)
|
347
347
|
sequel (~> 5.68.0)
|
348
348
|
standardrb
|
349
|
-
weaviate-ruby (~> 0.8.
|
349
|
+
weaviate-ruby (~> 0.8.3)
|
350
350
|
wikipedia-client (~> 1.17.0)
|
351
351
|
yard
|
352
352
|
|
data/README.md
CHANGED
@@ -35,19 +35,19 @@ require "langchain"
|
|
35
35
|
|
36
36
|
| Database | Querying | Storage | Schema Management | Backups | Rails Integration |
|
37
37
|
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
|
38
|
-
| [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
38
|
+
| [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
39
39
|
| [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
40
40
|
| [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
41
|
-
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
41
|
+
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
42
42
|
| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
43
|
-
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
43
|
+
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
44
44
|
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
45
45
|
|
46
46
|
### Using Vector Search Databases 🔍
|
47
47
|
|
48
48
|
Choose the LLM provider you'll be using (OpenAI or Cohere) and retrieve the API key.
|
49
49
|
|
50
|
-
Add `gem "weaviate-ruby", "~> 0.8.
|
50
|
+
Add `gem "weaviate-ruby", "~> 0.8.3"` to your Gemfile.
|
51
51
|
|
52
52
|
Pick the vector search database you'll be using and instantiate the client:
|
53
53
|
```ruby
|
@@ -87,7 +87,9 @@ module Langchain
|
|
87
87
|
query: query,
|
88
88
|
k: k
|
89
89
|
)
|
90
|
-
|
90
|
+
|
91
|
+
# We use "__id" when Weaviate is the provider
|
92
|
+
ids = records.map { |record| record.dig("id") || record.dig("__id") }
|
91
93
|
where(id: ids)
|
92
94
|
end
|
93
95
|
end
|
@@ -108,6 +108,11 @@ module Langchain::Vectorsearch
|
|
108
108
|
raise NotImplementedError, "#{self.class.name} does not support adding texts"
|
109
109
|
end
|
110
110
|
|
111
|
+
# Method supported by Vectorsearch DB to update a list of texts to the index
|
112
|
+
def update_texts(...)
|
113
|
+
raise NotImplementedError, "#{self.class.name} does not support updating texts"
|
114
|
+
end
|
115
|
+
|
111
116
|
# Method supported by Vectorsearch DB to search for similar texts in the index
|
112
117
|
def similarity_search(...)
|
113
118
|
raise NotImplementedError, "#{self.class.name} does not support similarity search"
|
@@ -32,11 +32,10 @@ module Langchain::Vectorsearch
|
|
32
32
|
# Add a list of texts to the index
|
33
33
|
# @param texts [Array] The list of texts to add
|
34
34
|
# @return [Hash] The response from the server
|
35
|
-
def add_texts(texts:)
|
36
|
-
embeddings = Array(texts).map do |text|
|
35
|
+
def add_texts(texts:, ids: [])
|
36
|
+
embeddings = Array(texts).map.with_index do |text, i|
|
37
37
|
::Chroma::Resources::Embedding.new(
|
38
|
-
|
39
|
-
id: SecureRandom.uuid,
|
38
|
+
id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
|
40
39
|
embedding: llm.embed(text: text),
|
41
40
|
# TODO: Add support for passing metadata
|
42
41
|
metadata: [], # metadatas[index],
|
@@ -48,6 +47,20 @@ module Langchain::Vectorsearch
|
|
48
47
|
collection.add(embeddings)
|
49
48
|
end
|
50
49
|
|
50
|
+
def update_texts(texts:, ids:)
|
51
|
+
embeddings = Array(texts).map.with_index do |text, i|
|
52
|
+
::Chroma::Resources::Embedding.new(
|
53
|
+
id: ids[i].to_s,
|
54
|
+
embedding: llm.embed(text: text),
|
55
|
+
# TODO: Add support for passing metadata
|
56
|
+
metadata: [], # metadatas[index],
|
57
|
+
document: text # Do we actually need to store the whole original document?
|
58
|
+
)
|
59
|
+
end
|
60
|
+
|
61
|
+
collection.update(embeddings)
|
62
|
+
end
|
63
|
+
|
51
64
|
# Create the collection with the default schema
|
52
65
|
# @return [Hash] The response from the server
|
53
66
|
def create_default_schema
|
@@ -33,14 +33,14 @@ module Langchain::Vectorsearch
|
|
33
33
|
|
34
34
|
# Add a list of texts to the index
|
35
35
|
# @param texts [Array] The list of texts to add
|
36
|
+
# @param ids [Array] The list of IDs to add
|
36
37
|
# @param namespace [String] The namespace to add the texts to
|
37
38
|
# @param metadata [Hash] The metadata to use for the texts
|
38
39
|
# @return [Hash] The response from the server
|
39
|
-
def add_texts(texts:, namespace: "", metadata: nil)
|
40
|
-
vectors = texts.map do |text|
|
40
|
+
def add_texts(texts:, ids: [], namespace: "", metadata: nil)
|
41
|
+
vectors = texts.map.with_index do |text, i|
|
41
42
|
{
|
42
|
-
|
43
|
-
id: SecureRandom.uuid,
|
43
|
+
id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
|
44
44
|
metadata: metadata || {content: text},
|
45
45
|
values: llm.embed(text: text)
|
46
46
|
}
|
@@ -51,6 +51,24 @@ module Langchain::Vectorsearch
|
|
51
51
|
index.upsert(vectors: vectors, namespace: namespace)
|
52
52
|
end
|
53
53
|
|
54
|
+
# Update a list of texts in the index
|
55
|
+
# @param texts [Array] The list of texts to update
|
56
|
+
# @param ids [Array] The list of IDs to update
|
57
|
+
# @param namespace [String] The namespace to update the texts in
|
58
|
+
# @param metadata [Hash] The metadata to use for the texts
|
59
|
+
# @return [Array] The response from the server
|
60
|
+
def update_texts(texts:, ids:, namespace: "", metadata: nil)
|
61
|
+
texts.map.with_index do |text, i|
|
62
|
+
# Pinecone::Vector#update ignore args when it is empty
|
63
|
+
index.update(
|
64
|
+
namespace: namespace,
|
65
|
+
id: ids[i].to_s,
|
66
|
+
values: llm.embed(text: text),
|
67
|
+
set_metadata: metadata
|
68
|
+
)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
54
72
|
# Create the index with the default schema
|
55
73
|
# @return [Hash] The response from the server
|
56
74
|
def create_default_schema
|
@@ -122,5 +140,11 @@ module Langchain::Vectorsearch
|
|
122
140
|
|
123
141
|
llm.chat(prompt: prompt)
|
124
142
|
end
|
143
|
+
|
144
|
+
# Pinecone index
|
145
|
+
# @return [Object] The Pinecone index
|
146
|
+
private def index
|
147
|
+
client.index(index_name)
|
148
|
+
end
|
125
149
|
end
|
126
150
|
end
|
@@ -32,11 +32,11 @@ module Langchain::Vectorsearch
|
|
32
32
|
# Add a list of texts to the index
|
33
33
|
# @param texts [Array] The list of texts to add
|
34
34
|
# @return [Hash] The response from the server
|
35
|
-
def add_texts(texts:)
|
35
|
+
def add_texts(texts:, ids:)
|
36
36
|
batch = {ids: [], vectors: [], payloads: []}
|
37
37
|
|
38
|
-
Array(texts).
|
39
|
-
batch[:ids].push(SecureRandom.uuid)
|
38
|
+
Array(texts).each_with_index do |text, i|
|
39
|
+
batch[:ids].push(ids[i] || SecureRandom.uuid)
|
40
40
|
batch[:vectors].push(llm.embed(text: text))
|
41
41
|
batch[:payloads].push({content: text})
|
42
42
|
end
|
@@ -47,6 +47,10 @@ module Langchain::Vectorsearch
|
|
47
47
|
)
|
48
48
|
end
|
49
49
|
|
50
|
+
def update_texts(texts:, ids:)
|
51
|
+
add_texts(texts: texts, ids: ids)
|
52
|
+
end
|
53
|
+
|
50
54
|
# Create the index with the default schema
|
51
55
|
# @return [Hash] The response from the server
|
52
56
|
def create_default_schema
|
@@ -83,12 +87,14 @@ module Langchain::Vectorsearch
|
|
83
87
|
embedding:,
|
84
88
|
k: 4
|
85
89
|
)
|
86
|
-
client.points.search(
|
90
|
+
response = client.points.search(
|
87
91
|
collection_name: index_name,
|
88
92
|
limit: k,
|
89
93
|
vector: embedding,
|
90
|
-
with_payload: true
|
94
|
+
with_payload: true,
|
95
|
+
with_vector: true
|
91
96
|
)
|
97
|
+
response.dig("result")
|
92
98
|
end
|
93
99
|
|
94
100
|
# Ask a question and return the answer
|
@@ -5,7 +5,7 @@ module Langchain::Vectorsearch
|
|
5
5
|
#
|
6
6
|
# Wrapper around Weaviate
|
7
7
|
#
|
8
|
-
# Gem requirements: gem "weaviate-ruby", "~> 0.8.
|
8
|
+
# Gem requirements: gem "weaviate-ruby", "~> 0.8.3"
|
9
9
|
#
|
10
10
|
# Usage:
|
11
11
|
# weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
|
@@ -35,7 +35,7 @@ module Langchain::Vectorsearch
|
|
35
35
|
# Add a list of texts to the index
|
36
36
|
# @param texts [Array] The list of texts to add
|
37
37
|
# @return [Hash] The response from the server
|
38
|
-
def add_texts(texts:, ids:)
|
38
|
+
def add_texts(texts:, ids: [])
|
39
39
|
client.objects.batch_create(
|
40
40
|
objects: weaviate_objects(texts, ids)
|
41
41
|
)
|
@@ -127,13 +127,13 @@ module Langchain::Vectorsearch
|
|
127
127
|
|
128
128
|
private
|
129
129
|
|
130
|
-
def weaviate_objects(texts, ids)
|
130
|
+
def weaviate_objects(texts, ids = [])
|
131
131
|
Array(texts).map.with_index do |text, i|
|
132
132
|
weaviate_object(text, ids[i])
|
133
133
|
end
|
134
134
|
end
|
135
135
|
|
136
|
-
def weaviate_object(text, id)
|
136
|
+
def weaviate_object(text, id = nil)
|
137
137
|
{
|
138
138
|
class: index_name,
|
139
139
|
properties: {
|
data/lib/langchain/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -436,14 +436,14 @@ dependencies:
|
|
436
436
|
requirements:
|
437
437
|
- - "~>"
|
438
438
|
- !ruby/object:Gem::Version
|
439
|
-
version: 0.8.
|
439
|
+
version: 0.8.3
|
440
440
|
type: :development
|
441
441
|
prerelease: false
|
442
442
|
version_requirements: !ruby/object:Gem::Requirement
|
443
443
|
requirements:
|
444
444
|
- - "~>"
|
445
445
|
- !ruby/object:Gem::Version
|
446
|
-
version: 0.8.
|
446
|
+
version: 0.8.3
|
447
447
|
- !ruby/object:Gem::Dependency
|
448
448
|
name: wikipedia-client
|
449
449
|
requirement: !ruby/object:Gem::Requirement
|