langchainrb 0.6.1 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a6f4e8bb8ecaba6ff4d53bba384bd6338012429a69a0dc7df0a58a476763e7e
4
- data.tar.gz: 92211a22fca9664831cf4f395a53dedddafc339ab419780932398c07256b737d
3
+ metadata.gz: 3df4368be6da730348143ba599d9fba642277a644a6dff9b9dc1b81014d29a8b
4
+ data.tar.gz: 964c65411f2fe7d1768a65a1700eeb573f56946a60b8cfd04b43fade8faf75d1
5
5
  SHA512:
6
- metadata.gz: b5c84f0a9a54f51799c5318cba243457fcfd6f026c71b8f34e58cf60172d476963f25ea8d24c49b35ed93c893adb9e2844443a22dd9e927ab16318850a11419a
7
- data.tar.gz: 4664927203ea032f737000c27ec5fa04c96ab606ec8377b4673b48638905b458077d4ab3cb7727fcb98be6c607a37bd318395fd96000a734de213c7d9041a219
6
+ metadata.gz: 1b032b337b109a953c49a5eafe4ad4161818bd02a2bd0076f7922aad438f4eb79029338dcb1c5d1f267b9f6ba1e0a35e7ba6fabe1369ae9d02b45240837012c8
7
+ data.tar.gz: e6d03a7bf10aa60ffc833ecda448e75c84edaaf9febece3ec06385857f340f19efd674dfd2f43a7d08c1a6698a13880a0e54dd90b740ce438a4658b08eae37d7
data/CHANGELOG.md CHANGED
@@ -1,5 +1,8 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.2] - 2023-06-25
4
+ - Qdrant, Chroma, and Pinecone are supported by ActiveRecord hooks
5
+
3
6
  ## [0.6.1] - 2023-06-24
4
7
  - Adding support to hook vectorsearch into ActiveRecord models
5
8
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.1)
4
+ langchainrb (0.6.2)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -133,7 +133,7 @@ GEM
133
133
  faraday (>= 1.0)
134
134
  faraday_middleware
135
135
  graphql-client
136
- graphql (2.0.21)
136
+ graphql (2.0.23)
137
137
  graphql-client (0.18.0)
138
138
  activesupport (>= 3.0)
139
139
  graphql
@@ -298,7 +298,7 @@ GEM
298
298
  tzinfo (2.0.6)
299
299
  concurrent-ruby (~> 1.0)
300
300
  unicode-display_width (2.4.2)
301
- weaviate-ruby (0.8.1)
301
+ weaviate-ruby (0.8.3)
302
302
  faraday (~> 1)
303
303
  faraday_middleware (~> 1)
304
304
  graphlient (~> 0.6.0)
@@ -346,7 +346,7 @@ DEPENDENCIES
346
346
  safe_ruby (~> 1.0.4)
347
347
  sequel (~> 5.68.0)
348
348
  standardrb
349
- weaviate-ruby (~> 0.8.0)
349
+ weaviate-ruby (~> 0.8.3)
350
350
  wikipedia-client (~> 1.17.0)
351
351
  yard
352
352
 
data/README.md CHANGED
@@ -35,19 +35,19 @@ require "langchain"
35
35
 
36
36
  | Database | Querying | Storage | Schema Management | Backups | Rails Integration |
37
37
  | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
38
- | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
38
+ | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
39
39
  | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
40
  | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
- | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
+ | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
42
42
  | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
43
- | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
43
+ | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
44
44
  | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
45
45
 
46
46
  ### Using Vector Search Databases 🔍
47
47
 
48
48
  Choose the LLM provider you'll be using (OpenAI or Cohere) and retrieve the API key.
49
49
 
50
- Add `gem "weaviate-ruby", "~> 0.8.0"` to your Gemfile.
50
+ Add `gem "weaviate-ruby", "~> 0.8.3"` to your Gemfile.
51
51
 
52
52
  Pick the vector search database you'll be using and instantiate the client:
53
53
  ```ruby
@@ -87,7 +87,9 @@ module Langchain
87
87
  query: query,
88
88
  k: k
89
89
  )
90
- ids = records.map { |record| record.dig("__id") }
90
+
91
+ # We use "__id" when Weaviate is the provider
92
+ ids = records.map { |record| record.dig("id") || record.dig("__id") }
91
93
  where(id: ids)
92
94
  end
93
95
  end
@@ -108,6 +108,11 @@ module Langchain::Vectorsearch
108
108
  raise NotImplementedError, "#{self.class.name} does not support adding texts"
109
109
  end
110
110
 
111
+ # Method supported by Vectorsearch DB to update a list of texts to the index
112
+ def update_texts(...)
113
+ raise NotImplementedError, "#{self.class.name} does not support updating texts"
114
+ end
115
+
111
116
  # Method supported by Vectorsearch DB to search for similar texts in the index
112
117
  def similarity_search(...)
113
118
  raise NotImplementedError, "#{self.class.name} does not support similarity search"
@@ -32,11 +32,10 @@ module Langchain::Vectorsearch
32
32
  # Add a list of texts to the index
33
33
  # @param texts [Array] The list of texts to add
34
34
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
36
- embeddings = Array(texts).map do |text|
35
+ def add_texts(texts:, ids: [])
36
+ embeddings = Array(texts).map.with_index do |text, i|
37
37
  ::Chroma::Resources::Embedding.new(
38
- # TODO: Add support for passing your own IDs
39
- id: SecureRandom.uuid,
38
+ id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
40
39
  embedding: llm.embed(text: text),
41
40
  # TODO: Add support for passing metadata
42
41
  metadata: [], # metadatas[index],
@@ -48,6 +47,20 @@ module Langchain::Vectorsearch
48
47
  collection.add(embeddings)
49
48
  end
50
49
 
50
+ def update_texts(texts:, ids:)
51
+ embeddings = Array(texts).map.with_index do |text, i|
52
+ ::Chroma::Resources::Embedding.new(
53
+ id: ids[i].to_s,
54
+ embedding: llm.embed(text: text),
55
+ # TODO: Add support for passing metadata
56
+ metadata: [], # metadatas[index],
57
+ document: text # Do we actually need to store the whole original document?
58
+ )
59
+ end
60
+
61
+ collection.update(embeddings)
62
+ end
63
+
51
64
  # Create the collection with the default schema
52
65
  # @return [Hash] The response from the server
53
66
  def create_default_schema
@@ -33,14 +33,14 @@ module Langchain::Vectorsearch
33
33
 
34
34
  # Add a list of texts to the index
35
35
  # @param texts [Array] The list of texts to add
36
+ # @param ids [Array] The list of IDs to add
36
37
  # @param namespace [String] The namespace to add the texts to
37
38
  # @param metadata [Hash] The metadata to use for the texts
38
39
  # @return [Hash] The response from the server
39
- def add_texts(texts:, namespace: "", metadata: nil)
40
- vectors = texts.map do |text|
40
+ def add_texts(texts:, ids: [], namespace: "", metadata: nil)
41
+ vectors = texts.map.with_index do |text, i|
41
42
  {
42
- # TODO: Allows passing in your own IDs
43
- id: SecureRandom.uuid,
43
+ id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
44
44
  metadata: metadata || {content: text},
45
45
  values: llm.embed(text: text)
46
46
  }
@@ -51,6 +51,24 @@ module Langchain::Vectorsearch
51
51
  index.upsert(vectors: vectors, namespace: namespace)
52
52
  end
53
53
 
54
+ # Update a list of texts in the index
55
+ # @param texts [Array] The list of texts to update
56
+ # @param ids [Array] The list of IDs to update
57
+ # @param namespace [String] The namespace to update the texts in
58
+ # @param metadata [Hash] The metadata to use for the texts
59
+ # @return [Array] The response from the server
60
+ def update_texts(texts:, ids:, namespace: "", metadata: nil)
61
+ texts.map.with_index do |text, i|
62
+ # Pinecone::Vector#update ignore args when it is empty
63
+ index.update(
64
+ namespace: namespace,
65
+ id: ids[i].to_s,
66
+ values: llm.embed(text: text),
67
+ set_metadata: metadata
68
+ )
69
+ end
70
+ end
71
+
54
72
  # Create the index with the default schema
55
73
  # @return [Hash] The response from the server
56
74
  def create_default_schema
@@ -122,5 +140,11 @@ module Langchain::Vectorsearch
122
140
 
123
141
  llm.chat(prompt: prompt)
124
142
  end
143
+
144
+ # Pinecone index
145
+ # @return [Object] The Pinecone index
146
+ private def index
147
+ client.index(index_name)
148
+ end
125
149
  end
126
150
  end
@@ -32,11 +32,11 @@ module Langchain::Vectorsearch
32
32
  # Add a list of texts to the index
33
33
  # @param texts [Array] The list of texts to add
34
34
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
35
+ def add_texts(texts:, ids:)
36
36
  batch = {ids: [], vectors: [], payloads: []}
37
37
 
38
- Array(texts).each do |text|
39
- batch[:ids].push(SecureRandom.uuid)
38
+ Array(texts).each_with_index do |text, i|
39
+ batch[:ids].push(ids[i] || SecureRandom.uuid)
40
40
  batch[:vectors].push(llm.embed(text: text))
41
41
  batch[:payloads].push({content: text})
42
42
  end
@@ -47,6 +47,10 @@ module Langchain::Vectorsearch
47
47
  )
48
48
  end
49
49
 
50
+ def update_texts(texts:, ids:)
51
+ add_texts(texts: texts, ids: ids)
52
+ end
53
+
50
54
  # Create the index with the default schema
51
55
  # @return [Hash] The response from the server
52
56
  def create_default_schema
@@ -83,12 +87,14 @@ module Langchain::Vectorsearch
83
87
  embedding:,
84
88
  k: 4
85
89
  )
86
- client.points.search(
90
+ response = client.points.search(
87
91
  collection_name: index_name,
88
92
  limit: k,
89
93
  vector: embedding,
90
- with_payload: true
94
+ with_payload: true,
95
+ with_vector: true
91
96
  )
97
+ response.dig("result")
92
98
  end
93
99
 
94
100
  # Ask a question and return the answer
@@ -5,7 +5,7 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Weaviate
7
7
  #
8
- # Gem requirements: gem "weaviate-ruby", "~> 0.8.0"
8
+ # Gem requirements: gem "weaviate-ruby", "~> 0.8.3"
9
9
  #
10
10
  # Usage:
11
11
  # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
@@ -35,7 +35,7 @@ module Langchain::Vectorsearch
35
35
  # Add a list of texts to the index
36
36
  # @param texts [Array] The list of texts to add
37
37
  # @return [Hash] The response from the server
38
- def add_texts(texts:, ids:)
38
+ def add_texts(texts:, ids: [])
39
39
  client.objects.batch_create(
40
40
  objects: weaviate_objects(texts, ids)
41
41
  )
@@ -127,13 +127,13 @@ module Langchain::Vectorsearch
127
127
 
128
128
  private
129
129
 
130
- def weaviate_objects(texts, ids)
130
+ def weaviate_objects(texts, ids = [])
131
131
  Array(texts).map.with_index do |text, i|
132
132
  weaviate_object(text, ids[i])
133
133
  end
134
134
  end
135
135
 
136
- def weaviate_object(text, id)
136
+ def weaviate_object(text, id = nil)
137
137
  {
138
138
  class: index_name,
139
139
  properties: {
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.1"
4
+ VERSION = "0.6.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-24 00:00:00.000000000 Z
11
+ date: 2023-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -436,14 +436,14 @@ dependencies:
436
436
  requirements:
437
437
  - - "~>"
438
438
  - !ruby/object:Gem::Version
439
- version: 0.8.0
439
+ version: 0.8.3
440
440
  type: :development
441
441
  prerelease: false
442
442
  version_requirements: !ruby/object:Gem::Requirement
443
443
  requirements:
444
444
  - - "~>"
445
445
  - !ruby/object:Gem::Version
446
- version: 0.8.0
446
+ version: 0.8.3
447
447
  - !ruby/object:Gem::Dependency
448
448
  name: wikipedia-client
449
449
  requirement: !ruby/object:Gem::Requirement