langchainrb 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a6f4e8bb8ecaba6ff4d53bba384bd6338012429a69a0dc7df0a58a476763e7e
4
- data.tar.gz: 92211a22fca9664831cf4f395a53dedddafc339ab419780932398c07256b737d
3
+ metadata.gz: 3df4368be6da730348143ba599d9fba642277a644a6dff9b9dc1b81014d29a8b
4
+ data.tar.gz: 964c65411f2fe7d1768a65a1700eeb573f56946a60b8cfd04b43fade8faf75d1
5
5
  SHA512:
6
- metadata.gz: b5c84f0a9a54f51799c5318cba243457fcfd6f026c71b8f34e58cf60172d476963f25ea8d24c49b35ed93c893adb9e2844443a22dd9e927ab16318850a11419a
7
- data.tar.gz: 4664927203ea032f737000c27ec5fa04c96ab606ec8377b4673b48638905b458077d4ab3cb7727fcb98be6c607a37bd318395fd96000a734de213c7d9041a219
6
+ metadata.gz: 1b032b337b109a953c49a5eafe4ad4161818bd02a2bd0076f7922aad438f4eb79029338dcb1c5d1f267b9f6ba1e0a35e7ba6fabe1369ae9d02b45240837012c8
7
+ data.tar.gz: e6d03a7bf10aa60ffc833ecda448e75c84edaaf9febece3ec06385857f340f19efd674dfd2f43a7d08c1a6698a13880a0e54dd90b740ce438a4658b08eae37d7
data/CHANGELOG.md CHANGED
@@ -1,5 +1,8 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.2] - 2023-06-25
4
+ - Qdrant, Chroma, and Pinecone are supported by ActiveRecord hooks
5
+
3
6
  ## [0.6.1] - 2023-06-24
4
7
  - Adding support to hook vectorsearch into ActiveRecord models
5
8
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.1)
4
+ langchainrb (0.6.2)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -133,7 +133,7 @@ GEM
133
133
  faraday (>= 1.0)
134
134
  faraday_middleware
135
135
  graphql-client
136
- graphql (2.0.21)
136
+ graphql (2.0.23)
137
137
  graphql-client (0.18.0)
138
138
  activesupport (>= 3.0)
139
139
  graphql
@@ -298,7 +298,7 @@ GEM
298
298
  tzinfo (2.0.6)
299
299
  concurrent-ruby (~> 1.0)
300
300
  unicode-display_width (2.4.2)
301
- weaviate-ruby (0.8.1)
301
+ weaviate-ruby (0.8.3)
302
302
  faraday (~> 1)
303
303
  faraday_middleware (~> 1)
304
304
  graphlient (~> 0.6.0)
@@ -346,7 +346,7 @@ DEPENDENCIES
346
346
  safe_ruby (~> 1.0.4)
347
347
  sequel (~> 5.68.0)
348
348
  standardrb
349
- weaviate-ruby (~> 0.8.0)
349
+ weaviate-ruby (~> 0.8.3)
350
350
  wikipedia-client (~> 1.17.0)
351
351
  yard
352
352
 
data/README.md CHANGED
@@ -35,19 +35,19 @@ require "langchain"
35
35
 
36
36
  | Database | Querying | Storage | Schema Management | Backups | Rails Integration |
37
37
  | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
38
- | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
38
+ | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
39
39
  | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
40
  | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
- | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
+ | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
42
42
  | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
43
- | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
43
+ | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
44
44
  | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
45
45
 
46
46
  ### Using Vector Search Databases 🔍
47
47
 
48
48
  Choose the LLM provider you'll be using (OpenAI or Cohere) and retrieve the API key.
49
49
 
50
- Add `gem "weaviate-ruby", "~> 0.8.0"` to your Gemfile.
50
+ Add `gem "weaviate-ruby", "~> 0.8.3"` to your Gemfile.
51
51
 
52
52
  Pick the vector search database you'll be using and instantiate the client:
53
53
  ```ruby
@@ -87,7 +87,9 @@ module Langchain
87
87
  query: query,
88
88
  k: k
89
89
  )
90
- ids = records.map { |record| record.dig("__id") }
90
+
91
+ # We use "__id" when Weaviate is the provider
92
+ ids = records.map { |record| record.dig("id") || record.dig("__id") }
91
93
  where(id: ids)
92
94
  end
93
95
  end
@@ -108,6 +108,11 @@ module Langchain::Vectorsearch
108
108
  raise NotImplementedError, "#{self.class.name} does not support adding texts"
109
109
  end
110
110
 
111
+ # Method supported by Vectorsearch DB to update a list of texts to the index
112
+ def update_texts(...)
113
+ raise NotImplementedError, "#{self.class.name} does not support updating texts"
114
+ end
115
+
111
116
  # Method supported by Vectorsearch DB to search for similar texts in the index
112
117
  def similarity_search(...)
113
118
  raise NotImplementedError, "#{self.class.name} does not support similarity search"
@@ -32,11 +32,10 @@ module Langchain::Vectorsearch
32
32
  # Add a list of texts to the index
33
33
  # @param texts [Array] The list of texts to add
34
34
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
36
- embeddings = Array(texts).map do |text|
35
+ def add_texts(texts:, ids: [])
36
+ embeddings = Array(texts).map.with_index do |text, i|
37
37
  ::Chroma::Resources::Embedding.new(
38
- # TODO: Add support for passing your own IDs
39
- id: SecureRandom.uuid,
38
+ id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
40
39
  embedding: llm.embed(text: text),
41
40
  # TODO: Add support for passing metadata
42
41
  metadata: [], # metadatas[index],
@@ -48,6 +47,20 @@ module Langchain::Vectorsearch
48
47
  collection.add(embeddings)
49
48
  end
50
49
 
50
+ def update_texts(texts:, ids:)
51
+ embeddings = Array(texts).map.with_index do |text, i|
52
+ ::Chroma::Resources::Embedding.new(
53
+ id: ids[i].to_s,
54
+ embedding: llm.embed(text: text),
55
+ # TODO: Add support for passing metadata
56
+ metadata: [], # metadatas[index],
57
+ document: text # Do we actually need to store the whole original document?
58
+ )
59
+ end
60
+
61
+ collection.update(embeddings)
62
+ end
63
+
51
64
  # Create the collection with the default schema
52
65
  # @return [Hash] The response from the server
53
66
  def create_default_schema
@@ -33,14 +33,14 @@ module Langchain::Vectorsearch
33
33
 
34
34
  # Add a list of texts to the index
35
35
  # @param texts [Array] The list of texts to add
36
+ # @param ids [Array] The list of IDs to add
36
37
  # @param namespace [String] The namespace to add the texts to
37
38
  # @param metadata [Hash] The metadata to use for the texts
38
39
  # @return [Hash] The response from the server
39
- def add_texts(texts:, namespace: "", metadata: nil)
40
- vectors = texts.map do |text|
40
+ def add_texts(texts:, ids: [], namespace: "", metadata: nil)
41
+ vectors = texts.map.with_index do |text, i|
41
42
  {
42
- # TODO: Allows passing in your own IDs
43
- id: SecureRandom.uuid,
43
+ id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
44
44
  metadata: metadata || {content: text},
45
45
  values: llm.embed(text: text)
46
46
  }
@@ -51,6 +51,24 @@ module Langchain::Vectorsearch
51
51
  index.upsert(vectors: vectors, namespace: namespace)
52
52
  end
53
53
 
54
+ # Update a list of texts in the index
55
+ # @param texts [Array] The list of texts to update
56
+ # @param ids [Array] The list of IDs to update
57
+ # @param namespace [String] The namespace to update the texts in
58
+ # @param metadata [Hash] The metadata to use for the texts
59
+ # @return [Array] The response from the server
60
+ def update_texts(texts:, ids:, namespace: "", metadata: nil)
61
+ texts.map.with_index do |text, i|
62
+ # Pinecone::Vector#update ignore args when it is empty
63
+ index.update(
64
+ namespace: namespace,
65
+ id: ids[i].to_s,
66
+ values: llm.embed(text: text),
67
+ set_metadata: metadata
68
+ )
69
+ end
70
+ end
71
+
54
72
  # Create the index with the default schema
55
73
  # @return [Hash] The response from the server
56
74
  def create_default_schema
@@ -122,5 +140,11 @@ module Langchain::Vectorsearch
122
140
 
123
141
  llm.chat(prompt: prompt)
124
142
  end
143
+
144
+ # Pinecone index
145
+ # @return [Object] The Pinecone index
146
+ private def index
147
+ client.index(index_name)
148
+ end
125
149
  end
126
150
  end
@@ -32,11 +32,11 @@ module Langchain::Vectorsearch
32
32
  # Add a list of texts to the index
33
33
  # @param texts [Array] The list of texts to add
34
34
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
35
+ def add_texts(texts:, ids:)
36
36
  batch = {ids: [], vectors: [], payloads: []}
37
37
 
38
- Array(texts).each do |text|
39
- batch[:ids].push(SecureRandom.uuid)
38
+ Array(texts).each_with_index do |text, i|
39
+ batch[:ids].push(ids[i] || SecureRandom.uuid)
40
40
  batch[:vectors].push(llm.embed(text: text))
41
41
  batch[:payloads].push({content: text})
42
42
  end
@@ -47,6 +47,10 @@ module Langchain::Vectorsearch
47
47
  )
48
48
  end
49
49
 
50
+ def update_texts(texts:, ids:)
51
+ add_texts(texts: texts, ids: ids)
52
+ end
53
+
50
54
  # Create the index with the default schema
51
55
  # @return [Hash] The response from the server
52
56
  def create_default_schema
@@ -83,12 +87,14 @@ module Langchain::Vectorsearch
83
87
  embedding:,
84
88
  k: 4
85
89
  )
86
- client.points.search(
90
+ response = client.points.search(
87
91
  collection_name: index_name,
88
92
  limit: k,
89
93
  vector: embedding,
90
- with_payload: true
94
+ with_payload: true,
95
+ with_vector: true
91
96
  )
97
+ response.dig("result")
92
98
  end
93
99
 
94
100
  # Ask a question and return the answer
@@ -5,7 +5,7 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Weaviate
7
7
  #
8
- # Gem requirements: gem "weaviate-ruby", "~> 0.8.0"
8
+ # Gem requirements: gem "weaviate-ruby", "~> 0.8.3"
9
9
  #
10
10
  # Usage:
11
11
  # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
@@ -35,7 +35,7 @@ module Langchain::Vectorsearch
35
35
  # Add a list of texts to the index
36
36
  # @param texts [Array] The list of texts to add
37
37
  # @return [Hash] The response from the server
38
- def add_texts(texts:, ids:)
38
+ def add_texts(texts:, ids: [])
39
39
  client.objects.batch_create(
40
40
  objects: weaviate_objects(texts, ids)
41
41
  )
@@ -127,13 +127,13 @@ module Langchain::Vectorsearch
127
127
 
128
128
  private
129
129
 
130
- def weaviate_objects(texts, ids)
130
+ def weaviate_objects(texts, ids = [])
131
131
  Array(texts).map.with_index do |text, i|
132
132
  weaviate_object(text, ids[i])
133
133
  end
134
134
  end
135
135
 
136
- def weaviate_object(text, id)
136
+ def weaviate_object(text, id = nil)
137
137
  {
138
138
  class: index_name,
139
139
  properties: {
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.1"
4
+ VERSION = "0.6.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-24 00:00:00.000000000 Z
11
+ date: 2023-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -436,14 +436,14 @@ dependencies:
436
436
  requirements:
437
437
  - - "~>"
438
438
  - !ruby/object:Gem::Version
439
- version: 0.8.0
439
+ version: 0.8.3
440
440
  type: :development
441
441
  prerelease: false
442
442
  version_requirements: !ruby/object:Gem::Requirement
443
443
  requirements:
444
444
  - - "~>"
445
445
  - !ruby/object:Gem::Version
446
- version: 0.8.0
446
+ version: 0.8.3
447
447
  - !ruby/object:Gem::Dependency
448
448
  name: wikipedia-client
449
449
  requirement: !ruby/object:Gem::Requirement