langchainrb 0.6.0 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7fba7b5e03ae75aa4ee3c89dd0322a73bbb9d3ced79f48dda8861af1f4f197b8
4
- data.tar.gz: 404e742b0911305beec3bd22575740fc78ed9005e21295e0f7c348c1bede3e7e
3
+ metadata.gz: 3df4368be6da730348143ba599d9fba642277a644a6dff9b9dc1b81014d29a8b
4
+ data.tar.gz: 964c65411f2fe7d1768a65a1700eeb573f56946a60b8cfd04b43fade8faf75d1
5
5
  SHA512:
6
- metadata.gz: c8166375c28abe9bc3a7e02a2ceba2ce1bea8ff4f751beb2c69d6f34aa46ba2b7c6ca34458f79b6dfba6d8908cc5e9b055f4e4e7dc6c4c09ac65f7f589c12eb7
7
- data.tar.gz: fb81a51867575c5fae10b79f0f3ee761b25b75d623f2c071d000b39f84c672d9f7164e3c8442a1f11d7abdff8edd41f7e79c2c3a7cb99fa9952406bc60506ce8
6
+ metadata.gz: 1b032b337b109a953c49a5eafe4ad4161818bd02a2bd0076f7922aad438f4eb79029338dcb1c5d1f267b9f6ba1e0a35e7ba6fabe1369ae9d02b45240837012c8
7
+ data.tar.gz: e6d03a7bf10aa60ffc833ecda448e75c84edaaf9febece3ec06385857f340f19efd674dfd2f43a7d08c1a6698a13880a0e54dd90b740ce438a4658b08eae37d7
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.2] - 2023-06-25
4
+ - Qdrant, Chroma, and Pinecone are supported by ActiveRecord hooks
5
+
6
+ ## [0.6.1] - 2023-06-24
7
+ - Adding support to hook vectorsearch into ActiveRecord models
8
+
3
9
  ## [0.6.0] - 2023-06-22
4
10
  - [BREAKING] Rename `ChainOfThoughtAgent` to `ReActAgent`
5
11
  - Implement A21 token validator
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.0)
4
+ langchainrb (0.6.2)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -133,7 +133,7 @@ GEM
133
133
  faraday (>= 1.0)
134
134
  faraday_middleware
135
135
  graphql-client
136
- graphql (2.0.21)
136
+ graphql (2.0.23)
137
137
  graphql-client (0.18.0)
138
138
  activesupport (>= 3.0)
139
139
  graphql
@@ -298,7 +298,7 @@ GEM
298
298
  tzinfo (2.0.6)
299
299
  concurrent-ruby (~> 1.0)
300
300
  unicode-display_width (2.4.2)
301
- weaviate-ruby (0.8.1)
301
+ weaviate-ruby (0.8.3)
302
302
  faraday (~> 1)
303
303
  faraday_middleware (~> 1)
304
304
  graphlient (~> 0.6.0)
@@ -346,7 +346,7 @@ DEPENDENCIES
346
346
  safe_ruby (~> 1.0.4)
347
347
  sequel (~> 5.68.0)
348
348
  standardrb
349
- weaviate-ruby (~> 0.8.0)
349
+ weaviate-ruby (~> 0.8.3)
350
350
  wikipedia-client (~> 1.17.0)
351
351
  yard
352
352
 
data/README.md CHANGED
@@ -35,19 +35,19 @@ require "langchain"
35
35
 
36
36
  | Database | Querying | Storage | Schema Management | Backups | Rails Integration |
37
37
  | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
38
- | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
38
+ | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
39
39
  | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
40
  | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
- | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
+ | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
42
42
  | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
43
- | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
44
- | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
43
+ | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
44
+ | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
45
45
 
46
46
  ### Using Vector Search Databases 🔍
47
47
 
48
48
  Choose the LLM provider you'll be using (OpenAI or Cohere) and retrieve the API key.
49
49
 
50
- Add `gem "weaviate-ruby", "~> 0.8.0"` to your Gemfile.
50
+ Add `gem "weaviate-ruby", "~> 0.8.3"` to your Gemfile.
51
51
 
52
52
  Pick the vector search database you'll be using and instantiate the client:
53
53
  ```ruby
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module ActiveRecord
5
+ # This module adds the following functionality to your ActiveRecord models:
6
+ # * `vectorsearch` class method to set the vector search provider
7
+ # * `similarity_search` class method to search for similar texts
8
+ # * `upsert_to_vectorsearch` instance method to upsert the record to the vector search provider
9
+ #
10
+ # Usage:
11
+ # class Recipe < ActiveRecord::Base
12
+ # vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
13
+ # api_key: ENV["WEAVIATE_API_KEY"],
14
+ # url: ENV["WEAVIATE_URL"],
15
+ # index_name: "Recipes",
16
+ # llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
17
+ # )
18
+ #
19
+ # after_save :upsert_to_vectorsearch
20
+ #
21
+ # # Overwriting how the model is serialized before it's indexed
22
+ # def as_vector
23
+ # [
24
+ # "Title: #{title}",
25
+ # "Description: #{description}",
26
+ # ...
27
+ # ]
28
+ # .compact
29
+ # .join("\n")
30
+ # end
31
+ # end
32
+ #
33
+ # Create the default schema
34
+ # Recipe.class_variable_get(:@@provider).create_default_schema
35
+ # Query the vector search provider
36
+ # Recipe.similarity_search("carnivore dish")
37
+ # Delete the default schema to start over
38
+ # Recipe.class_variable_get(:@@provider).client.schema.delete class_name: "Recipes"
39
+ #
40
+ module Hooks
41
+ def self.included(base)
42
+ base.extend ClassMethods
43
+ end
44
+
45
+ # Index the text to the vector search provider
46
+ # You'd typically call this method in an ActiveRecord callback
47
+ #
48
+ # @return [Boolean] true
49
+ # @raise [Error] Indexing to vector search DB failed
50
+ def upsert_to_vectorsearch
51
+ if previously_new_record?
52
+ self.class.class_variable_get(:@@provider).add_texts(
53
+ texts: [as_vector],
54
+ ids: [id]
55
+ )
56
+ else
57
+ self.class.class_variable_get(:@@provider).update_texts(
58
+ texts: [as_vector],
59
+ ids: [id]
60
+ )
61
+ end
62
+ end
63
+
64
+ # Used to serialize the DB record to an indexable vector text
65
+ # Overwrite this method in your model to customize
66
+ #
67
+ # @return [String] the text representation of the model
68
+ def as_vector
69
+ to_json
70
+ end
71
+
72
+ module ClassMethods
73
+ # Set the vector search provider
74
+ #
75
+ # @param provider [Object] The `Langchain::Vectorsearch::*` instance
76
+ def vectorsearch(provider:)
77
+ class_variable_set(:@@provider, provider)
78
+ end
79
+
80
+ # Search for similar texts
81
+ #
82
+ # @param query [String] The query to search for
83
+ # @param k [Integer] The number of results to return
84
+ # @return [ActiveRecord::Relation] The ActiveRecord relation
85
+ def similarity_search(query, k: 1)
86
+ records = class_variable_get(:@@provider).similarity_search(
87
+ query: query,
88
+ k: k
89
+ )
90
+
91
+ # We use "__id" when Weaviate is the provider
92
+ ids = records.map { |record| record.dig("id") || record.dig("__id") }
93
+ where(id: ids)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ class Railtie < Rails::Railtie
5
+ initializer "langchain" do
6
+ ActiveSupport.on_load(:active_record) do
7
+ ::ActiveRecord::Base.include Langchain::ActiveRecord::Hooks
8
+ end
9
+ end
10
+ end
11
+ end
@@ -108,6 +108,11 @@ module Langchain::Vectorsearch
108
108
  raise NotImplementedError, "#{self.class.name} does not support adding texts"
109
109
  end
110
110
 
111
+ # Method supported by Vectorsearch DB to update a list of texts to the index
112
+ def update_texts(...)
113
+ raise NotImplementedError, "#{self.class.name} does not support updating texts"
114
+ end
115
+
111
116
  # Method supported by Vectorsearch DB to search for similar texts in the index
112
117
  def similarity_search(...)
113
118
  raise NotImplementedError, "#{self.class.name} does not support similarity search"
@@ -32,11 +32,10 @@ module Langchain::Vectorsearch
32
32
  # Add a list of texts to the index
33
33
  # @param texts [Array] The list of texts to add
34
34
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
36
- embeddings = Array(texts).map do |text|
35
+ def add_texts(texts:, ids: [])
36
+ embeddings = Array(texts).map.with_index do |text, i|
37
37
  ::Chroma::Resources::Embedding.new(
38
- # TODO: Add support for passing your own IDs
39
- id: SecureRandom.uuid,
38
+ id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
40
39
  embedding: llm.embed(text: text),
41
40
  # TODO: Add support for passing metadata
42
41
  metadata: [], # metadatas[index],
@@ -48,6 +47,20 @@ module Langchain::Vectorsearch
48
47
  collection.add(embeddings)
49
48
  end
50
49
 
50
+ def update_texts(texts:, ids:)
51
+ embeddings = Array(texts).map.with_index do |text, i|
52
+ ::Chroma::Resources::Embedding.new(
53
+ id: ids[i].to_s,
54
+ embedding: llm.embed(text: text),
55
+ # TODO: Add support for passing metadata
56
+ metadata: [], # metadatas[index],
57
+ document: text # Do we actually need to store the whole original document?
58
+ )
59
+ end
60
+
61
+ collection.update(embeddings)
62
+ end
63
+
51
64
  # Create the collection with the default schema
52
65
  # @return [Hash] The response from the server
53
66
  def create_default_schema
@@ -33,14 +33,14 @@ module Langchain::Vectorsearch
33
33
 
34
34
  # Add a list of texts to the index
35
35
  # @param texts [Array] The list of texts to add
36
+ # @param ids [Array] The list of IDs to add
36
37
  # @param namespace [String] The namespace to add the texts to
37
38
  # @param metadata [Hash] The metadata to use for the texts
38
39
  # @return [Hash] The response from the server
39
- def add_texts(texts:, namespace: "", metadata: nil)
40
- vectors = texts.map do |text|
40
+ def add_texts(texts:, ids: [], namespace: "", metadata: nil)
41
+ vectors = texts.map.with_index do |text, i|
41
42
  {
42
- # TODO: Allows passing in your own IDs
43
- id: SecureRandom.uuid,
43
+ id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
44
44
  metadata: metadata || {content: text},
45
45
  values: llm.embed(text: text)
46
46
  }
@@ -51,6 +51,24 @@ module Langchain::Vectorsearch
51
51
  index.upsert(vectors: vectors, namespace: namespace)
52
52
  end
53
53
 
54
+ # Update a list of texts in the index
55
+ # @param texts [Array] The list of texts to update
56
+ # @param ids [Array] The list of IDs to update
57
+ # @param namespace [String] The namespace to update the texts in
58
+ # @param metadata [Hash] The metadata to use for the texts
59
+ # @return [Array] The response from the server
60
+ def update_texts(texts:, ids:, namespace: "", metadata: nil)
61
+ texts.map.with_index do |text, i|
62
+ # Pinecone::Vector#update ignore args when it is empty
63
+ index.update(
64
+ namespace: namespace,
65
+ id: ids[i].to_s,
66
+ values: llm.embed(text: text),
67
+ set_metadata: metadata
68
+ )
69
+ end
70
+ end
71
+
54
72
  # Create the index with the default schema
55
73
  # @return [Hash] The response from the server
56
74
  def create_default_schema
@@ -122,5 +140,11 @@ module Langchain::Vectorsearch
122
140
 
123
141
  llm.chat(prompt: prompt)
124
142
  end
143
+
144
+ # Pinecone index
145
+ # @return [Object] The Pinecone index
146
+ private def index
147
+ client.index(index_name)
148
+ end
125
149
  end
126
150
  end
@@ -32,11 +32,11 @@ module Langchain::Vectorsearch
32
32
  # Add a list of texts to the index
33
33
  # @param texts [Array] The list of texts to add
34
34
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
35
+ def add_texts(texts:, ids:)
36
36
  batch = {ids: [], vectors: [], payloads: []}
37
37
 
38
- Array(texts).each do |text|
39
- batch[:ids].push(SecureRandom.uuid)
38
+ Array(texts).each_with_index do |text, i|
39
+ batch[:ids].push(ids[i] || SecureRandom.uuid)
40
40
  batch[:vectors].push(llm.embed(text: text))
41
41
  batch[:payloads].push({content: text})
42
42
  end
@@ -47,6 +47,10 @@ module Langchain::Vectorsearch
47
47
  )
48
48
  end
49
49
 
50
+ def update_texts(texts:, ids:)
51
+ add_texts(texts: texts, ids: ids)
52
+ end
53
+
50
54
  # Create the index with the default schema
51
55
  # @return [Hash] The response from the server
52
56
  def create_default_schema
@@ -83,12 +87,14 @@ module Langchain::Vectorsearch
83
87
  embedding:,
84
88
  k: 4
85
89
  )
86
- client.points.search(
90
+ response = client.points.search(
87
91
  collection_name: index_name,
88
92
  limit: k,
89
93
  vector: embedding,
90
- with_payload: true
94
+ with_payload: true,
95
+ with_vector: true
91
96
  )
97
+ response.dig("result")
92
98
  end
93
99
 
94
100
  # Ask a question and return the answer
@@ -5,7 +5,7 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Weaviate
7
7
  #
8
- # Gem requirements: gem "weaviate-ruby", "~> 0.8.0"
8
+ # Gem requirements: gem "weaviate-ruby", "~> 0.8.3"
9
9
  #
10
10
  # Usage:
11
11
  # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
@@ -14,7 +14,7 @@ module Langchain::Vectorsearch
14
14
  # Initialize the Weaviate adapter
15
15
  # @param url [String] The URL of the Weaviate instance
16
16
  # @param api_key [String] The API key to use
17
- # @param index_name [String] The name of the index to use
17
+ # @param index_name [String] The capitalized name of the index to use
18
18
  # @param llm [Object] The LLM client to use
19
19
  def initialize(url:, api_key:, index_name:, llm:)
20
20
  depends_on "weaviate-ruby"
@@ -24,6 +24,9 @@ module Langchain::Vectorsearch
24
24
  url: url,
25
25
  api_key: api_key
26
26
  )
27
+
28
+ # Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
29
+ # TODO: Capitalize index_name
27
30
  @index_name = index_name
28
31
 
29
32
  super(llm: llm)
@@ -32,31 +35,51 @@ module Langchain::Vectorsearch
32
35
  # Add a list of texts to the index
33
36
  # @param texts [Array] The list of texts to add
34
37
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
36
- objects = Array(texts).map do |text|
37
- {
38
- class: index_name,
39
- properties: {content: text},
40
- vector: llm.embed(text: text)
41
- }
42
- end
43
-
38
+ def add_texts(texts:, ids: [])
44
39
  client.objects.batch_create(
45
- objects: objects
40
+ objects: weaviate_objects(texts, ids)
46
41
  )
47
42
  end
48
43
 
44
+ # Update a list of texts in the index
45
+ # @param texts [Array] The list of texts to update
46
+ # @return [Hash] The response from the server
47
+ def update_texts(texts:, ids:)
48
+ uuids = []
49
+
50
+ # Retrieve the UUIDs of the objects to update
51
+ Array(texts).map.with_index do |text, i|
52
+ record = client.query.get(
53
+ class_name: index_name,
54
+ fields: "_additional { id }",
55
+ where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
56
+ )
57
+ uuids.push record[0].dig("_additional", "id")
58
+ end
59
+
60
+ # Update the objects
61
+ texts.map.with_index do |text, i|
62
+ client.objects.update(
63
+ class_name: index_name,
64
+ id: uuids[i],
65
+ properties: {
66
+ __id: ids[i].to_s,
67
+ content: text
68
+ },
69
+ vector: llm.embed(text: text)
70
+ )
71
+ end
72
+ end
73
+
49
74
  # Create default schema
50
75
  def create_default_schema
51
76
  client.schema.create(
52
77
  class_name: index_name,
53
78
  vectorizer: "none",
54
79
  properties: [
55
- # TODO: Allow passing in your own IDs
56
- {
57
- dataType: ["text"],
58
- name: "content"
59
- }
80
+ # __id to be used a pointer to the original document
81
+ {dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
82
+ {dataType: ["text"], name: "content"}
60
83
  ]
61
84
  )
62
85
  end
@@ -82,7 +105,7 @@ module Langchain::Vectorsearch
82
105
  class_name: index_name,
83
106
  near_vector: near_vector,
84
107
  limit: k.to_s,
85
- fields: "content _additional { id }"
108
+ fields: "__id content _additional { id }"
86
109
  )
87
110
  end
88
111
 
@@ -101,5 +124,24 @@ module Langchain::Vectorsearch
101
124
 
102
125
  llm.chat(prompt: prompt)
103
126
  end
127
+
128
+ private
129
+
130
+ def weaviate_objects(texts, ids = [])
131
+ Array(texts).map.with_index do |text, i|
132
+ weaviate_object(text, ids[i])
133
+ end
134
+ end
135
+
136
+ def weaviate_object(text, id = nil)
137
+ {
138
+ class: index_name,
139
+ properties: {
140
+ __id: id.to_s,
141
+ content: text
142
+ },
143
+ vector: llm.embed(text: text)
144
+ }
145
+ end
104
146
  end
105
147
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.0"
4
+ VERSION = "0.6.2"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -145,6 +145,10 @@ module Langchain
145
145
  autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
146
146
  end
147
147
 
148
+ module ActiveRecord
149
+ autoload :Hooks, "langchain/active_record/hooks"
150
+ end
151
+
148
152
  module OutputParsers
149
153
  autoload :Base, "langchain/output_parsers/base"
150
154
  autoload :StructuredOutputParser, "langchain/output_parsers/structured"
@@ -154,3 +158,5 @@ module Langchain
154
158
  class BaseError < StandardError; end
155
159
  end
156
160
  end
161
+
162
+ require "langchain/railtie" if defined?(Rails)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-23 00:00:00.000000000 Z
11
+ date: 2023-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -436,14 +436,14 @@ dependencies:
436
436
  requirements:
437
437
  - - "~>"
438
438
  - !ruby/object:Gem::Version
439
- version: 0.8.0
439
+ version: 0.8.3
440
440
  type: :development
441
441
  prerelease: false
442
442
  version_requirements: !ruby/object:Gem::Requirement
443
443
  requirements:
444
444
  - - "~>"
445
445
  - !ruby/object:Gem::Version
446
- version: 0.8.0
446
+ version: 0.8.3
447
447
  - !ruby/object:Gem::Dependency
448
448
  name: wikipedia-client
449
449
  requirement: !ruby/object:Gem::Requirement
@@ -483,6 +483,7 @@ files:
483
483
  - examples/store_and_query_with_weaviate.rb
484
484
  - lefthook.yml
485
485
  - lib/langchain.rb
486
+ - lib/langchain/active_record/hooks.rb
486
487
  - lib/langchain/agent/base.rb
487
488
  - lib/langchain/agent/react_agent/react_agent.rb
488
489
  - lib/langchain/agent/react_agent/react_agent_prompt.yaml
@@ -519,6 +520,7 @@ files:
519
520
  - lib/langchain/prompt/few_shot_prompt_template.rb
520
521
  - lib/langchain/prompt/loading.rb
521
522
  - lib/langchain/prompt/prompt_template.rb
523
+ - lib/langchain/railtie.rb
522
524
  - lib/langchain/tool/base.rb
523
525
  - lib/langchain/tool/calculator.rb
524
526
  - lib/langchain/tool/database.rb