langchainrb 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7fba7b5e03ae75aa4ee3c89dd0322a73bbb9d3ced79f48dda8861af1f4f197b8
4
- data.tar.gz: 404e742b0911305beec3bd22575740fc78ed9005e21295e0f7c348c1bede3e7e
3
+ metadata.gz: 3df4368be6da730348143ba599d9fba642277a644a6dff9b9dc1b81014d29a8b
4
+ data.tar.gz: 964c65411f2fe7d1768a65a1700eeb573f56946a60b8cfd04b43fade8faf75d1
5
5
  SHA512:
6
- metadata.gz: c8166375c28abe9bc3a7e02a2ceba2ce1bea8ff4f751beb2c69d6f34aa46ba2b7c6ca34458f79b6dfba6d8908cc5e9b055f4e4e7dc6c4c09ac65f7f589c12eb7
7
- data.tar.gz: fb81a51867575c5fae10b79f0f3ee761b25b75d623f2c071d000b39f84c672d9f7164e3c8442a1f11d7abdff8edd41f7e79c2c3a7cb99fa9952406bc60506ce8
6
+ metadata.gz: 1b032b337b109a953c49a5eafe4ad4161818bd02a2bd0076f7922aad438f4eb79029338dcb1c5d1f267b9f6ba1e0a35e7ba6fabe1369ae9d02b45240837012c8
7
+ data.tar.gz: e6d03a7bf10aa60ffc833ecda448e75c84edaaf9febece3ec06385857f340f19efd674dfd2f43a7d08c1a6698a13880a0e54dd90b740ce438a4658b08eae37d7
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.2] - 2023-06-25
4
+ - Qdrant, Chroma, and Pinecone are supported by ActiveRecord hooks
5
+
6
+ ## [0.6.1] - 2023-06-24
7
+ - Adding support to hook vectorsearch into ActiveRecord models
8
+
3
9
  ## [0.6.0] - 2023-06-22
4
10
  - [BREAKING] Rename `ChainOfThoughtAgent` to `ReActAgent`
5
11
  - Implement A21 token validator
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.0)
4
+ langchainrb (0.6.2)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -133,7 +133,7 @@ GEM
133
133
  faraday (>= 1.0)
134
134
  faraday_middleware
135
135
  graphql-client
136
- graphql (2.0.21)
136
+ graphql (2.0.23)
137
137
  graphql-client (0.18.0)
138
138
  activesupport (>= 3.0)
139
139
  graphql
@@ -298,7 +298,7 @@ GEM
298
298
  tzinfo (2.0.6)
299
299
  concurrent-ruby (~> 1.0)
300
300
  unicode-display_width (2.4.2)
301
- weaviate-ruby (0.8.1)
301
+ weaviate-ruby (0.8.3)
302
302
  faraday (~> 1)
303
303
  faraday_middleware (~> 1)
304
304
  graphlient (~> 0.6.0)
@@ -346,7 +346,7 @@ DEPENDENCIES
346
346
  safe_ruby (~> 1.0.4)
347
347
  sequel (~> 5.68.0)
348
348
  standardrb
349
- weaviate-ruby (~> 0.8.0)
349
+ weaviate-ruby (~> 0.8.3)
350
350
  wikipedia-client (~> 1.17.0)
351
351
  yard
352
352
 
data/README.md CHANGED
@@ -35,19 +35,19 @@ require "langchain"
35
35
 
36
36
  | Database | Querying | Storage | Schema Management | Backups | Rails Integration |
37
37
  | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
38
- | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
38
+ | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
39
39
  | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
40
  | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
- | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
+ | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
42
42
  | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
43
- | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
44
- | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
43
+ | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
44
+ | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
45
45
 
46
46
  ### Using Vector Search Databases 🔍
47
47
 
48
48
  Choose the LLM provider you'll be using (OpenAI or Cohere) and retrieve the API key.
49
49
 
50
- Add `gem "weaviate-ruby", "~> 0.8.0"` to your Gemfile.
50
+ Add `gem "weaviate-ruby", "~> 0.8.3"` to your Gemfile.
51
51
 
52
52
  Pick the vector search database you'll be using and instantiate the client:
53
53
  ```ruby
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module ActiveRecord
5
+ # This module adds the following functionality to your ActiveRecord models:
6
+ # * `vectorsearch` class method to set the vector search provider
7
+ # * `similarity_search` class method to search for similar texts
8
+ # * `upsert_to_vectorsearch` instance method to upsert the record to the vector search provider
9
+ #
10
+ # Usage:
11
+ # class Recipe < ActiveRecord::Base
12
+ # vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
13
+ # api_key: ENV["WEAVIATE_API_KEY"],
14
+ # url: ENV["WEAVIATE_URL"],
15
+ # index_name: "Recipes",
16
+ # llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
17
+ # )
18
+ #
19
+ # after_save :upsert_to_vectorsearch
20
+ #
21
+ # # Overwriting how the model is serialized before it's indexed
22
+ # def as_vector
23
+ # [
24
+ # "Title: #{title}",
25
+ # "Description: #{description}",
26
+ # ...
27
+ # ]
28
+ # .compact
29
+ # .join("\n")
30
+ # end
31
+ # end
32
+ #
33
+ # Create the default schema
34
+ # Recipe.class_variable_get(:@@provider).create_default_schema
35
+ # Query the vector search provider
36
+ # Recipe.similarity_search("carnivore dish")
37
+ # Delete the default schema to start over
38
+ # Recipe.class_variable_get(:@@provider).client.schema.delete class_name: "Recipes"
39
+ #
40
+ module Hooks
41
+ def self.included(base)
42
+ base.extend ClassMethods
43
+ end
44
+
45
+ # Index the text to the vector search provider
46
+ # You'd typically call this method in an ActiveRecord callback
47
+ #
48
+ # @return [Boolean] true
49
+ # @raise [Error] Indexing to vector search DB failed
50
+ def upsert_to_vectorsearch
51
+ if previously_new_record?
52
+ self.class.class_variable_get(:@@provider).add_texts(
53
+ texts: [as_vector],
54
+ ids: [id]
55
+ )
56
+ else
57
+ self.class.class_variable_get(:@@provider).update_texts(
58
+ texts: [as_vector],
59
+ ids: [id]
60
+ )
61
+ end
62
+ end
63
+
64
+ # Used to serialize the DB record to an indexable vector text
65
+ # Overwrite this method in your model to customize
66
+ #
67
+ # @return [String] the text representation of the model
68
+ def as_vector
69
+ to_json
70
+ end
71
+
72
+ module ClassMethods
73
+ # Set the vector search provider
74
+ #
75
+ # @param provider [Object] The `Langchain::Vectorsearch::*` instance
76
+ def vectorsearch(provider:)
77
+ class_variable_set(:@@provider, provider)
78
+ end
79
+
80
+ # Search for similar texts
81
+ #
82
+ # @param query [String] The query to search for
83
+ # @param k [Integer] The number of results to return
84
+ # @return [ActiveRecord::Relation] The ActiveRecord relation
85
+ def similarity_search(query, k: 1)
86
+ records = class_variable_get(:@@provider).similarity_search(
87
+ query: query,
88
+ k: k
89
+ )
90
+
91
+ # We use "__id" when Weaviate is the provider
92
+ ids = records.map { |record| record.dig("id") || record.dig("__id") }
93
+ where(id: ids)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ class Railtie < Rails::Railtie
5
+ initializer "langchain" do
6
+ ActiveSupport.on_load(:active_record) do
7
+ ::ActiveRecord::Base.include Langchain::ActiveRecord::Hooks
8
+ end
9
+ end
10
+ end
11
+ end
@@ -108,6 +108,11 @@ module Langchain::Vectorsearch
108
108
  raise NotImplementedError, "#{self.class.name} does not support adding texts"
109
109
  end
110
110
 
111
+ # Method supported by Vectorsearch DB to update a list of texts to the index
112
+ def update_texts(...)
113
+ raise NotImplementedError, "#{self.class.name} does not support updating texts"
114
+ end
115
+
111
116
  # Method supported by Vectorsearch DB to search for similar texts in the index
112
117
  def similarity_search(...)
113
118
  raise NotImplementedError, "#{self.class.name} does not support similarity search"
@@ -32,11 +32,10 @@ module Langchain::Vectorsearch
32
32
  # Add a list of texts to the index
33
33
  # @param texts [Array] The list of texts to add
34
34
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
36
- embeddings = Array(texts).map do |text|
35
+ def add_texts(texts:, ids: [])
36
+ embeddings = Array(texts).map.with_index do |text, i|
37
37
  ::Chroma::Resources::Embedding.new(
38
- # TODO: Add support for passing your own IDs
39
- id: SecureRandom.uuid,
38
+ id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
40
39
  embedding: llm.embed(text: text),
41
40
  # TODO: Add support for passing metadata
42
41
  metadata: [], # metadatas[index],
@@ -48,6 +47,20 @@ module Langchain::Vectorsearch
48
47
  collection.add(embeddings)
49
48
  end
50
49
 
50
+ def update_texts(texts:, ids:)
51
+ embeddings = Array(texts).map.with_index do |text, i|
52
+ ::Chroma::Resources::Embedding.new(
53
+ id: ids[i].to_s,
54
+ embedding: llm.embed(text: text),
55
+ # TODO: Add support for passing metadata
56
+ metadata: [], # metadatas[index],
57
+ document: text # Do we actually need to store the whole original document?
58
+ )
59
+ end
60
+
61
+ collection.update(embeddings)
62
+ end
63
+
51
64
  # Create the collection with the default schema
52
65
  # @return [Hash] The response from the server
53
66
  def create_default_schema
@@ -33,14 +33,14 @@ module Langchain::Vectorsearch
33
33
 
34
34
  # Add a list of texts to the index
35
35
  # @param texts [Array] The list of texts to add
36
+ # @param ids [Array] The list of IDs to add
36
37
  # @param namespace [String] The namespace to add the texts to
37
38
  # @param metadata [Hash] The metadata to use for the texts
38
39
  # @return [Hash] The response from the server
39
- def add_texts(texts:, namespace: "", metadata: nil)
40
- vectors = texts.map do |text|
40
+ def add_texts(texts:, ids: [], namespace: "", metadata: nil)
41
+ vectors = texts.map.with_index do |text, i|
41
42
  {
42
- # TODO: Allows passing in your own IDs
43
- id: SecureRandom.uuid,
43
+ id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
44
44
  metadata: metadata || {content: text},
45
45
  values: llm.embed(text: text)
46
46
  }
@@ -51,6 +51,24 @@ module Langchain::Vectorsearch
51
51
  index.upsert(vectors: vectors, namespace: namespace)
52
52
  end
53
53
 
54
+ # Update a list of texts in the index
55
+ # @param texts [Array] The list of texts to update
56
+ # @param ids [Array] The list of IDs to update
57
+ # @param namespace [String] The namespace to update the texts in
58
+ # @param metadata [Hash] The metadata to use for the texts
59
+ # @return [Array] The response from the server
60
+ def update_texts(texts:, ids:, namespace: "", metadata: nil)
61
+ texts.map.with_index do |text, i|
62
+ # Pinecone::Vector#update ignore args when it is empty
63
+ index.update(
64
+ namespace: namespace,
65
+ id: ids[i].to_s,
66
+ values: llm.embed(text: text),
67
+ set_metadata: metadata
68
+ )
69
+ end
70
+ end
71
+
54
72
  # Create the index with the default schema
55
73
  # @return [Hash] The response from the server
56
74
  def create_default_schema
@@ -122,5 +140,11 @@ module Langchain::Vectorsearch
122
140
 
123
141
  llm.chat(prompt: prompt)
124
142
  end
143
+
144
+ # Pinecone index
145
+ # @return [Object] The Pinecone index
146
+ private def index
147
+ client.index(index_name)
148
+ end
125
149
  end
126
150
  end
@@ -32,11 +32,11 @@ module Langchain::Vectorsearch
32
32
  # Add a list of texts to the index
33
33
  # @param texts [Array] The list of texts to add
34
34
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
35
+ def add_texts(texts:, ids:)
36
36
  batch = {ids: [], vectors: [], payloads: []}
37
37
 
38
- Array(texts).each do |text|
39
- batch[:ids].push(SecureRandom.uuid)
38
+ Array(texts).each_with_index do |text, i|
39
+ batch[:ids].push(ids[i] || SecureRandom.uuid)
40
40
  batch[:vectors].push(llm.embed(text: text))
41
41
  batch[:payloads].push({content: text})
42
42
  end
@@ -47,6 +47,10 @@ module Langchain::Vectorsearch
47
47
  )
48
48
  end
49
49
 
50
+ def update_texts(texts:, ids:)
51
+ add_texts(texts: texts, ids: ids)
52
+ end
53
+
50
54
  # Create the index with the default schema
51
55
  # @return [Hash] The response from the server
52
56
  def create_default_schema
@@ -83,12 +87,14 @@ module Langchain::Vectorsearch
83
87
  embedding:,
84
88
  k: 4
85
89
  )
86
- client.points.search(
90
+ response = client.points.search(
87
91
  collection_name: index_name,
88
92
  limit: k,
89
93
  vector: embedding,
90
- with_payload: true
94
+ with_payload: true,
95
+ with_vector: true
91
96
  )
97
+ response.dig("result")
92
98
  end
93
99
 
94
100
  # Ask a question and return the answer
@@ -5,7 +5,7 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Weaviate
7
7
  #
8
- # Gem requirements: gem "weaviate-ruby", "~> 0.8.0"
8
+ # Gem requirements: gem "weaviate-ruby", "~> 0.8.3"
9
9
  #
10
10
  # Usage:
11
11
  # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
@@ -14,7 +14,7 @@ module Langchain::Vectorsearch
14
14
  # Initialize the Weaviate adapter
15
15
  # @param url [String] The URL of the Weaviate instance
16
16
  # @param api_key [String] The API key to use
17
- # @param index_name [String] The name of the index to use
17
+ # @param index_name [String] The capitalized name of the index to use
18
18
  # @param llm [Object] The LLM client to use
19
19
  def initialize(url:, api_key:, index_name:, llm:)
20
20
  depends_on "weaviate-ruby"
@@ -24,6 +24,9 @@ module Langchain::Vectorsearch
24
24
  url: url,
25
25
  api_key: api_key
26
26
  )
27
+
28
+ # Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
29
+ # TODO: Capitalize index_name
27
30
  @index_name = index_name
28
31
 
29
32
  super(llm: llm)
@@ -32,31 +35,51 @@ module Langchain::Vectorsearch
32
35
  # Add a list of texts to the index
33
36
  # @param texts [Array] The list of texts to add
34
37
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
36
- objects = Array(texts).map do |text|
37
- {
38
- class: index_name,
39
- properties: {content: text},
40
- vector: llm.embed(text: text)
41
- }
42
- end
43
-
38
+ def add_texts(texts:, ids: [])
44
39
  client.objects.batch_create(
45
- objects: objects
40
+ objects: weaviate_objects(texts, ids)
46
41
  )
47
42
  end
48
43
 
44
+ # Update a list of texts in the index
45
+ # @param texts [Array] The list of texts to update
46
+ # @return [Hash] The response from the server
47
+ def update_texts(texts:, ids:)
48
+ uuids = []
49
+
50
+ # Retrieve the UUIDs of the objects to update
51
+ Array(texts).map.with_index do |text, i|
52
+ record = client.query.get(
53
+ class_name: index_name,
54
+ fields: "_additional { id }",
55
+ where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
56
+ )
57
+ uuids.push record[0].dig("_additional", "id")
58
+ end
59
+
60
+ # Update the objects
61
+ texts.map.with_index do |text, i|
62
+ client.objects.update(
63
+ class_name: index_name,
64
+ id: uuids[i],
65
+ properties: {
66
+ __id: ids[i].to_s,
67
+ content: text
68
+ },
69
+ vector: llm.embed(text: text)
70
+ )
71
+ end
72
+ end
73
+
49
74
  # Create default schema
50
75
  def create_default_schema
51
76
  client.schema.create(
52
77
  class_name: index_name,
53
78
  vectorizer: "none",
54
79
  properties: [
55
- # TODO: Allow passing in your own IDs
56
- {
57
- dataType: ["text"],
58
- name: "content"
59
- }
80
+ # __id to be used a pointer to the original document
81
+ {dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
82
+ {dataType: ["text"], name: "content"}
60
83
  ]
61
84
  )
62
85
  end
@@ -82,7 +105,7 @@ module Langchain::Vectorsearch
82
105
  class_name: index_name,
83
106
  near_vector: near_vector,
84
107
  limit: k.to_s,
85
- fields: "content _additional { id }"
108
+ fields: "__id content _additional { id }"
86
109
  )
87
110
  end
88
111
 
@@ -101,5 +124,24 @@ module Langchain::Vectorsearch
101
124
 
102
125
  llm.chat(prompt: prompt)
103
126
  end
127
+
128
+ private
129
+
130
+ def weaviate_objects(texts, ids = [])
131
+ Array(texts).map.with_index do |text, i|
132
+ weaviate_object(text, ids[i])
133
+ end
134
+ end
135
+
136
+ def weaviate_object(text, id = nil)
137
+ {
138
+ class: index_name,
139
+ properties: {
140
+ __id: id.to_s,
141
+ content: text
142
+ },
143
+ vector: llm.embed(text: text)
144
+ }
145
+ end
104
146
  end
105
147
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.0"
4
+ VERSION = "0.6.2"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -145,6 +145,10 @@ module Langchain
145
145
  autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
146
146
  end
147
147
 
148
+ module ActiveRecord
149
+ autoload :Hooks, "langchain/active_record/hooks"
150
+ end
151
+
148
152
  module OutputParsers
149
153
  autoload :Base, "langchain/output_parsers/base"
150
154
  autoload :StructuredOutputParser, "langchain/output_parsers/structured"
@@ -154,3 +158,5 @@ module Langchain
154
158
  class BaseError < StandardError; end
155
159
  end
156
160
  end
161
+
162
+ require "langchain/railtie" if defined?(Rails)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-23 00:00:00.000000000 Z
11
+ date: 2023-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -436,14 +436,14 @@ dependencies:
436
436
  requirements:
437
437
  - - "~>"
438
438
  - !ruby/object:Gem::Version
439
- version: 0.8.0
439
+ version: 0.8.3
440
440
  type: :development
441
441
  prerelease: false
442
442
  version_requirements: !ruby/object:Gem::Requirement
443
443
  requirements:
444
444
  - - "~>"
445
445
  - !ruby/object:Gem::Version
446
- version: 0.8.0
446
+ version: 0.8.3
447
447
  - !ruby/object:Gem::Dependency
448
448
  name: wikipedia-client
449
449
  requirement: !ruby/object:Gem::Requirement
@@ -483,6 +483,7 @@ files:
483
483
  - examples/store_and_query_with_weaviate.rb
484
484
  - lefthook.yml
485
485
  - lib/langchain.rb
486
+ - lib/langchain/active_record/hooks.rb
486
487
  - lib/langchain/agent/base.rb
487
488
  - lib/langchain/agent/react_agent/react_agent.rb
488
489
  - lib/langchain/agent/react_agent/react_agent_prompt.yaml
@@ -519,6 +520,7 @@ files:
519
520
  - lib/langchain/prompt/few_shot_prompt_template.rb
520
521
  - lib/langchain/prompt/loading.rb
521
522
  - lib/langchain/prompt/prompt_template.rb
523
+ - lib/langchain/railtie.rb
522
524
  - lib/langchain/tool/base.rb
523
525
  - lib/langchain/tool/calculator.rb
524
526
  - lib/langchain/tool/database.rb