langchainrb 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7fba7b5e03ae75aa4ee3c89dd0322a73bbb9d3ced79f48dda8861af1f4f197b8
4
- data.tar.gz: 404e742b0911305beec3bd22575740fc78ed9005e21295e0f7c348c1bede3e7e
3
+ metadata.gz: 5a6f4e8bb8ecaba6ff4d53bba384bd6338012429a69a0dc7df0a58a476763e7e
4
+ data.tar.gz: 92211a22fca9664831cf4f395a53dedddafc339ab419780932398c07256b737d
5
5
  SHA512:
6
- metadata.gz: c8166375c28abe9bc3a7e02a2ceba2ce1bea8ff4f751beb2c69d6f34aa46ba2b7c6ca34458f79b6dfba6d8908cc5e9b055f4e4e7dc6c4c09ac65f7f589c12eb7
7
- data.tar.gz: fb81a51867575c5fae10b79f0f3ee761b25b75d623f2c071d000b39f84c672d9f7164e3c8442a1f11d7abdff8edd41f7e79c2c3a7cb99fa9952406bc60506ce8
6
+ metadata.gz: b5c84f0a9a54f51799c5318cba243457fcfd6f026c71b8f34e58cf60172d476963f25ea8d24c49b35ed93c893adb9e2844443a22dd9e927ab16318850a11419a
7
+ data.tar.gz: 4664927203ea032f737000c27ec5fa04c96ab606ec8377b4673b48638905b458077d4ab3cb7727fcb98be6c607a37bd318395fd96000a734de213c7d9041a219
data/CHANGELOG.md CHANGED
@@ -1,5 +1,8 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.1] - 2023-06-24
4
+ - Adding support to hook vectorsearch into ActiveRecord models
5
+
3
6
  ## [0.6.0] - 2023-06-22
4
7
  - [BREAKING] Rename `ChainOfThoughtAgent` to `ReActAgent`
5
8
  - Implement A21 token validator
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.0)
4
+ langchainrb (0.6.1)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
data/README.md CHANGED
@@ -41,7 +41,7 @@ require "langchain"
41
41
  | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
42
42
  | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
43
43
  | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
44
- | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
44
+ | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
45
45
 
46
46
  ### Using Vector Search Databases 🔍
47
47
 
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module ActiveRecord
5
+ # This module adds the following functionality to your ActiveRecord models:
6
+ # * `vectorsearch` class method to set the vector search provider
7
+ # * `similarity_search` class method to search for similar texts
8
+ # * `upsert_to_vectorsearch` instance method to upsert the record to the vector search provider
9
+ #
10
+ # Usage:
11
+ # class Recipe < ActiveRecord::Base
12
+ # vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
13
+ # api_key: ENV["WEAVIATE_API_KEY"],
14
+ # url: ENV["WEAVIATE_URL"],
15
+ # index_name: "Recipes",
16
+ # llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
17
+ # )
18
+ #
19
+ # after_save :upsert_to_vectorsearch
20
+ #
21
+ # # Overwriting how the model is serialized before it's indexed
22
+ # def as_vector
23
+ # [
24
+ # "Title: #{title}",
25
+ # "Description: #{description}",
26
+ # ...
27
+ # ]
28
+ # .compact
29
+ # .join("\n")
30
+ # end
31
+ # end
32
+ #
33
+ # Create the default schema
34
+ # Recipe.class_variable_get(:@@provider).create_default_schema
35
+ # Query the vector search provider
36
+ # Recipe.similarity_search("carnivore dish")
37
+ # Delete the default schema to start over
38
+ # Recipe.class_variable_get(:@@provider).client.schema.delete class_name: "Recipes"
39
+ #
40
+ module Hooks
41
+ def self.included(base)
42
+ base.extend ClassMethods
43
+ end
44
+
45
+ # Index the text to the vector search provider
46
+ # You'd typically call this method in an ActiveRecord callback
47
+ #
48
+ # @return [Boolean] true
49
+ # @raise [Error] Indexing to vector search DB failed
50
+ def upsert_to_vectorsearch
51
+ if previously_new_record?
52
+ self.class.class_variable_get(:@@provider).add_texts(
53
+ texts: [as_vector],
54
+ ids: [id]
55
+ )
56
+ else
57
+ self.class.class_variable_get(:@@provider).update_texts(
58
+ texts: [as_vector],
59
+ ids: [id]
60
+ )
61
+ end
62
+ end
63
+
64
+ # Used to serialize the DB record to an indexable vector text
65
+ # Overwrite this method in your model to customize
66
+ #
67
+ # @return [String] the text representation of the model
68
+ def as_vector
69
+ to_json
70
+ end
71
+
72
+ module ClassMethods
73
+ # Set the vector search provider
74
+ #
75
+ # @param provider [Object] The `Langchain::Vectorsearch::*` instance
76
+ def vectorsearch(provider:)
77
+ class_variable_set(:@@provider, provider)
78
+ end
79
+
80
+ # Search for similar texts
81
+ #
82
+ # @param query [String] The query to search for
83
+ # @param k [Integer] The number of results to return
84
+ # @return [ActiveRecord::Relation] The ActiveRecord relation
85
+ def similarity_search(query, k: 1)
86
+ records = class_variable_get(:@@provider).similarity_search(
87
+ query: query,
88
+ k: k
89
+ )
90
+ ids = records.map { |record| record.dig("__id") }
91
+ where(id: ids)
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ class Railtie < Rails::Railtie
5
+ initializer "langchain" do
6
+ ActiveSupport.on_load(:active_record) do
7
+ ::ActiveRecord::Base.include Langchain::ActiveRecord::Hooks
8
+ end
9
+ end
10
+ end
11
+ end
@@ -14,7 +14,7 @@ module Langchain::Vectorsearch
14
14
  # Initialize the Weaviate adapter
15
15
  # @param url [String] The URL of the Weaviate instance
16
16
  # @param api_key [String] The API key to use
17
- # @param index_name [String] The name of the index to use
17
+ # @param index_name [String] The capitalized name of the index to use
18
18
  # @param llm [Object] The LLM client to use
19
19
  def initialize(url:, api_key:, index_name:, llm:)
20
20
  depends_on "weaviate-ruby"
@@ -24,6 +24,9 @@ module Langchain::Vectorsearch
24
24
  url: url,
25
25
  api_key: api_key
26
26
  )
27
+
28
+ # Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
29
+ # TODO: Capitalize index_name
27
30
  @index_name = index_name
28
31
 
29
32
  super(llm: llm)
@@ -32,31 +35,51 @@ module Langchain::Vectorsearch
32
35
  # Add a list of texts to the index
33
36
  # @param texts [Array] The list of texts to add
34
37
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
36
- objects = Array(texts).map do |text|
37
- {
38
- class: index_name,
39
- properties: {content: text},
40
- vector: llm.embed(text: text)
41
- }
42
- end
43
-
38
+ def add_texts(texts:, ids:)
44
39
  client.objects.batch_create(
45
- objects: objects
40
+ objects: weaviate_objects(texts, ids)
46
41
  )
47
42
  end
48
43
 
44
+ # Update a list of texts in the index
45
+ # @param texts [Array] The list of texts to update
46
+ # @return [Hash] The response from the server
47
+ def update_texts(texts:, ids:)
48
+ uuids = []
49
+
50
+ # Retrieve the UUIDs of the objects to update
51
+ Array(texts).map.with_index do |text, i|
52
+ record = client.query.get(
53
+ class_name: index_name,
54
+ fields: "_additional { id }",
55
+ where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
56
+ )
57
+ uuids.push record[0].dig("_additional", "id")
58
+ end
59
+
60
+ # Update the objects
61
+ texts.map.with_index do |text, i|
62
+ client.objects.update(
63
+ class_name: index_name,
64
+ id: uuids[i],
65
+ properties: {
66
+ __id: ids[i].to_s,
67
+ content: text
68
+ },
69
+ vector: llm.embed(text: text)
70
+ )
71
+ end
72
+ end
73
+
49
74
  # Create default schema
50
75
  def create_default_schema
51
76
  client.schema.create(
52
77
  class_name: index_name,
53
78
  vectorizer: "none",
54
79
  properties: [
55
- # TODO: Allow passing in your own IDs
56
- {
57
- dataType: ["text"],
58
- name: "content"
59
- }
80
+ # __id to be used a pointer to the original document
81
+ {dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
82
+ {dataType: ["text"], name: "content"}
60
83
  ]
61
84
  )
62
85
  end
@@ -82,7 +105,7 @@ module Langchain::Vectorsearch
82
105
  class_name: index_name,
83
106
  near_vector: near_vector,
84
107
  limit: k.to_s,
85
- fields: "content _additional { id }"
108
+ fields: "__id content _additional { id }"
86
109
  )
87
110
  end
88
111
 
@@ -101,5 +124,24 @@ module Langchain::Vectorsearch
101
124
 
102
125
  llm.chat(prompt: prompt)
103
126
  end
127
+
128
+ private
129
+
130
+ def weaviate_objects(texts, ids)
131
+ Array(texts).map.with_index do |text, i|
132
+ weaviate_object(text, ids[i])
133
+ end
134
+ end
135
+
136
+ def weaviate_object(text, id)
137
+ {
138
+ class: index_name,
139
+ properties: {
140
+ __id: id.to_s,
141
+ content: text
142
+ },
143
+ vector: llm.embed(text: text)
144
+ }
145
+ end
104
146
  end
105
147
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.0"
4
+ VERSION = "0.6.1"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -145,6 +145,10 @@ module Langchain
145
145
  autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
146
146
  end
147
147
 
148
+ module ActiveRecord
149
+ autoload :Hooks, "langchain/active_record/hooks"
150
+ end
151
+
148
152
  module OutputParsers
149
153
  autoload :Base, "langchain/output_parsers/base"
150
154
  autoload :StructuredOutputParser, "langchain/output_parsers/structured"
@@ -154,3 +158,5 @@ module Langchain
154
158
  class BaseError < StandardError; end
155
159
  end
156
160
  end
161
+
162
+ require "langchain/railtie" if defined?(Rails)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-23 00:00:00.000000000 Z
11
+ date: 2023-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -483,6 +483,7 @@ files:
483
483
  - examples/store_and_query_with_weaviate.rb
484
484
  - lefthook.yml
485
485
  - lib/langchain.rb
486
+ - lib/langchain/active_record/hooks.rb
486
487
  - lib/langchain/agent/base.rb
487
488
  - lib/langchain/agent/react_agent/react_agent.rb
488
489
  - lib/langchain/agent/react_agent/react_agent_prompt.yaml
@@ -519,6 +520,7 @@ files:
519
520
  - lib/langchain/prompt/few_shot_prompt_template.rb
520
521
  - lib/langchain/prompt/loading.rb
521
522
  - lib/langchain/prompt/prompt_template.rb
523
+ - lib/langchain/railtie.rb
522
524
  - lib/langchain/tool/base.rb
523
525
  - lib/langchain/tool/calculator.rb
524
526
  - lib/langchain/tool/database.rb