langchainrb 0.6.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7fba7b5e03ae75aa4ee3c89dd0322a73bbb9d3ced79f48dda8861af1f4f197b8
4
- data.tar.gz: 404e742b0911305beec3bd22575740fc78ed9005e21295e0f7c348c1bede3e7e
3
+ metadata.gz: 5a6f4e8bb8ecaba6ff4d53bba384bd6338012429a69a0dc7df0a58a476763e7e
4
+ data.tar.gz: 92211a22fca9664831cf4f395a53dedddafc339ab419780932398c07256b737d
5
5
  SHA512:
6
- metadata.gz: c8166375c28abe9bc3a7e02a2ceba2ce1bea8ff4f751beb2c69d6f34aa46ba2b7c6ca34458f79b6dfba6d8908cc5e9b055f4e4e7dc6c4c09ac65f7f589c12eb7
7
- data.tar.gz: fb81a51867575c5fae10b79f0f3ee761b25b75d623f2c071d000b39f84c672d9f7164e3c8442a1f11d7abdff8edd41f7e79c2c3a7cb99fa9952406bc60506ce8
6
+ metadata.gz: b5c84f0a9a54f51799c5318cba243457fcfd6f026c71b8f34e58cf60172d476963f25ea8d24c49b35ed93c893adb9e2844443a22dd9e927ab16318850a11419a
7
+ data.tar.gz: 4664927203ea032f737000c27ec5fa04c96ab606ec8377b4673b48638905b458077d4ab3cb7727fcb98be6c607a37bd318395fd96000a734de213c7d9041a219
data/CHANGELOG.md CHANGED
@@ -1,5 +1,8 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.1] - 2023-06-24
4
+ - Adding support to hook vectorsearch into ActiveRecord models
5
+
3
6
  ## [0.6.0] - 2023-06-22
4
7
  - [BREAKING] Rename `ChainOfThoughtAgent` to `ReActAgent`
5
8
  - Implement A21 token validator
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.0)
4
+ langchainrb (0.6.1)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
data/README.md CHANGED
@@ -41,7 +41,7 @@ require "langchain"
41
41
  | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
42
42
  | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
43
43
  | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
44
- | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
44
+ | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
45
45
 
46
46
  ### Using Vector Search Databases 🔍
47
47
 
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module ActiveRecord
5
+ # This module adds the following functionality to your ActiveRecord models:
6
+ # * `vectorsearch` class method to set the vector search provider
7
+ # * `similarity_search` class method to search for similar texts
8
+ # * `upsert_to_vectorsearch` instance method to upsert the record to the vector search provider
9
+ #
10
+ # Usage:
11
+ # class Recipe < ActiveRecord::Base
12
+ # vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
13
+ # api_key: ENV["WEAVIATE_API_KEY"],
14
+ # url: ENV["WEAVIATE_URL"],
15
+ # index_name: "Recipes",
16
+ # llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
17
+ # )
18
+ #
19
+ # after_save :upsert_to_vectorsearch
20
+ #
21
+ # # Overwriting how the model is serialized before it's indexed
22
+ # def as_vector
23
+ # [
24
+ # "Title: #{title}",
25
+ # "Description: #{description}",
26
+ # ...
27
+ # ]
28
+ # .compact
29
+ # .join("\n")
30
+ # end
31
+ # end
32
+ #
33
+ # Create the default schema
34
+ # Recipe.class_variable_get(:@@provider).create_default_schema
35
+ # Query the vector search provider
36
+ # Recipe.similarity_search("carnivore dish")
37
+ # Delete the default schema to start over
38
+ # Recipe.class_variable_get(:@@provider).client.schema.delete class_name: "Recipes"
39
+ #
40
+ module Hooks
41
+ def self.included(base)
42
+ base.extend ClassMethods
43
+ end
44
+
45
+ # Index the text to the vector search provider
46
+ # You'd typically call this method in an ActiveRecord callback
47
+ #
48
+ # @return [Boolean] true
49
+ # @raise [Error] Indexing to vector search DB failed
50
+ def upsert_to_vectorsearch
51
+ if previously_new_record?
52
+ self.class.class_variable_get(:@@provider).add_texts(
53
+ texts: [as_vector],
54
+ ids: [id]
55
+ )
56
+ else
57
+ self.class.class_variable_get(:@@provider).update_texts(
58
+ texts: [as_vector],
59
+ ids: [id]
60
+ )
61
+ end
62
+ end
63
+
64
+ # Used to serialize the DB record to an indexable vector text
65
+ # Overwrite this method in your model to customize
66
+ #
67
+ # @return [String] the text representation of the model
68
+ def as_vector
69
+ to_json
70
+ end
71
+
72
+ module ClassMethods
73
+ # Set the vector search provider
74
+ #
75
+ # @param provider [Object] The `Langchain::Vectorsearch::*` instance
76
+ def vectorsearch(provider:)
77
+ class_variable_set(:@@provider, provider)
78
+ end
79
+
80
+ # Search for similar texts
81
+ #
82
+ # @param query [String] The query to search for
83
+ # @param k [Integer] The number of results to return
84
+ # @return [ActiveRecord::Relation] The ActiveRecord relation
85
+ def similarity_search(query, k: 1)
86
+ records = class_variable_get(:@@provider).similarity_search(
87
+ query: query,
88
+ k: k
89
+ )
90
+ ids = records.map { |record| record.dig("__id") }
91
+ where(id: ids)
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ class Railtie < Rails::Railtie
5
+ initializer "langchain" do
6
+ ActiveSupport.on_load(:active_record) do
7
+ ::ActiveRecord::Base.include Langchain::ActiveRecord::Hooks
8
+ end
9
+ end
10
+ end
11
+ end
@@ -14,7 +14,7 @@ module Langchain::Vectorsearch
14
14
  # Initialize the Weaviate adapter
15
15
  # @param url [String] The URL of the Weaviate instance
16
16
  # @param api_key [String] The API key to use
17
- # @param index_name [String] The name of the index to use
17
+ # @param index_name [String] The capitalized name of the index to use
18
18
  # @param llm [Object] The LLM client to use
19
19
  def initialize(url:, api_key:, index_name:, llm:)
20
20
  depends_on "weaviate-ruby"
@@ -24,6 +24,9 @@ module Langchain::Vectorsearch
24
24
  url: url,
25
25
  api_key: api_key
26
26
  )
27
+
28
+ # Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
29
+ # TODO: Capitalize index_name
27
30
  @index_name = index_name
28
31
 
29
32
  super(llm: llm)
@@ -32,31 +35,51 @@ module Langchain::Vectorsearch
32
35
  # Add a list of texts to the index
33
36
  # @param texts [Array] The list of texts to add
34
37
  # @return [Hash] The response from the server
35
- def add_texts(texts:)
36
- objects = Array(texts).map do |text|
37
- {
38
- class: index_name,
39
- properties: {content: text},
40
- vector: llm.embed(text: text)
41
- }
42
- end
43
-
38
+ def add_texts(texts:, ids:)
44
39
  client.objects.batch_create(
45
- objects: objects
40
+ objects: weaviate_objects(texts, ids)
46
41
  )
47
42
  end
48
43
 
44
+ # Update a list of texts in the index
45
+ # @param texts [Array] The list of texts to update
46
+ # @return [Hash] The response from the server
47
+ def update_texts(texts:, ids:)
48
+ uuids = []
49
+
50
+ # Retrieve the UUIDs of the objects to update
51
+ Array(texts).map.with_index do |text, i|
52
+ record = client.query.get(
53
+ class_name: index_name,
54
+ fields: "_additional { id }",
55
+ where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
56
+ )
57
+ uuids.push record[0].dig("_additional", "id")
58
+ end
59
+
60
+ # Update the objects
61
+ texts.map.with_index do |text, i|
62
+ client.objects.update(
63
+ class_name: index_name,
64
+ id: uuids[i],
65
+ properties: {
66
+ __id: ids[i].to_s,
67
+ content: text
68
+ },
69
+ vector: llm.embed(text: text)
70
+ )
71
+ end
72
+ end
73
+
49
74
  # Create default schema
50
75
  def create_default_schema
51
76
  client.schema.create(
52
77
  class_name: index_name,
53
78
  vectorizer: "none",
54
79
  properties: [
55
- # TODO: Allow passing in your own IDs
56
- {
57
- dataType: ["text"],
58
- name: "content"
59
- }
80
+ # __id to be used a pointer to the original document
81
+ {dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
82
+ {dataType: ["text"], name: "content"}
60
83
  ]
61
84
  )
62
85
  end
@@ -82,7 +105,7 @@ module Langchain::Vectorsearch
82
105
  class_name: index_name,
83
106
  near_vector: near_vector,
84
107
  limit: k.to_s,
85
- fields: "content _additional { id }"
108
+ fields: "__id content _additional { id }"
86
109
  )
87
110
  end
88
111
 
@@ -101,5 +124,24 @@ module Langchain::Vectorsearch
101
124
 
102
125
  llm.chat(prompt: prompt)
103
126
  end
127
+
128
+ private
129
+
130
+ def weaviate_objects(texts, ids)
131
+ Array(texts).map.with_index do |text, i|
132
+ weaviate_object(text, ids[i])
133
+ end
134
+ end
135
+
136
+ def weaviate_object(text, id)
137
+ {
138
+ class: index_name,
139
+ properties: {
140
+ __id: id.to_s,
141
+ content: text
142
+ },
143
+ vector: llm.embed(text: text)
144
+ }
145
+ end
104
146
  end
105
147
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.0"
4
+ VERSION = "0.6.1"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -145,6 +145,10 @@ module Langchain
145
145
  autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
146
146
  end
147
147
 
148
+ module ActiveRecord
149
+ autoload :Hooks, "langchain/active_record/hooks"
150
+ end
151
+
148
152
  module OutputParsers
149
153
  autoload :Base, "langchain/output_parsers/base"
150
154
  autoload :StructuredOutputParser, "langchain/output_parsers/structured"
@@ -154,3 +158,5 @@ module Langchain
154
158
  class BaseError < StandardError; end
155
159
  end
156
160
  end
161
+
162
+ require "langchain/railtie" if defined?(Rails)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-23 00:00:00.000000000 Z
11
+ date: 2023-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -483,6 +483,7 @@ files:
483
483
  - examples/store_and_query_with_weaviate.rb
484
484
  - lefthook.yml
485
485
  - lib/langchain.rb
486
+ - lib/langchain/active_record/hooks.rb
486
487
  - lib/langchain/agent/base.rb
487
488
  - lib/langchain/agent/react_agent/react_agent.rb
488
489
  - lib/langchain/agent/react_agent/react_agent_prompt.yaml
@@ -519,6 +520,7 @@ files:
519
520
  - lib/langchain/prompt/few_shot_prompt_template.rb
520
521
  - lib/langchain/prompt/loading.rb
521
522
  - lib/langchain/prompt/prompt_template.rb
523
+ - lib/langchain/railtie.rb
522
524
  - lib/langchain/tool/base.rb
523
525
  - lib/langchain/tool/calculator.rb
524
526
  - lib/langchain/tool/database.rb