langchainrb 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/Gemfile.lock +1 -1
- data/README.md +1 -1
- data/lib/langchain/active_record/hooks.rb +96 -0
- data/lib/langchain/railtie.rb +11 -0
- data/lib/langchain/vectorsearch/weaviate.rb +59 -17
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +6 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a6f4e8bb8ecaba6ff4d53bba384bd6338012429a69a0dc7df0a58a476763e7e
|
4
|
+
data.tar.gz: 92211a22fca9664831cf4f395a53dedddafc339ab419780932398c07256b737d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5c84f0a9a54f51799c5318cba243457fcfd6f026c71b8f34e58cf60172d476963f25ea8d24c49b35ed93c893adb9e2844443a22dd9e927ab16318850a11419a
|
7
|
+
data.tar.gz: 4664927203ea032f737000c27ec5fa04c96ab606ec8377b4673b48638905b458077d4ab3cb7727fcb98be6c607a37bd318395fd96000a734de213c7d9041a219
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -41,7 +41,7 @@ require "langchain"
|
|
41
41
|
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
42
42
|
| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
43
43
|
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
44
|
-
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
44
|
+
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
45
45
|
|
46
46
|
### Using Vector Search Databases 🔍
|
47
47
|
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module ActiveRecord
|
5
|
+
# This module adds the following functionality to your ActiveRecord models:
|
6
|
+
# * `vectorsearch` class method to set the vector search provider
|
7
|
+
# * `similarity_search` class method to search for similar texts
|
8
|
+
# * `upsert_to_vectorsearch` instance method to upsert the record to the vector search provider
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# class Recipe < ActiveRecord::Base
|
12
|
+
# vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
|
13
|
+
# api_key: ENV["WEAVIATE_API_KEY"],
|
14
|
+
# url: ENV["WEAVIATE_URL"],
|
15
|
+
# index_name: "Recipes",
|
16
|
+
# llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
17
|
+
# )
|
18
|
+
#
|
19
|
+
# after_save :upsert_to_vectorsearch
|
20
|
+
#
|
21
|
+
# # Overwriting how the model is serialized before it's indexed
|
22
|
+
# def as_vector
|
23
|
+
# [
|
24
|
+
# "Title: #{title}",
|
25
|
+
# "Description: #{description}",
|
26
|
+
# ...
|
27
|
+
# ]
|
28
|
+
# .compact
|
29
|
+
# .join("\n")
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# Create the default schema
|
34
|
+
# Recipe.class_variable_get(:@@provider).create_default_schema
|
35
|
+
# Query the vector search provider
|
36
|
+
# Recipe.similarity_search("carnivore dish")
|
37
|
+
# Delete the default schema to start over
|
38
|
+
# Recipe.class_variable_get(:@@provider).client.schema.delete class_name: "Recipes"
|
39
|
+
#
|
40
|
+
module Hooks
|
41
|
+
def self.included(base)
|
42
|
+
base.extend ClassMethods
|
43
|
+
end
|
44
|
+
|
45
|
+
# Index the text to the vector search provider
|
46
|
+
# You'd typically call this method in an ActiveRecord callback
|
47
|
+
#
|
48
|
+
# @return [Boolean] true
|
49
|
+
# @raise [Error] Indexing to vector search DB failed
|
50
|
+
def upsert_to_vectorsearch
|
51
|
+
if previously_new_record?
|
52
|
+
self.class.class_variable_get(:@@provider).add_texts(
|
53
|
+
texts: [as_vector],
|
54
|
+
ids: [id]
|
55
|
+
)
|
56
|
+
else
|
57
|
+
self.class.class_variable_get(:@@provider).update_texts(
|
58
|
+
texts: [as_vector],
|
59
|
+
ids: [id]
|
60
|
+
)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Used to serialize the DB record to an indexable vector text
|
65
|
+
# Overwrite this method in your model to customize
|
66
|
+
#
|
67
|
+
# @return [String] the text representation of the model
|
68
|
+
def as_vector
|
69
|
+
to_json
|
70
|
+
end
|
71
|
+
|
72
|
+
module ClassMethods
|
73
|
+
# Set the vector search provider
|
74
|
+
#
|
75
|
+
# @param provider [Object] The `Langchain::Vectorsearch::*` instance
|
76
|
+
def vectorsearch(provider:)
|
77
|
+
class_variable_set(:@@provider, provider)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Search for similar texts
|
81
|
+
#
|
82
|
+
# @param query [String] The query to search for
|
83
|
+
# @param k [Integer] The number of results to return
|
84
|
+
# @return [ActiveRecord::Relation] The ActiveRecord relation
|
85
|
+
def similarity_search(query, k: 1)
|
86
|
+
records = class_variable_get(:@@provider).similarity_search(
|
87
|
+
query: query,
|
88
|
+
k: k
|
89
|
+
)
|
90
|
+
ids = records.map { |record| record.dig("__id") }
|
91
|
+
where(id: ids)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -14,7 +14,7 @@ module Langchain::Vectorsearch
|
|
14
14
|
# Initialize the Weaviate adapter
|
15
15
|
# @param url [String] The URL of the Weaviate instance
|
16
16
|
# @param api_key [String] The API key to use
|
17
|
-
# @param index_name [String] The name of the index to use
|
17
|
+
# @param index_name [String] The capitalized name of the index to use
|
18
18
|
# @param llm [Object] The LLM client to use
|
19
19
|
def initialize(url:, api_key:, index_name:, llm:)
|
20
20
|
depends_on "weaviate-ruby"
|
@@ -24,6 +24,9 @@ module Langchain::Vectorsearch
|
|
24
24
|
url: url,
|
25
25
|
api_key: api_key
|
26
26
|
)
|
27
|
+
|
28
|
+
# Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
|
29
|
+
# TODO: Capitalize index_name
|
27
30
|
@index_name = index_name
|
28
31
|
|
29
32
|
super(llm: llm)
|
@@ -32,31 +35,51 @@ module Langchain::Vectorsearch
|
|
32
35
|
# Add a list of texts to the index
|
33
36
|
# @param texts [Array] The list of texts to add
|
34
37
|
# @return [Hash] The response from the server
|
35
|
-
def add_texts(texts:)
|
36
|
-
objects = Array(texts).map do |text|
|
37
|
-
{
|
38
|
-
class: index_name,
|
39
|
-
properties: {content: text},
|
40
|
-
vector: llm.embed(text: text)
|
41
|
-
}
|
42
|
-
end
|
43
|
-
|
38
|
+
def add_texts(texts:, ids:)
|
44
39
|
client.objects.batch_create(
|
45
|
-
objects:
|
40
|
+
objects: weaviate_objects(texts, ids)
|
46
41
|
)
|
47
42
|
end
|
48
43
|
|
44
|
+
# Update a list of texts in the index
|
45
|
+
# @param texts [Array] The list of texts to update
|
46
|
+
# @return [Hash] The response from the server
|
47
|
+
def update_texts(texts:, ids:)
|
48
|
+
uuids = []
|
49
|
+
|
50
|
+
# Retrieve the UUIDs of the objects to update
|
51
|
+
Array(texts).map.with_index do |text, i|
|
52
|
+
record = client.query.get(
|
53
|
+
class_name: index_name,
|
54
|
+
fields: "_additional { id }",
|
55
|
+
where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
|
56
|
+
)
|
57
|
+
uuids.push record[0].dig("_additional", "id")
|
58
|
+
end
|
59
|
+
|
60
|
+
# Update the objects
|
61
|
+
texts.map.with_index do |text, i|
|
62
|
+
client.objects.update(
|
63
|
+
class_name: index_name,
|
64
|
+
id: uuids[i],
|
65
|
+
properties: {
|
66
|
+
__id: ids[i].to_s,
|
67
|
+
content: text
|
68
|
+
},
|
69
|
+
vector: llm.embed(text: text)
|
70
|
+
)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
49
74
|
# Create default schema
|
50
75
|
def create_default_schema
|
51
76
|
client.schema.create(
|
52
77
|
class_name: index_name,
|
53
78
|
vectorizer: "none",
|
54
79
|
properties: [
|
55
|
-
#
|
56
|
-
{
|
57
|
-
|
58
|
-
name: "content"
|
59
|
-
}
|
80
|
+
# __id to be used a pointer to the original document
|
81
|
+
{dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
|
82
|
+
{dataType: ["text"], name: "content"}
|
60
83
|
]
|
61
84
|
)
|
62
85
|
end
|
@@ -82,7 +105,7 @@ module Langchain::Vectorsearch
|
|
82
105
|
class_name: index_name,
|
83
106
|
near_vector: near_vector,
|
84
107
|
limit: k.to_s,
|
85
|
-
fields: "content _additional { id }"
|
108
|
+
fields: "__id content _additional { id }"
|
86
109
|
)
|
87
110
|
end
|
88
111
|
|
@@ -101,5 +124,24 @@ module Langchain::Vectorsearch
|
|
101
124
|
|
102
125
|
llm.chat(prompt: prompt)
|
103
126
|
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
def weaviate_objects(texts, ids)
|
131
|
+
Array(texts).map.with_index do |text, i|
|
132
|
+
weaviate_object(text, ids[i])
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def weaviate_object(text, id)
|
137
|
+
{
|
138
|
+
class: index_name,
|
139
|
+
properties: {
|
140
|
+
__id: id.to_s,
|
141
|
+
content: text
|
142
|
+
},
|
143
|
+
vector: llm.embed(text: text)
|
144
|
+
}
|
145
|
+
end
|
104
146
|
end
|
105
147
|
end
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
@@ -145,6 +145,10 @@ module Langchain
|
|
145
145
|
autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
|
146
146
|
end
|
147
147
|
|
148
|
+
module ActiveRecord
|
149
|
+
autoload :Hooks, "langchain/active_record/hooks"
|
150
|
+
end
|
151
|
+
|
148
152
|
module OutputParsers
|
149
153
|
autoload :Base, "langchain/output_parsers/base"
|
150
154
|
autoload :StructuredOutputParser, "langchain/output_parsers/structured"
|
@@ -154,3 +158,5 @@ module Langchain
|
|
154
158
|
class BaseError < StandardError; end
|
155
159
|
end
|
156
160
|
end
|
161
|
+
|
162
|
+
require "langchain/railtie" if defined?(Rails)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -483,6 +483,7 @@ files:
|
|
483
483
|
- examples/store_and_query_with_weaviate.rb
|
484
484
|
- lefthook.yml
|
485
485
|
- lib/langchain.rb
|
486
|
+
- lib/langchain/active_record/hooks.rb
|
486
487
|
- lib/langchain/agent/base.rb
|
487
488
|
- lib/langchain/agent/react_agent/react_agent.rb
|
488
489
|
- lib/langchain/agent/react_agent/react_agent_prompt.yaml
|
@@ -519,6 +520,7 @@ files:
|
|
519
520
|
- lib/langchain/prompt/few_shot_prompt_template.rb
|
520
521
|
- lib/langchain/prompt/loading.rb
|
521
522
|
- lib/langchain/prompt/prompt_template.rb
|
523
|
+
- lib/langchain/railtie.rb
|
522
524
|
- lib/langchain/tool/base.rb
|
523
525
|
- lib/langchain/tool/calculator.rb
|
524
526
|
- lib/langchain/tool/database.rb
|