langchainrb_rails 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +28 -0
- data/.tool-versions +1 -0
- data/CHANGELOG.md +8 -1
- data/Gemfile +4 -0
- data/Gemfile.lock +27 -3
- data/README.md +90 -8
- data/lib/langchainrb_overrides/vectorsearch/pgvector.rb +124 -0
- data/lib/langchainrb_rails/active_record/hooks.rb +22 -5
- data/lib/langchainrb_rails/generators/langchainrb_rails/base_generator.rb +24 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/chroma_generator.rb +54 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/pgvector_generator.rb +75 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/pinecone_generator.rb +3 -19
- data/lib/langchainrb_rails/generators/langchainrb_rails/templates/add_vector_column_template.rb.tt +10 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/templates/chroma_initializer.rb.tt +9 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/templates/enable_vector_extension_template.rb.tt +5 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/templates/pgvector_initializer.rb.tt +7 -0
- data/lib/langchainrb_rails/railtie.rb +2 -0
- data/lib/langchainrb_rails/version.rb +1 -1
- data/lib/langchainrb_rails.rb +5 -0
- metadata +13 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 37760a671cfe732f12ddb79769997ae325240b5c1b0bb96ee3e13786d605cc6a
|
|
4
|
+
data.tar.gz: 89b9aa5c04ffb28823836cfef7428990e27e1d38dc231240fb4e7004ab3db614
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 78c3d36d67ace05932ef36bc9e9136837bb350e5698aad55836f3d6dea0602228f461f2ae1aafc10ec050151d7f6b71e5a7cca21357b1a5f6bf2410f38596a69
|
|
7
|
+
data.tar.gz: fa5a1bdd97e82d71f9b4b35d0c1f15245c87966cdef60117364174f8306a7df72848dda3b52e2635844a5ae05d25a9e1c6166bf7a25b9ccd52565ae787c8fc86
|
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
AllCops:
|
|
2
|
+
TargetRubyVersion: 2.7
|
|
3
|
+
NewCops: enable
|
|
4
|
+
Exclude:
|
|
5
|
+
- 'bin/**/*'
|
|
6
|
+
- 'db/schema.rb'
|
|
7
|
+
- 'vendor/**/*'
|
|
8
|
+
- 'spec/fixtures/**/*'
|
|
9
|
+
|
|
10
|
+
Metrics/LineLength:
|
|
11
|
+
Max: 140
|
|
12
|
+
|
|
13
|
+
Metrics/BlockLength:
|
|
14
|
+
Exclude:
|
|
15
|
+
- 'spec/**/*.rb'
|
|
16
|
+
|
|
17
|
+
Style/Documentation:
|
|
18
|
+
Enabled: false
|
|
19
|
+
|
|
20
|
+
Style/FrozenStringLiteralComment:
|
|
21
|
+
Enabled: true
|
|
22
|
+
|
|
23
|
+
Style/StringLiterals:
|
|
24
|
+
Enabled: false
|
|
25
|
+
|
|
26
|
+
Lint/SuppressedException:
|
|
27
|
+
Exclude:
|
|
28
|
+
- 'spec/**/*.rb'
|
data/.tool-versions
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ruby 3.1.2
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
langchainrb_rails (0.1.
|
|
4
|
+
langchainrb_rails (0.1.3)
|
|
5
5
|
langchainrb (~> 0.7.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
@@ -86,7 +86,11 @@ GEM
|
|
|
86
86
|
baran (0.1.9)
|
|
87
87
|
base64 (0.1.1)
|
|
88
88
|
bigdecimal (3.1.4)
|
|
89
|
+
brakeman (6.0.1)
|
|
89
90
|
builder (3.2.4)
|
|
91
|
+
bundler-audit (0.9.1)
|
|
92
|
+
bundler (>= 1.2.0, < 3)
|
|
93
|
+
thor (~> 1.0)
|
|
90
94
|
byebug (11.1.3)
|
|
91
95
|
coderay (1.1.3)
|
|
92
96
|
colorize (0.8.1)
|
|
@@ -109,12 +113,14 @@ GEM
|
|
|
109
113
|
json (2.6.3)
|
|
110
114
|
json-schema (4.0.0)
|
|
111
115
|
addressable (>= 2.8)
|
|
112
|
-
langchainrb (0.7.
|
|
116
|
+
langchainrb (0.7.1)
|
|
113
117
|
baran (~> 0.1.9)
|
|
114
118
|
colorize (~> 0.8.1)
|
|
115
119
|
json-schema (~> 4.0.0)
|
|
120
|
+
matrix
|
|
116
121
|
pragmatic_segmenter (~> 0.3.0)
|
|
117
122
|
tiktoken_ruby (~> 0.0.5)
|
|
123
|
+
to_bool (~> 2.0.0)
|
|
118
124
|
zeitwerk (~> 2.5)
|
|
119
125
|
language_server-protocol (3.17.0.3)
|
|
120
126
|
lint_roller (1.1.0)
|
|
@@ -127,8 +133,10 @@ GEM
|
|
|
127
133
|
net-pop
|
|
128
134
|
net-smtp
|
|
129
135
|
marcel (1.0.2)
|
|
136
|
+
matrix (0.4.2)
|
|
130
137
|
method_source (1.0.0)
|
|
131
138
|
mini_mime (1.1.5)
|
|
139
|
+
mini_portile2 (2.8.5)
|
|
132
140
|
minitest (5.20.0)
|
|
133
141
|
mutex_m (0.1.2)
|
|
134
142
|
net-imap (0.4.2)
|
|
@@ -141,8 +149,13 @@ GEM
|
|
|
141
149
|
net-smtp (0.4.0)
|
|
142
150
|
net-protocol
|
|
143
151
|
nio4r (2.5.9)
|
|
152
|
+
nokogiri (1.15.4)
|
|
153
|
+
mini_portile2 (~> 2.8.2)
|
|
154
|
+
racc (~> 1.4)
|
|
144
155
|
nokogiri (1.15.4-x86_64-darwin)
|
|
145
156
|
racc (~> 1.4)
|
|
157
|
+
nokogiri (1.15.4-x86_64-linux)
|
|
158
|
+
racc (~> 1.4)
|
|
146
159
|
parallel (1.23.0)
|
|
147
160
|
parser (3.2.2.4)
|
|
148
161
|
ast (~> 2.4.1)
|
|
@@ -198,6 +211,7 @@ GEM
|
|
|
198
211
|
zeitwerk (~> 2.6)
|
|
199
212
|
rainbow (3.1.1)
|
|
200
213
|
rake (13.0.6)
|
|
214
|
+
rb_sys (0.9.82)
|
|
201
215
|
rdoc (6.5.0)
|
|
202
216
|
psych (>= 4.0.0)
|
|
203
217
|
regexp_parser (2.8.2)
|
|
@@ -252,8 +266,12 @@ GEM
|
|
|
252
266
|
standard
|
|
253
267
|
stringio (3.0.8)
|
|
254
268
|
thor (1.3.0)
|
|
269
|
+
tiktoken_ruby (0.0.6)
|
|
270
|
+
rb_sys (~> 0.9.68)
|
|
255
271
|
tiktoken_ruby (0.0.6-x86_64-darwin)
|
|
272
|
+
tiktoken_ruby (0.0.6-x86_64-linux)
|
|
256
273
|
timeout (0.4.0)
|
|
274
|
+
to_bool (2.0.0)
|
|
257
275
|
tzinfo (2.0.6)
|
|
258
276
|
concurrent-ruby (~> 1.0)
|
|
259
277
|
unicode (0.4.4.4)
|
|
@@ -263,18 +281,24 @@ GEM
|
|
|
263
281
|
websocket-extensions (>= 0.1.0)
|
|
264
282
|
websocket-extensions (0.1.5)
|
|
265
283
|
yard (0.9.34)
|
|
266
|
-
zeitwerk (2.6.
|
|
284
|
+
zeitwerk (2.6.12)
|
|
267
285
|
|
|
268
286
|
PLATFORMS
|
|
287
|
+
ruby
|
|
269
288
|
x86_64-darwin-19
|
|
289
|
+
x86_64-darwin-22
|
|
290
|
+
x86_64-linux
|
|
270
291
|
|
|
271
292
|
DEPENDENCIES
|
|
293
|
+
brakeman
|
|
294
|
+
bundler-audit
|
|
272
295
|
langchainrb
|
|
273
296
|
langchainrb_rails!
|
|
274
297
|
pry-byebug (~> 3.10.0)
|
|
275
298
|
rails (> 6.0.0)
|
|
276
299
|
rake (~> 13.0)
|
|
277
300
|
rspec (~> 3.0)
|
|
301
|
+
rubocop
|
|
278
302
|
standardrb
|
|
279
303
|
yard (~> 0.9.34)
|
|
280
304
|
|
data/README.md
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
💎🔗 Langchain.rb for Rails
|
|
2
2
|
---
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
👨💻👩💻 CURRENTLY SEEKING PEOPLE TO FORM THE CORE GROUP OF MAINTAINERS WITH
|
|
3
|
+
The fastest way to sprinkle AI ✨ on top of your Rails app. Add OpenAI-powered question-and-answering in minutes.
|
|
6
4
|
|
|
7
5
|

|
|
8
6
|
[](https://badge.fury.io/rb/langchainrb_rails)
|
|
@@ -10,24 +8,107 @@
|
|
|
10
8
|
[](https://github.com/andreibondarev/langchainrb_rails/blob/main/LICENSE.txt)
|
|
11
9
|
[](https://discord.gg/WDARp7J2n8)
|
|
12
10
|
|
|
11
|
+
## Dependencies
|
|
12
|
+
|
|
13
|
+
* Ruby 3.0+
|
|
14
|
+
* Postgres 11+
|
|
15
|
+
|
|
16
|
+
## Table of Contents
|
|
13
17
|
|
|
14
|
-
|
|
18
|
+
- [Installation](#installation)
|
|
19
|
+
- [Generators](#rails-generators)
|
|
15
20
|
|
|
16
21
|
## Installation
|
|
17
22
|
|
|
18
23
|
Install the gem and add to the application's Gemfile by executing:
|
|
19
|
-
|
|
20
|
-
|
|
24
|
+
```bash
|
|
25
|
+
bundle add langchainrb_rails
|
|
26
|
+
```
|
|
21
27
|
|
|
22
28
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
|
29
|
+
```bash
|
|
30
|
+
gem install langchainrb_rails
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Configuration w/ [Pgvector](https://github.com/pgvector/pgvector) (requires Postgres 11+)
|
|
34
|
+
|
|
35
|
+
1. Run the Rails generator to add vectorsearch to your ActiveRecord model
|
|
36
|
+
```bash
|
|
37
|
+
rails generate langchainrb_rails:pgvector --model=Product --llm=openai
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
This adds required dependencies to your Gemfile, creates the `config/initializers/langchainrb_rails.rb` initializer file, database migrations, and adds the necessary code to the ActiveRecord model to enable vectorsearch.
|
|
23
41
|
|
|
24
|
-
|
|
42
|
+
2. Bundle and migrate
|
|
43
|
+
```bash
|
|
44
|
+
bundle install && rails db:migrate
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
3. Set the env var `OPENAI_API_KEY` to your OpenAI API key: https://platform.openai.com/account/api-keys
|
|
48
|
+
```ruby
|
|
49
|
+
ENV["OPENAI_API_KEY"]=
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
5. Generate embeddings for your model
|
|
53
|
+
```ruby
|
|
54
|
+
Product.embed!
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
This can take a while depending on the number of database records.
|
|
58
|
+
|
|
59
|
+
## Usage
|
|
60
|
+
|
|
61
|
+
### Question and Answering
|
|
62
|
+
```ruby
|
|
63
|
+
Product.ask("list the brands of shoes that are in stock")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Returns a `String` with a natural language answer. The answer is assembled using the following steps:
|
|
67
|
+
|
|
68
|
+
1. An embedding is generated for the passed in `question` using the selected LLM.
|
|
69
|
+
2. We calculate a [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) to find records that most closely match your question's embedding.
|
|
70
|
+
3. A prompt is created using the question and the above records (their `#as_vector` representation )are added as context.
|
|
71
|
+
4. This prompt is passed to the LLM to generate an answer
|
|
72
|
+
|
|
73
|
+
### Similarity Search
|
|
74
|
+
```ruby
|
|
75
|
+
Product.similarity_search("t-shirt")
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Returns ActiveRecord relation that most closely matches the `query` using vector search.
|
|
79
|
+
|
|
80
|
+
## Customization
|
|
81
|
+
|
|
82
|
+
### Changing the vector representation of a record
|
|
83
|
+
|
|
84
|
+
By default, embeddings are generated by calling the following method on your model instance:
|
|
85
|
+
```ruby
|
|
86
|
+
to_json(except: :embedding)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
You can override this by defining an `#as_vector` method in your model:
|
|
90
|
+
```ruby
|
|
91
|
+
def as_vector
|
|
92
|
+
{ name: name, description: description, category: category.name, ... }.to_json
|
|
93
|
+
end
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Re-generate embeddings after modifying this method:
|
|
97
|
+
|
|
98
|
+
```ruby
|
|
99
|
+
product.embed!
|
|
100
|
+
```
|
|
25
101
|
|
|
26
102
|
## Rails Generators
|
|
27
103
|
|
|
28
|
-
###
|
|
104
|
+
### Pgvector Generator
|
|
29
105
|
|
|
106
|
+
```bash
|
|
107
|
+
rails generate langchainrb_rails:pgvector --model=Product --llm=openai
|
|
30
108
|
```
|
|
109
|
+
|
|
110
|
+
### Pinecone Generator - adds vectorsearch to your ActiveRecord model
|
|
111
|
+
```bash
|
|
31
112
|
rails generate langchainrb_rails:pinecone --model=Product --llm=openai
|
|
32
113
|
```
|
|
33
114
|
|
|
@@ -39,3 +120,4 @@ Pinecone Generator does the following:
|
|
|
39
120
|
1. Creates the `config/initializers/langchainrb_rails.rb` initializer file
|
|
40
121
|
2. Adds necessary code to the ActiveRecord model to enable vectorsearch
|
|
41
122
|
3. Adds `pinecone` gem to the Gemfile
|
|
123
|
+
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Overriding Langchain.rb's Pgvector implementation to use ActiveRecord.
|
|
4
|
+
# Original implementation: https://github.com/andreibondarev/langchainrb/blob/main/lib/langchain/vectorsearch/pgvector.rb
|
|
5
|
+
|
|
6
|
+
module Langchain::Vectorsearch
|
|
7
|
+
class Pgvector < Base
|
|
8
|
+
#
|
|
9
|
+
# The PostgreSQL vector search adapter
|
|
10
|
+
#
|
|
11
|
+
# Gem requirements:
|
|
12
|
+
# gem "pgvector", "~> 0.2"
|
|
13
|
+
#
|
|
14
|
+
# Usage:
|
|
15
|
+
# pgvector = Langchain::Vectorsearch::Pgvector.new(llm:)
|
|
16
|
+
#
|
|
17
|
+
|
|
18
|
+
# The operators supported by the PostgreSQL vector search adapter
|
|
19
|
+
OPERATORS = [
|
|
20
|
+
"cosine",
|
|
21
|
+
"euclidean",
|
|
22
|
+
"inner_product"
|
|
23
|
+
]
|
|
24
|
+
DEFAULT_OPERATOR = "cosine"
|
|
25
|
+
|
|
26
|
+
attr_reader :operator, :llm
|
|
27
|
+
attr_accessor :model
|
|
28
|
+
|
|
29
|
+
# @param url [String] The URL of the PostgreSQL database
|
|
30
|
+
# @param index_name [String] The name of the table to use for the index
|
|
31
|
+
# @param llm [Object] The LLM client to use
|
|
32
|
+
# @param namespace [String] The namespace to use for the index when inserting/querying
|
|
33
|
+
def initialize(llm:)
|
|
34
|
+
# If the line below is called, the generator fails as calls to
|
|
35
|
+
# LangchainrbRails.config.vectorsearch will generate an exception.
|
|
36
|
+
# These happen in the template files.
|
|
37
|
+
# depends_on "neighbor"
|
|
38
|
+
|
|
39
|
+
@operator = DEFAULT_OPERATOR
|
|
40
|
+
|
|
41
|
+
super(llm: llm)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Add a list of texts to the index
|
|
45
|
+
# @param texts [Array<String>] The texts to add to the index
|
|
46
|
+
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
|
|
47
|
+
# @return [Array<Integer>] The the ids of the added texts.
|
|
48
|
+
def add_texts(texts:, ids:)
|
|
49
|
+
embeddings = texts.map do |text|
|
|
50
|
+
llm.embed(text: text).embedding
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# I believe the records returned by #find must be in the
|
|
54
|
+
# same order as the embeddings. I _think_ this works for uuid ids but didn't test
|
|
55
|
+
# deeply.
|
|
56
|
+
# TODO - implement find_each so we don't load all records into memory
|
|
57
|
+
model.find(ids).each.with_index do |record, i|
|
|
58
|
+
record.update_column(:embedding, embeddings[i])
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def update_texts(texts:, ids:)
|
|
63
|
+
add_texts(texts: texts, ids: ids)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Invoke a rake task that will create an initializer (`config/initializers/langchain.rb`) file
|
|
67
|
+
# and db/migrations/* files
|
|
68
|
+
def create_default_schema
|
|
69
|
+
Rake::Task["pgvector"].invoke
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Destroy default schema
|
|
73
|
+
def destroy_default_schema
|
|
74
|
+
# Tell the user to rollback the migration
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Search for similar texts in the index
|
|
78
|
+
# @param query [String] The text to search for
|
|
79
|
+
# @param k [Integer] The number of top results to return
|
|
80
|
+
# @return [Array<Hash>] The results of the search
|
|
81
|
+
# TODO - drop the named "query:" param so it is the same interface as #ask?
|
|
82
|
+
def similarity_search(query:, k: 4)
|
|
83
|
+
embedding = llm.embed(text: query).embedding
|
|
84
|
+
|
|
85
|
+
similarity_search_by_vector(
|
|
86
|
+
embedding: embedding,
|
|
87
|
+
k: k
|
|
88
|
+
)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Search for similar texts in the index by the passed in vector.
|
|
92
|
+
# You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
|
|
93
|
+
# @param embedding [Array<Float>] The vector to search for
|
|
94
|
+
# @param k [Integer] The number of top results to return
|
|
95
|
+
# @return [Array<Hash>] The results of the search
|
|
96
|
+
# TODO - drop the named "embedding:" param so it is the same interface as #ask?
|
|
97
|
+
def similarity_search_by_vector(embedding:, k: 4)
|
|
98
|
+
model
|
|
99
|
+
.nearest_neighbors(:embedding, embedding, distance: operator)
|
|
100
|
+
.limit(k)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Ask a question and return the answer
|
|
104
|
+
# @param question [String] The question to ask
|
|
105
|
+
# @param k [Integer] The number of results to have in context
|
|
106
|
+
# @yield [String] Stream responses back one String at a time
|
|
107
|
+
# @return [String] The answer to the question
|
|
108
|
+
def ask(question, k: 4, &block)
|
|
109
|
+
# Noisy as the embedding column has a lot of data
|
|
110
|
+
ActiveRecord::Base.logger.silence do
|
|
111
|
+
search_results = similarity_search(query: question, k: k)
|
|
112
|
+
|
|
113
|
+
context = search_results.map do |result|
|
|
114
|
+
result.as_vector
|
|
115
|
+
end
|
|
116
|
+
context = context.join("\n---\n")
|
|
117
|
+
|
|
118
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
|
119
|
+
|
|
120
|
+
llm.chat(prompt: prompt, &block)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -61,7 +61,9 @@ module LangchainrbRails
|
|
|
61
61
|
#
|
|
62
62
|
# @return [String] the text representation of the model
|
|
63
63
|
def as_vector
|
|
64
|
-
|
|
64
|
+
# Don't vectorize the embedding ... this would happen if it already exists
|
|
65
|
+
# for a record and we update.
|
|
66
|
+
to_json(except: :embedding)
|
|
65
67
|
end
|
|
66
68
|
|
|
67
69
|
module ClassMethods
|
|
@@ -70,6 +72,21 @@ module LangchainrbRails
|
|
|
70
72
|
# @param provider [Object] The `Langchain::Vectorsearch::*` instance
|
|
71
73
|
def vectorsearch
|
|
72
74
|
class_variable_set(:@@provider, LangchainrbRails.config.vectorsearch)
|
|
75
|
+
|
|
76
|
+
# Pgvector-specific configuration
|
|
77
|
+
if LangchainrbRails.config.vectorsearch.is_a?(Langchain::Vectorsearch::Pgvector)
|
|
78
|
+
has_neighbors(:embedding)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
LangchainrbRails.config.vectorsearch.model = self
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Iterates over records and generate embeddings.
|
|
85
|
+
# Will re-generate for ALL records (not just records with embeddings).
|
|
86
|
+
def embed!
|
|
87
|
+
find_each do |record|
|
|
88
|
+
record.upsert_to_vectorsearch
|
|
89
|
+
end
|
|
73
90
|
end
|
|
74
91
|
|
|
75
92
|
# Search for similar texts
|
|
@@ -84,7 +101,7 @@ module LangchainrbRails
|
|
|
84
101
|
)
|
|
85
102
|
|
|
86
103
|
# We use "__id" when Weaviate is the provider
|
|
87
|
-
ids = records.map { |record| record.
|
|
104
|
+
ids = records.map { |record| record.try("id") || record.dig("__id") }
|
|
88
105
|
where(id: ids)
|
|
89
106
|
end
|
|
90
107
|
|
|
@@ -94,12 +111,12 @@ module LangchainrbRails
|
|
|
94
111
|
# @param k [Integer] The number of results to have in context
|
|
95
112
|
# @yield [String] Stream responses back one String at a time
|
|
96
113
|
# @return [String] The answer to the question
|
|
97
|
-
def ask(question
|
|
114
|
+
def ask(question, k: 4, &block)
|
|
98
115
|
class_variable_get(:@@provider).ask(
|
|
99
|
-
question
|
|
116
|
+
question,
|
|
100
117
|
k: k,
|
|
101
118
|
&block
|
|
102
|
-
)
|
|
119
|
+
).completion
|
|
103
120
|
end
|
|
104
121
|
end
|
|
105
122
|
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require "rails/generators"
|
|
2
|
+
require "rails/generators/active_record"
|
|
3
|
+
|
|
4
|
+
module LangchainrbRails
|
|
5
|
+
module Generators
|
|
6
|
+
class BaseGenerator < Rails::Generators::Base
|
|
7
|
+
include ::ActiveRecord::Generators::Migration
|
|
8
|
+
|
|
9
|
+
class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
|
|
10
|
+
class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
|
|
11
|
+
|
|
12
|
+
# Available LLM providers to be passed in as --llm option
|
|
13
|
+
LLMS = {
|
|
14
|
+
"cohere" => "Langchain::LLM::Cohere",
|
|
15
|
+
"google_palm" => "Langchain::LLM::GooglePalm",
|
|
16
|
+
"hugging_face" => "Langchain::LLM::HuggingFace",
|
|
17
|
+
"llama_cpp" => "Langchain::LLM::LlamaCpp",
|
|
18
|
+
"ollama" => "Langchain::LLM::Ollama",
|
|
19
|
+
"openai" => "Langchain::LLM::OpenAI",
|
|
20
|
+
"replicate" => "Langchain::LLM::Replicate"
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LangchainrbRails
|
|
4
|
+
module Generators
|
|
5
|
+
#
|
|
6
|
+
# ChromaGenerator does the following:
|
|
7
|
+
# 1. Creates the `langchainrb_rails.rb` initializer file
|
|
8
|
+
# 2. Adds necessary code to the ActiveRecord model to enable vectorsearch
|
|
9
|
+
# 3. Adds `chroma-db` gem to the Gemfile
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# rails generate langchainrb_rails:chrome --model=Product --llm=openai
|
|
13
|
+
#
|
|
14
|
+
class ChromaGenerator < LangchainrbRails::Generators::BaseGenerator
|
|
15
|
+
desc "This generator adds Chroma vectorsearch integration to your ActiveRecord model"
|
|
16
|
+
source_root File.join(__dir__, "templates")
|
|
17
|
+
|
|
18
|
+
# Creates the `langchainrb_rails.rb` initializer file
|
|
19
|
+
def create_initializer_file
|
|
20
|
+
template "chroma_initializer.rb", "config/initializers/langchainrb_rails.rb"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Adds `vectorsearch` class method to the model and `after_save` callback that calls `upsert_to_vectorsearch()`
|
|
24
|
+
def add_to_model
|
|
25
|
+
inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
|
|
26
|
+
" vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Adds `chroma-db` gem to the Gemfile
|
|
31
|
+
# TODO: Can we automatically run `bundle install`?
|
|
32
|
+
def add_to_gemfile
|
|
33
|
+
gem "chroma-db", version: "~> 0.6.0"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
# @return [String] Name of the model
|
|
39
|
+
def model_name
|
|
40
|
+
options["model"]
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# @return [String] LLM provider to use
|
|
44
|
+
def llm
|
|
45
|
+
options["llm"]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @return [Langchain::LLM::*] LLM class
|
|
49
|
+
def llm_class
|
|
50
|
+
Langchain::LLM.const_get(LLMS[llm])
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LangchainrbRails
|
|
4
|
+
module Generators
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# rails g langchain:pgvector -model=Product -llm=openai
|
|
8
|
+
#
|
|
9
|
+
class PgvectorGenerator < LangchainrbRails::Generators::BaseGenerator
|
|
10
|
+
desc "This generator adds Pgvector vectorsearch integration to your ActiveRecord model"
|
|
11
|
+
source_root File.join(__dir__, "templates")
|
|
12
|
+
|
|
13
|
+
def copy_migration
|
|
14
|
+
migration_template "enable_vector_extension_template.rb", "db/migrate/enable_vector_extension.rb", migration_version: migration_version
|
|
15
|
+
migration_template "add_vector_column_template.rb", "db/migrate/add_vector_column_to_#{table_name}.rb", migration_version: migration_version
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def create_initializer_file
|
|
19
|
+
template "pgvector_initializer.rb", "config/initializers/langchainrb_rails.rb"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def migration_version
|
|
23
|
+
"[#{::ActiveRecord::VERSION::MAJOR}.#{::ActiveRecord::VERSION::MINOR}]"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def add_to_model
|
|
27
|
+
inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
|
|
28
|
+
" vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def add_to_gemfile
|
|
33
|
+
# Dependency for Langchain PgVector
|
|
34
|
+
gem "neighbor"
|
|
35
|
+
gem "ruby-openai"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def post_install_message
|
|
39
|
+
say "Please do the following to start Q&A with your #{model_name} records:", :green
|
|
40
|
+
say "1. Run `bundle install` to install the new gems."
|
|
41
|
+
say "2. Set `OPENAI_API_KEY` environment variable to your OpenAI API key."
|
|
42
|
+
say "3. Run `rails db:migrate` to apply the database migrations to enable pgvector and add the embedding column."
|
|
43
|
+
say "4. In Rails console, run `#{model_name}.embed!` to set the embeddings for all records."
|
|
44
|
+
say "5. Ask a question in the Rails console, ie: `#{model_name}.ask('[YOUR QUESTION]')`"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
# @return [String] Name of the model
|
|
50
|
+
def model_name
|
|
51
|
+
options["model"]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# @return [String] Table name of the model
|
|
55
|
+
def table_name
|
|
56
|
+
model_name.downcase.pluralize
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# @return [String] LLM provider to use
|
|
60
|
+
def llm
|
|
61
|
+
options["llm"]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# @return [Langchain::LLM::*] LLM class
|
|
65
|
+
def llm_class
|
|
66
|
+
Langchain::LLM.const_get(LLMS[llm])
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# @return [Integer] Dimension of the vector to be used
|
|
70
|
+
def vector_dimension
|
|
71
|
+
llm_class.default_dimension
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module LangchainrbRails
|
|
4
4
|
module Generators
|
|
@@ -11,26 +11,10 @@ module LangchainrbRails
|
|
|
11
11
|
# Usage:
|
|
12
12
|
# rails generate langchainrb_rails:pinecone --model=Product --llm=openai
|
|
13
13
|
#
|
|
14
|
-
class PineconeGenerator <
|
|
14
|
+
class PineconeGenerator < LangchainrbRails::Generators::BaseGenerator
|
|
15
15
|
desc "This generator adds Pinecone vectorsearch integration to your ActiveRecord model"
|
|
16
|
-
|
|
17
|
-
include ::ActiveRecord::Generators::Migration
|
|
18
16
|
source_root File.join(__dir__, "templates")
|
|
19
17
|
|
|
20
|
-
class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
|
|
21
|
-
class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
|
|
22
|
-
|
|
23
|
-
# Available LLM providers to be passed in as --llm option
|
|
24
|
-
LLMS = {
|
|
25
|
-
"cohere" => "Langchain::LLM::Cohere",
|
|
26
|
-
"google_palm" => "Langchain::LLM::GooglePalm",
|
|
27
|
-
"hugging_face" => "Langchain::LLM::HuggingFace",
|
|
28
|
-
"llama_cpp" => "Langchain::LLM::LlamaCpp",
|
|
29
|
-
"ollama" => "Langchain::LLM::Ollama",
|
|
30
|
-
"openai" => "Langchain::LLM::OpenAI",
|
|
31
|
-
"replicate" => "Langchain::LLM::Replicate"
|
|
32
|
-
}
|
|
33
|
-
|
|
34
18
|
# Creates the `langchainrb_rails.rb` initializer file
|
|
35
19
|
def create_initializer_file
|
|
36
20
|
template "pinecone_initializer.rb", "config/initializers/langchainrb_rails.rb"
|
|
@@ -46,7 +30,7 @@ module LangchainrbRails
|
|
|
46
30
|
# Adds `pinecone` gem to the Gemfile
|
|
47
31
|
# TODO: Can we automatically run `bundle install`?
|
|
48
32
|
def add_to_gemfile
|
|
49
|
-
gem "pinecone"
|
|
33
|
+
gem "pinecone", version: "~> 0.1.6"
|
|
50
34
|
end
|
|
51
35
|
|
|
52
36
|
private
|
|
@@ -9,7 +9,9 @@ module LangchainrbRails
|
|
|
9
9
|
end
|
|
10
10
|
|
|
11
11
|
generators do
|
|
12
|
+
require_relative "generators/langchainrb_rails/chroma_generator"
|
|
12
13
|
require_relative "generators/langchainrb_rails/pinecone_generator"
|
|
14
|
+
require_relative "generators/langchainrb_rails/pgvector_generator"
|
|
13
15
|
end
|
|
14
16
|
end
|
|
15
17
|
end
|
data/lib/langchainrb_rails.rb
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "forwardable"
|
|
3
4
|
require "langchain"
|
|
5
|
+
require "rails"
|
|
4
6
|
require_relative "langchainrb_rails/version"
|
|
5
7
|
require "langchainrb_rails/railtie"
|
|
6
8
|
require "langchainrb_rails/config"
|
|
9
|
+
require_relative "langchainrb_overrides/vectorsearch/pgvector"
|
|
7
10
|
|
|
8
11
|
module LangchainrbRails
|
|
9
12
|
class Error < StandardError; end
|
|
@@ -13,6 +16,8 @@ module LangchainrbRails
|
|
|
13
16
|
end
|
|
14
17
|
|
|
15
18
|
module Generators
|
|
19
|
+
autoload :BaseGenerator, "langchainrb_rails/generators/langchainrb_rails/base_generator"
|
|
20
|
+
autoload :ChromaGenerator, "langchainrb_rails/generators/langchainrb_rails/chroma_generator"
|
|
16
21
|
autoload :PgvectorGenerator, "langchainrb_rails/generators/langchainrb_rails/pgvector_generator"
|
|
17
22
|
end
|
|
18
23
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: langchainrb_rails
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrei Bondarev
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-11-01 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: langchainrb
|
|
@@ -74,16 +74,26 @@ extensions: []
|
|
|
74
74
|
extra_rdoc_files: []
|
|
75
75
|
files:
|
|
76
76
|
- ".rspec"
|
|
77
|
+
- ".rubocop.yml"
|
|
78
|
+
- ".tool-versions"
|
|
77
79
|
- CHANGELOG.md
|
|
78
80
|
- Gemfile
|
|
79
81
|
- Gemfile.lock
|
|
80
82
|
- LICENSE.txt
|
|
81
83
|
- README.md
|
|
82
84
|
- Rakefile
|
|
85
|
+
- lib/langchainrb_overrides/vectorsearch/pgvector.rb
|
|
83
86
|
- lib/langchainrb_rails.rb
|
|
84
87
|
- lib/langchainrb_rails/active_record/hooks.rb
|
|
85
88
|
- lib/langchainrb_rails/config.rb
|
|
89
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/base_generator.rb
|
|
90
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/chroma_generator.rb
|
|
91
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/pgvector_generator.rb
|
|
86
92
|
- lib/langchainrb_rails/generators/langchainrb_rails/pinecone_generator.rb
|
|
93
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/templates/add_vector_column_template.rb.tt
|
|
94
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/templates/chroma_initializer.rb.tt
|
|
95
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/templates/enable_vector_extension_template.rb.tt
|
|
96
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/templates/pgvector_initializer.rb.tt
|
|
87
97
|
- lib/langchainrb_rails/generators/langchainrb_rails/templates/pinecone_initializer.rb.tt
|
|
88
98
|
- lib/langchainrb_rails/railtie.rb
|
|
89
99
|
- lib/langchainrb_rails/version.rb
|
|
@@ -111,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
111
121
|
- !ruby/object:Gem::Version
|
|
112
122
|
version: '0'
|
|
113
123
|
requirements: []
|
|
114
|
-
rubygems_version: 3.
|
|
124
|
+
rubygems_version: 3.3.7
|
|
115
125
|
signing_key:
|
|
116
126
|
specification_version: 4
|
|
117
127
|
summary: Rails wrapper for langchainrb gem
|