langchainrb_rails 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +28 -0
- data/.tool-versions +1 -0
- data/CHANGELOG.md +8 -1
- data/Gemfile +4 -0
- data/Gemfile.lock +27 -3
- data/README.md +90 -8
- data/lib/langchainrb_overrides/vectorsearch/pgvector.rb +124 -0
- data/lib/langchainrb_rails/active_record/hooks.rb +22 -5
- data/lib/langchainrb_rails/generators/langchainrb_rails/base_generator.rb +24 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/chroma_generator.rb +54 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/pgvector_generator.rb +75 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/pinecone_generator.rb +3 -19
- data/lib/langchainrb_rails/generators/langchainrb_rails/templates/add_vector_column_template.rb.tt +10 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/templates/chroma_initializer.rb.tt +9 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/templates/enable_vector_extension_template.rb.tt +5 -0
- data/lib/langchainrb_rails/generators/langchainrb_rails/templates/pgvector_initializer.rb.tt +7 -0
- data/lib/langchainrb_rails/railtie.rb +2 -0
- data/lib/langchainrb_rails/version.rb +1 -1
- data/lib/langchainrb_rails.rb +5 -0
- metadata +13 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37760a671cfe732f12ddb79769997ae325240b5c1b0bb96ee3e13786d605cc6a
|
4
|
+
data.tar.gz: 89b9aa5c04ffb28823836cfef7428990e27e1d38dc231240fb4e7004ab3db614
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 78c3d36d67ace05932ef36bc9e9136837bb350e5698aad55836f3d6dea0602228f461f2ae1aafc10ec050151d7f6b71e5a7cca21357b1a5f6bf2410f38596a69
|
7
|
+
data.tar.gz: fa5a1bdd97e82d71f9b4b35d0c1f15245c87966cdef60117364174f8306a7df72848dda3b52e2635844a5ae05d25a9e1c6166bf7a25b9ccd52565ae787c8fc86
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
AllCops:
|
2
|
+
TargetRubyVersion: 2.7
|
3
|
+
NewCops: enable
|
4
|
+
Exclude:
|
5
|
+
- 'bin/**/*'
|
6
|
+
- 'db/schema.rb'
|
7
|
+
- 'vendor/**/*'
|
8
|
+
- 'spec/fixtures/**/*'
|
9
|
+
|
10
|
+
Metrics/LineLength:
|
11
|
+
Max: 140
|
12
|
+
|
13
|
+
Metrics/BlockLength:
|
14
|
+
Exclude:
|
15
|
+
- 'spec/**/*.rb'
|
16
|
+
|
17
|
+
Style/Documentation:
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
Style/FrozenStringLiteralComment:
|
21
|
+
Enabled: true
|
22
|
+
|
23
|
+
Style/StringLiterals:
|
24
|
+
Enabled: false
|
25
|
+
|
26
|
+
Lint/SuppressedException:
|
27
|
+
Exclude:
|
28
|
+
- 'spec/**/*.rb'
|
data/.tool-versions
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby 3.1.2
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb_rails (0.1.
|
4
|
+
langchainrb_rails (0.1.3)
|
5
5
|
langchainrb (~> 0.7.0)
|
6
6
|
|
7
7
|
GEM
|
@@ -86,7 +86,11 @@ GEM
|
|
86
86
|
baran (0.1.9)
|
87
87
|
base64 (0.1.1)
|
88
88
|
bigdecimal (3.1.4)
|
89
|
+
brakeman (6.0.1)
|
89
90
|
builder (3.2.4)
|
91
|
+
bundler-audit (0.9.1)
|
92
|
+
bundler (>= 1.2.0, < 3)
|
93
|
+
thor (~> 1.0)
|
90
94
|
byebug (11.1.3)
|
91
95
|
coderay (1.1.3)
|
92
96
|
colorize (0.8.1)
|
@@ -109,12 +113,14 @@ GEM
|
|
109
113
|
json (2.6.3)
|
110
114
|
json-schema (4.0.0)
|
111
115
|
addressable (>= 2.8)
|
112
|
-
langchainrb (0.7.
|
116
|
+
langchainrb (0.7.1)
|
113
117
|
baran (~> 0.1.9)
|
114
118
|
colorize (~> 0.8.1)
|
115
119
|
json-schema (~> 4.0.0)
|
120
|
+
matrix
|
116
121
|
pragmatic_segmenter (~> 0.3.0)
|
117
122
|
tiktoken_ruby (~> 0.0.5)
|
123
|
+
to_bool (~> 2.0.0)
|
118
124
|
zeitwerk (~> 2.5)
|
119
125
|
language_server-protocol (3.17.0.3)
|
120
126
|
lint_roller (1.1.0)
|
@@ -127,8 +133,10 @@ GEM
|
|
127
133
|
net-pop
|
128
134
|
net-smtp
|
129
135
|
marcel (1.0.2)
|
136
|
+
matrix (0.4.2)
|
130
137
|
method_source (1.0.0)
|
131
138
|
mini_mime (1.1.5)
|
139
|
+
mini_portile2 (2.8.5)
|
132
140
|
minitest (5.20.0)
|
133
141
|
mutex_m (0.1.2)
|
134
142
|
net-imap (0.4.2)
|
@@ -141,8 +149,13 @@ GEM
|
|
141
149
|
net-smtp (0.4.0)
|
142
150
|
net-protocol
|
143
151
|
nio4r (2.5.9)
|
152
|
+
nokogiri (1.15.4)
|
153
|
+
mini_portile2 (~> 2.8.2)
|
154
|
+
racc (~> 1.4)
|
144
155
|
nokogiri (1.15.4-x86_64-darwin)
|
145
156
|
racc (~> 1.4)
|
157
|
+
nokogiri (1.15.4-x86_64-linux)
|
158
|
+
racc (~> 1.4)
|
146
159
|
parallel (1.23.0)
|
147
160
|
parser (3.2.2.4)
|
148
161
|
ast (~> 2.4.1)
|
@@ -198,6 +211,7 @@ GEM
|
|
198
211
|
zeitwerk (~> 2.6)
|
199
212
|
rainbow (3.1.1)
|
200
213
|
rake (13.0.6)
|
214
|
+
rb_sys (0.9.82)
|
201
215
|
rdoc (6.5.0)
|
202
216
|
psych (>= 4.0.0)
|
203
217
|
regexp_parser (2.8.2)
|
@@ -252,8 +266,12 @@ GEM
|
|
252
266
|
standard
|
253
267
|
stringio (3.0.8)
|
254
268
|
thor (1.3.0)
|
269
|
+
tiktoken_ruby (0.0.6)
|
270
|
+
rb_sys (~> 0.9.68)
|
255
271
|
tiktoken_ruby (0.0.6-x86_64-darwin)
|
272
|
+
tiktoken_ruby (0.0.6-x86_64-linux)
|
256
273
|
timeout (0.4.0)
|
274
|
+
to_bool (2.0.0)
|
257
275
|
tzinfo (2.0.6)
|
258
276
|
concurrent-ruby (~> 1.0)
|
259
277
|
unicode (0.4.4.4)
|
@@ -263,18 +281,24 @@ GEM
|
|
263
281
|
websocket-extensions (>= 0.1.0)
|
264
282
|
websocket-extensions (0.1.5)
|
265
283
|
yard (0.9.34)
|
266
|
-
zeitwerk (2.6.
|
284
|
+
zeitwerk (2.6.12)
|
267
285
|
|
268
286
|
PLATFORMS
|
287
|
+
ruby
|
269
288
|
x86_64-darwin-19
|
289
|
+
x86_64-darwin-22
|
290
|
+
x86_64-linux
|
270
291
|
|
271
292
|
DEPENDENCIES
|
293
|
+
brakeman
|
294
|
+
bundler-audit
|
272
295
|
langchainrb
|
273
296
|
langchainrb_rails!
|
274
297
|
pry-byebug (~> 3.10.0)
|
275
298
|
rails (> 6.0.0)
|
276
299
|
rake (~> 13.0)
|
277
300
|
rspec (~> 3.0)
|
301
|
+
rubocop
|
278
302
|
standardrb
|
279
303
|
yard (~> 0.9.34)
|
280
304
|
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
💎🔗 Langchain.rb for Rails
|
2
2
|
---
|
3
|
-
|
4
|
-
|
5
|
-
👨💻👩💻 CURRENTLY SEEKING PEOPLE TO FORM THE CORE GROUP OF MAINTAINERS WITH
|
3
|
+
The fastest way to sprinkle AI ✨ on top of your Rails app. Add OpenAI-powered question-and-answering in minutes.
|
6
4
|
|
7
5
|
![Tests status](https://github.com/andreibondarev/langchainrb_rails/actions/workflows/ci.yml/badge.svg?branch=main)
|
8
6
|
[![Gem Version](https://badge.fury.io/rb/langchainrb_rails.svg)](https://badge.fury.io/rb/langchainrb_rails)
|
@@ -10,24 +8,107 @@
|
|
10
8
|
[![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/andreibondarev/langchainrb_rails/blob/main/LICENSE.txt)
|
11
9
|
[![](https://dcbadge.vercel.app/api/server/WDARp7J2n8?compact=true&style=flat)](https://discord.gg/WDARp7J2n8)
|
12
10
|
|
11
|
+
## Dependencies
|
12
|
+
|
13
|
+
* Ruby 3.0+
|
14
|
+
* Postgres 11+
|
15
|
+
|
16
|
+
## Table of Contents
|
13
17
|
|
14
|
-
|
18
|
+
- [Installation](#installation)
|
19
|
+
- [Generators](#rails-generators)
|
15
20
|
|
16
21
|
## Installation
|
17
22
|
|
18
23
|
Install the gem and add to the application's Gemfile by executing:
|
19
|
-
|
20
|
-
|
24
|
+
```bash
|
25
|
+
bundle add langchainrb_rails
|
26
|
+
```
|
21
27
|
|
22
28
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
29
|
+
```bash
|
30
|
+
gem install langchainrb_rails
|
31
|
+
```
|
32
|
+
|
33
|
+
## Configuration w/ [Pgvector](https://github.com/pgvector/pgvector) (requires Postgres 11+)
|
34
|
+
|
35
|
+
1. Run the Rails generator to add vectorsearch to your ActiveRecord model
|
36
|
+
```bash
|
37
|
+
rails generate langchainrb_rails:pgvector --model=Product --llm=openai
|
38
|
+
```
|
39
|
+
|
40
|
+
This adds required dependencies to your Gemfile, creates the `config/initializers/langchainrb_rails.rb` initializer file, database migrations, and adds the necessary code to the ActiveRecord model to enable vectorsearch.
|
23
41
|
|
24
|
-
|
42
|
+
2. Bundle and migrate
|
43
|
+
```bash
|
44
|
+
bundle install && rails db:migrate
|
45
|
+
```
|
46
|
+
|
47
|
+
3. Set the env var `OPENAI_API_KEY` to your OpenAI API key: https://platform.openai.com/account/api-keys
|
48
|
+
```ruby
|
49
|
+
ENV["OPENAI_API_KEY"]=
|
50
|
+
```
|
51
|
+
|
52
|
+
5. Generate embeddings for your model
|
53
|
+
```ruby
|
54
|
+
Product.embed!
|
55
|
+
```
|
56
|
+
|
57
|
+
This can take a while depending on the number of database records.
|
58
|
+
|
59
|
+
## Usage
|
60
|
+
|
61
|
+
### Question and Answering
|
62
|
+
```ruby
|
63
|
+
Product.ask("list the brands of shoes that are in stock")
|
64
|
+
```
|
65
|
+
|
66
|
+
Returns a `String` with a natural language answer. The answer is assembled using the following steps:
|
67
|
+
|
68
|
+
1. An embedding is generated for the passed in `question` using the selected LLM.
|
69
|
+
2. We calculate a [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) to find records that most closely match your question's embedding.
|
70
|
+
3. A prompt is created using the question and the above records (their `#as_vector` representation )are added as context.
|
71
|
+
4. This prompt is passed to the LLM to generate an answer
|
72
|
+
|
73
|
+
### Similarity Search
|
74
|
+
```ruby
|
75
|
+
Product.similarity_search("t-shirt")
|
76
|
+
```
|
77
|
+
|
78
|
+
Returns ActiveRecord relation that most closely matches the `query` using vector search.
|
79
|
+
|
80
|
+
## Customization
|
81
|
+
|
82
|
+
### Changing the vector representation of a record
|
83
|
+
|
84
|
+
By default, embeddings are generated by calling the following method on your model instance:
|
85
|
+
```ruby
|
86
|
+
to_json(except: :embedding)
|
87
|
+
```
|
88
|
+
|
89
|
+
You can override this by defining an `#as_vector` method in your model:
|
90
|
+
```ruby
|
91
|
+
def as_vector
|
92
|
+
{ name: name, description: description, category: category.name, ... }.to_json
|
93
|
+
end
|
94
|
+
```
|
95
|
+
|
96
|
+
Re-generate embeddings after modifying this method:
|
97
|
+
|
98
|
+
```ruby
|
99
|
+
product.embed!
|
100
|
+
```
|
25
101
|
|
26
102
|
## Rails Generators
|
27
103
|
|
28
|
-
###
|
104
|
+
### Pgvector Generator
|
29
105
|
|
106
|
+
```bash
|
107
|
+
rails generate langchainrb_rails:pgvector --model=Product --llm=openai
|
30
108
|
```
|
109
|
+
|
110
|
+
### Pinecone Generator - adds vectorsearch to your ActiveRecord model
|
111
|
+
```bash
|
31
112
|
rails generate langchainrb_rails:pinecone --model=Product --llm=openai
|
32
113
|
```
|
33
114
|
|
@@ -39,3 +120,4 @@ Pinecone Generator does the following:
|
|
39
120
|
1. Creates the `config/initializers/langchainrb_rails.rb` initializer file
|
40
121
|
2. Adds necessary code to the ActiveRecord model to enable vectorsearch
|
41
122
|
3. Adds `pinecone` gem to the Gemfile
|
123
|
+
|
@@ -0,0 +1,124 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Overriding Langchain.rb's Pgvector implementation to use ActiveRecord.
|
4
|
+
# Original implementation: https://github.com/andreibondarev/langchainrb/blob/main/lib/langchain/vectorsearch/pgvector.rb
|
5
|
+
|
6
|
+
module Langchain::Vectorsearch
|
7
|
+
class Pgvector < Base
|
8
|
+
#
|
9
|
+
# The PostgreSQL vector search adapter
|
10
|
+
#
|
11
|
+
# Gem requirements:
|
12
|
+
# gem "pgvector", "~> 0.2"
|
13
|
+
#
|
14
|
+
# Usage:
|
15
|
+
# pgvector = Langchain::Vectorsearch::Pgvector.new(llm:)
|
16
|
+
#
|
17
|
+
|
18
|
+
# The operators supported by the PostgreSQL vector search adapter
|
19
|
+
OPERATORS = [
|
20
|
+
"cosine",
|
21
|
+
"euclidean",
|
22
|
+
"inner_product"
|
23
|
+
]
|
24
|
+
DEFAULT_OPERATOR = "cosine"
|
25
|
+
|
26
|
+
attr_reader :operator, :llm
|
27
|
+
attr_accessor :model
|
28
|
+
|
29
|
+
# @param url [String] The URL of the PostgreSQL database
|
30
|
+
# @param index_name [String] The name of the table to use for the index
|
31
|
+
# @param llm [Object] The LLM client to use
|
32
|
+
# @param namespace [String] The namespace to use for the index when inserting/querying
|
33
|
+
def initialize(llm:)
|
34
|
+
# If the line below is called, the generator fails as calls to
|
35
|
+
# LangchainrbRails.config.vectorsearch will generate an exception.
|
36
|
+
# These happen in the template files.
|
37
|
+
# depends_on "neighbor"
|
38
|
+
|
39
|
+
@operator = DEFAULT_OPERATOR
|
40
|
+
|
41
|
+
super(llm: llm)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Add a list of texts to the index
|
45
|
+
# @param texts [Array<String>] The texts to add to the index
|
46
|
+
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
|
47
|
+
# @return [Array<Integer>] The the ids of the added texts.
|
48
|
+
def add_texts(texts:, ids:)
|
49
|
+
embeddings = texts.map do |text|
|
50
|
+
llm.embed(text: text).embedding
|
51
|
+
end
|
52
|
+
|
53
|
+
# I believe the records returned by #find must be in the
|
54
|
+
# same order as the embeddings. I _think_ this works for uuid ids but didn't test
|
55
|
+
# deeply.
|
56
|
+
# TODO - implement find_each so we don't load all records into memory
|
57
|
+
model.find(ids).each.with_index do |record, i|
|
58
|
+
record.update_column(:embedding, embeddings[i])
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def update_texts(texts:, ids:)
|
63
|
+
add_texts(texts: texts, ids: ids)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Invoke a rake task that will create an initializer (`config/initializers/langchain.rb`) file
|
67
|
+
# and db/migrations/* files
|
68
|
+
def create_default_schema
|
69
|
+
Rake::Task["pgvector"].invoke
|
70
|
+
end
|
71
|
+
|
72
|
+
# Destroy default schema
|
73
|
+
def destroy_default_schema
|
74
|
+
# Tell the user to rollback the migration
|
75
|
+
end
|
76
|
+
|
77
|
+
# Search for similar texts in the index
|
78
|
+
# @param query [String] The text to search for
|
79
|
+
# @param k [Integer] The number of top results to return
|
80
|
+
# @return [Array<Hash>] The results of the search
|
81
|
+
# TODO - drop the named "query:" param so it is the same interface as #ask?
|
82
|
+
def similarity_search(query:, k: 4)
|
83
|
+
embedding = llm.embed(text: query).embedding
|
84
|
+
|
85
|
+
similarity_search_by_vector(
|
86
|
+
embedding: embedding,
|
87
|
+
k: k
|
88
|
+
)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Search for similar texts in the index by the passed in vector.
|
92
|
+
# You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
|
93
|
+
# @param embedding [Array<Float>] The vector to search for
|
94
|
+
# @param k [Integer] The number of top results to return
|
95
|
+
# @return [Array<Hash>] The results of the search
|
96
|
+
# TODO - drop the named "embedding:" param so it is the same interface as #ask?
|
97
|
+
def similarity_search_by_vector(embedding:, k: 4)
|
98
|
+
model
|
99
|
+
.nearest_neighbors(:embedding, embedding, distance: operator)
|
100
|
+
.limit(k)
|
101
|
+
end
|
102
|
+
|
103
|
+
# Ask a question and return the answer
|
104
|
+
# @param question [String] The question to ask
|
105
|
+
# @param k [Integer] The number of results to have in context
|
106
|
+
# @yield [String] Stream responses back one String at a time
|
107
|
+
# @return [String] The answer to the question
|
108
|
+
def ask(question, k: 4, &block)
|
109
|
+
# Noisy as the embedding column has a lot of data
|
110
|
+
ActiveRecord::Base.logger.silence do
|
111
|
+
search_results = similarity_search(query: question, k: k)
|
112
|
+
|
113
|
+
context = search_results.map do |result|
|
114
|
+
result.as_vector
|
115
|
+
end
|
116
|
+
context = context.join("\n---\n")
|
117
|
+
|
118
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
119
|
+
|
120
|
+
llm.chat(prompt: prompt, &block)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -61,7 +61,9 @@ module LangchainrbRails
|
|
61
61
|
#
|
62
62
|
# @return [String] the text representation of the model
|
63
63
|
def as_vector
|
64
|
-
|
64
|
+
# Don't vectorize the embedding ... this would happen if it already exists
|
65
|
+
# for a record and we update.
|
66
|
+
to_json(except: :embedding)
|
65
67
|
end
|
66
68
|
|
67
69
|
module ClassMethods
|
@@ -70,6 +72,21 @@ module LangchainrbRails
|
|
70
72
|
# @param provider [Object] The `Langchain::Vectorsearch::*` instance
|
71
73
|
def vectorsearch
|
72
74
|
class_variable_set(:@@provider, LangchainrbRails.config.vectorsearch)
|
75
|
+
|
76
|
+
# Pgvector-specific configuration
|
77
|
+
if LangchainrbRails.config.vectorsearch.is_a?(Langchain::Vectorsearch::Pgvector)
|
78
|
+
has_neighbors(:embedding)
|
79
|
+
end
|
80
|
+
|
81
|
+
LangchainrbRails.config.vectorsearch.model = self
|
82
|
+
end
|
83
|
+
|
84
|
+
# Iterates over records and generate embeddings.
|
85
|
+
# Will re-generate for ALL records (not just records with embeddings).
|
86
|
+
def embed!
|
87
|
+
find_each do |record|
|
88
|
+
record.upsert_to_vectorsearch
|
89
|
+
end
|
73
90
|
end
|
74
91
|
|
75
92
|
# Search for similar texts
|
@@ -84,7 +101,7 @@ module LangchainrbRails
|
|
84
101
|
)
|
85
102
|
|
86
103
|
# We use "__id" when Weaviate is the provider
|
87
|
-
ids = records.map { |record| record.
|
104
|
+
ids = records.map { |record| record.try("id") || record.dig("__id") }
|
88
105
|
where(id: ids)
|
89
106
|
end
|
90
107
|
|
@@ -94,12 +111,12 @@ module LangchainrbRails
|
|
94
111
|
# @param k [Integer] The number of results to have in context
|
95
112
|
# @yield [String] Stream responses back one String at a time
|
96
113
|
# @return [String] The answer to the question
|
97
|
-
def ask(question
|
114
|
+
def ask(question, k: 4, &block)
|
98
115
|
class_variable_get(:@@provider).ask(
|
99
|
-
question
|
116
|
+
question,
|
100
117
|
k: k,
|
101
118
|
&block
|
102
|
-
)
|
119
|
+
).completion
|
103
120
|
end
|
104
121
|
end
|
105
122
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require "rails/generators"
|
2
|
+
require "rails/generators/active_record"
|
3
|
+
|
4
|
+
module LangchainrbRails
|
5
|
+
module Generators
|
6
|
+
class BaseGenerator < Rails::Generators::Base
|
7
|
+
include ::ActiveRecord::Generators::Migration
|
8
|
+
|
9
|
+
class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
|
10
|
+
class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
|
11
|
+
|
12
|
+
# Available LLM providers to be passed in as --llm option
|
13
|
+
LLMS = {
|
14
|
+
"cohere" => "Langchain::LLM::Cohere",
|
15
|
+
"google_palm" => "Langchain::LLM::GooglePalm",
|
16
|
+
"hugging_face" => "Langchain::LLM::HuggingFace",
|
17
|
+
"llama_cpp" => "Langchain::LLM::LlamaCpp",
|
18
|
+
"ollama" => "Langchain::LLM::Ollama",
|
19
|
+
"openai" => "Langchain::LLM::OpenAI",
|
20
|
+
"replicate" => "Langchain::LLM::Replicate"
|
21
|
+
}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module LangchainrbRails
|
4
|
+
module Generators
|
5
|
+
#
|
6
|
+
# ChromaGenerator does the following:
|
7
|
+
# 1. Creates the `langchainrb_rails.rb` initializer file
|
8
|
+
# 2. Adds necessary code to the ActiveRecord model to enable vectorsearch
|
9
|
+
# 3. Adds `chroma-db` gem to the Gemfile
|
10
|
+
#
|
11
|
+
# Usage:
|
12
|
+
# rails generate langchainrb_rails:chrome --model=Product --llm=openai
|
13
|
+
#
|
14
|
+
class ChromaGenerator < LangchainrbRails::Generators::BaseGenerator
|
15
|
+
desc "This generator adds Chroma vectorsearch integration to your ActiveRecord model"
|
16
|
+
source_root File.join(__dir__, "templates")
|
17
|
+
|
18
|
+
# Creates the `langchainrb_rails.rb` initializer file
|
19
|
+
def create_initializer_file
|
20
|
+
template "chroma_initializer.rb", "config/initializers/langchainrb_rails.rb"
|
21
|
+
end
|
22
|
+
|
23
|
+
# Adds `vectorsearch` class method to the model and `after_save` callback that calls `upsert_to_vectorsearch()`
|
24
|
+
def add_to_model
|
25
|
+
inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
|
26
|
+
" vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Adds `chroma-db` gem to the Gemfile
|
31
|
+
# TODO: Can we automatically run `bundle install`?
|
32
|
+
def add_to_gemfile
|
33
|
+
gem "chroma-db", version: "~> 0.6.0"
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# @return [String] Name of the model
|
39
|
+
def model_name
|
40
|
+
options["model"]
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [String] LLM provider to use
|
44
|
+
def llm
|
45
|
+
options["llm"]
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Langchain::LLM::*] LLM class
|
49
|
+
def llm_class
|
50
|
+
Langchain::LLM.const_get(LLMS[llm])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module LangchainrbRails
|
4
|
+
module Generators
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
# rails g langchain:pgvector -model=Product -llm=openai
|
8
|
+
#
|
9
|
+
class PgvectorGenerator < LangchainrbRails::Generators::BaseGenerator
|
10
|
+
desc "This generator adds Pgvector vectorsearch integration to your ActiveRecord model"
|
11
|
+
source_root File.join(__dir__, "templates")
|
12
|
+
|
13
|
+
def copy_migration
|
14
|
+
migration_template "enable_vector_extension_template.rb", "db/migrate/enable_vector_extension.rb", migration_version: migration_version
|
15
|
+
migration_template "add_vector_column_template.rb", "db/migrate/add_vector_column_to_#{table_name}.rb", migration_version: migration_version
|
16
|
+
end
|
17
|
+
|
18
|
+
def create_initializer_file
|
19
|
+
template "pgvector_initializer.rb", "config/initializers/langchainrb_rails.rb"
|
20
|
+
end
|
21
|
+
|
22
|
+
def migration_version
|
23
|
+
"[#{::ActiveRecord::VERSION::MAJOR}.#{::ActiveRecord::VERSION::MINOR}]"
|
24
|
+
end
|
25
|
+
|
26
|
+
def add_to_model
|
27
|
+
inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
|
28
|
+
" vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def add_to_gemfile
|
33
|
+
# Dependency for Langchain PgVector
|
34
|
+
gem "neighbor"
|
35
|
+
gem "ruby-openai"
|
36
|
+
end
|
37
|
+
|
38
|
+
def post_install_message
|
39
|
+
say "Please do the following to start Q&A with your #{model_name} records:", :green
|
40
|
+
say "1. Run `bundle install` to install the new gems."
|
41
|
+
say "2. Set `OPENAI_API_KEY` environment variable to your OpenAI API key."
|
42
|
+
say "3. Run `rails db:migrate` to apply the database migrations to enable pgvector and add the embedding column."
|
43
|
+
say "4. In Rails console, run `#{model_name}.embed!` to set the embeddings for all records."
|
44
|
+
say "5. Ask a question in the Rails console, ie: `#{model_name}.ask('[YOUR QUESTION]')`"
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
# @return [String] Name of the model
|
50
|
+
def model_name
|
51
|
+
options["model"]
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [String] Table name of the model
|
55
|
+
def table_name
|
56
|
+
model_name.downcase.pluralize
|
57
|
+
end
|
58
|
+
|
59
|
+
# @return [String] LLM provider to use
|
60
|
+
def llm
|
61
|
+
options["llm"]
|
62
|
+
end
|
63
|
+
|
64
|
+
# @return [Langchain::LLM::*] LLM class
|
65
|
+
def llm_class
|
66
|
+
Langchain::LLM.const_get(LLMS[llm])
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [Integer] Dimension of the vector to be used
|
70
|
+
def vector_dimension
|
71
|
+
llm_class.default_dimension
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module LangchainrbRails
|
4
4
|
module Generators
|
@@ -11,26 +11,10 @@ module LangchainrbRails
|
|
11
11
|
# Usage:
|
12
12
|
# rails generate langchainrb_rails:pinecone --model=Product --llm=openai
|
13
13
|
#
|
14
|
-
class PineconeGenerator <
|
14
|
+
class PineconeGenerator < LangchainrbRails::Generators::BaseGenerator
|
15
15
|
desc "This generator adds Pinecone vectorsearch integration to your ActiveRecord model"
|
16
|
-
|
17
|
-
include ::ActiveRecord::Generators::Migration
|
18
16
|
source_root File.join(__dir__, "templates")
|
19
17
|
|
20
|
-
class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
|
21
|
-
class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
|
22
|
-
|
23
|
-
# Available LLM providers to be passed in as --llm option
|
24
|
-
LLMS = {
|
25
|
-
"cohere" => "Langchain::LLM::Cohere",
|
26
|
-
"google_palm" => "Langchain::LLM::GooglePalm",
|
27
|
-
"hugging_face" => "Langchain::LLM::HuggingFace",
|
28
|
-
"llama_cpp" => "Langchain::LLM::LlamaCpp",
|
29
|
-
"ollama" => "Langchain::LLM::Ollama",
|
30
|
-
"openai" => "Langchain::LLM::OpenAI",
|
31
|
-
"replicate" => "Langchain::LLM::Replicate"
|
32
|
-
}
|
33
|
-
|
34
18
|
# Creates the `langchainrb_rails.rb` initializer file
|
35
19
|
def create_initializer_file
|
36
20
|
template "pinecone_initializer.rb", "config/initializers/langchainrb_rails.rb"
|
@@ -46,7 +30,7 @@ module LangchainrbRails
|
|
46
30
|
# Adds `pinecone` gem to the Gemfile
|
47
31
|
# TODO: Can we automatically run `bundle install`?
|
48
32
|
def add_to_gemfile
|
49
|
-
gem "pinecone"
|
33
|
+
gem "pinecone", version: "~> 0.1.6"
|
50
34
|
end
|
51
35
|
|
52
36
|
private
|
@@ -9,7 +9,9 @@ module LangchainrbRails
|
|
9
9
|
end
|
10
10
|
|
11
11
|
generators do
|
12
|
+
require_relative "generators/langchainrb_rails/chroma_generator"
|
12
13
|
require_relative "generators/langchainrb_rails/pinecone_generator"
|
14
|
+
require_relative "generators/langchainrb_rails/pgvector_generator"
|
13
15
|
end
|
14
16
|
end
|
15
17
|
end
|
data/lib/langchainrb_rails.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "forwardable"
|
3
4
|
require "langchain"
|
5
|
+
require "rails"
|
4
6
|
require_relative "langchainrb_rails/version"
|
5
7
|
require "langchainrb_rails/railtie"
|
6
8
|
require "langchainrb_rails/config"
|
9
|
+
require_relative "langchainrb_overrides/vectorsearch/pgvector"
|
7
10
|
|
8
11
|
module LangchainrbRails
|
9
12
|
class Error < StandardError; end
|
@@ -13,6 +16,8 @@ module LangchainrbRails
|
|
13
16
|
end
|
14
17
|
|
15
18
|
module Generators
|
19
|
+
autoload :BaseGenerator, "langchainrb_rails/generators/langchainrb_rails/base_generator"
|
20
|
+
autoload :ChromaGenerator, "langchainrb_rails/generators/langchainrb_rails/chroma_generator"
|
16
21
|
autoload :PgvectorGenerator, "langchainrb_rails/generators/langchainrb_rails/pgvector_generator"
|
17
22
|
end
|
18
23
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb_rails
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: langchainrb
|
@@ -74,16 +74,26 @@ extensions: []
|
|
74
74
|
extra_rdoc_files: []
|
75
75
|
files:
|
76
76
|
- ".rspec"
|
77
|
+
- ".rubocop.yml"
|
78
|
+
- ".tool-versions"
|
77
79
|
- CHANGELOG.md
|
78
80
|
- Gemfile
|
79
81
|
- Gemfile.lock
|
80
82
|
- LICENSE.txt
|
81
83
|
- README.md
|
82
84
|
- Rakefile
|
85
|
+
- lib/langchainrb_overrides/vectorsearch/pgvector.rb
|
83
86
|
- lib/langchainrb_rails.rb
|
84
87
|
- lib/langchainrb_rails/active_record/hooks.rb
|
85
88
|
- lib/langchainrb_rails/config.rb
|
89
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/base_generator.rb
|
90
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/chroma_generator.rb
|
91
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/pgvector_generator.rb
|
86
92
|
- lib/langchainrb_rails/generators/langchainrb_rails/pinecone_generator.rb
|
93
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/templates/add_vector_column_template.rb.tt
|
94
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/templates/chroma_initializer.rb.tt
|
95
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/templates/enable_vector_extension_template.rb.tt
|
96
|
+
- lib/langchainrb_rails/generators/langchainrb_rails/templates/pgvector_initializer.rb.tt
|
87
97
|
- lib/langchainrb_rails/generators/langchainrb_rails/templates/pinecone_initializer.rb.tt
|
88
98
|
- lib/langchainrb_rails/railtie.rb
|
89
99
|
- lib/langchainrb_rails/version.rb
|
@@ -111,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
121
|
- !ruby/object:Gem::Version
|
112
122
|
version: '0'
|
113
123
|
requirements: []
|
114
|
-
rubygems_version: 3.
|
124
|
+
rubygems_version: 3.3.7
|
115
125
|
signing_key:
|
116
126
|
specification_version: 4
|
117
127
|
summary: Rails wrapper for langchainrb gem
|