langchainrb_rails 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a931eb26d7f80828dd7c24d5c03e89e2c0050a45653904f9e1b03d8f14a5da6f
4
- data.tar.gz: 0f3aaf4c70b6ded7a81351c1fc39131ea86dec28e2ec411fc408f53a07e1fec0
3
+ metadata.gz: 37760a671cfe732f12ddb79769997ae325240b5c1b0bb96ee3e13786d605cc6a
4
+ data.tar.gz: 89b9aa5c04ffb28823836cfef7428990e27e1d38dc231240fb4e7004ab3db614
5
5
  SHA512:
6
- metadata.gz: f61cfaa83f6d799d22ae02d5ae749f55e0918cb42e568050c7b9ca93d3859bbfcc6d0082ef6181ab2c0a6f3f2b376cbc3c3b983915d98a009b4cc3d05eb33a55
7
- data.tar.gz: 026f538572f9a5b63253ea788f1f76e1982182b8dd931cee435c18de9db2f1d2210684af04cdd597a9f6c76d3c9e30133be0268378d9213d965346f653043bdf
6
+ metadata.gz: 78c3d36d67ace05932ef36bc9e9136837bb350e5698aad55836f3d6dea0602228f461f2ae1aafc10ec050151d7f6b71e5a7cca21357b1a5f6bf2410f38596a69
7
+ data.tar.gz: fa5a1bdd97e82d71f9b4b35d0c1f15245c87966cdef60117364174f8306a7df72848dda3b52e2635844a5ae05d25a9e1c6166bf7a25b9ccd52565ae787c8fc86
data/.rubocop.yml ADDED
@@ -0,0 +1,28 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.7
3
+ NewCops: enable
4
+ Exclude:
5
+ - 'bin/**/*'
6
+ - 'db/schema.rb'
7
+ - 'vendor/**/*'
8
+ - 'spec/fixtures/**/*'
9
+
10
+ Metrics/LineLength:
11
+ Max: 140
12
+
13
+ Metrics/BlockLength:
14
+ Exclude:
15
+ - 'spec/**/*.rb'
16
+
17
+ Style/Documentation:
18
+ Enabled: false
19
+
20
+ Style/FrozenStringLiteralComment:
21
+ Enabled: true
22
+
23
+ Style/StringLiterals:
24
+ Enabled: false
25
+
26
+ Lint/SuppressedException:
27
+ Exclude:
28
+ - 'spec/**/*.rb'
data/.tool-versions ADDED
@@ -0,0 +1 @@
1
+ ruby 3.1.2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  ## [Unreleased]
2
2
 
3
- ## [0.1.0] - 2023-10-22
3
+ ## [0.1.3] - 2023-11-01
4
+ - Pgvector vectorsearch generator
5
+
6
+ ## [0.1.2] - 2023-10-27
7
+ - Pinecone vectorsearch generator
4
8
 
9
+ ## [0.1.1] - 2023-10-23
10
+
11
+ ## [0.1.0] - 2023-10-22
5
12
  - Initial release
data/Gemfile CHANGED
@@ -9,6 +9,10 @@ gem "rake", "~> 13.0"
9
9
 
10
10
  gem "rspec", "~> 3.0"
11
11
 
12
+ gem "bundler-audit", require: false
13
+ gem "brakeman", require: false
14
+ gem "rubocop", require: false
15
+
12
16
  gem "standardrb"
13
17
 
14
18
  gem "langchainrb"
data/Gemfile.lock CHANGED
@@ -1,34 +1,161 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb_rails (0.1.1)
4
+ langchainrb_rails (0.1.3)
5
5
  langchainrb (~> 0.7.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
+ actioncable (7.1.1)
11
+ actionpack (= 7.1.1)
12
+ activesupport (= 7.1.1)
13
+ nio4r (~> 2.0)
14
+ websocket-driver (>= 0.6.1)
15
+ zeitwerk (~> 2.6)
16
+ actionmailbox (7.1.1)
17
+ actionpack (= 7.1.1)
18
+ activejob (= 7.1.1)
19
+ activerecord (= 7.1.1)
20
+ activestorage (= 7.1.1)
21
+ activesupport (= 7.1.1)
22
+ mail (>= 2.7.1)
23
+ net-imap
24
+ net-pop
25
+ net-smtp
26
+ actionmailer (7.1.1)
27
+ actionpack (= 7.1.1)
28
+ actionview (= 7.1.1)
29
+ activejob (= 7.1.1)
30
+ activesupport (= 7.1.1)
31
+ mail (~> 2.5, >= 2.5.4)
32
+ net-imap
33
+ net-pop
34
+ net-smtp
35
+ rails-dom-testing (~> 2.2)
36
+ actionpack (7.1.1)
37
+ actionview (= 7.1.1)
38
+ activesupport (= 7.1.1)
39
+ nokogiri (>= 1.8.5)
40
+ rack (>= 2.2.4)
41
+ rack-session (>= 1.0.1)
42
+ rack-test (>= 0.6.3)
43
+ rails-dom-testing (~> 2.2)
44
+ rails-html-sanitizer (~> 1.6)
45
+ actiontext (7.1.1)
46
+ actionpack (= 7.1.1)
47
+ activerecord (= 7.1.1)
48
+ activestorage (= 7.1.1)
49
+ activesupport (= 7.1.1)
50
+ globalid (>= 0.6.0)
51
+ nokogiri (>= 1.8.5)
52
+ actionview (7.1.1)
53
+ activesupport (= 7.1.1)
54
+ builder (~> 3.1)
55
+ erubi (~> 1.11)
56
+ rails-dom-testing (~> 2.2)
57
+ rails-html-sanitizer (~> 1.6)
58
+ activejob (7.1.1)
59
+ activesupport (= 7.1.1)
60
+ globalid (>= 0.3.6)
61
+ activemodel (7.1.1)
62
+ activesupport (= 7.1.1)
63
+ activerecord (7.1.1)
64
+ activemodel (= 7.1.1)
65
+ activesupport (= 7.1.1)
66
+ timeout (>= 0.4.0)
67
+ activestorage (7.1.1)
68
+ actionpack (= 7.1.1)
69
+ activejob (= 7.1.1)
70
+ activerecord (= 7.1.1)
71
+ activesupport (= 7.1.1)
72
+ marcel (~> 1.0)
73
+ activesupport (7.1.1)
74
+ base64
75
+ bigdecimal
76
+ concurrent-ruby (~> 1.0, >= 1.0.2)
77
+ connection_pool (>= 2.2.5)
78
+ drb
79
+ i18n (>= 1.6, < 2)
80
+ minitest (>= 5.1)
81
+ mutex_m
82
+ tzinfo (~> 2.0)
10
83
  addressable (2.8.5)
11
84
  public_suffix (>= 2.0.2, < 6.0)
12
85
  ast (2.4.2)
13
86
  baran (0.1.9)
14
87
  base64 (0.1.1)
88
+ bigdecimal (3.1.4)
89
+ brakeman (6.0.1)
90
+ builder (3.2.4)
91
+ bundler-audit (0.9.1)
92
+ bundler (>= 1.2.0, < 3)
93
+ thor (~> 1.0)
15
94
  byebug (11.1.3)
16
95
  coderay (1.1.3)
17
96
  colorize (0.8.1)
97
+ concurrent-ruby (1.2.2)
98
+ connection_pool (2.4.1)
99
+ crass (1.0.6)
100
+ date (3.3.3)
18
101
  diff-lcs (1.5.0)
102
+ drb (2.1.1)
103
+ ruby2_keywords
104
+ erubi (1.12.0)
105
+ globalid (1.2.1)
106
+ activesupport (>= 6.1)
107
+ i18n (1.14.1)
108
+ concurrent-ruby (~> 1.0)
109
+ io-console (0.6.0)
110
+ irb (1.8.3)
111
+ rdoc
112
+ reline (>= 0.3.8)
19
113
  json (2.6.3)
20
114
  json-schema (4.0.0)
21
115
  addressable (>= 2.8)
22
- langchainrb (0.7.0)
116
+ langchainrb (0.7.1)
23
117
  baran (~> 0.1.9)
24
118
  colorize (~> 0.8.1)
25
119
  json-schema (~> 4.0.0)
120
+ matrix
26
121
  pragmatic_segmenter (~> 0.3.0)
27
122
  tiktoken_ruby (~> 0.0.5)
123
+ to_bool (~> 2.0.0)
28
124
  zeitwerk (~> 2.5)
29
125
  language_server-protocol (3.17.0.3)
30
126
  lint_roller (1.1.0)
127
+ loofah (2.21.4)
128
+ crass (~> 1.0.2)
129
+ nokogiri (>= 1.12.0)
130
+ mail (2.8.1)
131
+ mini_mime (>= 0.1.1)
132
+ net-imap
133
+ net-pop
134
+ net-smtp
135
+ marcel (1.0.2)
136
+ matrix (0.4.2)
31
137
  method_source (1.0.0)
138
+ mini_mime (1.1.5)
139
+ mini_portile2 (2.8.5)
140
+ minitest (5.20.0)
141
+ mutex_m (0.1.2)
142
+ net-imap (0.4.2)
143
+ date
144
+ net-protocol
145
+ net-pop (0.1.2)
146
+ net-protocol
147
+ net-protocol (0.2.1)
148
+ timeout
149
+ net-smtp (0.4.0)
150
+ net-protocol
151
+ nio4r (2.5.9)
152
+ nokogiri (1.15.4)
153
+ mini_portile2 (~> 2.8.2)
154
+ racc (~> 1.4)
155
+ nokogiri (1.15.4-x86_64-darwin)
156
+ racc (~> 1.4)
157
+ nokogiri (1.15.4-x86_64-linux)
158
+ racc (~> 1.4)
32
159
  parallel (1.23.0)
33
160
  parser (3.2.2.4)
34
161
  ast (~> 2.4.1)
@@ -41,11 +168,55 @@ GEM
41
168
  pry-byebug (3.10.1)
42
169
  byebug (~> 11.0)
43
170
  pry (>= 0.13, < 0.15)
171
+ psych (5.1.1.1)
172
+ stringio
44
173
  public_suffix (5.0.3)
45
174
  racc (1.7.1)
175
+ rack (3.0.8)
176
+ rack-session (2.0.0)
177
+ rack (>= 3.0.0)
178
+ rack-test (2.1.0)
179
+ rack (>= 1.3)
180
+ rackup (2.1.0)
181
+ rack (>= 3)
182
+ webrick (~> 1.8)
183
+ rails (7.1.1)
184
+ actioncable (= 7.1.1)
185
+ actionmailbox (= 7.1.1)
186
+ actionmailer (= 7.1.1)
187
+ actionpack (= 7.1.1)
188
+ actiontext (= 7.1.1)
189
+ actionview (= 7.1.1)
190
+ activejob (= 7.1.1)
191
+ activemodel (= 7.1.1)
192
+ activerecord (= 7.1.1)
193
+ activestorage (= 7.1.1)
194
+ activesupport (= 7.1.1)
195
+ bundler (>= 1.15.0)
196
+ railties (= 7.1.1)
197
+ rails-dom-testing (2.2.0)
198
+ activesupport (>= 5.0.0)
199
+ minitest
200
+ nokogiri (>= 1.6)
201
+ rails-html-sanitizer (1.6.0)
202
+ loofah (~> 2.21)
203
+ nokogiri (~> 1.14)
204
+ railties (7.1.1)
205
+ actionpack (= 7.1.1)
206
+ activesupport (= 7.1.1)
207
+ irb
208
+ rackup (>= 1.0.0)
209
+ rake (>= 12.2)
210
+ thor (~> 1.0, >= 1.2.2)
211
+ zeitwerk (~> 2.6)
46
212
  rainbow (3.1.1)
47
213
  rake (13.0.6)
214
+ rb_sys (0.9.82)
215
+ rdoc (6.5.0)
216
+ psych (>= 4.0.0)
48
217
  regexp_parser (2.8.2)
218
+ reline (0.3.9)
219
+ io-console (~> 0.5)
49
220
  rexml (3.2.6)
50
221
  rspec (3.12.0)
51
222
  rspec-core (~> 3.12.0)
@@ -78,6 +249,7 @@ GEM
78
249
  rubocop (>= 1.7.0, < 2.0)
79
250
  rubocop-ast (>= 0.4.0)
80
251
  ruby-progressbar (1.13.0)
252
+ ruby2_keywords (0.0.5)
81
253
  standard (1.31.2)
82
254
  language_server-protocol (~> 3.17.0.2)
83
255
  lint_roller (~> 1.0)
@@ -92,21 +264,41 @@ GEM
92
264
  rubocop-performance (~> 1.19.1)
93
265
  standardrb (1.0.1)
94
266
  standard
267
+ stringio (3.0.8)
268
+ thor (1.3.0)
269
+ tiktoken_ruby (0.0.6)
270
+ rb_sys (~> 0.9.68)
95
271
  tiktoken_ruby (0.0.6-x86_64-darwin)
272
+ tiktoken_ruby (0.0.6-x86_64-linux)
273
+ timeout (0.4.0)
274
+ to_bool (2.0.0)
275
+ tzinfo (2.0.6)
276
+ concurrent-ruby (~> 1.0)
96
277
  unicode (0.4.4.4)
97
278
  unicode-display_width (2.5.0)
279
+ webrick (1.8.1)
280
+ websocket-driver (0.7.6)
281
+ websocket-extensions (>= 0.1.0)
282
+ websocket-extensions (0.1.5)
98
283
  yard (0.9.34)
99
- zeitwerk (2.6.11)
284
+ zeitwerk (2.6.12)
100
285
 
101
286
  PLATFORMS
287
+ ruby
102
288
  x86_64-darwin-19
289
+ x86_64-darwin-22
290
+ x86_64-linux
103
291
 
104
292
  DEPENDENCIES
293
+ brakeman
294
+ bundler-audit
105
295
  langchainrb
106
296
  langchainrb_rails!
107
297
  pry-byebug (~> 3.10.0)
298
+ rails (> 6.0.0)
108
299
  rake (~> 13.0)
109
300
  rspec (~> 3.0)
301
+ rubocop
110
302
  standardrb
111
303
  yard (~> 0.9.34)
112
304
 
data/README.md CHANGED
@@ -1,10 +1,6 @@
1
1
  💎🔗 Langchain.rb for Rails
2
2
  ---
3
- Building applications with LLMs through composability
4
-
5
- 👨‍💻👩‍💻 CURRENTLY SEEKING PEOPLE TO FORM THE CORE GROUP OF MAINTAINERS WITH
6
-
7
- :warning: UNDER ACTIVE AND RAPID DEVELOPMENT (MAY BE BUGGY AND UNTESTED)
3
+ The fastest way to sprinkle AI on top of your Rails app. Add OpenAI-powered question-and-answering in minutes.
8
4
 
9
5
  ![Tests status](https://github.com/andreibondarev/langchainrb_rails/actions/workflows/ci.yml/badge.svg?branch=main)
10
6
  [![Gem Version](https://badge.fury.io/rb/langchainrb_rails.svg)](https://badge.fury.io/rb/langchainrb_rails)
@@ -12,21 +8,116 @@
12
8
  [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/andreibondarev/langchainrb_rails/blob/main/LICENSE.txt)
13
9
  [![](https://dcbadge.vercel.app/api/server/WDARp7J2n8?compact=true&style=flat)](https://discord.gg/WDARp7J2n8)
14
10
 
11
+ ## Dependencies
12
+
13
+ * Ruby 3.0+
14
+ * Postgres 11+
15
15
 
16
- Langchain.rb is a library that's an abstraction layer on top many emergent AI, ML and other DS tools. The goal is to abstract complexity and difficult concepts to make building AI/ML-supercharged applications approachable for traditional software engineers.
16
+ ## Table of Contents
17
+
18
+ - [Installation](#installation)
19
+ - [Generators](#rails-generators)
17
20
 
18
21
  ## Installation
19
22
 
20
23
  Install the gem and add to the application's Gemfile by executing:
21
-
22
- bundle add langchainrb_rails
24
+ ```bash
25
+ bundle add langchainrb_rails
26
+ ```
23
27
 
24
28
  If bundler is not being used to manage dependencies, install the gem by executing:
29
+ ```bash
30
+ gem install langchainrb_rails
31
+ ```
32
+
33
+ ## Configuration w/ [Pgvector](https://github.com/pgvector/pgvector) (requires Postgres 11+)
34
+
35
+ 1. Run the Rails generator to add vectorsearch to your ActiveRecord model
36
+ ```bash
37
+ rails generate langchainrb_rails:pgvector --model=Product --llm=openai
38
+ ```
39
+
40
+ This adds required dependencies to your Gemfile, creates the `config/initializers/langchainrb_rails.rb` initializer file, database migrations, and adds the necessary code to the ActiveRecord model to enable vectorsearch.
41
+
42
+ 2. Bundle and migrate
43
+ ```bash
44
+ bundle install && rails db:migrate
45
+ ```
46
+
47
+ 3. Set the env var `OPENAI_API_KEY` to your OpenAI API key: https://platform.openai.com/account/api-keys
48
+ ```ruby
49
+ ENV["OPENAI_API_KEY"]=
50
+ ```
51
+
52
+ 5. Generate embeddings for your model
53
+ ```ruby
54
+ Product.embed!
55
+ ```
25
56
 
26
- gem install langchainrb_rails
57
+ This can take a while depending on the number of database records.
27
58
 
28
59
  ## Usage
29
60
 
61
+ ### Question and Answering
62
+ ```ruby
63
+ Product.ask("list the brands of shoes that are in stock")
64
+ ```
65
+
66
+ Returns a `String` with a natural language answer. The answer is assembled using the following steps:
67
+
68
+ 1. An embedding is generated for the passed in `question` using the selected LLM.
69
+ 2. We calculate a [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) to find records that most closely match your question's embedding.
70
+ 3. A prompt is created using the question and the above records (their `#as_vector` representation )are added as context.
71
+ 4. This prompt is passed to the LLM to generate an answer
72
+
73
+ ### Similarity Search
30
74
  ```ruby
31
- require "langchainrb_rails"
75
+ Product.similarity_search("t-shirt")
32
76
  ```
77
+
78
+ Returns ActiveRecord relation that most closely matches the `query` using vector search.
79
+
80
+ ## Customization
81
+
82
+ ### Changing the vector representation of a record
83
+
84
+ By default, embeddings are generated by calling the following method on your model instance:
85
+ ```ruby
86
+ to_json(except: :embedding)
87
+ ```
88
+
89
+ You can override this by defining an `#as_vector` method in your model:
90
+ ```ruby
91
+ def as_vector
92
+ { name: name, description: description, category: category.name, ... }.to_json
93
+ end
94
+ ```
95
+
96
+ Re-generate embeddings after modifying this method:
97
+
98
+ ```ruby
99
+ product.embed!
100
+ ```
101
+
102
+ ## Rails Generators
103
+
104
+ ### Pgvector Generator
105
+
106
+ ```bash
107
+ rails generate langchainrb_rails:pgvector --model=Product --llm=openai
108
+ ```
109
+
110
+ ### Pinecone Generator - adds vectorsearch to your ActiveRecord model
111
+ ```bash
112
+ rails generate langchainrb_rails:pinecone --model=Product --llm=openai
113
+ ```
114
+
115
+ Available `--llm` options: `cohere`, `google_palm`, `hugging_face`, `llama_cpp`, `ollama`, `openai`, and `replicate`. The selected LLM will be used to generate embeddings and completions.
116
+
117
+ The `--model` option is used to specify which ActiveRecord model vectorsearch capabilities will be added to.
118
+
119
+ Pinecone Generator does the following:
120
+ 1. Creates the `config/initializers/langchainrb_rails.rb` initializer file
121
+ 2. Adds necessary code to the ActiveRecord model to enable vectorsearch
122
+ 3. Adds `pinecone` gem to the Gemfile
123
+
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Overriding Langchain.rb's Pgvector implementation to use ActiveRecord.
4
+ # Original implementation: https://github.com/andreibondarev/langchainrb/blob/main/lib/langchain/vectorsearch/pgvector.rb
5
+
6
+ module Langchain::Vectorsearch
7
+ class Pgvector < Base
8
+ #
9
+ # The PostgreSQL vector search adapter
10
+ #
11
+ # Gem requirements:
12
+ # gem "pgvector", "~> 0.2"
13
+ #
14
+ # Usage:
15
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(llm:)
16
+ #
17
+
18
+ # The operators supported by the PostgreSQL vector search adapter
19
+ OPERATORS = [
20
+ "cosine",
21
+ "euclidean",
22
+ "inner_product"
23
+ ]
24
+ DEFAULT_OPERATOR = "cosine"
25
+
26
+ attr_reader :operator, :llm
27
+ attr_accessor :model
28
+
29
+ # @param url [String] The URL of the PostgreSQL database
30
+ # @param index_name [String] The name of the table to use for the index
31
+ # @param llm [Object] The LLM client to use
32
+ # @param namespace [String] The namespace to use for the index when inserting/querying
33
+ def initialize(llm:)
34
+ # If the line below is called, the generator fails as calls to
35
+ # LangchainrbRails.config.vectorsearch will generate an exception.
36
+ # These happen in the template files.
37
+ # depends_on "neighbor"
38
+
39
+ @operator = DEFAULT_OPERATOR
40
+
41
+ super(llm: llm)
42
+ end
43
+
44
+ # Add a list of texts to the index
45
+ # @param texts [Array<String>] The texts to add to the index
46
+ # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
47
+ # @return [Array<Integer>] The the ids of the added texts.
48
+ def add_texts(texts:, ids:)
49
+ embeddings = texts.map do |text|
50
+ llm.embed(text: text).embedding
51
+ end
52
+
53
+ # I believe the records returned by #find must be in the
54
+ # same order as the embeddings. I _think_ this works for uuid ids but didn't test
55
+ # deeply.
56
+ # TODO - implement find_each so we don't load all records into memory
57
+ model.find(ids).each.with_index do |record, i|
58
+ record.update_column(:embedding, embeddings[i])
59
+ end
60
+ end
61
+
62
+ def update_texts(texts:, ids:)
63
+ add_texts(texts: texts, ids: ids)
64
+ end
65
+
66
+ # Invoke a rake task that will create an initializer (`config/initializers/langchain.rb`) file
67
+ # and db/migrations/* files
68
+ def create_default_schema
69
+ Rake::Task["pgvector"].invoke
70
+ end
71
+
72
+ # Destroy default schema
73
+ def destroy_default_schema
74
+ # Tell the user to rollback the migration
75
+ end
76
+
77
+ # Search for similar texts in the index
78
+ # @param query [String] The text to search for
79
+ # @param k [Integer] The number of top results to return
80
+ # @return [Array<Hash>] The results of the search
81
+ # TODO - drop the named "query:" param so it is the same interface as #ask?
82
+ def similarity_search(query:, k: 4)
83
+ embedding = llm.embed(text: query).embedding
84
+
85
+ similarity_search_by_vector(
86
+ embedding: embedding,
87
+ k: k
88
+ )
89
+ end
90
+
91
+ # Search for similar texts in the index by the passed in vector.
92
+ # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
93
+ # @param embedding [Array<Float>] The vector to search for
94
+ # @param k [Integer] The number of top results to return
95
+ # @return [Array<Hash>] The results of the search
96
+ # TODO - drop the named "embedding:" param so it is the same interface as #ask?
97
+ def similarity_search_by_vector(embedding:, k: 4)
98
+ model
99
+ .nearest_neighbors(:embedding, embedding, distance: operator)
100
+ .limit(k)
101
+ end
102
+
103
+ # Ask a question and return the answer
104
+ # @param question [String] The question to ask
105
+ # @param k [Integer] The number of results to have in context
106
+ # @yield [String] Stream responses back one String at a time
107
+ # @return [String] The answer to the question
108
+ def ask(question, k: 4, &block)
109
+ # Noisy as the embedding column has a lot of data
110
+ ActiveRecord::Base.logger.silence do
111
+ search_results = similarity_search(query: question, k: k)
112
+
113
+ context = search_results.map do |result|
114
+ result.as_vector
115
+ end
116
+ context = context.join("\n---\n")
117
+
118
+ prompt = generate_rag_prompt(question: question, context: context)
119
+
120
+ llm.chat(prompt: prompt, &block)
121
+ end
122
+ end
123
+ end
124
+ end
@@ -9,12 +9,7 @@ module LangchainrbRails
9
9
  #
10
10
  # Usage:
11
11
  # class Recipe < ActiveRecord::Base
12
- # vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
13
- # api_key: ENV["WEAVIATE_API_KEY"],
14
- # url: ENV["WEAVIATE_URL"],
15
- # index_name: "Recipes",
16
- # llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
17
- # )
12
+ # vectorsearch
18
13
  #
19
14
  # after_save :upsert_to_vectorsearch
20
15
  #
@@ -66,15 +61,32 @@ module LangchainrbRails
66
61
  #
67
62
  # @return [String] the text representation of the model
68
63
  def as_vector
69
- to_json
64
+ # Don't vectorize the embedding ... this would happen if it already exists
65
+ # for a record and we update.
66
+ to_json(except: :embedding)
70
67
  end
71
68
 
72
69
  module ClassMethods
73
70
  # Set the vector search provider
74
71
  #
75
72
  # @param provider [Object] The `Langchain::Vectorsearch::*` instance
76
- def vectorsearch(provider:)
77
- class_variable_set(:@@provider, provider)
73
+ def vectorsearch
74
+ class_variable_set(:@@provider, LangchainrbRails.config.vectorsearch)
75
+
76
+ # Pgvector-specific configuration
77
+ if LangchainrbRails.config.vectorsearch.is_a?(Langchain::Vectorsearch::Pgvector)
78
+ has_neighbors(:embedding)
79
+ end
80
+
81
+ LangchainrbRails.config.vectorsearch.model = self
82
+ end
83
+
84
+ # Iterates over records and generate embeddings.
85
+ # Will re-generate for ALL records (not just records with embeddings).
86
+ def embed!
87
+ find_each do |record|
88
+ record.upsert_to_vectorsearch
89
+ end
78
90
  end
79
91
 
80
92
  # Search for similar texts
@@ -89,7 +101,7 @@ module LangchainrbRails
89
101
  )
90
102
 
91
103
  # We use "__id" when Weaviate is the provider
92
- ids = records.map { |record| record.dig("id") || record.dig("__id") }
104
+ ids = records.map { |record| record.try("id") || record.dig("__id") }
93
105
  where(id: ids)
94
106
  end
95
107
 
@@ -99,12 +111,12 @@ module LangchainrbRails
99
111
  # @param k [Integer] The number of results to have in context
100
112
  # @yield [String] Stream responses back one String at a time
101
113
  # @return [String] The answer to the question
102
- def ask(question:, k: 4, &block)
114
+ def ask(question, k: 4, &block)
103
115
  class_variable_get(:@@provider).ask(
104
- question: question,
116
+ question,
105
117
  k: k,
106
118
  &block
107
- )
119
+ ).completion
108
120
  end
109
121
  end
110
122
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangchainrbRails
4
+ class Config
5
+ # This class is used to configure the gem config inside Rails apps, in the `config/initializers/langchainrb_rails.rb` file.
6
+ #
7
+ # Langchain is configured in the following way:
8
+ # LangchainrbRails.configure do |config|
9
+ # config.vectorsearch = ...
10
+ # end
11
+ attr_accessor :vectorsearch
12
+
13
+ def initialize
14
+ # Define the defaults for future configuration here
15
+ @vectorsearch = {}
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ require "rails/generators"
2
+ require "rails/generators/active_record"
3
+
4
+ module LangchainrbRails
5
+ module Generators
6
+ class BaseGenerator < Rails::Generators::Base
7
+ include ::ActiveRecord::Generators::Migration
8
+
9
+ class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
10
+ class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
11
+
12
+ # Available LLM providers to be passed in as --llm option
13
+ LLMS = {
14
+ "cohere" => "Langchain::LLM::Cohere",
15
+ "google_palm" => "Langchain::LLM::GooglePalm",
16
+ "hugging_face" => "Langchain::LLM::HuggingFace",
17
+ "llama_cpp" => "Langchain::LLM::LlamaCpp",
18
+ "ollama" => "Langchain::LLM::Ollama",
19
+ "openai" => "Langchain::LLM::OpenAI",
20
+ "replicate" => "Langchain::LLM::Replicate"
21
+ }
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangchainrbRails
4
+ module Generators
5
+ #
6
+ # ChromaGenerator does the following:
7
+ # 1. Creates the `langchainrb_rails.rb` initializer file
8
+ # 2. Adds necessary code to the ActiveRecord model to enable vectorsearch
9
+ # 3. Adds `chroma-db` gem to the Gemfile
10
+ #
11
+ # Usage:
12
+ # rails generate langchainrb_rails:chrome --model=Product --llm=openai
13
+ #
14
+ class ChromaGenerator < LangchainrbRails::Generators::BaseGenerator
15
+ desc "This generator adds Chroma vectorsearch integration to your ActiveRecord model"
16
+ source_root File.join(__dir__, "templates")
17
+
18
+ # Creates the `langchainrb_rails.rb` initializer file
19
+ def create_initializer_file
20
+ template "chroma_initializer.rb", "config/initializers/langchainrb_rails.rb"
21
+ end
22
+
23
+ # Adds `vectorsearch` class method to the model and `after_save` callback that calls `upsert_to_vectorsearch()`
24
+ def add_to_model
25
+ inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
26
+ " vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
27
+ end
28
+ end
29
+
30
+ # Adds `chroma-db` gem to the Gemfile
31
+ # TODO: Can we automatically run `bundle install`?
32
+ def add_to_gemfile
33
+ gem "chroma-db", version: "~> 0.6.0"
34
+ end
35
+
36
+ private
37
+
38
+ # @return [String] Name of the model
39
+ def model_name
40
+ options["model"]
41
+ end
42
+
43
+ # @return [String] LLM provider to use
44
+ def llm
45
+ options["llm"]
46
+ end
47
+
48
+ # @return [Langchain::LLM::*] LLM class
49
+ def llm_class
50
+ Langchain::LLM.const_get(LLMS[llm])
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangchainrbRails
4
+ module Generators
5
+ #
6
+ # Usage:
7
+ # rails g langchain:pgvector -model=Product -llm=openai
8
+ #
9
+ class PgvectorGenerator < LangchainrbRails::Generators::BaseGenerator
10
+ desc "This generator adds Pgvector vectorsearch integration to your ActiveRecord model"
11
+ source_root File.join(__dir__, "templates")
12
+
13
+ def copy_migration
14
+ migration_template "enable_vector_extension_template.rb", "db/migrate/enable_vector_extension.rb", migration_version: migration_version
15
+ migration_template "add_vector_column_template.rb", "db/migrate/add_vector_column_to_#{table_name}.rb", migration_version: migration_version
16
+ end
17
+
18
+ def create_initializer_file
19
+ template "pgvector_initializer.rb", "config/initializers/langchainrb_rails.rb"
20
+ end
21
+
22
+ def migration_version
23
+ "[#{::ActiveRecord::VERSION::MAJOR}.#{::ActiveRecord::VERSION::MINOR}]"
24
+ end
25
+
26
+ def add_to_model
27
+ inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
28
+ " vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
29
+ end
30
+ end
31
+
32
+ def add_to_gemfile
33
+ # Dependency for Langchain PgVector
34
+ gem "neighbor"
35
+ gem "ruby-openai"
36
+ end
37
+
38
+ def post_install_message
39
+ say "Please do the following to start Q&A with your #{model_name} records:", :green
40
+ say "1. Run `bundle install` to install the new gems."
41
+ say "2. Set `OPENAI_API_KEY` environment variable to your OpenAI API key."
42
+ say "3. Run `rails db:migrate` to apply the database migrations to enable pgvector and add the embedding column."
43
+ say "4. In Rails console, run `#{model_name}.embed!` to set the embeddings for all records."
44
+ say "5. Ask a question in the Rails console, ie: `#{model_name}.ask('[YOUR QUESTION]')`"
45
+ end
46
+
47
+ private
48
+
49
+ # @return [String] Name of the model
50
+ def model_name
51
+ options["model"]
52
+ end
53
+
54
+ # @return [String] Table name of the model
55
+ def table_name
56
+ model_name.downcase.pluralize
57
+ end
58
+
59
+ # @return [String] LLM provider to use
60
+ def llm
61
+ options["llm"]
62
+ end
63
+
64
+ # @return [Langchain::LLM::*] LLM class
65
+ def llm_class
66
+ Langchain::LLM.const_get(LLMS[llm])
67
+ end
68
+
69
+ # @return [Integer] Dimension of the vector to be used
70
+ def vector_dimension
71
+ llm_class.default_dimension
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangchainrbRails
4
+ module Generators
5
+ #
6
+ # PineconeGenerator does the following:
7
+ # 1. Creates the `langchainrb_rails.rb` initializer file
8
+ # 2. Adds necessary code to the ActiveRecord model to enable vectorsearch
9
+ # 3. Adds `pinecone` gem to the Gemfile
10
+ #
11
+ # Usage:
12
+ # rails generate langchainrb_rails:pinecone --model=Product --llm=openai
13
+ #
14
+ class PineconeGenerator < LangchainrbRails::Generators::BaseGenerator
15
+ desc "This generator adds Pinecone vectorsearch integration to your ActiveRecord model"
16
+ source_root File.join(__dir__, "templates")
17
+
18
+ # Creates the `langchainrb_rails.rb` initializer file
19
+ def create_initializer_file
20
+ template "pinecone_initializer.rb", "config/initializers/langchainrb_rails.rb"
21
+ end
22
+
23
+ # Adds `vectorsearch` class method to the model and `after_save` callback that calls `upsert_to_vectorsearch()`
24
+ def add_to_model
25
+ inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
26
+ " vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
27
+ end
28
+ end
29
+
30
+ # Adds `pinecone` gem to the Gemfile
31
+ # TODO: Can we automatically run `bundle install`?
32
+ def add_to_gemfile
33
+ gem "pinecone", version: "~> 0.1.6"
34
+ end
35
+
36
+ private
37
+
38
+ # @return [String] Name of the model
39
+ def model_name
40
+ options["model"]
41
+ end
42
+
43
+ # @return [String] LLM provider to use
44
+ def llm
45
+ options["llm"]
46
+ end
47
+
48
+ # @return [Langchain::LLM::*] LLM class
49
+ def llm_class
50
+ Langchain::LLM.const_get(LLMS[llm])
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,10 @@
1
+ class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ add_column :<%= table_name %>, :embedding, :vector,
4
+ limit: LangchainrbRails
5
+ .config
6
+ .vectorsearch
7
+ .llm
8
+ .default_dimension
9
+ end
10
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ LangchainrbRails.configure do |config|
4
+ config.vectorsearch = Langchain::Vectorsearch::Chroma.new(
5
+ llm: <%= llm_class %>.new(api_key: ENV["<%= llm.upcase %>_API_KEY"]),
6
+ url: ENV["CHROMA_URL"],
7
+ index_name: ""
8
+ )
9
+ end
@@ -0,0 +1,5 @@
1
+ class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ enable_extension "vector"
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ LangchainrbRails.configure do |config|
4
+ config.vectorsearch = Langchain::Vectorsearch::Pgvector.new(
5
+ llm: <%= llm_class %>.new(api_key: ENV["OPENAI_API_KEY"])
6
+ )
7
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ LangchainrbRails.configure do |config|
4
+ config.vectorsearch = Langchain::Vectorsearch::Pinecone.new(
5
+ llm: <%= llm_class %>.new(api_key: ENV["<%= llm.upcase %>_API_KEY"]),
6
+ environment: "",
7
+ api_key: ENV["PINECONE_API_KEY"],
8
+ index_name: ""
9
+ )
10
+ end
@@ -7,5 +7,11 @@ module LangchainrbRails
7
7
  ::ActiveRecord::Base.include LangchainrbRails::ActiveRecord::Hooks
8
8
  end
9
9
  end
10
+
11
+ generators do
12
+ require_relative "generators/langchainrb_rails/chroma_generator"
13
+ require_relative "generators/langchainrb_rails/pinecone_generator"
14
+ require_relative "generators/langchainrb_rails/pgvector_generator"
15
+ end
10
16
  end
11
17
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LangchainrbRails
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.3"
5
5
  end
@@ -1,8 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "forwardable"
3
4
  require "langchain"
5
+ require "rails"
4
6
  require_relative "langchainrb_rails/version"
5
7
  require "langchainrb_rails/railtie"
8
+ require "langchainrb_rails/config"
9
+ require_relative "langchainrb_overrides/vectorsearch/pgvector"
6
10
 
7
11
  module LangchainrbRails
8
12
  class Error < StandardError; end
@@ -10,4 +14,25 @@ module LangchainrbRails
10
14
  module ActiveRecord
11
15
  autoload :Hooks, "langchainrb_rails/active_record/hooks"
12
16
  end
17
+
18
+ module Generators
19
+ autoload :BaseGenerator, "langchainrb_rails/generators/langchainrb_rails/base_generator"
20
+ autoload :ChromaGenerator, "langchainrb_rails/generators/langchainrb_rails/chroma_generator"
21
+ autoload :PgvectorGenerator, "langchainrb_rails/generators/langchainrb_rails/pgvector_generator"
22
+ end
23
+
24
+ class << self
25
+ # Configures global settings for LangchainrbRails
26
+ # LangchainrbRails.configure do |config|
27
+ # config.vectorsearch = ...
28
+ # end
29
+ def configure
30
+ yield(config)
31
+ end
32
+
33
+ # @return [Config] The global configuration object
34
+ def config
35
+ @_config ||= Config.new
36
+ end
37
+ end
13
38
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb_rails
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-23 00:00:00.000000000 Z
11
+ date: 2023-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: langchainrb
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.9.34
55
+ - !ruby/object:Gem::Dependency
56
+ name: rails
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">"
60
+ - !ruby/object:Gem::Version
61
+ version: 6.0.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">"
67
+ - !ruby/object:Gem::Version
68
+ version: 6.0.0
55
69
  description: Rails wrapper for langchainrb gem
56
70
  email:
57
71
  - andrei.bondarev13@gmail.com
@@ -60,14 +74,27 @@ extensions: []
60
74
  extra_rdoc_files: []
61
75
  files:
62
76
  - ".rspec"
77
+ - ".rubocop.yml"
78
+ - ".tool-versions"
63
79
  - CHANGELOG.md
64
80
  - Gemfile
65
81
  - Gemfile.lock
66
82
  - LICENSE.txt
67
83
  - README.md
68
84
  - Rakefile
85
+ - lib/langchainrb_overrides/vectorsearch/pgvector.rb
69
86
  - lib/langchainrb_rails.rb
70
87
  - lib/langchainrb_rails/active_record/hooks.rb
88
+ - lib/langchainrb_rails/config.rb
89
+ - lib/langchainrb_rails/generators/langchainrb_rails/base_generator.rb
90
+ - lib/langchainrb_rails/generators/langchainrb_rails/chroma_generator.rb
91
+ - lib/langchainrb_rails/generators/langchainrb_rails/pgvector_generator.rb
92
+ - lib/langchainrb_rails/generators/langchainrb_rails/pinecone_generator.rb
93
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/add_vector_column_template.rb.tt
94
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/chroma_initializer.rb.tt
95
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/enable_vector_extension_template.rb.tt
96
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/pgvector_initializer.rb.tt
97
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/pinecone_initializer.rb.tt
71
98
  - lib/langchainrb_rails/railtie.rb
72
99
  - lib/langchainrb_rails/version.rb
73
100
  - sig/langchainrb_rails.rbs
@@ -94,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
94
121
  - !ruby/object:Gem::Version
95
122
  version: '0'
96
123
  requirements: []
97
- rubygems_version: 3.2.3
124
+ rubygems_version: 3.3.7
98
125
  signing_key:
99
126
  specification_version: 4
100
127
  summary: Rails wrapper for langchainrb gem