langchainrb_rails 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b792e1a7888a17c54fad522042ee7f6935df92e684119d4177f9627abb999dfd
4
- data.tar.gz: 77444b9ed6d52443c890f045c121ba1a58f78b4d532954fc858e8c01999c90a7
3
+ metadata.gz: 54ab2d33d89929b9222202d42cc5616aac73ca65fa81c3e4f2bc3d516a3a05c6
4
+ data.tar.gz: 4ce553994a4219904875168369ab598de11162db2cd6810d202af005acb10638
5
5
  SHA512:
6
- metadata.gz: 5d26a5c5d4a10e6ea4794d809ceb15feb6262deb4754bfc12362e1e0e536b8643f46a9f98d464ae7372d6ef1b3d45ed34cf0d5e4c88977b135b4be1d982d5c85
7
- data.tar.gz: '084a74842710d2b24db00e01344cbde645249671f4f975a9cc8f37763af53597e4cd3d2b5cd0f036320e181cbc20f8be8dca525279550529f9d58d8234fd601b'
6
+ metadata.gz: bd93a8976a54120bcec4f8c10ff5bfc5c36d8bccd9f172b071a7732d675ade00ddc3021f93ecb080f508728fdb06b45e5c086933b0bd5522dd094b08d9334d93
7
+ data.tar.gz: 0fd5d062e69a441c50c4963749e31c6773f38ba3e063d21574983b5fd4579c495c3e1b95d340540754c8f89b110c4e1f6ea3f0399265bcf315a0d26f5b6b8e36
data/.rubocop.yml ADDED
@@ -0,0 +1,28 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.7
3
+ NewCops: enable
4
+ Exclude:
5
+ - 'bin/**/*'
6
+ - 'db/schema.rb'
7
+ - 'vendor/**/*'
8
+ - 'spec/fixtures/**/*'
9
+
10
+ Metrics/LineLength:
11
+ Max: 140
12
+
13
+ Metrics/BlockLength:
14
+ Exclude:
15
+ - 'spec/**/*.rb'
16
+
17
+ Style/Documentation:
18
+ Enabled: false
19
+
20
+ Style/FrozenStringLiteralComment:
21
+ Enabled: true
22
+
23
+ Style/StringLiterals:
24
+ Enabled: false
25
+
26
+ Lint/SuppressedException:
27
+ Exclude:
28
+ - 'spec/**/*.rb'
data/.tool-versions ADDED
@@ -0,0 +1 @@
1
+ ruby 3.1.2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  ## [Unreleased]
2
2
 
3
- ## [0.1.0] - 2023-10-22
3
+ ## [0.1.4] - 2023-11-20
4
+ - Bugfix AR integration when using vectorsearch other than Pgvector
5
+
6
+ ## [0.1.3] - 2023-11-01
7
+ - Pgvector vectorsearch generator
8
+
9
+ ## [0.1.2] - 2023-10-27
10
+ - Pinecone vectorsearch generator
4
11
 
12
+ ## [0.1.1] - 2023-10-23
13
+
14
+ ## [0.1.0] - 2023-10-22
5
15
  - Initial release
data/Gemfile CHANGED
@@ -5,10 +5,14 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in langchainrb_rails.gemspec
6
6
  gemspec
7
7
 
8
- gem "rake", "~> 13.0"
8
+ gem "rake", "~> 13.1"
9
9
 
10
10
  gem "rspec", "~> 3.0"
11
11
 
12
+ gem "bundler-audit", require: false
13
+ gem "brakeman", require: false
14
+ gem "rubocop", require: false
15
+
12
16
  gem "standardrb"
13
17
 
14
18
  gem "langchainrb"
data/Gemfile.lock CHANGED
@@ -1,76 +1,77 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb_rails (0.1.2)
4
+ langchainrb_rails (0.1.4)
5
5
  langchainrb (~> 0.7.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- actioncable (7.1.1)
11
- actionpack (= 7.1.1)
12
- activesupport (= 7.1.1)
10
+ actioncable (7.1.2)
11
+ actionpack (= 7.1.2)
12
+ activesupport (= 7.1.2)
13
13
  nio4r (~> 2.0)
14
14
  websocket-driver (>= 0.6.1)
15
15
  zeitwerk (~> 2.6)
16
- actionmailbox (7.1.1)
17
- actionpack (= 7.1.1)
18
- activejob (= 7.1.1)
19
- activerecord (= 7.1.1)
20
- activestorage (= 7.1.1)
21
- activesupport (= 7.1.1)
16
+ actionmailbox (7.1.2)
17
+ actionpack (= 7.1.2)
18
+ activejob (= 7.1.2)
19
+ activerecord (= 7.1.2)
20
+ activestorage (= 7.1.2)
21
+ activesupport (= 7.1.2)
22
22
  mail (>= 2.7.1)
23
23
  net-imap
24
24
  net-pop
25
25
  net-smtp
26
- actionmailer (7.1.1)
27
- actionpack (= 7.1.1)
28
- actionview (= 7.1.1)
29
- activejob (= 7.1.1)
30
- activesupport (= 7.1.1)
26
+ actionmailer (7.1.2)
27
+ actionpack (= 7.1.2)
28
+ actionview (= 7.1.2)
29
+ activejob (= 7.1.2)
30
+ activesupport (= 7.1.2)
31
31
  mail (~> 2.5, >= 2.5.4)
32
32
  net-imap
33
33
  net-pop
34
34
  net-smtp
35
35
  rails-dom-testing (~> 2.2)
36
- actionpack (7.1.1)
37
- actionview (= 7.1.1)
38
- activesupport (= 7.1.1)
36
+ actionpack (7.1.2)
37
+ actionview (= 7.1.2)
38
+ activesupport (= 7.1.2)
39
39
  nokogiri (>= 1.8.5)
40
+ racc
40
41
  rack (>= 2.2.4)
41
42
  rack-session (>= 1.0.1)
42
43
  rack-test (>= 0.6.3)
43
44
  rails-dom-testing (~> 2.2)
44
45
  rails-html-sanitizer (~> 1.6)
45
- actiontext (7.1.1)
46
- actionpack (= 7.1.1)
47
- activerecord (= 7.1.1)
48
- activestorage (= 7.1.1)
49
- activesupport (= 7.1.1)
46
+ actiontext (7.1.2)
47
+ actionpack (= 7.1.2)
48
+ activerecord (= 7.1.2)
49
+ activestorage (= 7.1.2)
50
+ activesupport (= 7.1.2)
50
51
  globalid (>= 0.6.0)
51
52
  nokogiri (>= 1.8.5)
52
- actionview (7.1.1)
53
- activesupport (= 7.1.1)
53
+ actionview (7.1.2)
54
+ activesupport (= 7.1.2)
54
55
  builder (~> 3.1)
55
56
  erubi (~> 1.11)
56
57
  rails-dom-testing (~> 2.2)
57
58
  rails-html-sanitizer (~> 1.6)
58
- activejob (7.1.1)
59
- activesupport (= 7.1.1)
59
+ activejob (7.1.2)
60
+ activesupport (= 7.1.2)
60
61
  globalid (>= 0.3.6)
61
- activemodel (7.1.1)
62
- activesupport (= 7.1.1)
63
- activerecord (7.1.1)
64
- activemodel (= 7.1.1)
65
- activesupport (= 7.1.1)
62
+ activemodel (7.1.2)
63
+ activesupport (= 7.1.2)
64
+ activerecord (7.1.2)
65
+ activemodel (= 7.1.2)
66
+ activesupport (= 7.1.2)
66
67
  timeout (>= 0.4.0)
67
- activestorage (7.1.1)
68
- actionpack (= 7.1.1)
69
- activejob (= 7.1.1)
70
- activerecord (= 7.1.1)
71
- activesupport (= 7.1.1)
68
+ activestorage (7.1.2)
69
+ actionpack (= 7.1.2)
70
+ activejob (= 7.1.2)
71
+ activerecord (= 7.1.2)
72
+ activesupport (= 7.1.2)
72
73
  marcel (~> 1.0)
73
- activesupport (7.1.1)
74
+ activesupport (7.1.2)
74
75
  base64
75
76
  bigdecimal
76
77
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -83,19 +84,23 @@ GEM
83
84
  addressable (2.8.5)
84
85
  public_suffix (>= 2.0.2, < 6.0)
85
86
  ast (2.4.2)
86
- baran (0.1.9)
87
- base64 (0.1.1)
87
+ baran (0.1.10)
88
+ base64 (0.2.0)
88
89
  bigdecimal (3.1.4)
90
+ brakeman (6.0.1)
89
91
  builder (3.2.4)
92
+ bundler-audit (0.9.1)
93
+ bundler (>= 1.2.0, < 3)
94
+ thor (~> 1.0)
90
95
  byebug (11.1.3)
91
96
  coderay (1.1.3)
92
97
  colorize (0.8.1)
93
98
  concurrent-ruby (1.2.2)
94
99
  connection_pool (2.4.1)
95
100
  crass (1.0.6)
96
- date (3.3.3)
101
+ date (3.3.4)
97
102
  diff-lcs (1.5.0)
98
- drb (2.1.1)
103
+ drb (2.2.0)
99
104
  ruby2_keywords
100
105
  erubi (1.12.0)
101
106
  globalid (1.2.1)
@@ -103,18 +108,20 @@ GEM
103
108
  i18n (1.14.1)
104
109
  concurrent-ruby (~> 1.0)
105
110
  io-console (0.6.0)
106
- irb (1.8.3)
111
+ irb (1.9.0)
107
112
  rdoc
108
113
  reline (>= 0.3.8)
109
114
  json (2.6.3)
110
115
  json-schema (4.0.0)
111
116
  addressable (>= 2.8)
112
- langchainrb (0.7.0)
117
+ langchainrb (0.7.5)
113
118
  baran (~> 0.1.9)
114
119
  colorize (~> 0.8.1)
115
120
  json-schema (~> 4.0.0)
121
+ matrix
116
122
  pragmatic_segmenter (~> 0.3.0)
117
123
  tiktoken_ruby (~> 0.0.5)
124
+ to_bool (~> 2.0.0)
118
125
  zeitwerk (~> 2.5)
119
126
  language_server-protocol (3.17.0.3)
120
127
  lint_roller (1.1.0)
@@ -127,22 +134,29 @@ GEM
127
134
  net-pop
128
135
  net-smtp
129
136
  marcel (1.0.2)
137
+ matrix (0.4.2)
130
138
  method_source (1.0.0)
131
139
  mini_mime (1.1.5)
140
+ mini_portile2 (2.8.5)
132
141
  minitest (5.20.0)
133
- mutex_m (0.1.2)
134
- net-imap (0.4.2)
142
+ mutex_m (0.2.0)
143
+ net-imap (0.4.4)
135
144
  date
136
145
  net-protocol
137
146
  net-pop (0.1.2)
138
147
  net-protocol
139
- net-protocol (0.2.1)
148
+ net-protocol (0.2.2)
140
149
  timeout
141
150
  net-smtp (0.4.0)
142
151
  net-protocol
143
152
  nio4r (2.5.9)
153
+ nokogiri (1.15.4)
154
+ mini_portile2 (~> 2.8.2)
155
+ racc (~> 1.4)
144
156
  nokogiri (1.15.4-x86_64-darwin)
145
157
  racc (~> 1.4)
158
+ nokogiri (1.15.4-x86_64-linux)
159
+ racc (~> 1.4)
146
160
  parallel (1.23.0)
147
161
  parser (3.2.2.4)
148
162
  ast (~> 2.4.1)
@@ -157,8 +171,8 @@ GEM
157
171
  pry (>= 0.13, < 0.15)
158
172
  psych (5.1.1.1)
159
173
  stringio
160
- public_suffix (5.0.3)
161
- racc (1.7.1)
174
+ public_suffix (5.0.4)
175
+ racc (1.7.3)
162
176
  rack (3.0.8)
163
177
  rack-session (2.0.0)
164
178
  rack (>= 3.0.0)
@@ -167,20 +181,20 @@ GEM
167
181
  rackup (2.1.0)
168
182
  rack (>= 3)
169
183
  webrick (~> 1.8)
170
- rails (7.1.1)
171
- actioncable (= 7.1.1)
172
- actionmailbox (= 7.1.1)
173
- actionmailer (= 7.1.1)
174
- actionpack (= 7.1.1)
175
- actiontext (= 7.1.1)
176
- actionview (= 7.1.1)
177
- activejob (= 7.1.1)
178
- activemodel (= 7.1.1)
179
- activerecord (= 7.1.1)
180
- activestorage (= 7.1.1)
181
- activesupport (= 7.1.1)
184
+ rails (7.1.2)
185
+ actioncable (= 7.1.2)
186
+ actionmailbox (= 7.1.2)
187
+ actionmailer (= 7.1.2)
188
+ actionpack (= 7.1.2)
189
+ actiontext (= 7.1.2)
190
+ actionview (= 7.1.2)
191
+ activejob (= 7.1.2)
192
+ activemodel (= 7.1.2)
193
+ activerecord (= 7.1.2)
194
+ activestorage (= 7.1.2)
195
+ activesupport (= 7.1.2)
182
196
  bundler (>= 1.15.0)
183
- railties (= 7.1.1)
197
+ railties (= 7.1.2)
184
198
  rails-dom-testing (2.2.0)
185
199
  activesupport (>= 5.0.0)
186
200
  minitest
@@ -188,20 +202,21 @@ GEM
188
202
  rails-html-sanitizer (1.6.0)
189
203
  loofah (~> 2.21)
190
204
  nokogiri (~> 1.14)
191
- railties (7.1.1)
192
- actionpack (= 7.1.1)
193
- activesupport (= 7.1.1)
205
+ railties (7.1.2)
206
+ actionpack (= 7.1.2)
207
+ activesupport (= 7.1.2)
194
208
  irb
195
209
  rackup (>= 1.0.0)
196
210
  rake (>= 12.2)
197
211
  thor (~> 1.0, >= 1.2.2)
198
212
  zeitwerk (~> 2.6)
199
213
  rainbow (3.1.1)
200
- rake (13.0.6)
201
- rdoc (6.5.0)
214
+ rake (13.1.0)
215
+ rb_sys (0.9.83)
216
+ rdoc (6.6.0)
202
217
  psych (>= 4.0.0)
203
218
  regexp_parser (2.8.2)
204
- reline (0.3.9)
219
+ reline (0.4.0)
205
220
  io-console (~> 0.5)
206
221
  rexml (3.2.6)
207
222
  rspec (3.12.0)
@@ -217,29 +232,28 @@ GEM
217
232
  diff-lcs (>= 1.2.0, < 2.0)
218
233
  rspec-support (~> 3.12.0)
219
234
  rspec-support (3.12.1)
220
- rubocop (1.56.4)
221
- base64 (~> 0.1.1)
235
+ rubocop (1.57.2)
222
236
  json (~> 2.3)
223
237
  language_server-protocol (>= 3.17.0)
224
238
  parallel (~> 1.10)
225
- parser (>= 3.2.2.3)
239
+ parser (>= 3.2.2.4)
226
240
  rainbow (>= 2.2.2, < 4.0)
227
241
  regexp_parser (>= 1.8, < 3.0)
228
242
  rexml (>= 3.2.5, < 4.0)
229
243
  rubocop-ast (>= 1.28.1, < 2.0)
230
244
  ruby-progressbar (~> 1.7)
231
245
  unicode-display_width (>= 2.4.0, < 3.0)
232
- rubocop-ast (1.29.0)
246
+ rubocop-ast (1.30.0)
233
247
  parser (>= 3.2.1.0)
234
248
  rubocop-performance (1.19.1)
235
249
  rubocop (>= 1.7.0, < 2.0)
236
250
  rubocop-ast (>= 0.4.0)
237
251
  ruby-progressbar (1.13.0)
238
252
  ruby2_keywords (0.0.5)
239
- standard (1.31.2)
253
+ standard (1.32.0)
240
254
  language_server-protocol (~> 3.17.0.2)
241
255
  lint_roller (~> 1.0)
242
- rubocop (~> 1.56.4)
256
+ rubocop (~> 1.57.2)
243
257
  standard-custom (~> 1.0.0)
244
258
  standard-performance (~> 1.2)
245
259
  standard-custom (1.0.2)
@@ -250,10 +264,14 @@ GEM
250
264
  rubocop-performance (~> 1.19.1)
251
265
  standardrb (1.0.1)
252
266
  standard
253
- stringio (3.0.8)
267
+ stringio (3.0.9)
254
268
  thor (1.3.0)
269
+ tiktoken_ruby (0.0.6)
270
+ rb_sys (~> 0.9.68)
255
271
  tiktoken_ruby (0.0.6-x86_64-darwin)
256
- timeout (0.4.0)
272
+ tiktoken_ruby (0.0.6-x86_64-linux)
273
+ timeout (0.4.1)
274
+ to_bool (2.0.0)
257
275
  tzinfo (2.0.6)
258
276
  concurrent-ruby (~> 1.0)
259
277
  unicode (0.4.4.4)
@@ -263,18 +281,24 @@ GEM
263
281
  websocket-extensions (>= 0.1.0)
264
282
  websocket-extensions (0.1.5)
265
283
  yard (0.9.34)
266
- zeitwerk (2.6.11)
284
+ zeitwerk (2.6.12)
267
285
 
268
286
  PLATFORMS
287
+ ruby
269
288
  x86_64-darwin-19
289
+ x86_64-darwin-22
290
+ x86_64-linux
270
291
 
271
292
  DEPENDENCIES
293
+ brakeman
294
+ bundler-audit
272
295
  langchainrb
273
296
  langchainrb_rails!
274
297
  pry-byebug (~> 3.10.0)
275
298
  rails (> 6.0.0)
276
- rake (~> 13.0)
299
+ rake (~> 13.1)
277
300
  rspec (~> 3.0)
301
+ rubocop
278
302
  standardrb
279
303
  yard (~> 0.9.34)
280
304
 
data/README.md CHANGED
@@ -1,33 +1,117 @@
1
1
  💎🔗 Langchain.rb for Rails
2
2
  ---
3
- Building applications with LLMs through composability
3
+ The fastest way to sprinkle AI on top of your Rails app. Add OpenAI-powered question-and-answering in minutes.
4
4
 
5
- 👨‍💻👩‍💻 CURRENTLY SEEKING PEOPLE TO FORM THE CORE GROUP OF MAINTAINERS WITH
5
+ Available for paid consulting engagements! [Email me](mailto:andrei@sourcelabs.io).
6
6
 
7
7
  ![Tests status](https://github.com/andreibondarev/langchainrb_rails/actions/workflows/ci.yml/badge.svg?branch=main)
8
8
  [![Gem Version](https://badge.fury.io/rb/langchainrb_rails.svg)](https://badge.fury.io/rb/langchainrb_rails)
9
9
  [![Docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://rubydoc.info/gems/langchainrb_rails)
10
10
  [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/andreibondarev/langchainrb_rails/blob/main/LICENSE.txt)
11
11
  [![](https://dcbadge.vercel.app/api/server/WDARp7J2n8?compact=true&style=flat)](https://discord.gg/WDARp7J2n8)
12
+ [![X](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40rushing_andrei)](https://twitter.com/rushing_andrei)
12
13
 
14
+ ## Dependencies
13
15
 
14
- Langchain.rb is a library that's an abstraction layer on top many emergent AI, ML and other DS tools. The goal is to abstract complexity and difficult concepts to make building AI/ML-supercharged applications approachable for traditional software engineers.
16
+ * Ruby 3.0+
17
+ * Postgres 11+
18
+
19
+ ## Table of Contents
20
+
21
+ - [Installation](#installation)
22
+ - [Generators](#rails-generators)
15
23
 
16
24
  ## Installation
17
25
 
18
26
  Install the gem and add to the application's Gemfile by executing:
19
-
20
- bundle add langchainrb_rails
27
+ ```bash
28
+ bundle add langchainrb_rails
29
+ ```
21
30
 
22
31
  If bundler is not being used to manage dependencies, install the gem by executing:
32
+ ```bash
33
+ gem install langchainrb_rails
34
+ ```
35
+
36
+ ## Configuration w/ [Pgvector](https://github.com/pgvector/pgvector) (requires Postgres 11+)
37
+
38
+ 1. Run the Rails generator to add vectorsearch to your ActiveRecord model
39
+ ```bash
40
+ rails generate langchainrb_rails:pgvector --model=Product --llm=openai
41
+ ```
42
+
43
+ This adds required dependencies to your Gemfile, creates the `config/initializers/langchainrb_rails.rb` initializer file, database migrations, and adds the necessary code to the ActiveRecord model to enable vectorsearch.
44
+
45
+ 2. Bundle and migrate
46
+ ```bash
47
+ bundle install && rails db:migrate
48
+ ```
49
+
50
+ 3. Set the env var `OPENAI_API_KEY` to your OpenAI API key: https://platform.openai.com/account/api-keys
51
+ ```ruby
52
+ ENV["OPENAI_API_KEY"]=
53
+ ```
54
+
55
+ 5. Generate embeddings for your model
56
+ ```ruby
57
+ Product.embed!
58
+ ```
59
+
60
+ This can take a while depending on the number of database records.
61
+
62
+ ## Usage
63
+
64
+ ### Question and Answering
65
+ ```ruby
66
+ Product.ask("list the brands of shoes that are in stock")
67
+ ```
68
+
69
+ Returns a `String` with a natural language answer. The answer is assembled using the following steps:
70
+
71
+ 1. An embedding is generated for the passed in `question` using the selected LLM.
72
+ 2. We calculate a [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) to find records that most closely match your question's embedding.
73
+ 3. A prompt is created using the question and the above records (their `#as_vector` representation )are added as context.
74
+ 4. This prompt is passed to the LLM to generate an answer
75
+
76
+ ### Similarity Search
77
+ ```ruby
78
+ Product.similarity_search("t-shirt")
79
+ ```
80
+
81
+ Returns ActiveRecord relation that most closely matches the `query` using vector search.
82
+
83
+ ## Customization
84
+
85
+ ### Changing the vector representation of a record
86
+
87
+ By default, embeddings are generated by calling the following method on your model instance:
88
+ ```ruby
89
+ to_json(except: :embedding)
90
+ ```
91
+
92
+ You can override this by defining an `#as_vector` method in your model:
93
+ ```ruby
94
+ def as_vector
95
+ { name: name, description: description, category: category.name, ... }.to_json
96
+ end
97
+ ```
23
98
 
24
- gem install langchainrb_rails
99
+ Re-generate embeddings after modifying this method:
100
+
101
+ ```ruby
102
+ Product.embed!
103
+ ```
25
104
 
26
105
  ## Rails Generators
27
106
 
28
- ### Pinecone Generator - adds vectorsearch to your ActiveRecord model
107
+ ### Pgvector Generator
29
108
 
109
+ ```bash
110
+ rails generate langchainrb_rails:pgvector --model=Product --llm=openai
30
111
  ```
112
+
113
+ ### Pinecone Generator - adds vectorsearch to your ActiveRecord model
114
+ ```bash
31
115
  rails generate langchainrb_rails:pinecone --model=Product --llm=openai
32
116
  ```
33
117
 
@@ -39,3 +123,4 @@ Pinecone Generator does the following:
39
123
  1. Creates the `config/initializers/langchainrb_rails.rb` initializer file
40
124
  2. Adds necessary code to the ActiveRecord model to enable vectorsearch
41
125
  3. Adds `pinecone` gem to the Gemfile
126
+
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Overriding Langchain.rb's Pgvector implementation to use ActiveRecord.
4
+ # Original implementation: https://github.com/andreibondarev/langchainrb/blob/main/lib/langchain/vectorsearch/pgvector.rb
5
+
6
+ module Langchain::Vectorsearch
7
+ class Pgvector < Base
8
+ #
9
+ # The PostgreSQL vector search adapter
10
+ #
11
+ # Gem requirements:
12
+ # gem "pgvector", "~> 0.2"
13
+ #
14
+ # Usage:
15
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(llm:)
16
+ #
17
+
18
+ # The operators supported by the PostgreSQL vector search adapter
19
+ OPERATORS = [
20
+ "cosine",
21
+ "euclidean",
22
+ "inner_product"
23
+ ]
24
+ DEFAULT_OPERATOR = "cosine"
25
+
26
+ attr_reader :operator, :llm
27
+ attr_accessor :model
28
+
29
+ # @param url [String] The URL of the PostgreSQL database
30
+ # @param index_name [String] The name of the table to use for the index
31
+ # @param llm [Object] The LLM client to use
32
+ # @param namespace [String] The namespace to use for the index when inserting/querying
33
+ def initialize(llm:)
34
+ # If the line below is called, the generator fails as calls to
35
+ # LangchainrbRails.config.vectorsearch will generate an exception.
36
+ # These happen in the template files.
37
+ # depends_on "neighbor"
38
+
39
+ @operator = DEFAULT_OPERATOR
40
+
41
+ super(llm: llm)
42
+ end
43
+
44
+ # Add a list of texts to the index
45
+ # @param texts [Array<String>] The texts to add to the index
46
+ # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
47
+ # @return [Array<Integer>] The the ids of the added texts.
48
+ def add_texts(texts:, ids:)
49
+ embeddings = texts.map do |text|
50
+ llm.embed(text: text).embedding
51
+ end
52
+
53
+ # I believe the records returned by #find must be in the
54
+ # same order as the embeddings. I _think_ this works for uuid ids but didn't test
55
+ # deeply.
56
+ # TODO - implement find_each so we don't load all records into memory
57
+ model.find(ids).each.with_index do |record, i|
58
+ record.update_column(:embedding, embeddings[i])
59
+ end
60
+ end
61
+
62
+ def update_texts(texts:, ids:)
63
+ add_texts(texts: texts, ids: ids)
64
+ end
65
+
66
+ # Invoke a rake task that will create an initializer (`config/initializers/langchain.rb`) file
67
+ # and db/migrations/* files
68
+ def create_default_schema
69
+ Rake::Task["pgvector"].invoke
70
+ end
71
+
72
+ # Destroy default schema
73
+ def destroy_default_schema
74
+ # Tell the user to rollback the migration
75
+ end
76
+
77
+ # Search for similar texts in the index
78
+ # @param query [String] The text to search for
79
+ # @param k [Integer] The number of top results to return
80
+ # @return [Array<Hash>] The results of the search
81
+ # TODO - drop the named "query:" param so it is the same interface as #ask?
82
+ def similarity_search(query:, k: 4)
83
+ embedding = llm.embed(text: query).embedding
84
+
85
+ similarity_search_by_vector(
86
+ embedding: embedding,
87
+ k: k
88
+ )
89
+ end
90
+
91
+ # Search for similar texts in the index by the passed in vector.
92
+ # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
93
+ # @param embedding [Array<Float>] The vector to search for
94
+ # @param k [Integer] The number of top results to return
95
+ # @return [Array<Hash>] The results of the search
96
+ # TODO - drop the named "embedding:" param so it is the same interface as #ask?
97
+ def similarity_search_by_vector(embedding:, k: 4)
98
+ model
99
+ .nearest_neighbors(:embedding, embedding, distance: operator)
100
+ .limit(k)
101
+ end
102
+
103
+ # Ask a question and return the answer
104
+ # @param question [String] The question to ask
105
+ # @param k [Integer] The number of results to have in context
106
+ # @yield [String] Stream responses back one String at a time
107
+ # @return [String] The answer to the question
108
+ def ask(question, k: 4, &block)
109
+ # Noisy as the embedding column has a lot of data
110
+ ActiveRecord::Base.logger.silence do
111
+ search_results = similarity_search(query: question, k: k)
112
+
113
+ context = search_results.map do |result|
114
+ result.as_vector
115
+ end
116
+ context = context.join("\n---\n")
117
+
118
+ prompt = generate_rag_prompt(question: question, context: context)
119
+
120
+ llm.chat(prompt: prompt, &block)
121
+ end
122
+ end
123
+ end
124
+ end
@@ -61,7 +61,9 @@ module LangchainrbRails
61
61
  #
62
62
  # @return [String] the text representation of the model
63
63
  def as_vector
64
- to_json
64
+ # Don't vectorize the embedding ... this would happen if it already exists
65
+ # for a record and we update.
66
+ to_json(except: :embedding)
65
67
  end
66
68
 
67
69
  module ClassMethods
@@ -70,6 +72,20 @@ module LangchainrbRails
70
72
  # @param provider [Object] The `Langchain::Vectorsearch::*` instance
71
73
  def vectorsearch
72
74
  class_variable_set(:@@provider, LangchainrbRails.config.vectorsearch)
75
+
76
+ # Pgvector-specific configuration
77
+ if LangchainrbRails.config.vectorsearch.is_a?(Langchain::Vectorsearch::Pgvector)
78
+ has_neighbors(:embedding)
79
+ LangchainrbRails.config.vectorsearch.model = self
80
+ end
81
+ end
82
+
83
+ # Iterates over records and generate embeddings.
84
+ # Will re-generate for ALL records (not just records with embeddings).
85
+ def embed!
86
+ find_each do |record|
87
+ record.upsert_to_vectorsearch
88
+ end
73
89
  end
74
90
 
75
91
  # Search for similar texts
@@ -84,7 +100,7 @@ module LangchainrbRails
84
100
  )
85
101
 
86
102
  # We use "__id" when Weaviate is the provider
87
- ids = records.map { |record| record.dig("id") || record.dig("__id") }
103
+ ids = records.map { |record| record.try("id") || record.dig("__id") }
88
104
  where(id: ids)
89
105
  end
90
106
 
@@ -94,12 +110,12 @@ module LangchainrbRails
94
110
  # @param k [Integer] The number of results to have in context
95
111
  # @yield [String] Stream responses back one String at a time
96
112
  # @return [String] The answer to the question
97
- def ask(question:, k: 4, &block)
113
+ def ask(question, k: 4, &block)
98
114
  class_variable_get(:@@provider).ask(
99
- question: question,
115
+ question,
100
116
  k: k,
101
117
  &block
102
- )
118
+ ).completion
103
119
  end
104
120
  end
105
121
  end
@@ -0,0 +1,24 @@
1
+ require "rails/generators"
2
+ require "rails/generators/active_record"
3
+
4
+ module LangchainrbRails
5
+ module Generators
6
+ class BaseGenerator < Rails::Generators::Base
7
+ include ::ActiveRecord::Generators::Migration
8
+
9
+ class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
10
+ class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
11
+
12
+ # Available LLM providers to be passed in as --llm option
13
+ LLMS = {
14
+ "cohere" => "Langchain::LLM::Cohere",
15
+ "google_palm" => "Langchain::LLM::GooglePalm",
16
+ "hugging_face" => "Langchain::LLM::HuggingFace",
17
+ "llama_cpp" => "Langchain::LLM::LlamaCpp",
18
+ "ollama" => "Langchain::LLM::Ollama",
19
+ "openai" => "Langchain::LLM::OpenAI",
20
+ "replicate" => "Langchain::LLM::Replicate"
21
+ }
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangchainrbRails
4
+ module Generators
5
+ #
6
+ # ChromaGenerator does the following:
7
+ # 1. Creates the `langchainrb_rails.rb` initializer file
8
+ # 2. Adds necessary code to the ActiveRecord model to enable vectorsearch
9
+ # 3. Adds `chroma-db` gem to the Gemfile
10
+ #
11
+ # Usage:
12
+ # rails generate langchainrb_rails:chrome --model=Product --llm=openai
13
+ #
14
+ class ChromaGenerator < LangchainrbRails::Generators::BaseGenerator
15
+ desc "This generator adds Chroma vectorsearch integration to your ActiveRecord model"
16
+ source_root File.join(__dir__, "templates")
17
+
18
+ # Creates the `langchainrb_rails.rb` initializer file
19
+ def create_initializer_file
20
+ template "chroma_initializer.rb", "config/initializers/langchainrb_rails.rb"
21
+ end
22
+
23
+ # Adds `vectorsearch` class method to the model and `after_save` callback that calls `upsert_to_vectorsearch()`
24
+ def add_to_model
25
+ inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
26
+ " vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
27
+ end
28
+ end
29
+
30
+ # Adds `chroma-db` gem to the Gemfile
31
+ # TODO: Can we automatically run `bundle install`?
32
+ def add_to_gemfile
33
+ gem "chroma-db", version: "~> 0.6.0"
34
+ end
35
+
36
+ private
37
+
38
+ # @return [String] Name of the model
39
+ def model_name
40
+ options["model"]
41
+ end
42
+
43
+ # @return [String] LLM provider to use
44
+ def llm
45
+ options["llm"]
46
+ end
47
+
48
+ # @return [Langchain::LLM::*] LLM class
49
+ def llm_class
50
+ Langchain::LLM.const_get(LLMS[llm])
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangchainrbRails
4
+ module Generators
5
+ #
6
+ # Usage:
7
+ # rails g langchain:pgvector -model=Product -llm=openai
8
+ #
9
+ class PgvectorGenerator < LangchainrbRails::Generators::BaseGenerator
10
+ desc "This generator adds Pgvector vectorsearch integration to your ActiveRecord model"
11
+ source_root File.join(__dir__, "templates")
12
+
13
+ def copy_migration
14
+ migration_template "enable_vector_extension_template.rb", "db/migrate/enable_vector_extension.rb", migration_version: migration_version
15
+ migration_template "add_vector_column_template.rb", "db/migrate/add_vector_column_to_#{table_name}.rb", migration_version: migration_version
16
+ end
17
+
18
+ def create_initializer_file
19
+ template "pgvector_initializer.rb", "config/initializers/langchainrb_rails.rb"
20
+ end
21
+
22
+ def migration_version
23
+ "[#{::ActiveRecord::VERSION::MAJOR}.#{::ActiveRecord::VERSION::MINOR}]"
24
+ end
25
+
26
+ def add_to_model
27
+ inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
28
+ " vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
29
+ end
30
+ end
31
+
32
+ def add_to_gemfile
33
+ # Dependency for Langchain PgVector
34
+ gem "neighbor"
35
+ gem "ruby-openai"
36
+ end
37
+
38
+ def post_install_message
39
+ say "Please do the following to start Q&A with your #{model_name} records:", :green
40
+ say "1. Run `bundle install` to install the new gems."
41
+ say "2. Set `OPENAI_API_KEY` environment variable to your OpenAI API key."
42
+ say "3. Run `rails db:migrate` to apply the database migrations to enable pgvector and add the embedding column."
43
+ say "4. In Rails console, run `#{model_name}.embed!` to set the embeddings for all records."
44
+ say "5. Ask a question in the Rails console, ie: `#{model_name}.ask('[YOUR QUESTION]')`"
45
+ end
46
+
47
+ private
48
+
49
+ # @return [String] Name of the model
50
+ def model_name
51
+ options["model"]
52
+ end
53
+
54
+ # @return [String] Table name of the model
55
+ def table_name
56
+ model_name.downcase.pluralize
57
+ end
58
+
59
+ # @return [String] LLM provider to use
60
+ def llm
61
+ options["llm"]
62
+ end
63
+
64
+ # @return [Langchain::LLM::*] LLM class
65
+ def llm_class
66
+ Langchain::LLM.const_get(LLMS[llm])
67
+ end
68
+
69
+ # @return [Integer] Dimension of the vector to be used
70
+ def vector_dimension
71
+ llm_class.default_dimension
72
+ end
73
+ end
74
+ end
75
+ end
@@ -1,4 +1,4 @@
1
- require "rails/generators/active_record"
1
+ # frozen_string_literal: true
2
2
 
3
3
  module LangchainrbRails
4
4
  module Generators
@@ -11,26 +11,10 @@ module LangchainrbRails
11
11
  # Usage:
12
12
  # rails generate langchainrb_rails:pinecone --model=Product --llm=openai
13
13
  #
14
- class PineconeGenerator < Rails::Generators::Base
14
+ class PineconeGenerator < LangchainrbRails::Generators::BaseGenerator
15
15
  desc "This generator adds Pinecone vectorsearch integration to your ActiveRecord model"
16
-
17
- include ::ActiveRecord::Generators::Migration
18
16
  source_root File.join(__dir__, "templates")
19
17
 
20
- class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
21
- class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
22
-
23
- # Available LLM providers to be passed in as --llm option
24
- LLMS = {
25
- "cohere" => "Langchain::LLM::Cohere",
26
- "google_palm" => "Langchain::LLM::GooglePalm",
27
- "hugging_face" => "Langchain::LLM::HuggingFace",
28
- "llama_cpp" => "Langchain::LLM::LlamaCpp",
29
- "ollama" => "Langchain::LLM::Ollama",
30
- "openai" => "Langchain::LLM::OpenAI",
31
- "replicate" => "Langchain::LLM::Replicate"
32
- }
33
-
34
18
  # Creates the `langchainrb_rails.rb` initializer file
35
19
  def create_initializer_file
36
20
  template "pinecone_initializer.rb", "config/initializers/langchainrb_rails.rb"
@@ -46,7 +30,7 @@ module LangchainrbRails
46
30
  # Adds `pinecone` gem to the Gemfile
47
31
  # TODO: Can we automatically run `bundle install`?
48
32
  def add_to_gemfile
49
- gem "pinecone"
33
+ gem "pinecone", version: "~> 0.1.6"
50
34
  end
51
35
 
52
36
  private
@@ -0,0 +1,10 @@
1
+ class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ add_column :<%= table_name %>, :embedding, :vector,
4
+ limit: LangchainrbRails
5
+ .config
6
+ .vectorsearch
7
+ .llm
8
+ .default_dimension
9
+ end
10
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ LangchainrbRails.configure do |config|
4
+ config.vectorsearch = Langchain::Vectorsearch::Chroma.new(
5
+ llm: <%= llm_class %>.new(api_key: ENV["<%= llm.upcase %>_API_KEY"]),
6
+ url: ENV["CHROMA_URL"],
7
+ index_name: ""
8
+ )
9
+ end
@@ -0,0 +1,5 @@
1
+ class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ enable_extension "vector"
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ LangchainrbRails.configure do |config|
4
+ config.vectorsearch = Langchain::Vectorsearch::Pgvector.new(
5
+ llm: <%= llm_class %>.new(api_key: ENV["OPENAI_API_KEY"])
6
+ )
7
+ end
@@ -9,7 +9,9 @@ module LangchainrbRails
9
9
  end
10
10
 
11
11
  generators do
12
+ require_relative "generators/langchainrb_rails/chroma_generator"
12
13
  require_relative "generators/langchainrb_rails/pinecone_generator"
14
+ require_relative "generators/langchainrb_rails/pgvector_generator"
13
15
  end
14
16
  end
15
17
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LangchainrbRails
4
- VERSION = "0.1.2"
4
+ VERSION = "0.1.4"
5
5
  end
@@ -1,9 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "forwardable"
3
4
  require "langchain"
5
+ require "rails"
4
6
  require_relative "langchainrb_rails/version"
5
7
  require "langchainrb_rails/railtie"
6
8
  require "langchainrb_rails/config"
9
+ require_relative "langchainrb_overrides/vectorsearch/pgvector"
7
10
 
8
11
  module LangchainrbRails
9
12
  class Error < StandardError; end
@@ -13,6 +16,8 @@ module LangchainrbRails
13
16
  end
14
17
 
15
18
  module Generators
19
+ autoload :BaseGenerator, "langchainrb_rails/generators/langchainrb_rails/base_generator"
20
+ autoload :ChromaGenerator, "langchainrb_rails/generators/langchainrb_rails/chroma_generator"
16
21
  autoload :PgvectorGenerator, "langchainrb_rails/generators/langchainrb_rails/pgvector_generator"
17
22
  end
18
23
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb_rails
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-27 00:00:00.000000000 Z
11
+ date: 2023-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: langchainrb
@@ -74,16 +74,26 @@ extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
76
  - ".rspec"
77
+ - ".rubocop.yml"
78
+ - ".tool-versions"
77
79
  - CHANGELOG.md
78
80
  - Gemfile
79
81
  - Gemfile.lock
80
82
  - LICENSE.txt
81
83
  - README.md
82
84
  - Rakefile
85
+ - lib/langchainrb_overrides/vectorsearch/pgvector.rb
83
86
  - lib/langchainrb_rails.rb
84
87
  - lib/langchainrb_rails/active_record/hooks.rb
85
88
  - lib/langchainrb_rails/config.rb
89
+ - lib/langchainrb_rails/generators/langchainrb_rails/base_generator.rb
90
+ - lib/langchainrb_rails/generators/langchainrb_rails/chroma_generator.rb
91
+ - lib/langchainrb_rails/generators/langchainrb_rails/pgvector_generator.rb
86
92
  - lib/langchainrb_rails/generators/langchainrb_rails/pinecone_generator.rb
93
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/add_vector_column_template.rb.tt
94
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/chroma_initializer.rb.tt
95
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/enable_vector_extension_template.rb.tt
96
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/pgvector_initializer.rb.tt
87
97
  - lib/langchainrb_rails/generators/langchainrb_rails/templates/pinecone_initializer.rb.tt
88
98
  - lib/langchainrb_rails/railtie.rb
89
99
  - lib/langchainrb_rails/version.rb
@@ -111,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
121
  - !ruby/object:Gem::Version
112
122
  version: '0'
113
123
  requirements: []
114
- rubygems_version: 3.2.3
124
+ rubygems_version: 3.3.7
115
125
  signing_key:
116
126
  specification_version: 4
117
127
  summary: Rails wrapper for langchainrb gem