langchainrb_rails 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b792e1a7888a17c54fad522042ee7f6935df92e684119d4177f9627abb999dfd
4
- data.tar.gz: 77444b9ed6d52443c890f045c121ba1a58f78b4d532954fc858e8c01999c90a7
3
+ metadata.gz: 54ab2d33d89929b9222202d42cc5616aac73ca65fa81c3e4f2bc3d516a3a05c6
4
+ data.tar.gz: 4ce553994a4219904875168369ab598de11162db2cd6810d202af005acb10638
5
5
  SHA512:
6
- metadata.gz: 5d26a5c5d4a10e6ea4794d809ceb15feb6262deb4754bfc12362e1e0e536b8643f46a9f98d464ae7372d6ef1b3d45ed34cf0d5e4c88977b135b4be1d982d5c85
7
- data.tar.gz: '084a74842710d2b24db00e01344cbde645249671f4f975a9cc8f37763af53597e4cd3d2b5cd0f036320e181cbc20f8be8dca525279550529f9d58d8234fd601b'
6
+ metadata.gz: bd93a8976a54120bcec4f8c10ff5bfc5c36d8bccd9f172b071a7732d675ade00ddc3021f93ecb080f508728fdb06b45e5c086933b0bd5522dd094b08d9334d93
7
+ data.tar.gz: 0fd5d062e69a441c50c4963749e31c6773f38ba3e063d21574983b5fd4579c495c3e1b95d340540754c8f89b110c4e1f6ea3f0399265bcf315a0d26f5b6b8e36
data/.rubocop.yml ADDED
@@ -0,0 +1,28 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.7
3
+ NewCops: enable
4
+ Exclude:
5
+ - 'bin/**/*'
6
+ - 'db/schema.rb'
7
+ - 'vendor/**/*'
8
+ - 'spec/fixtures/**/*'
9
+
10
+ Metrics/LineLength:
11
+ Max: 140
12
+
13
+ Metrics/BlockLength:
14
+ Exclude:
15
+ - 'spec/**/*.rb'
16
+
17
+ Style/Documentation:
18
+ Enabled: false
19
+
20
+ Style/FrozenStringLiteralComment:
21
+ Enabled: true
22
+
23
+ Style/StringLiterals:
24
+ Enabled: false
25
+
26
+ Lint/SuppressedException:
27
+ Exclude:
28
+ - 'spec/**/*.rb'
data/.tool-versions ADDED
@@ -0,0 +1 @@
1
+ ruby 3.1.2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  ## [Unreleased]
2
2
 
3
- ## [0.1.0] - 2023-10-22
3
+ ## [0.1.4] - 2023-11-20
4
+ - Bugfix AR integration when using vectorsearch other than Pgvector
5
+
6
+ ## [0.1.3] - 2023-11-01
7
+ - Pgvector vectorsearch generator
8
+
9
+ ## [0.1.2] - 2023-10-27
10
+ - Pinecone vectorsearch generator
4
11
 
12
+ ## [0.1.1] - 2023-10-23
13
+
14
+ ## [0.1.0] - 2023-10-22
5
15
  - Initial release
data/Gemfile CHANGED
@@ -5,10 +5,14 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in langchainrb_rails.gemspec
6
6
  gemspec
7
7
 
8
- gem "rake", "~> 13.0"
8
+ gem "rake", "~> 13.1"
9
9
 
10
10
  gem "rspec", "~> 3.0"
11
11
 
12
+ gem "bundler-audit", require: false
13
+ gem "brakeman", require: false
14
+ gem "rubocop", require: false
15
+
12
16
  gem "standardrb"
13
17
 
14
18
  gem "langchainrb"
data/Gemfile.lock CHANGED
@@ -1,76 +1,77 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb_rails (0.1.2)
4
+ langchainrb_rails (0.1.4)
5
5
  langchainrb (~> 0.7.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- actioncable (7.1.1)
11
- actionpack (= 7.1.1)
12
- activesupport (= 7.1.1)
10
+ actioncable (7.1.2)
11
+ actionpack (= 7.1.2)
12
+ activesupport (= 7.1.2)
13
13
  nio4r (~> 2.0)
14
14
  websocket-driver (>= 0.6.1)
15
15
  zeitwerk (~> 2.6)
16
- actionmailbox (7.1.1)
17
- actionpack (= 7.1.1)
18
- activejob (= 7.1.1)
19
- activerecord (= 7.1.1)
20
- activestorage (= 7.1.1)
21
- activesupport (= 7.1.1)
16
+ actionmailbox (7.1.2)
17
+ actionpack (= 7.1.2)
18
+ activejob (= 7.1.2)
19
+ activerecord (= 7.1.2)
20
+ activestorage (= 7.1.2)
21
+ activesupport (= 7.1.2)
22
22
  mail (>= 2.7.1)
23
23
  net-imap
24
24
  net-pop
25
25
  net-smtp
26
- actionmailer (7.1.1)
27
- actionpack (= 7.1.1)
28
- actionview (= 7.1.1)
29
- activejob (= 7.1.1)
30
- activesupport (= 7.1.1)
26
+ actionmailer (7.1.2)
27
+ actionpack (= 7.1.2)
28
+ actionview (= 7.1.2)
29
+ activejob (= 7.1.2)
30
+ activesupport (= 7.1.2)
31
31
  mail (~> 2.5, >= 2.5.4)
32
32
  net-imap
33
33
  net-pop
34
34
  net-smtp
35
35
  rails-dom-testing (~> 2.2)
36
- actionpack (7.1.1)
37
- actionview (= 7.1.1)
38
- activesupport (= 7.1.1)
36
+ actionpack (7.1.2)
37
+ actionview (= 7.1.2)
38
+ activesupport (= 7.1.2)
39
39
  nokogiri (>= 1.8.5)
40
+ racc
40
41
  rack (>= 2.2.4)
41
42
  rack-session (>= 1.0.1)
42
43
  rack-test (>= 0.6.3)
43
44
  rails-dom-testing (~> 2.2)
44
45
  rails-html-sanitizer (~> 1.6)
45
- actiontext (7.1.1)
46
- actionpack (= 7.1.1)
47
- activerecord (= 7.1.1)
48
- activestorage (= 7.1.1)
49
- activesupport (= 7.1.1)
46
+ actiontext (7.1.2)
47
+ actionpack (= 7.1.2)
48
+ activerecord (= 7.1.2)
49
+ activestorage (= 7.1.2)
50
+ activesupport (= 7.1.2)
50
51
  globalid (>= 0.6.0)
51
52
  nokogiri (>= 1.8.5)
52
- actionview (7.1.1)
53
- activesupport (= 7.1.1)
53
+ actionview (7.1.2)
54
+ activesupport (= 7.1.2)
54
55
  builder (~> 3.1)
55
56
  erubi (~> 1.11)
56
57
  rails-dom-testing (~> 2.2)
57
58
  rails-html-sanitizer (~> 1.6)
58
- activejob (7.1.1)
59
- activesupport (= 7.1.1)
59
+ activejob (7.1.2)
60
+ activesupport (= 7.1.2)
60
61
  globalid (>= 0.3.6)
61
- activemodel (7.1.1)
62
- activesupport (= 7.1.1)
63
- activerecord (7.1.1)
64
- activemodel (= 7.1.1)
65
- activesupport (= 7.1.1)
62
+ activemodel (7.1.2)
63
+ activesupport (= 7.1.2)
64
+ activerecord (7.1.2)
65
+ activemodel (= 7.1.2)
66
+ activesupport (= 7.1.2)
66
67
  timeout (>= 0.4.0)
67
- activestorage (7.1.1)
68
- actionpack (= 7.1.1)
69
- activejob (= 7.1.1)
70
- activerecord (= 7.1.1)
71
- activesupport (= 7.1.1)
68
+ activestorage (7.1.2)
69
+ actionpack (= 7.1.2)
70
+ activejob (= 7.1.2)
71
+ activerecord (= 7.1.2)
72
+ activesupport (= 7.1.2)
72
73
  marcel (~> 1.0)
73
- activesupport (7.1.1)
74
+ activesupport (7.1.2)
74
75
  base64
75
76
  bigdecimal
76
77
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -83,19 +84,23 @@ GEM
83
84
  addressable (2.8.5)
84
85
  public_suffix (>= 2.0.2, < 6.0)
85
86
  ast (2.4.2)
86
- baran (0.1.9)
87
- base64 (0.1.1)
87
+ baran (0.1.10)
88
+ base64 (0.2.0)
88
89
  bigdecimal (3.1.4)
90
+ brakeman (6.0.1)
89
91
  builder (3.2.4)
92
+ bundler-audit (0.9.1)
93
+ bundler (>= 1.2.0, < 3)
94
+ thor (~> 1.0)
90
95
  byebug (11.1.3)
91
96
  coderay (1.1.3)
92
97
  colorize (0.8.1)
93
98
  concurrent-ruby (1.2.2)
94
99
  connection_pool (2.4.1)
95
100
  crass (1.0.6)
96
- date (3.3.3)
101
+ date (3.3.4)
97
102
  diff-lcs (1.5.0)
98
- drb (2.1.1)
103
+ drb (2.2.0)
99
104
  ruby2_keywords
100
105
  erubi (1.12.0)
101
106
  globalid (1.2.1)
@@ -103,18 +108,20 @@ GEM
103
108
  i18n (1.14.1)
104
109
  concurrent-ruby (~> 1.0)
105
110
  io-console (0.6.0)
106
- irb (1.8.3)
111
+ irb (1.9.0)
107
112
  rdoc
108
113
  reline (>= 0.3.8)
109
114
  json (2.6.3)
110
115
  json-schema (4.0.0)
111
116
  addressable (>= 2.8)
112
- langchainrb (0.7.0)
117
+ langchainrb (0.7.5)
113
118
  baran (~> 0.1.9)
114
119
  colorize (~> 0.8.1)
115
120
  json-schema (~> 4.0.0)
121
+ matrix
116
122
  pragmatic_segmenter (~> 0.3.0)
117
123
  tiktoken_ruby (~> 0.0.5)
124
+ to_bool (~> 2.0.0)
118
125
  zeitwerk (~> 2.5)
119
126
  language_server-protocol (3.17.0.3)
120
127
  lint_roller (1.1.0)
@@ -127,22 +134,29 @@ GEM
127
134
  net-pop
128
135
  net-smtp
129
136
  marcel (1.0.2)
137
+ matrix (0.4.2)
130
138
  method_source (1.0.0)
131
139
  mini_mime (1.1.5)
140
+ mini_portile2 (2.8.5)
132
141
  minitest (5.20.0)
133
- mutex_m (0.1.2)
134
- net-imap (0.4.2)
142
+ mutex_m (0.2.0)
143
+ net-imap (0.4.4)
135
144
  date
136
145
  net-protocol
137
146
  net-pop (0.1.2)
138
147
  net-protocol
139
- net-protocol (0.2.1)
148
+ net-protocol (0.2.2)
140
149
  timeout
141
150
  net-smtp (0.4.0)
142
151
  net-protocol
143
152
  nio4r (2.5.9)
153
+ nokogiri (1.15.4)
154
+ mini_portile2 (~> 2.8.2)
155
+ racc (~> 1.4)
144
156
  nokogiri (1.15.4-x86_64-darwin)
145
157
  racc (~> 1.4)
158
+ nokogiri (1.15.4-x86_64-linux)
159
+ racc (~> 1.4)
146
160
  parallel (1.23.0)
147
161
  parser (3.2.2.4)
148
162
  ast (~> 2.4.1)
@@ -157,8 +171,8 @@ GEM
157
171
  pry (>= 0.13, < 0.15)
158
172
  psych (5.1.1.1)
159
173
  stringio
160
- public_suffix (5.0.3)
161
- racc (1.7.1)
174
+ public_suffix (5.0.4)
175
+ racc (1.7.3)
162
176
  rack (3.0.8)
163
177
  rack-session (2.0.0)
164
178
  rack (>= 3.0.0)
@@ -167,20 +181,20 @@ GEM
167
181
  rackup (2.1.0)
168
182
  rack (>= 3)
169
183
  webrick (~> 1.8)
170
- rails (7.1.1)
171
- actioncable (= 7.1.1)
172
- actionmailbox (= 7.1.1)
173
- actionmailer (= 7.1.1)
174
- actionpack (= 7.1.1)
175
- actiontext (= 7.1.1)
176
- actionview (= 7.1.1)
177
- activejob (= 7.1.1)
178
- activemodel (= 7.1.1)
179
- activerecord (= 7.1.1)
180
- activestorage (= 7.1.1)
181
- activesupport (= 7.1.1)
184
+ rails (7.1.2)
185
+ actioncable (= 7.1.2)
186
+ actionmailbox (= 7.1.2)
187
+ actionmailer (= 7.1.2)
188
+ actionpack (= 7.1.2)
189
+ actiontext (= 7.1.2)
190
+ actionview (= 7.1.2)
191
+ activejob (= 7.1.2)
192
+ activemodel (= 7.1.2)
193
+ activerecord (= 7.1.2)
194
+ activestorage (= 7.1.2)
195
+ activesupport (= 7.1.2)
182
196
  bundler (>= 1.15.0)
183
- railties (= 7.1.1)
197
+ railties (= 7.1.2)
184
198
  rails-dom-testing (2.2.0)
185
199
  activesupport (>= 5.0.0)
186
200
  minitest
@@ -188,20 +202,21 @@ GEM
188
202
  rails-html-sanitizer (1.6.0)
189
203
  loofah (~> 2.21)
190
204
  nokogiri (~> 1.14)
191
- railties (7.1.1)
192
- actionpack (= 7.1.1)
193
- activesupport (= 7.1.1)
205
+ railties (7.1.2)
206
+ actionpack (= 7.1.2)
207
+ activesupport (= 7.1.2)
194
208
  irb
195
209
  rackup (>= 1.0.0)
196
210
  rake (>= 12.2)
197
211
  thor (~> 1.0, >= 1.2.2)
198
212
  zeitwerk (~> 2.6)
199
213
  rainbow (3.1.1)
200
- rake (13.0.6)
201
- rdoc (6.5.0)
214
+ rake (13.1.0)
215
+ rb_sys (0.9.83)
216
+ rdoc (6.6.0)
202
217
  psych (>= 4.0.0)
203
218
  regexp_parser (2.8.2)
204
- reline (0.3.9)
219
+ reline (0.4.0)
205
220
  io-console (~> 0.5)
206
221
  rexml (3.2.6)
207
222
  rspec (3.12.0)
@@ -217,29 +232,28 @@ GEM
217
232
  diff-lcs (>= 1.2.0, < 2.0)
218
233
  rspec-support (~> 3.12.0)
219
234
  rspec-support (3.12.1)
220
- rubocop (1.56.4)
221
- base64 (~> 0.1.1)
235
+ rubocop (1.57.2)
222
236
  json (~> 2.3)
223
237
  language_server-protocol (>= 3.17.0)
224
238
  parallel (~> 1.10)
225
- parser (>= 3.2.2.3)
239
+ parser (>= 3.2.2.4)
226
240
  rainbow (>= 2.2.2, < 4.0)
227
241
  regexp_parser (>= 1.8, < 3.0)
228
242
  rexml (>= 3.2.5, < 4.0)
229
243
  rubocop-ast (>= 1.28.1, < 2.0)
230
244
  ruby-progressbar (~> 1.7)
231
245
  unicode-display_width (>= 2.4.0, < 3.0)
232
- rubocop-ast (1.29.0)
246
+ rubocop-ast (1.30.0)
233
247
  parser (>= 3.2.1.0)
234
248
  rubocop-performance (1.19.1)
235
249
  rubocop (>= 1.7.0, < 2.0)
236
250
  rubocop-ast (>= 0.4.0)
237
251
  ruby-progressbar (1.13.0)
238
252
  ruby2_keywords (0.0.5)
239
- standard (1.31.2)
253
+ standard (1.32.0)
240
254
  language_server-protocol (~> 3.17.0.2)
241
255
  lint_roller (~> 1.0)
242
- rubocop (~> 1.56.4)
256
+ rubocop (~> 1.57.2)
243
257
  standard-custom (~> 1.0.0)
244
258
  standard-performance (~> 1.2)
245
259
  standard-custom (1.0.2)
@@ -250,10 +264,14 @@ GEM
250
264
  rubocop-performance (~> 1.19.1)
251
265
  standardrb (1.0.1)
252
266
  standard
253
- stringio (3.0.8)
267
+ stringio (3.0.9)
254
268
  thor (1.3.0)
269
+ tiktoken_ruby (0.0.6)
270
+ rb_sys (~> 0.9.68)
255
271
  tiktoken_ruby (0.0.6-x86_64-darwin)
256
- timeout (0.4.0)
272
+ tiktoken_ruby (0.0.6-x86_64-linux)
273
+ timeout (0.4.1)
274
+ to_bool (2.0.0)
257
275
  tzinfo (2.0.6)
258
276
  concurrent-ruby (~> 1.0)
259
277
  unicode (0.4.4.4)
@@ -263,18 +281,24 @@ GEM
263
281
  websocket-extensions (>= 0.1.0)
264
282
  websocket-extensions (0.1.5)
265
283
  yard (0.9.34)
266
- zeitwerk (2.6.11)
284
+ zeitwerk (2.6.12)
267
285
 
268
286
  PLATFORMS
287
+ ruby
269
288
  x86_64-darwin-19
289
+ x86_64-darwin-22
290
+ x86_64-linux
270
291
 
271
292
  DEPENDENCIES
293
+ brakeman
294
+ bundler-audit
272
295
  langchainrb
273
296
  langchainrb_rails!
274
297
  pry-byebug (~> 3.10.0)
275
298
  rails (> 6.0.0)
276
- rake (~> 13.0)
299
+ rake (~> 13.1)
277
300
  rspec (~> 3.0)
301
+ rubocop
278
302
  standardrb
279
303
  yard (~> 0.9.34)
280
304
 
data/README.md CHANGED
@@ -1,33 +1,117 @@
1
1
  💎🔗 Langchain.rb for Rails
2
2
  ---
3
- Building applications with LLMs through composability
3
+ The fastest way to sprinkle AI on top of your Rails app. Add OpenAI-powered question-and-answering in minutes.
4
4
 
5
- 👨‍💻👩‍💻 CURRENTLY SEEKING PEOPLE TO FORM THE CORE GROUP OF MAINTAINERS WITH
5
+ Available for paid consulting engagements! [Email me](mailto:andrei@sourcelabs.io).
6
6
 
7
7
  ![Tests status](https://github.com/andreibondarev/langchainrb_rails/actions/workflows/ci.yml/badge.svg?branch=main)
8
8
  [![Gem Version](https://badge.fury.io/rb/langchainrb_rails.svg)](https://badge.fury.io/rb/langchainrb_rails)
9
9
  [![Docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://rubydoc.info/gems/langchainrb_rails)
10
10
  [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/andreibondarev/langchainrb_rails/blob/main/LICENSE.txt)
11
11
  [![](https://dcbadge.vercel.app/api/server/WDARp7J2n8?compact=true&style=flat)](https://discord.gg/WDARp7J2n8)
12
+ [![X](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40rushing_andrei)](https://twitter.com/rushing_andrei)
12
13
 
14
+ ## Dependencies
13
15
 
14
- Langchain.rb is a library that's an abstraction layer on top many emergent AI, ML and other DS tools. The goal is to abstract complexity and difficult concepts to make building AI/ML-supercharged applications approachable for traditional software engineers.
16
+ * Ruby 3.0+
17
+ * Postgres 11+
18
+
19
+ ## Table of Contents
20
+
21
+ - [Installation](#installation)
22
+ - [Generators](#rails-generators)
15
23
 
16
24
  ## Installation
17
25
 
18
26
  Install the gem and add to the application's Gemfile by executing:
19
-
20
- bundle add langchainrb_rails
27
+ ```bash
28
+ bundle add langchainrb_rails
29
+ ```
21
30
 
22
31
  If bundler is not being used to manage dependencies, install the gem by executing:
32
+ ```bash
33
+ gem install langchainrb_rails
34
+ ```
35
+
36
+ ## Configuration w/ [Pgvector](https://github.com/pgvector/pgvector) (requires Postgres 11+)
37
+
38
+ 1. Run the Rails generator to add vectorsearch to your ActiveRecord model
39
+ ```bash
40
+ rails generate langchainrb_rails:pgvector --model=Product --llm=openai
41
+ ```
42
+
43
+ This adds required dependencies to your Gemfile, creates the `config/initializers/langchainrb_rails.rb` initializer file, database migrations, and adds the necessary code to the ActiveRecord model to enable vectorsearch.
44
+
45
+ 2. Bundle and migrate
46
+ ```bash
47
+ bundle install && rails db:migrate
48
+ ```
49
+
50
+ 3. Set the env var `OPENAI_API_KEY` to your OpenAI API key: https://platform.openai.com/account/api-keys
51
+ ```ruby
52
+ ENV["OPENAI_API_KEY"]=
53
+ ```
54
+
55
+ 5. Generate embeddings for your model
56
+ ```ruby
57
+ Product.embed!
58
+ ```
59
+
60
+ This can take a while depending on the number of database records.
61
+
62
+ ## Usage
63
+
64
+ ### Question and Answering
65
+ ```ruby
66
+ Product.ask("list the brands of shoes that are in stock")
67
+ ```
68
+
69
+ Returns a `String` with a natural language answer. The answer is assembled using the following steps:
70
+
71
+ 1. An embedding is generated for the passed in `question` using the selected LLM.
72
+ 2. We calculate a [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) to find records that most closely match your question's embedding.
73
+ 3. A prompt is created using the question and the above records (their `#as_vector` representation )are added as context.
74
+ 4. This prompt is passed to the LLM to generate an answer
75
+
76
+ ### Similarity Search
77
+ ```ruby
78
+ Product.similarity_search("t-shirt")
79
+ ```
80
+
81
+ Returns ActiveRecord relation that most closely matches the `query` using vector search.
82
+
83
+ ## Customization
84
+
85
+ ### Changing the vector representation of a record
86
+
87
+ By default, embeddings are generated by calling the following method on your model instance:
88
+ ```ruby
89
+ to_json(except: :embedding)
90
+ ```
91
+
92
+ You can override this by defining an `#as_vector` method in your model:
93
+ ```ruby
94
+ def as_vector
95
+ { name: name, description: description, category: category.name, ... }.to_json
96
+ end
97
+ ```
23
98
 
24
- gem install langchainrb_rails
99
+ Re-generate embeddings after modifying this method:
100
+
101
+ ```ruby
102
+ Product.embed!
103
+ ```
25
104
 
26
105
  ## Rails Generators
27
106
 
28
- ### Pinecone Generator - adds vectorsearch to your ActiveRecord model
107
+ ### Pgvector Generator
29
108
 
109
+ ```bash
110
+ rails generate langchainrb_rails:pgvector --model=Product --llm=openai
30
111
  ```
112
+
113
+ ### Pinecone Generator - adds vectorsearch to your ActiveRecord model
114
+ ```bash
31
115
  rails generate langchainrb_rails:pinecone --model=Product --llm=openai
32
116
  ```
33
117
 
@@ -39,3 +123,4 @@ Pinecone Generator does the following:
39
123
  1. Creates the `config/initializers/langchainrb_rails.rb` initializer file
40
124
  2. Adds necessary code to the ActiveRecord model to enable vectorsearch
41
125
  3. Adds `pinecone` gem to the Gemfile
126
+
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Overriding Langchain.rb's Pgvector implementation to use ActiveRecord.
4
+ # Original implementation: https://github.com/andreibondarev/langchainrb/blob/main/lib/langchain/vectorsearch/pgvector.rb
5
+
6
+ module Langchain::Vectorsearch
7
+ class Pgvector < Base
8
+ #
9
+ # The PostgreSQL vector search adapter
10
+ #
11
+ # Gem requirements:
12
+ # gem "pgvector", "~> 0.2"
13
+ #
14
+ # Usage:
15
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(llm:)
16
+ #
17
+
18
+ # The operators supported by the PostgreSQL vector search adapter
19
+ OPERATORS = [
20
+ "cosine",
21
+ "euclidean",
22
+ "inner_product"
23
+ ]
24
+ DEFAULT_OPERATOR = "cosine"
25
+
26
+ attr_reader :operator, :llm
27
+ attr_accessor :model
28
+
29
+ # @param url [String] The URL of the PostgreSQL database
30
+ # @param index_name [String] The name of the table to use for the index
31
+ # @param llm [Object] The LLM client to use
32
+ # @param namespace [String] The namespace to use for the index when inserting/querying
33
+ def initialize(llm:)
34
+ # If the line below is called, the generator fails as calls to
35
+ # LangchainrbRails.config.vectorsearch will generate an exception.
36
+ # These happen in the template files.
37
+ # depends_on "neighbor"
38
+
39
+ @operator = DEFAULT_OPERATOR
40
+
41
+ super(llm: llm)
42
+ end
43
+
44
+ # Add a list of texts to the index
45
+ # @param texts [Array<String>] The texts to add to the index
46
+ # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
47
+ # @return [Array<Integer>] The the ids of the added texts.
48
+ def add_texts(texts:, ids:)
49
+ embeddings = texts.map do |text|
50
+ llm.embed(text: text).embedding
51
+ end
52
+
53
+ # I believe the records returned by #find must be in the
54
+ # same order as the embeddings. I _think_ this works for uuid ids but didn't test
55
+ # deeply.
56
+ # TODO - implement find_each so we don't load all records into memory
57
+ model.find(ids).each.with_index do |record, i|
58
+ record.update_column(:embedding, embeddings[i])
59
+ end
60
+ end
61
+
62
+ def update_texts(texts:, ids:)
63
+ add_texts(texts: texts, ids: ids)
64
+ end
65
+
66
+ # Invoke a rake task that will create an initializer (`config/initializers/langchain.rb`) file
67
+ # and db/migrations/* files
68
+ def create_default_schema
69
+ Rake::Task["pgvector"].invoke
70
+ end
71
+
72
+ # Destroy default schema
73
+ def destroy_default_schema
74
+ # Tell the user to rollback the migration
75
+ end
76
+
77
+ # Search for similar texts in the index
78
+ # @param query [String] The text to search for
79
+ # @param k [Integer] The number of top results to return
80
+ # @return [Array<Hash>] The results of the search
81
+ # TODO - drop the named "query:" param so it is the same interface as #ask?
82
+ def similarity_search(query:, k: 4)
83
+ embedding = llm.embed(text: query).embedding
84
+
85
+ similarity_search_by_vector(
86
+ embedding: embedding,
87
+ k: k
88
+ )
89
+ end
90
+
91
+ # Search for similar texts in the index by the passed in vector.
92
+ # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
93
+ # @param embedding [Array<Float>] The vector to search for
94
+ # @param k [Integer] The number of top results to return
95
+ # @return [Array<Hash>] The results of the search
96
+ # TODO - drop the named "embedding:" param so it is the same interface as #ask?
97
+ def similarity_search_by_vector(embedding:, k: 4)
98
+ model
99
+ .nearest_neighbors(:embedding, embedding, distance: operator)
100
+ .limit(k)
101
+ end
102
+
103
+ # Ask a question and return the answer
104
+ # @param question [String] The question to ask
105
+ # @param k [Integer] The number of results to have in context
106
+ # @yield [String] Stream responses back one String at a time
107
+ # @return [String] The answer to the question
108
+ def ask(question, k: 4, &block)
109
+ # Noisy as the embedding column has a lot of data
110
+ ActiveRecord::Base.logger.silence do
111
+ search_results = similarity_search(query: question, k: k)
112
+
113
+ context = search_results.map do |result|
114
+ result.as_vector
115
+ end
116
+ context = context.join("\n---\n")
117
+
118
+ prompt = generate_rag_prompt(question: question, context: context)
119
+
120
+ llm.chat(prompt: prompt, &block)
121
+ end
122
+ end
123
+ end
124
+ end
@@ -61,7 +61,9 @@ module LangchainrbRails
61
61
  #
62
62
  # @return [String] the text representation of the model
63
63
  def as_vector
64
- to_json
64
+ # Don't vectorize the embedding ... this would happen if it already exists
65
+ # for a record and we update.
66
+ to_json(except: :embedding)
65
67
  end
66
68
 
67
69
  module ClassMethods
@@ -70,6 +72,20 @@ module LangchainrbRails
70
72
  # @param provider [Object] The `Langchain::Vectorsearch::*` instance
71
73
  def vectorsearch
72
74
  class_variable_set(:@@provider, LangchainrbRails.config.vectorsearch)
75
+
76
+ # Pgvector-specific configuration
77
+ if LangchainrbRails.config.vectorsearch.is_a?(Langchain::Vectorsearch::Pgvector)
78
+ has_neighbors(:embedding)
79
+ LangchainrbRails.config.vectorsearch.model = self
80
+ end
81
+ end
82
+
83
+ # Iterates over records and generate embeddings.
84
+ # Will re-generate for ALL records (not just records with embeddings).
85
+ def embed!
86
+ find_each do |record|
87
+ record.upsert_to_vectorsearch
88
+ end
73
89
  end
74
90
 
75
91
  # Search for similar texts
@@ -84,7 +100,7 @@ module LangchainrbRails
84
100
  )
85
101
 
86
102
  # We use "__id" when Weaviate is the provider
87
- ids = records.map { |record| record.dig("id") || record.dig("__id") }
103
+ ids = records.map { |record| record.try("id") || record.dig("__id") }
88
104
  where(id: ids)
89
105
  end
90
106
 
@@ -94,12 +110,12 @@ module LangchainrbRails
94
110
  # @param k [Integer] The number of results to have in context
95
111
  # @yield [String] Stream responses back one String at a time
96
112
  # @return [String] The answer to the question
97
- def ask(question:, k: 4, &block)
113
+ def ask(question, k: 4, &block)
98
114
  class_variable_get(:@@provider).ask(
99
- question: question,
115
+ question,
100
116
  k: k,
101
117
  &block
102
- )
118
+ ).completion
103
119
  end
104
120
  end
105
121
  end
@@ -0,0 +1,24 @@
1
+ require "rails/generators"
2
+ require "rails/generators/active_record"
3
+
4
+ module LangchainrbRails
5
+ module Generators
6
+ class BaseGenerator < Rails::Generators::Base
7
+ include ::ActiveRecord::Generators::Migration
8
+
9
+ class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
10
+ class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
11
+
12
+ # Available LLM providers to be passed in as --llm option
13
+ LLMS = {
14
+ "cohere" => "Langchain::LLM::Cohere",
15
+ "google_palm" => "Langchain::LLM::GooglePalm",
16
+ "hugging_face" => "Langchain::LLM::HuggingFace",
17
+ "llama_cpp" => "Langchain::LLM::LlamaCpp",
18
+ "ollama" => "Langchain::LLM::Ollama",
19
+ "openai" => "Langchain::LLM::OpenAI",
20
+ "replicate" => "Langchain::LLM::Replicate"
21
+ }
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangchainrbRails
4
+ module Generators
5
+ #
6
+ # ChromaGenerator does the following:
7
+ # 1. Creates the `langchainrb_rails.rb` initializer file
8
+ # 2. Adds necessary code to the ActiveRecord model to enable vectorsearch
9
+ # 3. Adds `chroma-db` gem to the Gemfile
10
+ #
11
+ # Usage:
12
+ # rails generate langchainrb_rails:chrome --model=Product --llm=openai
13
+ #
14
+ class ChromaGenerator < LangchainrbRails::Generators::BaseGenerator
15
+ desc "This generator adds Chroma vectorsearch integration to your ActiveRecord model"
16
+ source_root File.join(__dir__, "templates")
17
+
18
+ # Creates the `langchainrb_rails.rb` initializer file
19
+ def create_initializer_file
20
+ template "chroma_initializer.rb", "config/initializers/langchainrb_rails.rb"
21
+ end
22
+
23
+ # Adds `vectorsearch` class method to the model and `after_save` callback that calls `upsert_to_vectorsearch()`
24
+ def add_to_model
25
+ inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
26
+ " vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
27
+ end
28
+ end
29
+
30
+ # Adds `chroma-db` gem to the Gemfile
31
+ # TODO: Can we automatically run `bundle install`?
32
+ def add_to_gemfile
33
+ gem "chroma-db", version: "~> 0.6.0"
34
+ end
35
+
36
+ private
37
+
38
+ # @return [String] Name of the model
39
+ def model_name
40
+ options["model"]
41
+ end
42
+
43
+ # @return [String] LLM provider to use
44
+ def llm
45
+ options["llm"]
46
+ end
47
+
48
+ # @return [Langchain::LLM::*] LLM class
49
+ def llm_class
50
+ Langchain::LLM.const_get(LLMS[llm])
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangchainrbRails
4
+ module Generators
5
+ #
6
+ # Usage:
7
+ # rails g langchain:pgvector -model=Product -llm=openai
8
+ #
9
+ class PgvectorGenerator < LangchainrbRails::Generators::BaseGenerator
10
+ desc "This generator adds Pgvector vectorsearch integration to your ActiveRecord model"
11
+ source_root File.join(__dir__, "templates")
12
+
13
+ def copy_migration
14
+ migration_template "enable_vector_extension_template.rb", "db/migrate/enable_vector_extension.rb", migration_version: migration_version
15
+ migration_template "add_vector_column_template.rb", "db/migrate/add_vector_column_to_#{table_name}.rb", migration_version: migration_version
16
+ end
17
+
18
+ def create_initializer_file
19
+ template "pgvector_initializer.rb", "config/initializers/langchainrb_rails.rb"
20
+ end
21
+
22
+ def migration_version
23
+ "[#{::ActiveRecord::VERSION::MAJOR}.#{::ActiveRecord::VERSION::MINOR}]"
24
+ end
25
+
26
+ def add_to_model
27
+ inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
28
+ " vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
29
+ end
30
+ end
31
+
32
+ def add_to_gemfile
33
+ # Dependency for Langchain PgVector
34
+ gem "neighbor"
35
+ gem "ruby-openai"
36
+ end
37
+
38
+ def post_install_message
39
+ say "Please do the following to start Q&A with your #{model_name} records:", :green
40
+ say "1. Run `bundle install` to install the new gems."
41
+ say "2. Set `OPENAI_API_KEY` environment variable to your OpenAI API key."
42
+ say "3. Run `rails db:migrate` to apply the database migrations to enable pgvector and add the embedding column."
43
+ say "4. In Rails console, run `#{model_name}.embed!` to set the embeddings for all records."
44
+ say "5. Ask a question in the Rails console, ie: `#{model_name}.ask('[YOUR QUESTION]')`"
45
+ end
46
+
47
+ private
48
+
49
+ # @return [String] Name of the model
50
+ def model_name
51
+ options["model"]
52
+ end
53
+
54
+ # @return [String] Table name of the model
55
+ def table_name
56
+ model_name.downcase.pluralize
57
+ end
58
+
59
+ # @return [String] LLM provider to use
60
+ def llm
61
+ options["llm"]
62
+ end
63
+
64
+ # @return [Langchain::LLM::*] LLM class
65
+ def llm_class
66
+ Langchain::LLM.const_get(LLMS[llm])
67
+ end
68
+
69
+ # @return [Integer] Dimension of the vector to be used
70
+ def vector_dimension
71
+ llm_class.default_dimension
72
+ end
73
+ end
74
+ end
75
+ end
@@ -1,4 +1,4 @@
1
- require "rails/generators/active_record"
1
+ # frozen_string_literal: true
2
2
 
3
3
  module LangchainrbRails
4
4
  module Generators
@@ -11,26 +11,10 @@ module LangchainrbRails
11
11
  # Usage:
12
12
  # rails generate langchainrb_rails:pinecone --model=Product --llm=openai
13
13
  #
14
- class PineconeGenerator < Rails::Generators::Base
14
+ class PineconeGenerator < LangchainrbRails::Generators::BaseGenerator
15
15
  desc "This generator adds Pinecone vectorsearch integration to your ActiveRecord model"
16
-
17
- include ::ActiveRecord::Generators::Migration
18
16
  source_root File.join(__dir__, "templates")
19
17
 
20
- class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
21
- class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
22
-
23
- # Available LLM providers to be passed in as --llm option
24
- LLMS = {
25
- "cohere" => "Langchain::LLM::Cohere",
26
- "google_palm" => "Langchain::LLM::GooglePalm",
27
- "hugging_face" => "Langchain::LLM::HuggingFace",
28
- "llama_cpp" => "Langchain::LLM::LlamaCpp",
29
- "ollama" => "Langchain::LLM::Ollama",
30
- "openai" => "Langchain::LLM::OpenAI",
31
- "replicate" => "Langchain::LLM::Replicate"
32
- }
33
-
34
18
  # Creates the `langchainrb_rails.rb` initializer file
35
19
  def create_initializer_file
36
20
  template "pinecone_initializer.rb", "config/initializers/langchainrb_rails.rb"
@@ -46,7 +30,7 @@ module LangchainrbRails
46
30
  # Adds `pinecone` gem to the Gemfile
47
31
  # TODO: Can we automatically run `bundle install`?
48
32
  def add_to_gemfile
49
- gem "pinecone"
33
+ gem "pinecone", version: "~> 0.1.6"
50
34
  end
51
35
 
52
36
  private
@@ -0,0 +1,10 @@
1
+ class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ add_column :<%= table_name %>, :embedding, :vector,
4
+ limit: LangchainrbRails
5
+ .config
6
+ .vectorsearch
7
+ .llm
8
+ .default_dimension
9
+ end
10
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ LangchainrbRails.configure do |config|
4
+ config.vectorsearch = Langchain::Vectorsearch::Chroma.new(
5
+ llm: <%= llm_class %>.new(api_key: ENV["<%= llm.upcase %>_API_KEY"]),
6
+ url: ENV["CHROMA_URL"],
7
+ index_name: ""
8
+ )
9
+ end
@@ -0,0 +1,5 @@
1
+ class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ enable_extension "vector"
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ LangchainrbRails.configure do |config|
4
+ config.vectorsearch = Langchain::Vectorsearch::Pgvector.new(
5
+ llm: <%= llm_class %>.new(api_key: ENV["OPENAI_API_KEY"])
6
+ )
7
+ end
@@ -9,7 +9,9 @@ module LangchainrbRails
9
9
  end
10
10
 
11
11
  generators do
12
+ require_relative "generators/langchainrb_rails/chroma_generator"
12
13
  require_relative "generators/langchainrb_rails/pinecone_generator"
14
+ require_relative "generators/langchainrb_rails/pgvector_generator"
13
15
  end
14
16
  end
15
17
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LangchainrbRails
4
- VERSION = "0.1.2"
4
+ VERSION = "0.1.4"
5
5
  end
@@ -1,9 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "forwardable"
3
4
  require "langchain"
5
+ require "rails"
4
6
  require_relative "langchainrb_rails/version"
5
7
  require "langchainrb_rails/railtie"
6
8
  require "langchainrb_rails/config"
9
+ require_relative "langchainrb_overrides/vectorsearch/pgvector"
7
10
 
8
11
  module LangchainrbRails
9
12
  class Error < StandardError; end
@@ -13,6 +16,8 @@ module LangchainrbRails
13
16
  end
14
17
 
15
18
  module Generators
19
+ autoload :BaseGenerator, "langchainrb_rails/generators/langchainrb_rails/base_generator"
20
+ autoload :ChromaGenerator, "langchainrb_rails/generators/langchainrb_rails/chroma_generator"
16
21
  autoload :PgvectorGenerator, "langchainrb_rails/generators/langchainrb_rails/pgvector_generator"
17
22
  end
18
23
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb_rails
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-27 00:00:00.000000000 Z
11
+ date: 2023-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: langchainrb
@@ -74,16 +74,26 @@ extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
76
  - ".rspec"
77
+ - ".rubocop.yml"
78
+ - ".tool-versions"
77
79
  - CHANGELOG.md
78
80
  - Gemfile
79
81
  - Gemfile.lock
80
82
  - LICENSE.txt
81
83
  - README.md
82
84
  - Rakefile
85
+ - lib/langchainrb_overrides/vectorsearch/pgvector.rb
83
86
  - lib/langchainrb_rails.rb
84
87
  - lib/langchainrb_rails/active_record/hooks.rb
85
88
  - lib/langchainrb_rails/config.rb
89
+ - lib/langchainrb_rails/generators/langchainrb_rails/base_generator.rb
90
+ - lib/langchainrb_rails/generators/langchainrb_rails/chroma_generator.rb
91
+ - lib/langchainrb_rails/generators/langchainrb_rails/pgvector_generator.rb
86
92
  - lib/langchainrb_rails/generators/langchainrb_rails/pinecone_generator.rb
93
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/add_vector_column_template.rb.tt
94
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/chroma_initializer.rb.tt
95
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/enable_vector_extension_template.rb.tt
96
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/pgvector_initializer.rb.tt
87
97
  - lib/langchainrb_rails/generators/langchainrb_rails/templates/pinecone_initializer.rb.tt
88
98
  - lib/langchainrb_rails/railtie.rb
89
99
  - lib/langchainrb_rails/version.rb
@@ -111,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
121
  - !ruby/object:Gem::Version
112
122
  version: '0'
113
123
  requirements: []
114
- rubygems_version: 3.2.3
124
+ rubygems_version: 3.3.7
115
125
  signing_key:
116
126
  specification_version: 4
117
127
  summary: Rails wrapper for langchainrb gem