langchainrb_rails 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b792e1a7888a17c54fad522042ee7f6935df92e684119d4177f9627abb999dfd
4
- data.tar.gz: 77444b9ed6d52443c890f045c121ba1a58f78b4d532954fc858e8c01999c90a7
3
+ metadata.gz: 37760a671cfe732f12ddb79769997ae325240b5c1b0bb96ee3e13786d605cc6a
4
+ data.tar.gz: 89b9aa5c04ffb28823836cfef7428990e27e1d38dc231240fb4e7004ab3db614
5
5
  SHA512:
6
- metadata.gz: 5d26a5c5d4a10e6ea4794d809ceb15feb6262deb4754bfc12362e1e0e536b8643f46a9f98d464ae7372d6ef1b3d45ed34cf0d5e4c88977b135b4be1d982d5c85
7
- data.tar.gz: '084a74842710d2b24db00e01344cbde645249671f4f975a9cc8f37763af53597e4cd3d2b5cd0f036320e181cbc20f8be8dca525279550529f9d58d8234fd601b'
6
+ metadata.gz: 78c3d36d67ace05932ef36bc9e9136837bb350e5698aad55836f3d6dea0602228f461f2ae1aafc10ec050151d7f6b71e5a7cca21357b1a5f6bf2410f38596a69
7
+ data.tar.gz: fa5a1bdd97e82d71f9b4b35d0c1f15245c87966cdef60117364174f8306a7df72848dda3b52e2635844a5ae05d25a9e1c6166bf7a25b9ccd52565ae787c8fc86
data/.rubocop.yml ADDED
@@ -0,0 +1,28 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.7
3
+ NewCops: enable
4
+ Exclude:
5
+ - 'bin/**/*'
6
+ - 'db/schema.rb'
7
+ - 'vendor/**/*'
8
+ - 'spec/fixtures/**/*'
9
+
10
+ Metrics/LineLength:
11
+ Max: 140
12
+
13
+ Metrics/BlockLength:
14
+ Exclude:
15
+ - 'spec/**/*.rb'
16
+
17
+ Style/Documentation:
18
+ Enabled: false
19
+
20
+ Style/FrozenStringLiteralComment:
21
+ Enabled: true
22
+
23
+ Style/StringLiterals:
24
+ Enabled: false
25
+
26
+ Lint/SuppressedException:
27
+ Exclude:
28
+ - 'spec/**/*.rb'
data/.tool-versions ADDED
@@ -0,0 +1 @@
1
+ ruby 3.1.2
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  ## [Unreleased]
2
2
 
3
- ## [0.1.0] - 2023-10-22
3
+ ## [0.1.3] - 2023-11-01
4
+ - Pgvector vectorsearch generator
5
+
6
+ ## [0.1.2] - 2023-10-27
7
+ - Pinecone vectorsearch generator
4
8
 
9
+ ## [0.1.1] - 2023-10-23
10
+
11
+ ## [0.1.0] - 2023-10-22
5
12
  - Initial release
data/Gemfile CHANGED
@@ -9,6 +9,10 @@ gem "rake", "~> 13.0"
9
9
 
10
10
  gem "rspec", "~> 3.0"
11
11
 
12
+ gem "bundler-audit", require: false
13
+ gem "brakeman", require: false
14
+ gem "rubocop", require: false
15
+
12
16
  gem "standardrb"
13
17
 
14
18
  gem "langchainrb"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb_rails (0.1.2)
4
+ langchainrb_rails (0.1.3)
5
5
  langchainrb (~> 0.7.0)
6
6
 
7
7
  GEM
@@ -86,7 +86,11 @@ GEM
86
86
  baran (0.1.9)
87
87
  base64 (0.1.1)
88
88
  bigdecimal (3.1.4)
89
+ brakeman (6.0.1)
89
90
  builder (3.2.4)
91
+ bundler-audit (0.9.1)
92
+ bundler (>= 1.2.0, < 3)
93
+ thor (~> 1.0)
90
94
  byebug (11.1.3)
91
95
  coderay (1.1.3)
92
96
  colorize (0.8.1)
@@ -109,12 +113,14 @@ GEM
109
113
  json (2.6.3)
110
114
  json-schema (4.0.0)
111
115
  addressable (>= 2.8)
112
- langchainrb (0.7.0)
116
+ langchainrb (0.7.1)
113
117
  baran (~> 0.1.9)
114
118
  colorize (~> 0.8.1)
115
119
  json-schema (~> 4.0.0)
120
+ matrix
116
121
  pragmatic_segmenter (~> 0.3.0)
117
122
  tiktoken_ruby (~> 0.0.5)
123
+ to_bool (~> 2.0.0)
118
124
  zeitwerk (~> 2.5)
119
125
  language_server-protocol (3.17.0.3)
120
126
  lint_roller (1.1.0)
@@ -127,8 +133,10 @@ GEM
127
133
  net-pop
128
134
  net-smtp
129
135
  marcel (1.0.2)
136
+ matrix (0.4.2)
130
137
  method_source (1.0.0)
131
138
  mini_mime (1.1.5)
139
+ mini_portile2 (2.8.5)
132
140
  minitest (5.20.0)
133
141
  mutex_m (0.1.2)
134
142
  net-imap (0.4.2)
@@ -141,8 +149,13 @@ GEM
141
149
  net-smtp (0.4.0)
142
150
  net-protocol
143
151
  nio4r (2.5.9)
152
+ nokogiri (1.15.4)
153
+ mini_portile2 (~> 2.8.2)
154
+ racc (~> 1.4)
144
155
  nokogiri (1.15.4-x86_64-darwin)
145
156
  racc (~> 1.4)
157
+ nokogiri (1.15.4-x86_64-linux)
158
+ racc (~> 1.4)
146
159
  parallel (1.23.0)
147
160
  parser (3.2.2.4)
148
161
  ast (~> 2.4.1)
@@ -198,6 +211,7 @@ GEM
198
211
  zeitwerk (~> 2.6)
199
212
  rainbow (3.1.1)
200
213
  rake (13.0.6)
214
+ rb_sys (0.9.82)
201
215
  rdoc (6.5.0)
202
216
  psych (>= 4.0.0)
203
217
  regexp_parser (2.8.2)
@@ -252,8 +266,12 @@ GEM
252
266
  standard
253
267
  stringio (3.0.8)
254
268
  thor (1.3.0)
269
+ tiktoken_ruby (0.0.6)
270
+ rb_sys (~> 0.9.68)
255
271
  tiktoken_ruby (0.0.6-x86_64-darwin)
272
+ tiktoken_ruby (0.0.6-x86_64-linux)
256
273
  timeout (0.4.0)
274
+ to_bool (2.0.0)
257
275
  tzinfo (2.0.6)
258
276
  concurrent-ruby (~> 1.0)
259
277
  unicode (0.4.4.4)
@@ -263,18 +281,24 @@ GEM
263
281
  websocket-extensions (>= 0.1.0)
264
282
  websocket-extensions (0.1.5)
265
283
  yard (0.9.34)
266
- zeitwerk (2.6.11)
284
+ zeitwerk (2.6.12)
267
285
 
268
286
  PLATFORMS
287
+ ruby
269
288
  x86_64-darwin-19
289
+ x86_64-darwin-22
290
+ x86_64-linux
270
291
 
271
292
  DEPENDENCIES
293
+ brakeman
294
+ bundler-audit
272
295
  langchainrb
273
296
  langchainrb_rails!
274
297
  pry-byebug (~> 3.10.0)
275
298
  rails (> 6.0.0)
276
299
  rake (~> 13.0)
277
300
  rspec (~> 3.0)
301
+ rubocop
278
302
  standardrb
279
303
  yard (~> 0.9.34)
280
304
 
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  💎🔗 Langchain.rb for Rails
2
2
  ---
3
- Building applications with LLMs through composability
4
-
5
- 👨‍💻👩‍💻 CURRENTLY SEEKING PEOPLE TO FORM THE CORE GROUP OF MAINTAINERS WITH
3
+ The fastest way to sprinkle AI on top of your Rails app. Add OpenAI-powered question-and-answering in minutes.
6
4
 
7
5
  ![Tests status](https://github.com/andreibondarev/langchainrb_rails/actions/workflows/ci.yml/badge.svg?branch=main)
8
6
  [![Gem Version](https://badge.fury.io/rb/langchainrb_rails.svg)](https://badge.fury.io/rb/langchainrb_rails)
@@ -10,24 +8,107 @@
10
8
  [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/andreibondarev/langchainrb_rails/blob/main/LICENSE.txt)
11
9
  [![](https://dcbadge.vercel.app/api/server/WDARp7J2n8?compact=true&style=flat)](https://discord.gg/WDARp7J2n8)
12
10
 
11
+ ## Dependencies
12
+
13
+ * Ruby 3.0+
14
+ * Postgres 11+
15
+
16
+ ## Table of Contents
13
17
 
14
- Langchain.rb is a library that's an abstraction layer on top many emergent AI, ML and other DS tools. The goal is to abstract complexity and difficult concepts to make building AI/ML-supercharged applications approachable for traditional software engineers.
18
+ - [Installation](#installation)
19
+ - [Generators](#rails-generators)
15
20
 
16
21
  ## Installation
17
22
 
18
23
  Install the gem and add to the application's Gemfile by executing:
19
-
20
- bundle add langchainrb_rails
24
+ ```bash
25
+ bundle add langchainrb_rails
26
+ ```
21
27
 
22
28
  If bundler is not being used to manage dependencies, install the gem by executing:
29
+ ```bash
30
+ gem install langchainrb_rails
31
+ ```
32
+
33
+ ## Configuration w/ [Pgvector](https://github.com/pgvector/pgvector) (requires Postgres 11+)
34
+
35
+ 1. Run the Rails generator to add vectorsearch to your ActiveRecord model
36
+ ```bash
37
+ rails generate langchainrb_rails:pgvector --model=Product --llm=openai
38
+ ```
39
+
40
+ This adds required dependencies to your Gemfile, creates the `config/initializers/langchainrb_rails.rb` initializer file, database migrations, and adds the necessary code to the ActiveRecord model to enable vectorsearch.
23
41
 
24
- gem install langchainrb_rails
42
+ 2. Bundle and migrate
43
+ ```bash
44
+ bundle install && rails db:migrate
45
+ ```
46
+
47
+ 3. Set the env var `OPENAI_API_KEY` to your OpenAI API key: https://platform.openai.com/account/api-keys
48
+ ```ruby
49
+ ENV["OPENAI_API_KEY"]=
50
+ ```
51
+
52
+ 5. Generate embeddings for your model
53
+ ```ruby
54
+ Product.embed!
55
+ ```
56
+
57
+ This can take a while depending on the number of database records.
58
+
59
+ ## Usage
60
+
61
+ ### Question and Answering
62
+ ```ruby
63
+ Product.ask("list the brands of shoes that are in stock")
64
+ ```
65
+
66
+ Returns a `String` with a natural language answer. The answer is assembled using the following steps:
67
+
68
+ 1. An embedding is generated for the passed in `question` using the selected LLM.
69
+ 2. We calculate a [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) to find records that most closely match your question's embedding.
70
+ 3. A prompt is created using the question and the above records (their `#as_vector` representation )are added as context.
71
+ 4. This prompt is passed to the LLM to generate an answer
72
+
73
+ ### Similarity Search
74
+ ```ruby
75
+ Product.similarity_search("t-shirt")
76
+ ```
77
+
78
+ Returns ActiveRecord relation that most closely matches the `query` using vector search.
79
+
80
+ ## Customization
81
+
82
+ ### Changing the vector representation of a record
83
+
84
+ By default, embeddings are generated by calling the following method on your model instance:
85
+ ```ruby
86
+ to_json(except: :embedding)
87
+ ```
88
+
89
+ You can override this by defining an `#as_vector` method in your model:
90
+ ```ruby
91
+ def as_vector
92
+ { name: name, description: description, category: category.name, ... }.to_json
93
+ end
94
+ ```
95
+
96
+ Re-generate embeddings after modifying this method:
97
+
98
+ ```ruby
99
+ product.embed!
100
+ ```
25
101
 
26
102
  ## Rails Generators
27
103
 
28
- ### Pinecone Generator - adds vectorsearch to your ActiveRecord model
104
+ ### Pgvector Generator
29
105
 
106
+ ```bash
107
+ rails generate langchainrb_rails:pgvector --model=Product --llm=openai
30
108
  ```
109
+
110
+ ### Pinecone Generator - adds vectorsearch to your ActiveRecord model
111
+ ```bash
31
112
  rails generate langchainrb_rails:pinecone --model=Product --llm=openai
32
113
  ```
33
114
 
@@ -39,3 +120,4 @@ Pinecone Generator does the following:
39
120
  1. Creates the `config/initializers/langchainrb_rails.rb` initializer file
40
121
  2. Adds necessary code to the ActiveRecord model to enable vectorsearch
41
122
  3. Adds `pinecone` gem to the Gemfile
123
+
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Overriding Langchain.rb's Pgvector implementation to use ActiveRecord.
4
+ # Original implementation: https://github.com/andreibondarev/langchainrb/blob/main/lib/langchain/vectorsearch/pgvector.rb
5
+
6
+ module Langchain::Vectorsearch
7
+ class Pgvector < Base
8
+ #
9
+ # The PostgreSQL vector search adapter
10
+ #
11
+ # Gem requirements:
12
+ # gem "pgvector", "~> 0.2"
13
+ #
14
+ # Usage:
15
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(llm:)
16
+ #
17
+
18
+ # The operators supported by the PostgreSQL vector search adapter
19
+ OPERATORS = [
20
+ "cosine",
21
+ "euclidean",
22
+ "inner_product"
23
+ ]
24
+ DEFAULT_OPERATOR = "cosine"
25
+
26
+ attr_reader :operator, :llm
27
+ attr_accessor :model
28
+
29
+ # @param url [String] The URL of the PostgreSQL database
30
+ # @param index_name [String] The name of the table to use for the index
31
+ # @param llm [Object] The LLM client to use
32
+ # @param namespace [String] The namespace to use for the index when inserting/querying
33
+ def initialize(llm:)
34
+ # If the line below is called, the generator fails as calls to
35
+ # LangchainrbRails.config.vectorsearch will generate an exception.
36
+ # These happen in the template files.
37
+ # depends_on "neighbor"
38
+
39
+ @operator = DEFAULT_OPERATOR
40
+
41
+ super(llm: llm)
42
+ end
43
+
44
+ # Add a list of texts to the index
45
+ # @param texts [Array<String>] The texts to add to the index
46
+ # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
47
+ # @return [Array<Integer>] The the ids of the added texts.
48
+ def add_texts(texts:, ids:)
49
+ embeddings = texts.map do |text|
50
+ llm.embed(text: text).embedding
51
+ end
52
+
53
+ # I believe the records returned by #find must be in the
54
+ # same order as the embeddings. I _think_ this works for uuid ids but didn't test
55
+ # deeply.
56
+ # TODO - implement find_each so we don't load all records into memory
57
+ model.find(ids).each.with_index do |record, i|
58
+ record.update_column(:embedding, embeddings[i])
59
+ end
60
+ end
61
+
62
+ def update_texts(texts:, ids:)
63
+ add_texts(texts: texts, ids: ids)
64
+ end
65
+
66
+ # Invoke a rake task that will create an initializer (`config/initializers/langchain.rb`) file
67
+ # and db/migrations/* files
68
+ def create_default_schema
69
+ Rake::Task["pgvector"].invoke
70
+ end
71
+
72
+ # Destroy default schema
73
+ def destroy_default_schema
74
+ # Tell the user to rollback the migration
75
+ end
76
+
77
+ # Search for similar texts in the index
78
+ # @param query [String] The text to search for
79
+ # @param k [Integer] The number of top results to return
80
+ # @return [Array<Hash>] The results of the search
81
+ # TODO - drop the named "query:" param so it is the same interface as #ask?
82
+ def similarity_search(query:, k: 4)
83
+ embedding = llm.embed(text: query).embedding
84
+
85
+ similarity_search_by_vector(
86
+ embedding: embedding,
87
+ k: k
88
+ )
89
+ end
90
+
91
+ # Search for similar texts in the index by the passed in vector.
92
+ # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
93
+ # @param embedding [Array<Float>] The vector to search for
94
+ # @param k [Integer] The number of top results to return
95
+ # @return [Array<Hash>] The results of the search
96
+ # TODO - drop the named "embedding:" param so it is the same interface as #ask?
97
+ def similarity_search_by_vector(embedding:, k: 4)
98
+ model
99
+ .nearest_neighbors(:embedding, embedding, distance: operator)
100
+ .limit(k)
101
+ end
102
+
103
+ # Ask a question and return the answer
104
+ # @param question [String] The question to ask
105
+ # @param k [Integer] The number of results to have in context
106
+ # @yield [String] Stream responses back one String at a time
107
+ # @return [String] The answer to the question
108
+ def ask(question, k: 4, &block)
109
+ # Noisy as the embedding column has a lot of data
110
+ ActiveRecord::Base.logger.silence do
111
+ search_results = similarity_search(query: question, k: k)
112
+
113
+ context = search_results.map do |result|
114
+ result.as_vector
115
+ end
116
+ context = context.join("\n---\n")
117
+
118
+ prompt = generate_rag_prompt(question: question, context: context)
119
+
120
+ llm.chat(prompt: prompt, &block)
121
+ end
122
+ end
123
+ end
124
+ end
@@ -61,7 +61,9 @@ module LangchainrbRails
61
61
  #
62
62
  # @return [String] the text representation of the model
63
63
  def as_vector
64
- to_json
64
+ # Don't vectorize the embedding ... this would happen if it already exists
65
+ # for a record and we update.
66
+ to_json(except: :embedding)
65
67
  end
66
68
 
67
69
  module ClassMethods
@@ -70,6 +72,21 @@ module LangchainrbRails
70
72
  # @param provider [Object] The `Langchain::Vectorsearch::*` instance
71
73
  def vectorsearch
72
74
  class_variable_set(:@@provider, LangchainrbRails.config.vectorsearch)
75
+
76
+ # Pgvector-specific configuration
77
+ if LangchainrbRails.config.vectorsearch.is_a?(Langchain::Vectorsearch::Pgvector)
78
+ has_neighbors(:embedding)
79
+ end
80
+
81
+ LangchainrbRails.config.vectorsearch.model = self
82
+ end
83
+
84
+ # Iterates over records and generate embeddings.
85
+ # Will re-generate for ALL records (not just records with embeddings).
86
+ def embed!
87
+ find_each do |record|
88
+ record.upsert_to_vectorsearch
89
+ end
73
90
  end
74
91
 
75
92
  # Search for similar texts
@@ -84,7 +101,7 @@ module LangchainrbRails
84
101
  )
85
102
 
86
103
  # We use "__id" when Weaviate is the provider
87
- ids = records.map { |record| record.dig("id") || record.dig("__id") }
104
+ ids = records.map { |record| record.try("id") || record.dig("__id") }
88
105
  where(id: ids)
89
106
  end
90
107
 
@@ -94,12 +111,12 @@ module LangchainrbRails
94
111
  # @param k [Integer] The number of results to have in context
95
112
  # @yield [String] Stream responses back one String at a time
96
113
  # @return [String] The answer to the question
97
- def ask(question:, k: 4, &block)
114
+ def ask(question, k: 4, &block)
98
115
  class_variable_get(:@@provider).ask(
99
- question: question,
116
+ question,
100
117
  k: k,
101
118
  &block
102
- )
119
+ ).completion
103
120
  end
104
121
  end
105
122
  end
@@ -0,0 +1,24 @@
1
+ require "rails/generators"
2
+ require "rails/generators/active_record"
3
+
4
+ module LangchainrbRails
5
+ module Generators
6
+ class BaseGenerator < Rails::Generators::Base
7
+ include ::ActiveRecord::Generators::Migration
8
+
9
+ class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
10
+ class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
11
+
12
+ # Available LLM providers to be passed in as --llm option
13
+ LLMS = {
14
+ "cohere" => "Langchain::LLM::Cohere",
15
+ "google_palm" => "Langchain::LLM::GooglePalm",
16
+ "hugging_face" => "Langchain::LLM::HuggingFace",
17
+ "llama_cpp" => "Langchain::LLM::LlamaCpp",
18
+ "ollama" => "Langchain::LLM::Ollama",
19
+ "openai" => "Langchain::LLM::OpenAI",
20
+ "replicate" => "Langchain::LLM::Replicate"
21
+ }
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangchainrbRails
4
+ module Generators
5
+ #
6
+ # ChromaGenerator does the following:
7
+ # 1. Creates the `langchainrb_rails.rb` initializer file
8
+ # 2. Adds necessary code to the ActiveRecord model to enable vectorsearch
9
+ # 3. Adds `chroma-db` gem to the Gemfile
10
+ #
11
+ # Usage:
12
+ # rails generate langchainrb_rails:chrome --model=Product --llm=openai
13
+ #
14
+ class ChromaGenerator < LangchainrbRails::Generators::BaseGenerator
15
+ desc "This generator adds Chroma vectorsearch integration to your ActiveRecord model"
16
+ source_root File.join(__dir__, "templates")
17
+
18
+ # Creates the `langchainrb_rails.rb` initializer file
19
+ def create_initializer_file
20
+ template "chroma_initializer.rb", "config/initializers/langchainrb_rails.rb"
21
+ end
22
+
23
+ # Adds `vectorsearch` class method to the model and `after_save` callback that calls `upsert_to_vectorsearch()`
24
+ def add_to_model
25
+ inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
26
+ " vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
27
+ end
28
+ end
29
+
30
+ # Adds `chroma-db` gem to the Gemfile
31
+ # TODO: Can we automatically run `bundle install`?
32
+ def add_to_gemfile
33
+ gem "chroma-db", version: "~> 0.6.0"
34
+ end
35
+
36
+ private
37
+
38
+ # @return [String] Name of the model
39
+ def model_name
40
+ options["model"]
41
+ end
42
+
43
+ # @return [String] LLM provider to use
44
+ def llm
45
+ options["llm"]
46
+ end
47
+
48
+ # @return [Langchain::LLM::*] LLM class
49
+ def llm_class
50
+ Langchain::LLM.const_get(LLMS[llm])
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangchainrbRails
4
+ module Generators
5
+ #
6
+ # Usage:
7
+ # rails g langchain:pgvector -model=Product -llm=openai
8
+ #
9
+ class PgvectorGenerator < LangchainrbRails::Generators::BaseGenerator
10
+ desc "This generator adds Pgvector vectorsearch integration to your ActiveRecord model"
11
+ source_root File.join(__dir__, "templates")
12
+
13
+ def copy_migration
14
+ migration_template "enable_vector_extension_template.rb", "db/migrate/enable_vector_extension.rb", migration_version: migration_version
15
+ migration_template "add_vector_column_template.rb", "db/migrate/add_vector_column_to_#{table_name}.rb", migration_version: migration_version
16
+ end
17
+
18
+ def create_initializer_file
19
+ template "pgvector_initializer.rb", "config/initializers/langchainrb_rails.rb"
20
+ end
21
+
22
+ def migration_version
23
+ "[#{::ActiveRecord::VERSION::MAJOR}.#{::ActiveRecord::VERSION::MINOR}]"
24
+ end
25
+
26
+ def add_to_model
27
+ inject_into_class "app/models/#{model_name.downcase}.rb", model_name do
28
+ " vectorsearch\n\n after_save :upsert_to_vectorsearch\n\n"
29
+ end
30
+ end
31
+
32
+ def add_to_gemfile
33
+ # Dependency for Langchain PgVector
34
+ gem "neighbor"
35
+ gem "ruby-openai"
36
+ end
37
+
38
+ def post_install_message
39
+ say "Please do the following to start Q&A with your #{model_name} records:", :green
40
+ say "1. Run `bundle install` to install the new gems."
41
+ say "2. Set `OPENAI_API_KEY` environment variable to your OpenAI API key."
42
+ say "3. Run `rails db:migrate` to apply the database migrations to enable pgvector and add the embedding column."
43
+ say "4. In Rails console, run `#{model_name}.embed!` to set the embeddings for all records."
44
+ say "5. Ask a question in the Rails console, ie: `#{model_name}.ask('[YOUR QUESTION]')`"
45
+ end
46
+
47
+ private
48
+
49
+ # @return [String] Name of the model
50
+ def model_name
51
+ options["model"]
52
+ end
53
+
54
+ # @return [String] Table name of the model
55
+ def table_name
56
+ model_name.downcase.pluralize
57
+ end
58
+
59
+ # @return [String] LLM provider to use
60
+ def llm
61
+ options["llm"]
62
+ end
63
+
64
+ # @return [Langchain::LLM::*] LLM class
65
+ def llm_class
66
+ Langchain::LLM.const_get(LLMS[llm])
67
+ end
68
+
69
+ # @return [Integer] Dimension of the vector to be used
70
+ def vector_dimension
71
+ llm_class.default_dimension
72
+ end
73
+ end
74
+ end
75
+ end
@@ -1,4 +1,4 @@
1
- require "rails/generators/active_record"
1
+ # frozen_string_literal: true
2
2
 
3
3
  module LangchainrbRails
4
4
  module Generators
@@ -11,26 +11,10 @@ module LangchainrbRails
11
11
  # Usage:
12
12
  # rails generate langchainrb_rails:pinecone --model=Product --llm=openai
13
13
  #
14
- class PineconeGenerator < Rails::Generators::Base
14
+ class PineconeGenerator < LangchainrbRails::Generators::BaseGenerator
15
15
  desc "This generator adds Pinecone vectorsearch integration to your ActiveRecord model"
16
-
17
- include ::ActiveRecord::Generators::Migration
18
16
  source_root File.join(__dir__, "templates")
19
17
 
20
- class_option :model, type: :string, required: true, desc: "ActiveRecord Model to add vectorsearch to", aliases: "-m"
21
- class_option :llm, type: :string, required: true, desc: "LLM provider that will be used to generate embeddings and completions"
22
-
23
- # Available LLM providers to be passed in as --llm option
24
- LLMS = {
25
- "cohere" => "Langchain::LLM::Cohere",
26
- "google_palm" => "Langchain::LLM::GooglePalm",
27
- "hugging_face" => "Langchain::LLM::HuggingFace",
28
- "llama_cpp" => "Langchain::LLM::LlamaCpp",
29
- "ollama" => "Langchain::LLM::Ollama",
30
- "openai" => "Langchain::LLM::OpenAI",
31
- "replicate" => "Langchain::LLM::Replicate"
32
- }
33
-
34
18
  # Creates the `langchainrb_rails.rb` initializer file
35
19
  def create_initializer_file
36
20
  template "pinecone_initializer.rb", "config/initializers/langchainrb_rails.rb"
@@ -46,7 +30,7 @@ module LangchainrbRails
46
30
  # Adds `pinecone` gem to the Gemfile
47
31
  # TODO: Can we automatically run `bundle install`?
48
32
  def add_to_gemfile
49
- gem "pinecone"
33
+ gem "pinecone", version: "~> 0.1.6"
50
34
  end
51
35
 
52
36
  private
@@ -0,0 +1,10 @@
1
+ class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ add_column :<%= table_name %>, :embedding, :vector,
4
+ limit: LangchainrbRails
5
+ .config
6
+ .vectorsearch
7
+ .llm
8
+ .default_dimension
9
+ end
10
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ LangchainrbRails.configure do |config|
4
+ config.vectorsearch = Langchain::Vectorsearch::Chroma.new(
5
+ llm: <%= llm_class %>.new(api_key: ENV["<%= llm.upcase %>_API_KEY"]),
6
+ url: ENV["CHROMA_URL"],
7
+ index_name: ""
8
+ )
9
+ end
@@ -0,0 +1,5 @@
1
+ class <%= migration_class_name %> < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ enable_extension "vector"
4
+ end
5
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ LangchainrbRails.configure do |config|
4
+ config.vectorsearch = Langchain::Vectorsearch::Pgvector.new(
5
+ llm: <%= llm_class %>.new(api_key: ENV["OPENAI_API_KEY"])
6
+ )
7
+ end
@@ -9,7 +9,9 @@ module LangchainrbRails
9
9
  end
10
10
 
11
11
  generators do
12
+ require_relative "generators/langchainrb_rails/chroma_generator"
12
13
  require_relative "generators/langchainrb_rails/pinecone_generator"
14
+ require_relative "generators/langchainrb_rails/pgvector_generator"
13
15
  end
14
16
  end
15
17
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LangchainrbRails
4
- VERSION = "0.1.2"
4
+ VERSION = "0.1.3"
5
5
  end
@@ -1,9 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "forwardable"
3
4
  require "langchain"
5
+ require "rails"
4
6
  require_relative "langchainrb_rails/version"
5
7
  require "langchainrb_rails/railtie"
6
8
  require "langchainrb_rails/config"
9
+ require_relative "langchainrb_overrides/vectorsearch/pgvector"
7
10
 
8
11
  module LangchainrbRails
9
12
  class Error < StandardError; end
@@ -13,6 +16,8 @@ module LangchainrbRails
13
16
  end
14
17
 
15
18
  module Generators
19
+ autoload :BaseGenerator, "langchainrb_rails/generators/langchainrb_rails/base_generator"
20
+ autoload :ChromaGenerator, "langchainrb_rails/generators/langchainrb_rails/chroma_generator"
16
21
  autoload :PgvectorGenerator, "langchainrb_rails/generators/langchainrb_rails/pgvector_generator"
17
22
  end
18
23
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb_rails
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-27 00:00:00.000000000 Z
11
+ date: 2023-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: langchainrb
@@ -74,16 +74,26 @@ extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
76
  - ".rspec"
77
+ - ".rubocop.yml"
78
+ - ".tool-versions"
77
79
  - CHANGELOG.md
78
80
  - Gemfile
79
81
  - Gemfile.lock
80
82
  - LICENSE.txt
81
83
  - README.md
82
84
  - Rakefile
85
+ - lib/langchainrb_overrides/vectorsearch/pgvector.rb
83
86
  - lib/langchainrb_rails.rb
84
87
  - lib/langchainrb_rails/active_record/hooks.rb
85
88
  - lib/langchainrb_rails/config.rb
89
+ - lib/langchainrb_rails/generators/langchainrb_rails/base_generator.rb
90
+ - lib/langchainrb_rails/generators/langchainrb_rails/chroma_generator.rb
91
+ - lib/langchainrb_rails/generators/langchainrb_rails/pgvector_generator.rb
86
92
  - lib/langchainrb_rails/generators/langchainrb_rails/pinecone_generator.rb
93
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/add_vector_column_template.rb.tt
94
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/chroma_initializer.rb.tt
95
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/enable_vector_extension_template.rb.tt
96
+ - lib/langchainrb_rails/generators/langchainrb_rails/templates/pgvector_initializer.rb.tt
87
97
  - lib/langchainrb_rails/generators/langchainrb_rails/templates/pinecone_initializer.rb.tt
88
98
  - lib/langchainrb_rails/railtie.rb
89
99
  - lib/langchainrb_rails/version.rb
@@ -111,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
121
  - !ruby/object:Gem::Version
112
122
  version: '0'
113
123
  requirements: []
114
- rubygems_version: 3.2.3
124
+ rubygems_version: 3.3.7
115
125
  signing_key:
116
126
  specification_version: 4
117
127
  summary: Rails wrapper for langchainrb gem