bx_builder_chain 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +13 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +22 -0
  6. data/Gemfile.lock +120 -0
  7. data/README.md +74 -0
  8. data/Rakefile +12 -0
  9. data/bx_builder_chain.gemspec +35 -0
  10. data/lib/bx_builder_chain/chunker/recursive_text.rb +38 -0
  11. data/lib/bx_builder_chain/chunker/text.rb +38 -0
  12. data/lib/bx_builder_chain/configuration.rb +21 -0
  13. data/lib/bx_builder_chain/data.rb +28 -0
  14. data/lib/bx_builder_chain/dependency_helper.rb +22 -0
  15. data/lib/bx_builder_chain/llm/base.rb +64 -0
  16. data/lib/bx_builder_chain/llm/open_ai.rb +191 -0
  17. data/lib/bx_builder_chain/loader.rb +144 -0
  18. data/lib/bx_builder_chain/processors/base.rb +21 -0
  19. data/lib/bx_builder_chain/processors/csv.rb +27 -0
  20. data/lib/bx_builder_chain/processors/docx.rb +25 -0
  21. data/lib/bx_builder_chain/processors/html.rb +29 -0
  22. data/lib/bx_builder_chain/processors/json.rb +17 -0
  23. data/lib/bx_builder_chain/processors/pdf.rb +26 -0
  24. data/lib/bx_builder_chain/processors/text.rb +17 -0
  25. data/lib/bx_builder_chain/processors/xlsx.rb +31 -0
  26. data/lib/bx_builder_chain/utils/token_data/cl100k_base.tiktoken +100256 -0
  27. data/lib/bx_builder_chain/utils/token_length/base_validator.rb +45 -0
  28. data/lib/bx_builder_chain/utils/token_length/open_ai_validator.rb +70 -0
  29. data/lib/bx_builder_chain/utils/tokenization/byte_pair_encoding.rb +72 -0
  30. data/lib/bx_builder_chain/utils/tokenization/open_ai_encodings.rb +44 -0
  31. data/lib/bx_builder_chain/vectorsearch/base.rb +160 -0
  32. data/lib/bx_builder_chain/vectorsearch/pgvector.rb +228 -0
  33. data/lib/bx_builder_chain/version.rb +5 -0
  34. data/lib/bx_builder_chain.rb +38 -0
  35. data/lib/generators/bx_builder_chain/install_generator.rb +42 -0
  36. data/lib/generators/bx_builder_chain/templates/app/admin/bx_builder_chain_document.rb +65 -0
  37. data/lib/generators/bx_builder_chain/templates/app/controllers/bx_builder_chain/documents_controller.rb +65 -0
  38. data/lib/generators/bx_builder_chain/templates/app/controllers/bx_builder_chain/questions_controller.rb +33 -0
  39. data/lib/generators/bx_builder_chain/templates/app/controllers/bx_builder_chain/test_controller.rb +10 -0
  40. data/lib/generators/bx_builder_chain/templates/app/models/bx_builder_chain/document.rb +26 -0
  41. data/lib/generators/bx_builder_chain/templates/app/models/bx_builder_chain/document_chunk.rb +9 -0
  42. data/lib/generators/bx_builder_chain/templates/app/models/bx_builder_chain/embedding.rb +9 -0
  43. data/lib/generators/bx_builder_chain/templates/app/services/bx_builder_chain/document_upload_service.rb +47 -0
  44. data/lib/generators/bx_builder_chain/templates/app/services/bx_builder_chain/question_asking_service.rb +35 -0
  45. data/lib/generators/bx_builder_chain/templates/app/views/bx_builder_chain/test/form.html.erb +164 -0
  46. data/lib/generators/bx_builder_chain/templates/app/workers/bx_builder_chain/document_processor_worker.rb +32 -0
  47. data/lib/generators/bx_builder_chain/templates/initializer.rb +12 -0
  48. data/lib/generators/bx_builder_chain/templates/migration.rb +33 -0
  49. data/lib/pgvector/pg/binary_decoder/vector.rb +14 -0
  50. data/lib/pgvector/pg/text_decoder/vector.rb +12 -0
  51. data/lib/pgvector/pg.rb +10 -0
  52. data/lib/pgvector.rb +11 -0
  53. data/lib/sequel/plugins/pgvector/class_methods.rb +47 -0
  54. data/lib/sequel/plugins/pgvector/instance_methods.rb +34 -0
  55. data/lib/sequel/plugins/pgvector.rb +12 -0
  56. data/sig/bx_langchain_chat.rbs +4 -0
  57. metadata +238 -0
@@ -0,0 +1,42 @@
1
+ require 'rails/generators'
2
+ require 'rails/generators/active_record'
3
+
4
+ module BxBuilderChain
5
+ module Generators
6
+ class InstallGenerator < Rails::Generators::Base
7
+ source_root File.expand_path('templates', __dir__)
8
+
9
+ desc "Creates BxBuilderChain initializer, migration, and copies app templates for your application"
10
+
11
+ def copy_initializer
12
+ template "initializer.rb", "config/initializers/bx_builder_chain.rb"
13
+ end
14
+
15
+ def copy_migration
16
+ timestamp_number = Time.now.strftime("%Y%m%d%H%M%S")
17
+ template "migration.rb", "db/migrate/#{timestamp_number}_create_bx_builder_chain_schema.rb"
18
+ end
19
+
20
+ def copy_app_templates
21
+ directory "app", Rails.root.join("app")
22
+ end
23
+
24
+ def add_routes
25
+ inject_into_file 'config/routes.rb', after: "Rails.application.routes.draw do\n" do
26
+ <<~ROUTES
27
+ namespace :bx_builder_chain do
28
+ get 'test_form', to: 'test#form' # remove before production
29
+
30
+ post 'documents/upload', to: 'documents#upload_and_process'
31
+ post 'documents/upload_for_later', to: 'documents#upload_and_process_later'
32
+ get 'documents/list', to: 'documents#namespace_documents'
33
+ delete 'documents/delete', to: 'documents#delete_documents'
34
+
35
+ post 'ask', to: 'questions#ask'
36
+ end
37
+ ROUTES
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,65 @@
1
+ ActiveAdmin.register BxBuilderChain::Document, as: "Document Embeddings" do
2
+ permit_params :name, :namespace
3
+
4
+ controller do
5
+ def scoped_collection
6
+ super.includes(:document_chunks)
7
+ end
8
+
9
+ def create
10
+
11
+ uploaded_files = params[:bx_builder_chain_document][:files]
12
+ namespace = params[:bx_builder_chain_document][:namespace]
13
+
14
+ user_groups = namespace.to_s.split(',').reject(&:blank?)
15
+
16
+ service = BxBuilderChain::DocumentUploadService.new(
17
+ files: uploaded_files,
18
+ current_user_groups: user_groups,
19
+ client_class_name: 'BxBuilderChain::Vectorsearch::Pgvector',
20
+ llm_class_name: 'BxBuilderChain::Llm::OpenAi'
21
+ )
22
+ result = service.upload_and_process
23
+
24
+ if result[:error]
25
+ flash.now[:error] = JSON.parse(response.body)["error"]
26
+ render :new
27
+ else
28
+ redirect_to admin_bx_builder_chain_documents_path, notice: 'Document uploaded and processed successfully.'
29
+ end
30
+ end
31
+ end
32
+
33
+ index do
34
+ selectable_column
35
+ id_column
36
+ column :name
37
+ column :namespace
38
+ column "Document Chunks" do |document|
39
+ document.document_chunks.size
40
+ end
41
+ column :created_at
42
+
43
+ # # Custom actions
44
+ # actions defaults: false do |document|
45
+ # link_to 'Delete', admin_bx_builder_chain_document_path(document), method: :delete, data: { confirm: 'Are you sure?' }
46
+ # end
47
+ actions
48
+ end
49
+
50
+ filter :name
51
+ filter :namespace
52
+ filter :created_at
53
+
54
+ form html: { multipart: true } do |f|
55
+ f.inputs do
56
+ if f.object.new_record?
57
+ f.input :files, as: :file, input_html: { multiple: true }
58
+ else
59
+ f.input :name, as: :string, input_html: { disabled: true }
60
+ end
61
+ f.input :namespace, as: :string
62
+ end
63
+ f.actions
64
+ end
65
+ end
@@ -0,0 +1,65 @@
1
+ module BxBuilderChain
2
+ class DocumentsController < ::ApplicationController
3
+ skip_before_action :verify_authenticity_token
4
+
5
+ LLM_CLASS_NAME = 'BxBuilderChain::Llm::OpenAi'
6
+ CLIENT_CLASS_NAME = 'BxBuilderChain::Vectorsearch::Pgvector'
7
+
8
+ def namespace_documents
9
+ documents = BxBuilderChain::Document.where(namespace: current_user_document_groups.first)
10
+ render json: documents
11
+ end
12
+
13
+ def delete_documents
14
+ return render json: { error: 'No document IDs provided' }, status: :bad_request unless params[:ids].present?
15
+
16
+ # Filter documents by the provided IDs and the namespace from the current user groups
17
+ documents_to_delete = BxBuilderChain::Document.where(id: params[:ids], namespace: current_user_document_groups.first)
18
+
19
+ # Destroy the documents
20
+ documents_to_delete.destroy_all
21
+
22
+ render json: { success: 'Documents deleted successfully' }
23
+ end
24
+
25
+ def upload_and_process
26
+ service = DocumentUploadService.new(
27
+ files: params[:files],
28
+ user_groups: current_user_document_groups, # optional defaults to ['public']
29
+ client_class_name: CLIENT_CLASS_NAME, # optional defaults to 'BxBuilderChain::Vectorsearch::Pgvector'
30
+ llm_class_name: LLM_CLASS_NAME # optional defaults to 'BxBuilderChain::Llm::OpenAi'
31
+ )
32
+
33
+ result = service.upload_and_process
34
+
35
+ if result[:error]
36
+ render json: { error: result[:error] }, status: :bad_request
37
+ else
38
+ render json: result
39
+ end
40
+ end
41
+
42
+ def upload_and_process_later
43
+ service = DocumentUploadService.new(
44
+ files: params[:files],
45
+ current_user_groups: current_user_document_groups,
46
+ client_class_name: CLIENT_CLASS_NAME,
47
+ llm_class_name: LLM_CLASS_NAME
48
+ )
49
+ result = service.upload_and_process_later
50
+
51
+ if result[:error]
52
+ render json: { error: result[:error] }, status: :bad_request
53
+ else
54
+ render json: result
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ def current_user_document_groups
61
+ params[:current_user_groups].to_s.split(',').reject(&:blank?)
62
+ # replace this with the actual user document permission groups
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,33 @@
1
+ module BxBuilderChain
2
+ class QuestionsController < ::ApplicationController
3
+ skip_before_action :verify_authenticity_token
4
+
5
+ LLM_CLASS_NAME = 'BxBuilderChain::Llm::OpenAi'
6
+ CLIENT_CLASS_NAME = 'BxBuilderChain::Vectorsearch::Pgvector'
7
+
8
+ # POST /bx_builder_chain/ask
9
+ def ask
10
+ service = QuestionAskingService.new(
11
+ question: params[:question],
12
+ user_groups: current_user_document_groups, # optional defaults to ['public']
13
+ client_class_name: CLIENT_CLASS_NAME, # optional defaults to 'BxBuilderChain::Vectorsearch::Pgvector'
14
+ llm_class_name: LLM_CLASS_NAME, # optional defaults to 'BxBuilderChain::Llm::OpenAi'
15
+ context_results: 10 # optional defaults to 6
16
+ )
17
+ response = service.ask
18
+
19
+ if response[:error]
20
+ render json: response, status: :bad_request
21
+ else
22
+ render json: response
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def current_user_document_groups
29
+ params[:current_user_groups].to_s.split(',').reject(&:blank?)
30
+ # replace this with the actual user document permission groups
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,10 @@
1
+ module BxBuilderChain
2
+ class TestController < ::ApplicationController
3
+ skip_before_action :verify_authenticity_token
4
+
5
+ def form
6
+
7
+ end
8
+ end
9
+ end
10
+
@@ -0,0 +1,26 @@
1
+ module BxBuilderChain
2
+ class Document < ::ApplicationRecord
3
+ self.table_name = "bx_builder_chain_documents"
4
+
5
+ has_many :document_chunks, class_name: 'BxBuilderChain::DocumentChunk'
6
+ has_many :embeddings, through: :document_chunks
7
+
8
+ before_destroy :destroy_associated_data
9
+ after_update :update_associated_embedding_namespaces, if: :saved_change_to_namespace?
10
+
11
+ private
12
+
13
+ def update_associated_embedding_namespaces
14
+ embeddings.update_all(namespace: namespace)
15
+ end
16
+
17
+ def destroy_associated_data
18
+ # Delete all associated embeddings without loading them into memory
19
+ embedding_ids = BxBuilderChain::DocumentChunk.where(document_id: id).pluck(:embedding_id)
20
+ BxBuilderChain::Embedding.where(id: embedding_ids).delete_all
21
+
22
+ # Delete all chunks without loading them
23
+ BxBuilderChain::DocumentChunk.where(document_id: id).delete_all
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,9 @@
1
+ module BxBuilderChain
2
+ class DocumentChunk < ::ApplicationRecord
3
+ self.table_name = "bx_builder_chain_document_chunks"
4
+
5
+ belongs_to :document, class_name: 'BxBuilderChain::Document'
6
+ belongs_to :embedding, class_name: 'BxBuilderChain::Embedding'
7
+ end
8
+ end
9
+
@@ -0,0 +1,9 @@
1
+ module BxBuilderChain
2
+ class Embedding < ::ApplicationRecord
3
+ self.table_name = "bx_builder_chain_embeddings"
4
+
5
+ has_many :document_chunks, class_name: 'BxBuilderChain::DocumentChunk', dependent: :destroy
6
+ has_many :documents, through: :document_chunks
7
+ end
8
+ end
9
+
@@ -0,0 +1,47 @@
1
+ module BxBuilderChain
2
+ class DocumentUploadService
3
+ attr_reader :files, :user_groups, :llm_class_name, :client_class_name
4
+
5
+ WAITING_FOLDER = 'path/to/waiting_folder' # Update this path
6
+
7
+ def initialize(files:, user_groups: ['public'], client_class_name: 'BxBuilderChain::Vectorsearch::Pgvector', llm_class_name: 'BxBuilderChain::Llm::OpenAi')
8
+ @files = files
9
+ @user_groups = user_groups
10
+ @client_class_name = client_class_name
11
+ @llm_class_name = llm_class_name
12
+ end
13
+
14
+ def upload_and_process
15
+ return { error: 'No files provided' } unless @files&.any?
16
+
17
+ files_n_paths = @files.map { |file| { path: file.tempfile.path, filename: file.original_filename } }
18
+ client.add_data(paths: files_n_paths)
19
+
20
+ { success: 'Files added to document store' }
21
+ end
22
+
23
+ def upload_and_process_later
24
+ return { error: 'No files provided' } unless @files&.any?
25
+
26
+ @files.each do |file|
27
+ new_path = File.join(WAITING_FOLDER, file.original_filename)
28
+ FileUtils.mv(file.tempfile.path, new_path)
29
+ file_n_path = { path: new_path, filename: file.original_filename }
30
+ # Enqueue for processing with Sidekiq
31
+ BxBuilderChain::DocumentProcessorWorker.perform_async(file_n_path, llm_class: @llm_class_name, client_class: @client_class_name, namespaces: @user_groups)
32
+ end
33
+
34
+ { success: 'Files queued for processing' }
35
+ end
36
+
37
+ private
38
+
39
+ def client
40
+ @client ||= @client_class_name.constantize.new(
41
+ llm: @llm_class_name.constantize.new,
42
+ namespaces: @user_groups
43
+ )
44
+ end
45
+ end
46
+ end
47
+
@@ -0,0 +1,35 @@
1
+ module BxBuilderChain
2
+ class QuestionAskingService
3
+ attr_reader :question, :user_groups, :llm_class_name, :client_class_name, :context_results
4
+
5
+ def initialize(question:, user_groups: ['public'], client_class_name: 'BxBuilderChain::Vectorsearch::Pgvector', llm_class_name: 'BxBuilderChain::Llm::OpenAi', context_results: 6)
6
+ @question = question
7
+ @user_groups = user_groups
8
+ @client_class_name = client_class_name
9
+ @llm_class_name = llm_class_name
10
+ @context_results = context_results
11
+ end
12
+
13
+ def ask
14
+ return { error: 'No question provided' } unless question.present?
15
+
16
+ response = client.ask(question: question, context_results: context_results)
17
+ { answer: response }
18
+ end
19
+
20
+ private
21
+
22
+ def client
23
+ @client ||= client_class_name.constantize.new(
24
+ llm: llm_class_name.constantize.new(
25
+ default_options: {
26
+ chat_completion_model_name: "gpt-3.5-turbo",
27
+ temperature: 0.2
28
+ }
29
+ ),
30
+ namespaces: user_groups
31
+ )
32
+ end
33
+ end
34
+ end
35
+
@@ -0,0 +1,164 @@
1
+ <style>
2
+ body {
3
+ font-family: 'Arial', sans-serif;
4
+ background-color: #f5f5f5;
5
+ }
6
+
7
+ div.page_wrapper {
8
+ max-width: 1100px;
9
+ margin: 5% auto;
10
+ padding: 20px;
11
+ background-color: #fff;
12
+ box-shadow: 0 3px 6px rgba(0, 0, 0, 0.1);
13
+ border-radius: 5px;
14
+ }
15
+
16
+ div.form_wrapper {
17
+ width: 50%;
18
+ padding: 1%;
19
+ float: left;
20
+ background-color: #f9f9f9;
21
+ border-radius: 5px;
22
+ box-sizing: border-box;
23
+ height: 340px;
24
+ }
25
+
26
+ form {
27
+ width: 100%;
28
+ }
29
+
30
+ h2 {
31
+ font-size: 1.5em;
32
+ margin-bottom: 20px;
33
+ color: #333;
34
+ }
35
+
36
+ .form-group {
37
+ margin-bottom: 15px;
38
+ }
39
+
40
+ .form-control {
41
+ width: 93%;
42
+ padding: 8px 12px;
43
+ border: 1px solid #ccc;
44
+ border-radius: 4px;
45
+ }
46
+
47
+ .btn-primary {
48
+ background-color: #007bff;
49
+ color: #fff;
50
+ padding: 10px 15px;
51
+ border: none;
52
+ border-radius: 4px;
53
+ cursor: pointer;
54
+ transition: background-color 0.3s ease;
55
+ }
56
+
57
+ .btn-primary:hover {
58
+ background-color: #0056b3;
59
+ }
60
+ div.console_wrapper {
61
+ width: 100%;
62
+ margin-top: 20px;
63
+ padding: 15px;
64
+ background-color: #000;
65
+ color: #0f0;
66
+ font-family: 'Courier New', monospace;
67
+ border-radius: 5px;
68
+ min-height: 150px;
69
+ box-sizing: border-box;
70
+ overflow: auto;
71
+ }
72
+
73
+ .console_message {
74
+ white-space: pre-line; /* To respect new lines in the content */
75
+ }
76
+ </style>
77
+
78
+ <div class="page_wrapper">
79
+ <div class="form_wrapper">
80
+ <h2>Upload Files</h2>
81
+ <%= form_tag(bx_builder_chain_documents_upload_path, multipart: true, method: :post, id: 'upload_form') do %>
82
+ <div class="form-group">
83
+ <label for="files">Choose files to upload:</label>
84
+ <%= file_field_tag 'files[]', multiple: true, class: 'form-control' %>
85
+ </div>
86
+ <div class="form-group">
87
+ <label for="files">Choose files to upload:</label>
88
+ <%= file_field_tag 'files[]', multiple: true, class: 'form-control' %>
89
+ </div>
90
+
91
+ <div class="form-group">
92
+ <label for="current_user_groups">User ID:</label>
93
+ <%= text_field_tag 'current_user_groups', 1, class: 'form-control' %>
94
+ </div>
95
+
96
+ <div class="actions">
97
+ <%= submit_tag "Upload", class: 'btn btn-primary' %>
98
+ </div>
99
+ <% end %>
100
+ </div>
101
+
102
+ <div class="form_wrapper">
103
+ <h2>Ask Question</h2>
104
+
105
+ <%= form_tag(bx_builder_chain_ask_path, method: :post, id: 'ask_form') do %>
106
+ <div class="form-group">
107
+ <label for="question">Ask a question:</label>
108
+ <%= text_area_tag 'question', nil, class: 'form-control', rows: 3 %>
109
+ </div>
110
+
111
+ <div class="form-group">
112
+ <label for="current_user_groups">User Groups:</label>
113
+ <%= text_field_tag 'current_user_groups', 1, class: 'form-control' %>
114
+ </div>
115
+
116
+ <div class="actions">
117
+ <%= submit_tag "Submit Question", class: 'btn btn-primary' %>
118
+ </div>
119
+ <% end %>
120
+ </div>
121
+ <div style="clear: both;"></div>
122
+
123
+ <!-- Console box -->
124
+ <div class="console_wrapper">
125
+ <div class="console_message">
126
+ <%= flash[:console_message] || "Awaiting response..." %>
127
+ </div>
128
+ </div>
129
+ </div>
130
+ <script>
131
+ document.addEventListener('DOMContentLoaded', function () { // Selecting both forms
132
+ const uploadForm = document.querySelector('#upload_form');
133
+ const askForm = document.querySelector('#ask_form');
134
+ const consoleBox = document.querySelector('.console_message');
135
+
136
+ // Attach event listener to both forms
137
+ [uploadForm, askForm].forEach(form => {
138
+ form.addEventListener('submit', async function (event) {
139
+ event.preventDefault();
140
+ // Prevent default form submission
141
+
142
+ // Show loading message
143
+ consoleBox.textContent = "Processing...";
144
+
145
+ try {
146
+ let response = await fetch(event.target.action, {
147
+ method: 'POST',
148
+ body: new FormData(event.target) // This gathers form data
149
+ });
150
+
151
+ if (response.ok) {
152
+ let data = await response.json();
153
+ consoleBox.textContent = data.answer || data.success || "Success!";
154
+ } else {
155
+ let errorData = await response.json();
156
+ consoleBox.textContent = errorData.error || "An error occurred.";
157
+ }
158
+ } catch (error) {
159
+ consoleBox.textContent = "Failed to submit. Please try again.";
160
+ }
161
+ });
162
+ });
163
+ });
164
+ </script>
@@ -0,0 +1,32 @@
1
+ require 'sidekiq'
2
+
3
+ module BxBuilderChain
4
+ class DocumentProcessorWorker
5
+ include Sidekiq::Worker
6
+
7
+ sidekiq_options retry: 2
8
+
9
+ def perform(file_data, llm_class:, client_class:, namespaces:)
10
+ # Create a new instance of the service class
11
+ service = DocumentUploadService.new(
12
+ files: [file_data],
13
+ user_groups: namespaces,
14
+ client_class_name: client_class,
15
+ llm_class_name: llm_class
16
+ )
17
+
18
+ # Use the service method to process the file
19
+ result = service.upload_and_process
20
+
21
+ # Log errors if they occur
22
+ if result[:error]
23
+ BxBuilderChain.logger.error("BxBuilderChain::DocumentProcessorWorker Error: #{result[:error]}\n File: #{file_data[:filename]}\nNameSpace: #{namespaces.join(', ')}")
24
+ end
25
+
26
+ rescue => e
27
+ BxBuilderChain.logger.error("BxBuilderChain::DocumentProcessorWorker Error: #{e.message}\n File: #{file_data[:filename]}\nNameSpace: #{namespaces.join(', ')}")
28
+ # Re-raise the exception to let Sidekiq handle retries
29
+ raise e
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,12 @@
1
+ BxBuilderChain.configure do |config|
2
+ config.pg_url = "your_database_url" # eg 'postgres://postgres:password@localhost:5432/my_db'
3
+ config.openai_api_key = "your_openai_api_key" # eg ENV['OPENAI_API_KEY']
4
+ config.public_namespace = "public"
5
+ config.threshold = 0.25
6
+ config.default_prompt_template = "Context information is below
7
+ --------------------
8
+ %{context}
9
+ --------------------
10
+ Given the context information and not prior knowledge
11
+ answer the question: %{question}"
12
+ end
@@ -0,0 +1,33 @@
1
+ class CreateBxBuilderChainSchema < ActiveRecord::Migration[6.1]
2
+ def change
3
+ enable_extension 'vector' unless extension_enabled?('vector')
4
+
5
+ # bx_builder_chain_embeddings table
6
+ create_table :bx_builder_chain_embeddings do |t|
7
+ t.text :content
8
+ t.column :vectors, "vector(1536)"
9
+ t.text :namespace, default: 'public'
10
+
11
+ t.index :namespace
12
+ end
13
+
14
+ # bx_builder_chain_documents table
15
+ create_table :bx_builder_chain_documents do |t|
16
+ t.text :name
17
+ t.text :namespace, default: 'public'
18
+
19
+ t.index [:name, :namespace], unique: true
20
+ t.index :namespace
21
+ t.timestamps
22
+ end
23
+
24
+ # bx_builder_chain_document_chunks table
25
+ create_table :bx_builder_chain_document_chunks do |t|
26
+ t.references :document, null: false, foreign_key: { to_table: :bx_builder_chain_documents, on_delete: :cascade }
27
+ t.references :embedding, null: false, foreign_key: { to_table: :bx_builder_chain_embeddings, on_delete: :cascade }
28
+ end
29
+
30
+ # Unique constraint for combination of document_id and embedding_id
31
+ add_index :bx_builder_chain_document_chunks, [:document_id, :embedding_id], unique: true, name: 'index_document_embedding_unique'
32
+ end
33
+ end
@@ -0,0 +1,14 @@
1
+ module Pgvector
2
+ module PG
3
+ module BinaryDecoder
4
+ class Vector < ::PG::SimpleDecoder
5
+ def decode(string, tuple = nil, field = nil)
6
+ dim, unused = string[0, 4].unpack("nn")
7
+ raise "expected unused to be 0" if unused != 0
8
+ string[4..-1].unpack("g#{dim}")
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
14
+
@@ -0,0 +1,12 @@
1
+ module Pgvector
2
+ module PG
3
+ module TextDecoder
4
+ class Vector < ::PG::SimpleDecoder
5
+ def decode(string, tuple = nil, field = nil)
6
+ Pgvector.decode(string)
7
+ end
8
+ end
9
+ end
10
+ end
11
+ end
12
+
@@ -0,0 +1,10 @@
1
+ require "pg"
2
+
3
+ module Pgvector
4
+ module PG
5
+ def self.register_vector(registry)
6
+ registry.register_type(0, "vector", nil, TextDecoder::Vector)
7
+ registry.register_type(1, "vector", nil, BinaryDecoder::Vector)
8
+ end
9
+ end
10
+ end
data/lib/pgvector.rb ADDED
@@ -0,0 +1,11 @@
1
+ module Pgvector
2
+ autoload :PG, "pgvector/pg"
3
+
4
+ def self.encode(data)
5
+ "[#{data.to_a.map(&:to_f).join(",")}]"
6
+ end
7
+
8
+ def self.decode(string)
9
+ string[1..-2].split(",").map(&:to_f)
10
+ end
11
+ end