bx_builder_chain 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +13 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +120 -0
- data/README.md +74 -0
- data/Rakefile +12 -0
- data/bx_builder_chain.gemspec +35 -0
- data/lib/bx_builder_chain/chunker/recursive_text.rb +38 -0
- data/lib/bx_builder_chain/chunker/text.rb +38 -0
- data/lib/bx_builder_chain/configuration.rb +21 -0
- data/lib/bx_builder_chain/data.rb +28 -0
- data/lib/bx_builder_chain/dependency_helper.rb +22 -0
- data/lib/bx_builder_chain/llm/base.rb +64 -0
- data/lib/bx_builder_chain/llm/open_ai.rb +191 -0
- data/lib/bx_builder_chain/loader.rb +144 -0
- data/lib/bx_builder_chain/processors/base.rb +21 -0
- data/lib/bx_builder_chain/processors/csv.rb +27 -0
- data/lib/bx_builder_chain/processors/docx.rb +25 -0
- data/lib/bx_builder_chain/processors/html.rb +29 -0
- data/lib/bx_builder_chain/processors/json.rb +17 -0
- data/lib/bx_builder_chain/processors/pdf.rb +26 -0
- data/lib/bx_builder_chain/processors/text.rb +17 -0
- data/lib/bx_builder_chain/processors/xlsx.rb +31 -0
- data/lib/bx_builder_chain/utils/token_data/cl100k_base.tiktoken +100256 -0
- data/lib/bx_builder_chain/utils/token_length/base_validator.rb +45 -0
- data/lib/bx_builder_chain/utils/token_length/open_ai_validator.rb +70 -0
- data/lib/bx_builder_chain/utils/tokenization/byte_pair_encoding.rb +72 -0
- data/lib/bx_builder_chain/utils/tokenization/open_ai_encodings.rb +44 -0
- data/lib/bx_builder_chain/vectorsearch/base.rb +160 -0
- data/lib/bx_builder_chain/vectorsearch/pgvector.rb +228 -0
- data/lib/bx_builder_chain/version.rb +5 -0
- data/lib/bx_builder_chain.rb +38 -0
- data/lib/generators/bx_builder_chain/install_generator.rb +42 -0
- data/lib/generators/bx_builder_chain/templates/app/admin/bx_builder_chain_document.rb +65 -0
- data/lib/generators/bx_builder_chain/templates/app/controllers/bx_builder_chain/documents_controller.rb +65 -0
- data/lib/generators/bx_builder_chain/templates/app/controllers/bx_builder_chain/questions_controller.rb +33 -0
- data/lib/generators/bx_builder_chain/templates/app/controllers/bx_builder_chain/test_controller.rb +10 -0
- data/lib/generators/bx_builder_chain/templates/app/models/bx_builder_chain/document.rb +26 -0
- data/lib/generators/bx_builder_chain/templates/app/models/bx_builder_chain/document_chunk.rb +9 -0
- data/lib/generators/bx_builder_chain/templates/app/models/bx_builder_chain/embedding.rb +9 -0
- data/lib/generators/bx_builder_chain/templates/app/services/bx_builder_chain/document_upload_service.rb +47 -0
- data/lib/generators/bx_builder_chain/templates/app/services/bx_builder_chain/question_asking_service.rb +35 -0
- data/lib/generators/bx_builder_chain/templates/app/views/bx_builder_chain/test/form.html.erb +164 -0
- data/lib/generators/bx_builder_chain/templates/app/workers/bx_builder_chain/document_processor_worker.rb +32 -0
- data/lib/generators/bx_builder_chain/templates/initializer.rb +12 -0
- data/lib/generators/bx_builder_chain/templates/migration.rb +33 -0
- data/lib/pgvector/pg/binary_decoder/vector.rb +14 -0
- data/lib/pgvector/pg/text_decoder/vector.rb +12 -0
- data/lib/pgvector/pg.rb +10 -0
- data/lib/pgvector.rb +11 -0
- data/lib/sequel/plugins/pgvector/class_methods.rb +47 -0
- data/lib/sequel/plugins/pgvector/instance_methods.rb +34 -0
- data/lib/sequel/plugins/pgvector.rb +12 -0
- data/sig/bx_langchain_chat.rbs +4 -0
- metadata +238 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 550493404afe4cddc9999dc3071a0bc507052438409d320ee143a2711b6711a0
|
4
|
+
data.tar.gz: 75238a1638cc5970a110c14b3cdc914a9628c5c338d50317c698910a167f1a6f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 819f64ad09e864b4f008dd60b90857140a2d853b3adc145477f0e22da235ebfc558ad8f42c32345be8621c9ea1a7d6b00b3bedf06ca4b5dac8fb28fbb2a5e279
|
7
|
+
data.tar.gz: 40903aae567fc836e12ae05f0120ff44a3bbd36b213612abf35a20e7169236c5aff58655ccd65a452297d450798bf2e8d8187e54efac58784558e359e71e1699
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
4
|
+
|
5
|
+
# Specify your gem's dependencies in bx_builder_chain.gemspec
|
6
|
+
gemspec
|
7
|
+
|
8
|
+
gem "rake", "~> 13.0"
|
9
|
+
gem "rspec", "~> 3.0"
|
10
|
+
gem "rubocop", "~> 1.21"
|
11
|
+
gem "sequel", "~> 5.71"
|
12
|
+
gem "pg"
|
13
|
+
gem 'dotenv'
|
14
|
+
# gem "pgvector", path: '../pgvector-ruby'
|
15
|
+
gem "ruby-openai"
|
16
|
+
gem "pdf-reader"
|
17
|
+
gem "nokogiri"
|
18
|
+
gem "docx"
|
19
|
+
gem "roo"
|
20
|
+
gem "baran"
|
21
|
+
|
22
|
+
gem "pry"
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
bx_builder_chain (0.1.0)
|
5
|
+
baran (= 0.1.7)
|
6
|
+
docx
|
7
|
+
dotenv
|
8
|
+
nokogiri
|
9
|
+
pdf-reader
|
10
|
+
pg
|
11
|
+
roo
|
12
|
+
ruby-openai
|
13
|
+
sequel (~> 5.71)
|
14
|
+
zeitwerk (= 2.6.11)
|
15
|
+
|
16
|
+
GEM
|
17
|
+
remote: https://rubygems.org/
|
18
|
+
specs:
|
19
|
+
Ascii85 (1.1.0)
|
20
|
+
afm (0.2.2)
|
21
|
+
ast (2.4.2)
|
22
|
+
baran (0.1.7)
|
23
|
+
coderay (1.1.3)
|
24
|
+
diff-lcs (1.5.0)
|
25
|
+
docx (0.8.0)
|
26
|
+
nokogiri (~> 1.13, >= 1.13.0)
|
27
|
+
rubyzip (~> 2.0)
|
28
|
+
dotenv (2.8.1)
|
29
|
+
faraday (2.7.10)
|
30
|
+
faraday-net_http (>= 2.0, < 3.1)
|
31
|
+
ruby2_keywords (>= 0.0.4)
|
32
|
+
faraday-multipart (1.0.4)
|
33
|
+
multipart-post (~> 2)
|
34
|
+
faraday-net_http (3.0.2)
|
35
|
+
hashery (2.1.2)
|
36
|
+
json (2.6.3)
|
37
|
+
method_source (1.0.0)
|
38
|
+
multipart-post (2.3.0)
|
39
|
+
nokogiri (1.13.10-arm64-darwin)
|
40
|
+
racc (~> 1.4)
|
41
|
+
parallel (1.23.0)
|
42
|
+
parser (3.2.2.3)
|
43
|
+
ast (~> 2.4.1)
|
44
|
+
racc
|
45
|
+
pdf-reader (2.11.0)
|
46
|
+
Ascii85 (~> 1.0)
|
47
|
+
afm (~> 0.2.1)
|
48
|
+
hashery (~> 2.0)
|
49
|
+
ruby-rc4
|
50
|
+
ttfunk
|
51
|
+
pg (1.5.3)
|
52
|
+
pry (0.14.2)
|
53
|
+
coderay (~> 1.1)
|
54
|
+
method_source (~> 1.0)
|
55
|
+
racc (1.7.1)
|
56
|
+
rainbow (3.1.1)
|
57
|
+
rake (13.0.6)
|
58
|
+
regexp_parser (2.8.1)
|
59
|
+
rexml (3.2.6)
|
60
|
+
roo (2.8.3)
|
61
|
+
nokogiri (~> 1)
|
62
|
+
rubyzip (>= 1.3.0, < 3.0.0)
|
63
|
+
rspec (3.12.0)
|
64
|
+
rspec-core (~> 3.12.0)
|
65
|
+
rspec-expectations (~> 3.12.0)
|
66
|
+
rspec-mocks (~> 3.12.0)
|
67
|
+
rspec-core (3.12.2)
|
68
|
+
rspec-support (~> 3.12.0)
|
69
|
+
rspec-expectations (3.12.3)
|
70
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
71
|
+
rspec-support (~> 3.12.0)
|
72
|
+
rspec-mocks (3.12.6)
|
73
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
74
|
+
rspec-support (~> 3.12.0)
|
75
|
+
rspec-support (3.12.1)
|
76
|
+
rubocop (1.50.2)
|
77
|
+
json (~> 2.3)
|
78
|
+
parallel (~> 1.10)
|
79
|
+
parser (>= 3.2.0.0)
|
80
|
+
rainbow (>= 2.2.2, < 4.0)
|
81
|
+
regexp_parser (>= 1.8, < 3.0)
|
82
|
+
rexml (>= 3.2.5, < 4.0)
|
83
|
+
rubocop-ast (>= 1.28.0, < 2.0)
|
84
|
+
ruby-progressbar (~> 1.7)
|
85
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
86
|
+
rubocop-ast (1.29.0)
|
87
|
+
parser (>= 3.2.1.0)
|
88
|
+
ruby-openai (5.0.0)
|
89
|
+
faraday (>= 1)
|
90
|
+
faraday-multipart (>= 1)
|
91
|
+
ruby-progressbar (1.13.0)
|
92
|
+
ruby-rc4 (0.1.5)
|
93
|
+
ruby2_keywords (0.0.5)
|
94
|
+
rubyzip (2.3.2)
|
95
|
+
sequel (5.71.0)
|
96
|
+
ttfunk (1.7.0)
|
97
|
+
unicode-display_width (2.4.2)
|
98
|
+
zeitwerk (2.6.11)
|
99
|
+
|
100
|
+
PLATFORMS
|
101
|
+
arm64-darwin-22
|
102
|
+
|
103
|
+
DEPENDENCIES
|
104
|
+
baran
|
105
|
+
bx_builder_chain!
|
106
|
+
docx
|
107
|
+
dotenv
|
108
|
+
nokogiri
|
109
|
+
pdf-reader
|
110
|
+
pg
|
111
|
+
pry
|
112
|
+
rake (~> 13.0)
|
113
|
+
roo
|
114
|
+
rspec (~> 3.0)
|
115
|
+
rubocop (~> 1.21)
|
116
|
+
ruby-openai
|
117
|
+
sequel (~> 5.71)
|
118
|
+
|
119
|
+
BUNDLED WITH
|
120
|
+
2.4.7
|
data/README.md
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# BxBuilderChain
|
2
|
+
|
3
|
+
This gem / building block allows builder apps to use open ai.
|
4
|
+
The following features are included:
|
5
|
+
- OpenAi completion with GPT 3.5 and GPT 4
|
6
|
+
- Q&A / additional prompt context with a users private documents
|
7
|
+
- Documents can be stored on a global and user/group level
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Install the gem and add to the application's Gemfile by executing:
|
12
|
+
|
13
|
+
$ bundle add bx_builder_chain
|
14
|
+
|
15
|
+
followed by
|
16
|
+
|
17
|
+
$ bundle install
|
18
|
+
|
19
|
+
If bundler is not being used to manage dependencies, install the gem by executing:
|
20
|
+
|
21
|
+
$ gem install bx_builder_chain
|
22
|
+
|
23
|
+
TODO: add rake task to create db structure
|
24
|
+
### Optional
|
25
|
+
|
26
|
+
generate the endpoint & Active admin contollers for Builder Chain
|
27
|
+
|
28
|
+
$ rails generate builder_chain:endpoints
|
29
|
+
|
30
|
+
this will add the following endpoint controllers
|
31
|
+
- File upload
|
32
|
+
- OpenAi ask / completion
|
33
|
+
- ActiveAdmin documents controller
|
34
|
+
|
35
|
+
## Usage
|
36
|
+
```ruby
|
37
|
+
require "bx_builder_chain"
|
38
|
+
```
|
39
|
+
|
40
|
+
create the llm and client
|
41
|
+
```ruby
|
42
|
+
llm = BxBuilderChain::Llm::OpenAi.new(api_key: 'open-ai-api-key')
|
43
|
+
client = BxBuilderChain::Vectorsearch::Pgvector.new(
|
44
|
+
url: 'postgres://postgres:password@localhost:5432/test', # postgres db url
|
45
|
+
table_name: "embeddings", # table name for the documents to be stored
|
46
|
+
llm: llm,
|
47
|
+
namespace: user_id # default is nil, nil is used for global public documents
|
48
|
+
)
|
49
|
+
```
|
50
|
+
|
51
|
+
Add documents to the Vector Store
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
# Store plain texts in your vector search database
|
55
|
+
client.add_texts(
|
56
|
+
texts: [
|
57
|
+
"Begin by preheating your oven to 375°F (190°C). Prepare four boneless, skinless chicken breasts by cutting a pocket into the side of each breast, being careful not to cut all the way through. Season the chicken with salt and pepper to taste. In a large skillet, melt 2 tablespoons of unsalted butter over medium heat. Add 1 small diced onion and 2 minced garlic cloves, and cook until softened, about 3-4 minutes. Add 8 ounces of fresh spinach and cook until wilted, about 3 minutes. Remove the skillet from heat and let the mixture cool slightly.",
|
58
|
+
"In a bowl, combine the spinach mixture with 4 ounces of softened cream cheese, 1/4 cup of grated Parmesan cheese, 1/4 cup of shredded mozzarella cheese, and 1/4 teaspoon of red pepper flakes. Mix until well combined. Stuff each chicken breast pocket with an equal amount of the spinach mixture. Seal the pocket with a toothpick if necessary. In the same skillet, heat 1 tablespoon of olive oil over medium-high heat. Add the stuffed chicken breasts and sear on each side for 3-4 minutes, or until golden brown."
|
59
|
+
]
|
60
|
+
)
|
61
|
+
```
|
62
|
+
```ruby
|
63
|
+
# Store the contents of your files in your vector search database
|
64
|
+
my_pdf = "path/to/my.pdf"
|
65
|
+
my_text = "path/to/my.txt"
|
66
|
+
my_docx = "path/to/my.docx"
|
67
|
+
|
68
|
+
client.add_data(paths: [my_pdf, my_text, my_docx])
|
69
|
+
```
|
70
|
+
|
71
|
+
Then ask the question
|
72
|
+
```ruby
|
73
|
+
client.ask(question: "What is Frogger?")
|
74
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/bx_builder_chain/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "bx_builder_chain"
|
7
|
+
spec.version = BxBuilderChain::VERSION
|
8
|
+
spec.authors = ["Paul Ketelle"]
|
9
|
+
spec.email = ["paul.ketelle@builder.ai"]
|
10
|
+
|
11
|
+
spec.summary = "Write a short summary, because RubyGems requires one."
|
12
|
+
spec.description = "Write a longer description or delete this line."
|
13
|
+
spec.license = "MIT"
|
14
|
+
spec.required_ruby_version = ">= 2.6.0"
|
15
|
+
|
16
|
+
spec.files = Dir.chdir(__dir__) do
|
17
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
18
|
+
(File.expand_path(f) == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|circleci)|appveyor)})
|
19
|
+
end
|
20
|
+
end
|
21
|
+
spec.bindir = "exe"
|
22
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
23
|
+
spec.require_paths = ["lib"]
|
24
|
+
|
25
|
+
spec.add_dependency "zeitwerk", "2.6.11"
|
26
|
+
spec.add_dependency "baran", "0.1.7"
|
27
|
+
spec.add_dependency "sequel", "~> 5.71"
|
28
|
+
spec.add_dependency "pg", "~> 1.5.3"
|
29
|
+
spec.add_dependency 'dotenv', "~> 2.8"
|
30
|
+
spec.add_dependency "ruby-openai", "~> 5.1.0"
|
31
|
+
spec.add_dependency "pdf-reader", "~> 2.11.0"
|
32
|
+
spec.add_dependency "nokogiri", "~> 1.8"
|
33
|
+
spec.add_dependency "docx", "~> 0.8.0"
|
34
|
+
spec.add_dependency "roo", "~> 2.8.3"
|
35
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "baran"
|
4
|
+
|
5
|
+
module BxBuilderChain
|
6
|
+
module Chunker
|
7
|
+
#
|
8
|
+
# Recursive text chunker. Preferentially splits on separators.
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# BxBuilderChain::Chunker::RecursiveText.new(text).chunks
|
12
|
+
#
|
13
|
+
class RecursiveText
|
14
|
+
attr_reader :text, :chunk_size, :chunk_overlap, :separators
|
15
|
+
|
16
|
+
# @param [String] text
|
17
|
+
# @param [Integer] chunk_size
|
18
|
+
# @param [Integer] chunk_overlap
|
19
|
+
# @param [Array<String>] separators
|
20
|
+
def initialize(text, chunk_size: 1000, chunk_overlap: 200, separators: ["\n\n", "\n", ".", " ", ""])
|
21
|
+
@text = text
|
22
|
+
@chunk_size = chunk_size
|
23
|
+
@chunk_overlap = chunk_overlap
|
24
|
+
@separators = separators
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Array<String>]
|
28
|
+
def chunks
|
29
|
+
splitter = Baran::RecursiveCharacterTextSplitter.new(
|
30
|
+
chunk_size: chunk_size,
|
31
|
+
chunk_overlap: chunk_overlap,
|
32
|
+
separators: separators
|
33
|
+
)
|
34
|
+
splitter.chunks(text)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "baran"
|
4
|
+
|
5
|
+
module BxBuilderChain
|
6
|
+
module Chunker
|
7
|
+
#
|
8
|
+
# Simple text chunker
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# BxBuilderChain::Chunker::Text.new(text).chunks
|
12
|
+
#
|
13
|
+
class Text
|
14
|
+
attr_reader :text, :chunk_size, :chunk_overlap, :separator
|
15
|
+
|
16
|
+
# @param [String] text
|
17
|
+
# @param [Integer] chunk_size
|
18
|
+
# @param [Integer] chunk_overlap
|
19
|
+
# @param [String] separator
|
20
|
+
def initialize(text, chunk_size: 1024, chunk_overlap: 64, separator: "\n\n")
|
21
|
+
@text = text
|
22
|
+
@chunk_size = chunk_size
|
23
|
+
@chunk_overlap = chunk_overlap
|
24
|
+
@separator = separator
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Array<String>]
|
28
|
+
def chunks
|
29
|
+
splitter = Baran::CharacterTextSplitter.new(
|
30
|
+
chunk_size: chunk_size,
|
31
|
+
chunk_overlap: chunk_overlap,
|
32
|
+
separator: separator
|
33
|
+
)
|
34
|
+
splitter.chunks(text)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# Inside your gem (e.g., in `lib/bx_builder_chain/vectorsearch/configuration.rb`)
|
2
|
+
|
3
|
+
module BxBuilderChain
|
4
|
+
class Configuration
|
5
|
+
attr_accessor :pg_url, :openai_api_key, :public_namespace, :threshold, :default_prompt_template
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@pg_url = ENV['DB_URL']
|
9
|
+
@openai_api_key = ENV['OPENAI_API_KEY']
|
10
|
+
@public_namespace = 'public'
|
11
|
+
@threshold = 0.25
|
12
|
+
@default_prompt_template = "Context information is below
|
13
|
+
--------------------
|
14
|
+
%{context}
|
15
|
+
--------------------
|
16
|
+
Given the context information and not prior knowledge
|
17
|
+
answer the question: %{question}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BxBuilderChain
|
4
|
+
# Abstraction for data loaded by a {BxBuilderChain::Loader}
|
5
|
+
class Data
|
6
|
+
# URL or Path of the data source
|
7
|
+
# @return [String]
|
8
|
+
attr_reader :source
|
9
|
+
|
10
|
+
# @param data [String] data that was loaded
|
11
|
+
# @option options [String] :source URL or Path of the data source
|
12
|
+
def initialize(data, options = {})
|
13
|
+
@source = options[:source]
|
14
|
+
@data = data
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [String]
|
18
|
+
def value
|
19
|
+
@data
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param opts [Hash] options passed to the chunker
|
23
|
+
# @return [Array<String>]
|
24
|
+
def chunks(opts = {})
|
25
|
+
BxBuilderChain::Chunker::RecursiveText.new(@data, **opts).chunks
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BxBuilderChain
|
4
|
+
module DependencyHelper
|
5
|
+
class VersionError < ScriptError; end
|
6
|
+
|
7
|
+
# This method requires and loads the given gem, and then checks to see if the version of the gem meets the requirements listed in `langchain.gemspec`
|
8
|
+
# This solution was built to avoid auto-loading every single gem in the Gemfile when the developer will mostly likely be only using a few of them.
|
9
|
+
#
|
10
|
+
# @param gem_name [String] The name of the gem to load
|
11
|
+
# @return [Boolean] Whether or not the gem was loaded successfully
|
12
|
+
# @raise [LoadError] If the gem is not installed
|
13
|
+
# @raise [VersionError] If the gem is installed, but the version does not meet the requirements
|
14
|
+
#
|
15
|
+
def depends_on(gem_name)
|
16
|
+
Gem::Specification.find_by_name(gem_name)
|
17
|
+
true
|
18
|
+
rescue LoadError
|
19
|
+
raise LoadError, "!Could not load #{gem_name}. Please ensure that the #{gem_name} gem is installed."
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BxBuilderChain::Llm
|
4
|
+
class ApiError < StandardError; end
|
5
|
+
|
6
|
+
# A LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
|
7
|
+
#
|
8
|
+
# BxBuilderChain.rb provides a common interface to interact with all supported LLMs:
|
9
|
+
#
|
10
|
+
# - {BxBuilderChain::Llm::OpenAI}
|
11
|
+
#
|
12
|
+
# @abstract
|
13
|
+
class Base
|
14
|
+
include BxBuilderChain::DependencyHelper
|
15
|
+
|
16
|
+
# A client for communicating with the LLM
|
17
|
+
attr_reader :client
|
18
|
+
|
19
|
+
def default_dimension
|
20
|
+
self.class.const_get(:DEFAULTS).dig(:dimension)
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
# Generate a chat completion for a given prompt. Parameters will depend on the LLM
|
25
|
+
#
|
26
|
+
# @raise NotImplementedError if not supported by the LLM
|
27
|
+
def chat(**kwargs)
|
28
|
+
raise NotImplementedError, "#{self.class.name} does not support chat"
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# Generate a completion for a given prompt. Parameters will depend on the LLM.
|
33
|
+
#
|
34
|
+
# @raise NotImplementedError if not supported by the LLM
|
35
|
+
def complete(**kwargs)
|
36
|
+
raise NotImplementedError, "#{self.class.name} does not support completion"
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
# Generate an embedding for a given text. Parameters depends on the LLM.
|
41
|
+
#
|
42
|
+
# @raise NotImplementedError if not supported by the LLM
|
43
|
+
#
|
44
|
+
def embed(**kwargs)
|
45
|
+
raise NotImplementedError, "#{self.class.name} does not support generating embeddings"
|
46
|
+
end
|
47
|
+
|
48
|
+
#
|
49
|
+
# Generate a summary for a given text. Parameters depends on the LLM.
|
50
|
+
#
|
51
|
+
# @raise NotImplementedError if not supported by the LLM
|
52
|
+
#
|
53
|
+
def summarize(**kwargs)
|
54
|
+
raise NotImplementedError, "#{self.class.name} does not support summarization"
|
55
|
+
end
|
56
|
+
|
57
|
+
def count_tokens(string)
|
58
|
+
tokens = string.scan(/[\w]+|[\W]/)
|
59
|
+
tokens = tokens.flat_map { |token| token.split(/(?<=[\W])$/) }
|
60
|
+
|
61
|
+
return (tokens.length*1.07).to_i
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|