langchainrb 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +179 -0
- data/LICENSE.txt +21 -0
- data/README.md +127 -0
- data/Rakefile +8 -0
- data/examples/.keep +0 -0
- data/examples/store_and_query_with_pinecone.rb +43 -0
- data/examples/store_and_query_with_qdrant.rb +36 -0
- data/examples/store_and_query_with_weaviate.rb +30 -0
- data/lib/langchain.rb +17 -0
- data/lib/llm/base.rb +18 -0
- data/lib/llm/cohere.rb +45 -0
- data/lib/llm/openai.rb +50 -0
- data/lib/vectorsearch/base.rb +70 -0
- data/lib/vectorsearch/milvus.rb +111 -0
- data/lib/vectorsearch/pinecone.rb +112 -0
- data/lib/vectorsearch/qdrant.rb +109 -0
- data/lib/vectorsearch/weaviate.rb +135 -0
- data/lib/version.rb +5 -0
- data/sig/langchain.rbs +4 -0
- metadata +180 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1124e5c3ad459df63509c1682a713ecdf3952904df4685a3e0583c4e3c3a79dc
|
4
|
+
data.tar.gz: 0ebb4c9c0774101b32f2f679f95b1c99d425462407d7b28bd07273f9610c42fe
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f5c3fb67e7562df5b3e317f8f0c1d75e3948ce53812d10b5291236e4c068d8755781cc220add9eea686af189e77354b69daa45cb775353f3a3434dca1314920e
|
7
|
+
data.tar.gz: 0fb89fc2d4f1535b4af8ba5d3c9956da23ed3dd1174cb332f9c285838d9b1bfc3526552025ea1deca1b91e66275cef84b2c973c4b6118d3387074057ad192e67
|
data/.rspec
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,179 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
langchainrb (0.1.3)
|
5
|
+
cohere-ruby (~> 0.9.1)
|
6
|
+
milvus (~> 0.9.0)
|
7
|
+
pinecone (~> 0.1.6)
|
8
|
+
qdrant-ruby (~> 0.9.0)
|
9
|
+
ruby-openai (~> 4.0.0)
|
10
|
+
weaviate-ruby (~> 0.8.0)
|
11
|
+
|
12
|
+
GEM
|
13
|
+
remote: https://rubygems.org/
|
14
|
+
specs:
|
15
|
+
actionpack (7.0.4.3)
|
16
|
+
actionview (= 7.0.4.3)
|
17
|
+
activesupport (= 7.0.4.3)
|
18
|
+
rack (~> 2.0, >= 2.2.0)
|
19
|
+
rack-test (>= 0.6.3)
|
20
|
+
rails-dom-testing (~> 2.0)
|
21
|
+
rails-html-sanitizer (~> 1.0, >= 1.2.0)
|
22
|
+
actionview (7.0.4.3)
|
23
|
+
activesupport (= 7.0.4.3)
|
24
|
+
builder (~> 3.1)
|
25
|
+
erubi (~> 1.4)
|
26
|
+
rails-dom-testing (~> 2.0)
|
27
|
+
rails-html-sanitizer (~> 1.1, >= 1.2.0)
|
28
|
+
activesupport (7.0.4.3)
|
29
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
30
|
+
i18n (>= 1.6, < 2)
|
31
|
+
minitest (>= 5.1)
|
32
|
+
tzinfo (~> 2.0)
|
33
|
+
builder (3.2.4)
|
34
|
+
byebug (11.1.3)
|
35
|
+
coderay (1.1.3)
|
36
|
+
cohere-ruby (0.9.1)
|
37
|
+
faraday (~> 2.7.0)
|
38
|
+
concurrent-ruby (1.2.2)
|
39
|
+
crass (1.0.6)
|
40
|
+
diff-lcs (1.5.0)
|
41
|
+
dotenv (2.7.6)
|
42
|
+
dotenv-rails (2.7.6)
|
43
|
+
dotenv (= 2.7.6)
|
44
|
+
railties (>= 3.2)
|
45
|
+
dry-configurable (1.0.1)
|
46
|
+
dry-core (~> 1.0, < 2)
|
47
|
+
zeitwerk (~> 2.6)
|
48
|
+
dry-core (1.0.0)
|
49
|
+
concurrent-ruby (~> 1.0)
|
50
|
+
zeitwerk (~> 2.6)
|
51
|
+
dry-inflector (1.0.0)
|
52
|
+
dry-initializer (3.1.1)
|
53
|
+
dry-logic (1.5.0)
|
54
|
+
concurrent-ruby (~> 1.0)
|
55
|
+
dry-core (~> 1.0, < 2)
|
56
|
+
zeitwerk (~> 2.6)
|
57
|
+
dry-schema (1.13.1)
|
58
|
+
concurrent-ruby (~> 1.0)
|
59
|
+
dry-configurable (~> 1.0, >= 1.0.1)
|
60
|
+
dry-core (~> 1.0, < 2)
|
61
|
+
dry-initializer (~> 3.0)
|
62
|
+
dry-logic (>= 1.4, < 2)
|
63
|
+
dry-types (>= 1.7, < 2)
|
64
|
+
zeitwerk (~> 2.6)
|
65
|
+
dry-struct (1.6.0)
|
66
|
+
dry-core (~> 1.0, < 2)
|
67
|
+
dry-types (>= 1.7, < 2)
|
68
|
+
ice_nine (~> 0.11)
|
69
|
+
zeitwerk (~> 2.6)
|
70
|
+
dry-types (1.7.1)
|
71
|
+
concurrent-ruby (~> 1.0)
|
72
|
+
dry-core (~> 1.0)
|
73
|
+
dry-inflector (~> 1.0)
|
74
|
+
dry-logic (~> 1.4)
|
75
|
+
zeitwerk (~> 2.6)
|
76
|
+
dry-validation (1.10.0)
|
77
|
+
concurrent-ruby (~> 1.0)
|
78
|
+
dry-core (~> 1.0, < 2)
|
79
|
+
dry-initializer (~> 3.0)
|
80
|
+
dry-schema (>= 1.12, < 2)
|
81
|
+
zeitwerk (~> 2.6)
|
82
|
+
erubi (1.12.0)
|
83
|
+
faraday (2.7.4)
|
84
|
+
faraday-net_http (>= 2.0, < 3.1)
|
85
|
+
ruby2_keywords (>= 0.0.4)
|
86
|
+
faraday-multipart (1.0.4)
|
87
|
+
multipart-post (~> 2)
|
88
|
+
faraday-net_http (3.0.2)
|
89
|
+
graphlient (0.7.0)
|
90
|
+
faraday (~> 2.0)
|
91
|
+
graphql-client
|
92
|
+
graphql (2.0.21)
|
93
|
+
graphql-client (0.18.0)
|
94
|
+
activesupport (>= 3.0)
|
95
|
+
graphql
|
96
|
+
httparty (0.21.0)
|
97
|
+
mini_mime (>= 1.0.0)
|
98
|
+
multi_xml (>= 0.5.2)
|
99
|
+
i18n (1.13.0)
|
100
|
+
concurrent-ruby (~> 1.0)
|
101
|
+
ice_nine (0.11.2)
|
102
|
+
loofah (2.20.0)
|
103
|
+
crass (~> 1.0.2)
|
104
|
+
nokogiri (>= 1.5.9)
|
105
|
+
method_source (1.0.0)
|
106
|
+
milvus (0.9.0)
|
107
|
+
faraday (~> 2.7.0)
|
108
|
+
mini_mime (1.1.2)
|
109
|
+
minitest (5.18.0)
|
110
|
+
multi_xml (0.6.0)
|
111
|
+
multipart-post (2.3.0)
|
112
|
+
nokogiri (1.14.3-x86_64-darwin)
|
113
|
+
racc (~> 1.4)
|
114
|
+
pinecone (0.1.71)
|
115
|
+
dry-struct (~> 1.6.0)
|
116
|
+
dry-validation (~> 1.10.0)
|
117
|
+
httparty (~> 0.21.0)
|
118
|
+
pry (0.14.2)
|
119
|
+
coderay (~> 1.1)
|
120
|
+
method_source (~> 1.0)
|
121
|
+
pry-byebug (3.10.1)
|
122
|
+
byebug (~> 11.0)
|
123
|
+
pry (>= 0.13, < 0.15)
|
124
|
+
qdrant-ruby (0.9.0)
|
125
|
+
faraday (~> 2.7)
|
126
|
+
racc (1.6.2)
|
127
|
+
rack (2.2.7)
|
128
|
+
rack-test (2.1.0)
|
129
|
+
rack (>= 1.3)
|
130
|
+
rails-dom-testing (2.0.3)
|
131
|
+
activesupport (>= 4.2.0)
|
132
|
+
nokogiri (>= 1.6)
|
133
|
+
rails-html-sanitizer (1.5.0)
|
134
|
+
loofah (~> 2.19, >= 2.19.1)
|
135
|
+
railties (7.0.4.3)
|
136
|
+
actionpack (= 7.0.4.3)
|
137
|
+
activesupport (= 7.0.4.3)
|
138
|
+
method_source
|
139
|
+
rake (>= 12.2)
|
140
|
+
thor (~> 1.0)
|
141
|
+
zeitwerk (~> 2.5)
|
142
|
+
rake (13.0.6)
|
143
|
+
rspec (3.12.0)
|
144
|
+
rspec-core (~> 3.12.0)
|
145
|
+
rspec-expectations (~> 3.12.0)
|
146
|
+
rspec-mocks (~> 3.12.0)
|
147
|
+
rspec-core (3.12.2)
|
148
|
+
rspec-support (~> 3.12.0)
|
149
|
+
rspec-expectations (3.12.3)
|
150
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
151
|
+
rspec-support (~> 3.12.0)
|
152
|
+
rspec-mocks (3.12.5)
|
153
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
154
|
+
rspec-support (~> 3.12.0)
|
155
|
+
rspec-support (3.12.0)
|
156
|
+
ruby-openai (4.0.0)
|
157
|
+
faraday (>= 1)
|
158
|
+
faraday-multipart (>= 1)
|
159
|
+
ruby2_keywords (0.0.5)
|
160
|
+
thor (1.2.1)
|
161
|
+
tzinfo (2.0.6)
|
162
|
+
concurrent-ruby (~> 1.0)
|
163
|
+
weaviate-ruby (0.8.0)
|
164
|
+
faraday (~> 2.7)
|
165
|
+
graphlient (~> 0.7.0)
|
166
|
+
zeitwerk (2.6.8)
|
167
|
+
|
168
|
+
PLATFORMS
|
169
|
+
x86_64-darwin-19
|
170
|
+
|
171
|
+
DEPENDENCIES
|
172
|
+
dotenv-rails (~> 2.7.6)
|
173
|
+
langchainrb!
|
174
|
+
pry-byebug (~> 3.10.0)
|
175
|
+
rake (~> 13.0)
|
176
|
+
rspec (~> 3.0)
|
177
|
+
|
178
|
+
BUNDLED WITH
|
179
|
+
2.4.0
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2023 Andrei Bondarev
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
🦜️🔗 LangChain.rb
|
2
|
+
---
|
3
|
+
⚡ Building applications with LLMs through composability ⚡
|
4
|
+
|
5
|
+
:warning: UNDER ACTIVE AND RAPID DEVELOPMENT (MAY BE BUGGY AND UNTESTED)
|
6
|
+
|
7
|
+
![Tests status](https://github.com/andreibondarev/langchainrb/actions/workflows/ci.yml/badge.svg) [![Gem Version](https://badge.fury.io/rb/langchainrb.svg)](https://badge.fury.io/rb/langchainrb)
|
8
|
+
|
9
|
+
Vectorsearch library is an abstraction layer on top of many popular vector search databases. It is a modern ORM that allows developers to easily chunk data, generate embeddings, store, search, query and retrieve data from vector search databases. Vectorsearch offers a straight-forward DSL and abstracts away overly complicated machine learning/data science-specific configurations and concepts
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Install the gem and add to the application's Gemfile by executing:
|
14
|
+
|
15
|
+
$ bundle add langchainrb
|
16
|
+
|
17
|
+
If bundler is not being used to manage dependencies, install the gem by executing:
|
18
|
+
|
19
|
+
$ gem install langchainrb
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
require "langchain"
|
25
|
+
```
|
26
|
+
|
27
|
+
List of currently supported vector search databases and features:
|
28
|
+
|
29
|
+
| Database | Querying | Storage | Schema Management | Backups | Rails Integration | ??? |
|
30
|
+
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
|
31
|
+
| Weaviate | :white_check_mark: | WIP | WIP | WIP | | |
|
32
|
+
| Qdrant | :white_check_mark: | WIP | WIP | WIP | | |
|
33
|
+
| Milvus | :white_check_mark: | WIP | WIP | WIP | | |
|
34
|
+
| Pinecone | :white_check_mark: | WIP | WIP | WIP | | |
|
35
|
+
|
36
|
+
### Using Vector Search Databases
|
37
|
+
|
38
|
+
Choose the LLM provider you'll be using (OpenAI or Cohere) and retrieve the API key.
|
39
|
+
|
40
|
+
Pick the vector search database you'll be using and instantiate the client:
|
41
|
+
```ruby
|
42
|
+
client = Vectorsearch::Weaviate.new(
|
43
|
+
url: ENV["WEAVIATE_URL"],
|
44
|
+
api_key: ENV["WEAVIATE_API_KEY"],
|
45
|
+
llm: :openai, # or :cohere
|
46
|
+
llm_api_key: ENV["OPENAI_API_KEY"]
|
47
|
+
)
|
48
|
+
|
49
|
+
# You can instantiate any other supported vector search database:
|
50
|
+
client = Vectorsearch::Milvus.new(...)
|
51
|
+
client = Vectorsearch::Qdrant.new(...)
|
52
|
+
client = Vectorsearch::Pinecone.new(...)
|
53
|
+
```
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
# Creating the default schema
|
57
|
+
client.create_default_schema
|
58
|
+
```
|
59
|
+
|
60
|
+
```ruby
|
61
|
+
# Store your documents in your vector search database
|
62
|
+
client.add_texts(
|
63
|
+
texts: [
|
64
|
+
"Begin by preheating your oven to 375°F (190°C). Prepare four boneless, skinless chicken breasts by cutting a pocket into the side of each breast, being careful not to cut all the way through. Season the chicken with salt and pepper to taste. In a large skillet, melt 2 tablespoons of unsalted butter over medium heat. Add 1 small diced onion and 2 minced garlic cloves, and cook until softened, about 3-4 minutes. Add 8 ounces of fresh spinach and cook until wilted, about 3 minutes. Remove the skillet from heat and let the mixture cool slightly.",
|
65
|
+
"In a bowl, combine the spinach mixture with 4 ounces of softened cream cheese, 1/4 cup of grated Parmesan cheese, 1/4 cup of shredded mozzarella cheese, and 1/4 teaspoon of red pepper flakes. Mix until well combined. Stuff each chicken breast pocket with an equal amount of the spinach mixture. Seal the pocket with a toothpick if necessary. In the same skillet, heat 1 tablespoon of olive oil over medium-high heat. Add the stuffed chicken breasts and sear on each side for 3-4 minutes, or until golden brown."
|
66
|
+
]
|
67
|
+
)
|
68
|
+
```
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
# Retrieve similar documents based on the query string passed in
|
72
|
+
client.similarity_search(
|
73
|
+
query:,
|
74
|
+
k: # number of results to be retrieved
|
75
|
+
)
|
76
|
+
```
|
77
|
+
```ruby
|
78
|
+
# Retrieve similar documents based on the embedding passed in
|
79
|
+
client.similarity_search_by_vector(
|
80
|
+
embedding:,
|
81
|
+
k: # number of results to be retrieved
|
82
|
+
)
|
83
|
+
```
|
84
|
+
```ruby
|
85
|
+
# Q&A-style querying based on the question passed in
|
86
|
+
client.ask(
|
87
|
+
question:
|
88
|
+
)
|
89
|
+
```
|
90
|
+
|
91
|
+
### Using Standalone LLMs
|
92
|
+
|
93
|
+
#### OpenAI
|
94
|
+
```ruby
|
95
|
+
openai = LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
96
|
+
```
|
97
|
+
```ruby
|
98
|
+
openai.embed(text: "foo bar")
|
99
|
+
```
|
100
|
+
```ruby
|
101
|
+
openai.complete(prompt: "What is the meaning of life?")
|
102
|
+
```
|
103
|
+
|
104
|
+
#### Cohere
|
105
|
+
```ruby
|
106
|
+
cohere = LLM::Cohere.new(api_key: ENV["COHERE_API_KEY"])
|
107
|
+
```
|
108
|
+
```ruby
|
109
|
+
cohere.embed(text: "foo bar")
|
110
|
+
```
|
111
|
+
```ruby
|
112
|
+
cohere.complete(prompt: "What is the meaning of life?")
|
113
|
+
```
|
114
|
+
|
115
|
+
## Development
|
116
|
+
|
117
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
118
|
+
|
119
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
120
|
+
|
121
|
+
## Contributing
|
122
|
+
|
123
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/andreibondarev/langchain.
|
124
|
+
|
125
|
+
## License
|
126
|
+
|
127
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/examples/.keep
ADDED
File without changes
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require "langchain"
|
2
|
+
|
3
|
+
# Instantiate the Qdrant client
|
4
|
+
pinecone = Vectorsearch::Pinecone.new(
|
5
|
+
environment: ENV["PINECONE_ENVIRONMENT"],
|
6
|
+
api_key: ENV["PINECONE_API_KEY"],
|
7
|
+
index_name: "recipes",
|
8
|
+
llm: :openai,
|
9
|
+
llm_api_key: ENV["OPENAI_API_KEY"]
|
10
|
+
)
|
11
|
+
|
12
|
+
# Create the default schema.
|
13
|
+
pinecone.create_default_schema
|
14
|
+
|
15
|
+
# Set up an array of text strings
|
16
|
+
recipes = [
|
17
|
+
"Preheat oven to 400 degrees F (200 degrees C). Cut the top off the head of garlic. Arrange the garlic, carrots, celery, onion, pepper, and tomato on a large baking sheet in a single layer. Drizzle the olive oil over the vegetables; season with salt and pepper. Roast the vegetables in the preheated oven, turning every 20 minutes, until tender and browned, about 1 hour. Combine the water, thyme, parsley, and bay leaves in a large stock pot over medium-high heat. Squeeze the head of garlic into the stock pot, and discard the outer husk. Place the carrots, celery, onion, pepper, and tomato in the stock pot. Bring the water to a boil; reduce heat to low and simmer for 1 1/2 hours; strain and cool.",
|
18
|
+
"Heat oven to 190C/fan 170C/gas 5. Heat 1 tbsp oil and the butter in a frying pan, then add the onion and fry for 5 mins until softened. Cool slightly. Tip the sausagemeat, lemon zest, breadcrumbs, apricots, chestnuts and thyme into a bowl. Add the onion and cranberries, and mix everything together with your hands, adding plenty of pepper and a little salt. Cut each chicken breast into three fillets lengthwise and season all over with salt and pepper. Heat the remaining oil in the frying pan, and fry the chicken fillets quickly until browned, about 6-8 mins. Roll out two-thirds of the pastry to line a 20-23cm springform or deep loose-based tart tin. Press in half the sausage mix and spread to level. Then add the chicken pieces in one layer and cover with the rest of the sausage. Press down lightly. Roll out the remaining pastry. Brush the edges of the pastry with beaten egg and cover with the pastry lid. Pinch the edges to seal, then trim. Brush the top of the pie with egg, then roll out the trimmings to make holly leaf shapes and berries. Decorate the pie and brush again with egg. Set the tin on a baking sheet and bake for 50-60 mins, then cool in the tin for 15 mins. Remove and leave to cool completely. Serve with a winter salad and pickles."
|
19
|
+
]
|
20
|
+
|
21
|
+
# Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
|
22
|
+
pinecone.add_texts(
|
23
|
+
texts: recipes
|
24
|
+
)
|
25
|
+
|
26
|
+
# Query your data
|
27
|
+
pinecone.similarity_search(
|
28
|
+
query: "chicken",
|
29
|
+
k: 1
|
30
|
+
)
|
31
|
+
|
32
|
+
# Interact with your index through Q&A
|
33
|
+
pinecone.ask(
|
34
|
+
question: "What is the best recipe for chicken?"
|
35
|
+
)
|
36
|
+
|
37
|
+
# Generate your an embedding and search by it
|
38
|
+
openai = LLM::OpenAI.new(api_key: ENV['OPENAI_API_KEY'])
|
39
|
+
embedding = openai.embed(text: "veggie")
|
40
|
+
|
41
|
+
pinecone.similarity_search_by_vector(
|
42
|
+
embedding: embedding
|
43
|
+
)
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require "langchain"
|
2
|
+
|
3
|
+
# Instantiate the Qdrant client
|
4
|
+
qdrant = Vectorsearch::Qdrant.new(
|
5
|
+
url: ENV["QDRANT_URL"],
|
6
|
+
api_key: ENV["QDRANT_API_KEY"],
|
7
|
+
index_name: "recipes",
|
8
|
+
llm: :cohere,
|
9
|
+
llm_api_key: ENV["COHERE_API_KEY"]
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
# Create the default schema.
|
14
|
+
qdrant.create_default_schema
|
15
|
+
|
16
|
+
# Set up an array of text strings
|
17
|
+
recipes = [
|
18
|
+
"Preheat oven to 400 degrees F (200 degrees C). Cut the top off the head of garlic. Arrange the garlic, carrots, celery, onion, pepper, and tomato on a large baking sheet in a single layer. Drizzle the olive oil over the vegetables; season with salt and pepper. Roast the vegetables in the preheated oven, turning every 20 minutes, until tender and browned, about 1 hour. Combine the water, thyme, parsley, and bay leaves in a large stock pot over medium-high heat. Squeeze the head of garlic into the stock pot, and discard the outer husk. Place the carrots, celery, onion, pepper, and tomato in the stock pot. Bring the water to a boil; reduce heat to low and simmer for 1 1/2 hours; strain and cool.",
|
19
|
+
"Heat oven to 190C/fan 170C/gas 5. Heat 1 tbsp oil and the butter in a frying pan, then add the onion and fry for 5 mins until softened. Cool slightly. Tip the sausagemeat, lemon zest, breadcrumbs, apricots, chestnuts and thyme into a bowl. Add the onion and cranberries, and mix everything together with your hands, adding plenty of pepper and a little salt. Cut each chicken breast into three fillets lengthwise and season all over with salt and pepper. Heat the remaining oil in the frying pan, and fry the chicken fillets quickly until browned, about 6-8 mins. Roll out two-thirds of the pastry to line a 20-23cm springform or deep loose-based tart tin. Press in half the sausage mix and spread to level. Then add the chicken pieces in one layer and cover with the rest of the sausage. Press down lightly. Roll out the remaining pastry. Brush the edges of the pastry with beaten egg and cover with the pastry lid. Pinch the edges to seal, then trim. Brush the top of the pie with egg, then roll out the trimmings to make holly leaf shapes and berries. Decorate the pie and brush again with egg. Set the tin on a baking sheet and bake for 50-60 mins, then cool in the tin for 15 mins. Remove and leave to cool completely. Serve with a winter salad and pickles."
|
20
|
+
]
|
21
|
+
|
22
|
+
# Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
|
23
|
+
qdrant.add_texts(
|
24
|
+
texts: recipes
|
25
|
+
)
|
26
|
+
|
27
|
+
# Query your data
|
28
|
+
qdrant.similarity_search(
|
29
|
+
query: "chicken",
|
30
|
+
k: 1
|
31
|
+
)
|
32
|
+
|
33
|
+
# Interact with your index through Q&A
|
34
|
+
qdrant.ask(
|
35
|
+
question: "What is the best recipe for chicken?"
|
36
|
+
)
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require "langchain"
|
2
|
+
|
3
|
+
# Instantiate the Weaviate client
|
4
|
+
weaviate = Vectorsearch::Weaviate.new(
|
5
|
+
url: ENV["WEAVIATE_URL"],
|
6
|
+
api_key: ENV["WEAVIATE_API_KEY"],
|
7
|
+
index_name: "Recipes",
|
8
|
+
llm: :openai,
|
9
|
+
llm_api_key: ENV["OPENAI_API_KEY"]
|
10
|
+
)
|
11
|
+
|
12
|
+
# Create the default schema. A text field `content` will be used.
|
13
|
+
weaviate.create_default_schema
|
14
|
+
|
15
|
+
# Set up an array of text strings
|
16
|
+
recipes = [
|
17
|
+
"Preheat oven to 400 degrees F (200 degrees C). Cut the top off the head of garlic. Arrange the garlic, carrots, celery, onion, pepper, and tomato on a large baking sheet in a single layer. Drizzle the olive oil over the vegetables; season with salt and pepper. Roast the vegetables in the preheated oven, turning every 20 minutes, until tender and browned, about 1 hour. Combine the water, thyme, parsley, and bay leaves in a large stock pot over medium-high heat. Squeeze the head of garlic into the stock pot, and discard the outer husk. Place the carrots, celery, onion, pepper, and tomato in the stock pot. Bring the water to a boil; reduce heat to low and simmer for 1 1/2 hours; strain and cool.",
|
18
|
+
"Heat oven to 190C/fan 170C/gas 5. Heat 1 tbsp oil and the butter in a frying pan, then add the onion and fry for 5 mins until softened. Cool slightly. Tip the sausagemeat, lemon zest, breadcrumbs, apricots, chestnuts and thyme into a bowl. Add the onion and cranberries, and mix everything together with your hands, adding plenty of pepper and a little salt. Cut each chicken breast into three fillets lengthwise and season all over with salt and pepper. Heat the remaining oil in the frying pan, and fry the chicken fillets quickly until browned, about 6-8 mins. Roll out two-thirds of the pastry to line a 20-23cm springform or deep loose-based tart tin. Press in half the sausage mix and spread to level. Then add the chicken pieces in one layer and cover with the rest of the sausage. Press down lightly. Roll out the remaining pastry. Brush the edges of the pastry with beaten egg and cover with the pastry lid. Pinch the edges to seal, then trim. Brush the top of the pie with egg, then roll out the trimmings to make holly leaf shapes and berries. Decorate the pie and brush again with egg. Set the tin on a baking sheet and bake for 50-60 mins, then cool in the tin for 15 mins. Remove and leave to cool completely. Serve with a winter salad and pickles."
|
19
|
+
]
|
20
|
+
|
21
|
+
# Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
|
22
|
+
weaviate.add_texts(
|
23
|
+
texts: recipes
|
24
|
+
)
|
25
|
+
|
26
|
+
# Query your data
|
27
|
+
weaviate.similarity_search(
|
28
|
+
query: "chicken",
|
29
|
+
k: 1
|
30
|
+
)
|
data/lib/langchain.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "./version"
|
4
|
+
|
5
|
+
module Vectorsearch
|
6
|
+
autoload :Base, "vectorsearch/base"
|
7
|
+
autoload :Milvus, "vectorsearch/milvus"
|
8
|
+
autoload :Pinecone, "vectorsearch/pinecone"
|
9
|
+
autoload :Qdrant, "vectorsearch/qdrant"
|
10
|
+
autoload :Weaviate, "vectorsearch/weaviate"
|
11
|
+
end
|
12
|
+
|
13
|
+
module LLM
|
14
|
+
autoload :Base, "llm/base"
|
15
|
+
autoload :Cohere, "llm/cohere"
|
16
|
+
autoload :OpenAI, "llm/openai"
|
17
|
+
end
|
data/lib/llm/base.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module LLM
|
4
|
+
class Base
|
5
|
+
attr_reader :client
|
6
|
+
|
7
|
+
# Currently supported LLMs
|
8
|
+
# TODO: Add support for HuggingFace and other LLMs
|
9
|
+
LLMS = {
|
10
|
+
openai: "OpenAI",
|
11
|
+
cohere: "Cohere"
|
12
|
+
}.freeze
|
13
|
+
|
14
|
+
def default_dimension
|
15
|
+
self.class.const_get("DEFAULTS").dig(:dimension)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/llm/cohere.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cohere"
|
4
|
+
|
5
|
+
module LLM
|
6
|
+
class Cohere < Base
|
7
|
+
|
8
|
+
DEFAULTS = {
|
9
|
+
temperature: 0.0,
|
10
|
+
completion_model_name: "base",
|
11
|
+
embeddings_model_name: "small",
|
12
|
+
dimension: 1024
|
13
|
+
}.freeze
|
14
|
+
|
15
|
+
def initialize(api_key:)
|
16
|
+
@client = ::Cohere::Client.new(api_key: api_key)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Generate an embedding for a given text
|
20
|
+
# @param text [String] The text to generate an embedding for
|
21
|
+
# @return [Hash] The embedding
|
22
|
+
def embed(text:)
|
23
|
+
response = client.embed(
|
24
|
+
texts: [text],
|
25
|
+
model: DEFAULTS[:embeddings_model_name],
|
26
|
+
)
|
27
|
+
response.dig("embeddings").first
|
28
|
+
end
|
29
|
+
|
30
|
+
# Generate a completion for a given prompt
|
31
|
+
# @param prompt [String] The prompt to generate a completion for
|
32
|
+
# @return [Hash] The completion
|
33
|
+
def complete(prompt:)
|
34
|
+
response = client.generate(
|
35
|
+
prompt: prompt,
|
36
|
+
temperature: DEFAULTS[:temperature],
|
37
|
+
model: DEFAULTS[:completion_model_name],
|
38
|
+
)
|
39
|
+
response.dig("generations").first.dig("text")
|
40
|
+
end
|
41
|
+
|
42
|
+
alias_method :generate_completion, :complete
|
43
|
+
alias_method :generate_embedding, :embed
|
44
|
+
end
|
45
|
+
end
|
data/lib/llm/openai.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "openai"
|
4
|
+
|
5
|
+
module LLM
|
6
|
+
class OpenAI < Base
|
7
|
+
|
8
|
+
DEFAULTS = {
|
9
|
+
temperature: 0.0,
|
10
|
+
completion_model_name: "text-davinci-003",
|
11
|
+
embeddings_model_name: "text-embedding-ada-002",
|
12
|
+
dimension: 1536
|
13
|
+
}.freeze
|
14
|
+
|
15
|
+
def initialize(api_key:)
|
16
|
+
# TODO: Add support to pass `organization_id:`
|
17
|
+
@client = ::OpenAI::Client.new(access_token: api_key)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Generate an embedding for a given text
|
21
|
+
# @param text [String] The text to generate an embedding for
|
22
|
+
# @return [Array] The embedding
|
23
|
+
def embed(text:)
|
24
|
+
response = client.embeddings(
|
25
|
+
parameters: {
|
26
|
+
model: DEFAULTS[:embeddings_model_name],
|
27
|
+
input: text
|
28
|
+
}
|
29
|
+
)
|
30
|
+
response.dig("data").first.dig("embedding")
|
31
|
+
end
|
32
|
+
|
33
|
+
# Generate a completion for a given prompt
|
34
|
+
# @param prompt [String] The prompt to generate a completion for
|
35
|
+
# @return [String] The completion
|
36
|
+
def complete(prompt:)
|
37
|
+
response = client.completions(
|
38
|
+
parameters: {
|
39
|
+
model: DEFAULTS[:completion_model_name],
|
40
|
+
temperature: DEFAULTS[:temperature],
|
41
|
+
prompt: prompt
|
42
|
+
}
|
43
|
+
)
|
44
|
+
response.dig("choices").first.dig("text")
|
45
|
+
end
|
46
|
+
|
47
|
+
alias_method :generate_completion, :complete
|
48
|
+
alias_method :generate_embedding, :embed
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Vectorsearch
|
4
|
+
class Base
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
attr_reader :client, :index_name, :llm, :llm_api_key, :llm_client
|
8
|
+
|
9
|
+
DEFAULT_METRIC = "cosine".freeze
|
10
|
+
|
11
|
+
# @param llm [Symbol] The LLM to use
|
12
|
+
# @param llm_api_key [String] The API key for the LLM
|
13
|
+
def initialize(llm:, llm_api_key:)
|
14
|
+
validate_llm!(llm: llm)
|
15
|
+
|
16
|
+
@llm = llm
|
17
|
+
@llm_api_key = llm_api_key
|
18
|
+
|
19
|
+
@llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
|
20
|
+
end
|
21
|
+
|
22
|
+
def create_default_schema
|
23
|
+
raise NotImplementedError
|
24
|
+
end
|
25
|
+
|
26
|
+
def add_texts(texts:)
|
27
|
+
raise NotImplementedError
|
28
|
+
end
|
29
|
+
|
30
|
+
# NotImplementedError will be raised if the subclass does not implement this method
|
31
|
+
def ask(question:)
|
32
|
+
raise NotImplementedError
|
33
|
+
end
|
34
|
+
|
35
|
+
def_delegators :llm_client,
|
36
|
+
:generate_embedding,
|
37
|
+
:generate_completion,
|
38
|
+
:default_dimension
|
39
|
+
|
40
|
+
# def generate_embedding(text:)
|
41
|
+
# llm_client.embed(text: text)
|
42
|
+
# end
|
43
|
+
|
44
|
+
# def generate_completion(prompt:)
|
45
|
+
# llm_client.complete(prompt: prompt)
|
46
|
+
# end
|
47
|
+
|
48
|
+
# def default_dimension
|
49
|
+
# llm_client.default_dimension
|
50
|
+
# end
|
51
|
+
|
52
|
+
def generate_prompt(question:, context:)
|
53
|
+
"Context:\n" +
|
54
|
+
"#{context}\n" +
|
55
|
+
"---\n" +
|
56
|
+
"Question: #{question}\n" +
|
57
|
+
"---\n" +
|
58
|
+
"Answer:"
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def validate_llm!(llm:)
|
64
|
+
# TODO: Fix that this works is string `llm` value is passed in instead of symbol
|
65
|
+
unless LLM::Base::LLMS.keys.include?(llm)
|
66
|
+
raise ArgumentError, "LLM must be one of #{LLMS}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "milvus"
|
4
|
+
|
5
|
+
module Vectorsearch
|
6
|
+
class Milvus < Base
|
7
|
+
def initialize(
|
8
|
+
url:,
|
9
|
+
api_key: nil,
|
10
|
+
index_name:,
|
11
|
+
llm:,
|
12
|
+
llm_api_key:
|
13
|
+
)
|
14
|
+
@client = ::Milvus::Client.new(
|
15
|
+
url: url
|
16
|
+
)
|
17
|
+
@index_name = index_name
|
18
|
+
|
19
|
+
super(llm: llm, llm_api_key: llm_api_key)
|
20
|
+
end
|
21
|
+
|
22
|
+
def add_texts(
|
23
|
+
texts:
|
24
|
+
)
|
25
|
+
client.entities.insert(
|
26
|
+
collection_name: index_name,
|
27
|
+
num_rows: texts.count,
|
28
|
+
fields_data: [
|
29
|
+
{
|
30
|
+
field_name: "content",
|
31
|
+
type: ::Milvus::DATA_TYPES["varchar"],
|
32
|
+
field: texts
|
33
|
+
}, {
|
34
|
+
field_name: "vectors",
|
35
|
+
type: ::Milvus::DATA_TYPES["binary_vector"],
|
36
|
+
field: texts.map { |text| generate_embedding(text: text) }
|
37
|
+
}
|
38
|
+
]
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Create default schema
|
43
|
+
# @return [Hash] The response from the server
|
44
|
+
def create_default_schema
|
45
|
+
client.collections.create(
|
46
|
+
auto_id: true,
|
47
|
+
collection_name: index_name,
|
48
|
+
description: "Default schema created by Vectorsearch",
|
49
|
+
fields: [
|
50
|
+
{
|
51
|
+
name: "id",
|
52
|
+
is_primary_key: true,
|
53
|
+
autoID: true,
|
54
|
+
data_type: ::Milvus::DATA_TYPES["int64"]
|
55
|
+
}, {
|
56
|
+
name: "content",
|
57
|
+
is_primary_key: false,
|
58
|
+
data_type: ::Milvus::DATA_TYPES["varchar"],
|
59
|
+
type_params: [
|
60
|
+
{
|
61
|
+
key: "max_length",
|
62
|
+
value: "32768" # Largest allowed value
|
63
|
+
}
|
64
|
+
]
|
65
|
+
}, {
|
66
|
+
name: "vectors",
|
67
|
+
data_type: ::Milvus::DATA_TYPES["binary_vector"],
|
68
|
+
is_primary_key: false,
|
69
|
+
type_params: [
|
70
|
+
{
|
71
|
+
key: "dim",
|
72
|
+
value: default_dimension.to_s
|
73
|
+
}
|
74
|
+
]
|
75
|
+
}
|
76
|
+
]
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
80
|
+
def similarity_search(
|
81
|
+
query:,
|
82
|
+
k: 4
|
83
|
+
)
|
84
|
+
embedding = generate_embedding(text: query)
|
85
|
+
|
86
|
+
similarity_search_by_vector(
|
87
|
+
embedding: embedding,
|
88
|
+
k: k
|
89
|
+
)
|
90
|
+
end
|
91
|
+
|
92
|
+
def similarity_search_by_vector(
|
93
|
+
embedding:,
|
94
|
+
k: 4
|
95
|
+
)
|
96
|
+
client.search(
|
97
|
+
collection_name: index_name,
|
98
|
+
top_k: k.to_s,
|
99
|
+
vectors: [ embedding ],
|
100
|
+
dsl_type: 1,
|
101
|
+
params: "{\"nprobe\": 10}",
|
102
|
+
anns_field: "content",
|
103
|
+
metric_type: "L2"
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
def ask(question:)
|
108
|
+
raise NotImplementedError
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "pinecone"
|
4
|
+
|
5
|
+
module Vectorsearch
|
6
|
+
class Pinecone < Base
|
7
|
+
# Initialize the Pinecone client
|
8
|
+
# @param environment [String] The environment to use
|
9
|
+
# @param api_key [String] The API key to use
|
10
|
+
# @param index_name [String] The name of the index to use
|
11
|
+
# @param llm [Symbol] The LLM to use
|
12
|
+
# @param llm_api_key [String] The API key for the LLM
|
13
|
+
def initialize(
|
14
|
+
environment:,
|
15
|
+
api_key:,
|
16
|
+
index_name:,
|
17
|
+
llm:,
|
18
|
+
llm_api_key:
|
19
|
+
)
|
20
|
+
::Pinecone.configure do |config|
|
21
|
+
config.api_key = api_key
|
22
|
+
config.environment = environment
|
23
|
+
end
|
24
|
+
|
25
|
+
@client = ::Pinecone::Client.new
|
26
|
+
@index_name = index_name
|
27
|
+
|
28
|
+
super(llm: llm, llm_api_key: llm_api_key)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Add a list of texts to the index
|
32
|
+
# @param texts [Array] The list of texts to add
|
33
|
+
# @return [Hash] The response from the server
|
34
|
+
def add_texts(
|
35
|
+
texts:
|
36
|
+
)
|
37
|
+
vectors = texts.map do |text|
|
38
|
+
{
|
39
|
+
# TODO: Allows passing in your own IDs
|
40
|
+
id: SecureRandom.uuid,
|
41
|
+
metadata: { content: text },
|
42
|
+
values: generate_embedding(text: text)
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
index = client.index(index_name)
|
47
|
+
|
48
|
+
index.upsert(vectors: vectors)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Create the index with the default schema
|
52
|
+
# @return [Hash] The response from the server
|
53
|
+
def create_default_schema
|
54
|
+
client.create_index(
|
55
|
+
metric: DEFAULT_METRIC,
|
56
|
+
name: index_name,
|
57
|
+
dimension: default_dimension
|
58
|
+
)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Search for similar texts
|
62
|
+
# @param query [String] The text to search for
|
63
|
+
# @param k [Integer] The number of results to return
|
64
|
+
# @return [Array] The list of results
|
65
|
+
def similarity_search(
|
66
|
+
query:,
|
67
|
+
k: 4
|
68
|
+
)
|
69
|
+
embedding = generate_embedding(text: query)
|
70
|
+
|
71
|
+
similarity_search_by_vector(
|
72
|
+
embedding: embedding,
|
73
|
+
k: k
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Search for similar texts by embedding
|
78
|
+
# @param embedding [Array] The embedding to search for
|
79
|
+
# @param k [Integer] The number of results to return
|
80
|
+
# @return [Array] The list of results
|
81
|
+
def similarity_search_by_vector(
|
82
|
+
embedding:,
|
83
|
+
k: 4
|
84
|
+
)
|
85
|
+
index = client.index(index_name)
|
86
|
+
|
87
|
+
response = index.query(
|
88
|
+
vector: embedding,
|
89
|
+
top_k: k,
|
90
|
+
include_values: true,
|
91
|
+
include_metadata: true
|
92
|
+
)
|
93
|
+
response.dig("matches")
|
94
|
+
end
|
95
|
+
|
96
|
+
# Ask a question and return the answer
|
97
|
+
# @param question [String] The question to ask
|
98
|
+
# @return [String] The answer to the question
|
99
|
+
def ask(question:)
|
100
|
+
search_results = similarity_search(query: question)
|
101
|
+
|
102
|
+
context = search_results.map do |result|
|
103
|
+
result.dig("metadata").to_s
|
104
|
+
end
|
105
|
+
context = context.join("\n---\n")
|
106
|
+
|
107
|
+
prompt = generate_prompt(question: question, context: context)
|
108
|
+
|
109
|
+
generate_completion(prompt: prompt)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "qdrant"
|
4
|
+
|
5
|
+
module Vectorsearch
|
6
|
+
class Qdrant < Base
|
7
|
+
# Initialize the Qdrant client
|
8
|
+
# @param url [String] The URL of the Qdrant server
|
9
|
+
# @param api_key [String] The API key to use
|
10
|
+
# @param index_name [String] The name of the index to use
|
11
|
+
# @param llm [Symbol] The LLM to use
|
12
|
+
# @param llm_api_key [String] The API key for the LLM
|
13
|
+
def initialize(
|
14
|
+
url:,
|
15
|
+
api_key:,
|
16
|
+
index_name:,
|
17
|
+
llm:,
|
18
|
+
llm_api_key:
|
19
|
+
)
|
20
|
+
@client = ::Qdrant::Client.new(
|
21
|
+
url: url,
|
22
|
+
api_key: api_key
|
23
|
+
)
|
24
|
+
@index_name = index_name
|
25
|
+
|
26
|
+
super(llm: llm, llm_api_key: llm_api_key)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Add a list of texts to the index
|
30
|
+
# @param texts [Array] The list of texts to add
|
31
|
+
# @return [Hash] The response from the server
|
32
|
+
def add_texts(
|
33
|
+
texts:
|
34
|
+
)
|
35
|
+
batch = { ids: [], vectors: [], payloads: [] }
|
36
|
+
|
37
|
+
texts.each do |text|
|
38
|
+
batch[:ids].push(SecureRandom.uuid)
|
39
|
+
batch[:vectors].push(generate_embedding(text: text))
|
40
|
+
batch[:payloads].push({ content: text })
|
41
|
+
end
|
42
|
+
|
43
|
+
client.points.upsert(
|
44
|
+
collection_name: index_name,
|
45
|
+
batch: batch
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Create the index with the default schema
|
50
|
+
# @return [Hash] The response from the server
|
51
|
+
def create_default_schema
|
52
|
+
client.collections.create(
|
53
|
+
collection_name: index_name,
|
54
|
+
vectors: {
|
55
|
+
distance: DEFAULT_METRIC.capitalize,
|
56
|
+
size: default_dimension
|
57
|
+
}
|
58
|
+
)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Search for similar texts
|
62
|
+
# @param query [String] The text to search for
|
63
|
+
# @param k [Integer] The number of results to return
|
64
|
+
# @return [Hash] The response from the server
|
65
|
+
def similarity_search(
|
66
|
+
query:,
|
67
|
+
k: 4
|
68
|
+
)
|
69
|
+
embedding = generate_embedding(text: query)
|
70
|
+
|
71
|
+
similarity_search_by_vector(
|
72
|
+
embedding: embedding,
|
73
|
+
k: k
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Search for similar texts by embedding
|
78
|
+
# @param embedding [Array] The embedding to search for
|
79
|
+
# @param k [Integer] The number of results to return
|
80
|
+
# @return [Hash] The response from the server
|
81
|
+
def similarity_search_by_vector(
|
82
|
+
embedding:,
|
83
|
+
k: 4
|
84
|
+
)
|
85
|
+
client.points.search(
|
86
|
+
collection_name: index_name,
|
87
|
+
limit: k,
|
88
|
+
vector: embedding,
|
89
|
+
with_payload: true
|
90
|
+
)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Ask a question and return the answer
|
94
|
+
# @param question [String] The question to ask
|
95
|
+
# @return [String] The answer to the question
|
96
|
+
def ask(question:)
|
97
|
+
search_results = similarity_search(query: question)
|
98
|
+
|
99
|
+
context = search_results.dig("result").map do |result|
|
100
|
+
result.dig("payload").to_s
|
101
|
+
end
|
102
|
+
context = context.join("\n---\n")
|
103
|
+
|
104
|
+
prompt = generate_prompt(question: question, context: context)
|
105
|
+
|
106
|
+
generate_completion(prompt: prompt)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "weaviate"
|
4
|
+
|
5
|
+
module Vectorsearch
|
6
|
+
class Weaviate < Base
|
7
|
+
# Initialize the Weaviate adapter
|
8
|
+
# @param url [String] The URL of the Weaviate instance
|
9
|
+
# @param api_key [String] The API key to use
|
10
|
+
# @param index_name [String] The name of the index to use
|
11
|
+
# @param llm [Symbol] The LLM to use
|
12
|
+
# @param llm_api_key [String] The API key for the LLM
|
13
|
+
def initialize(
|
14
|
+
url:,
|
15
|
+
api_key:,
|
16
|
+
index_name:,
|
17
|
+
llm:,
|
18
|
+
llm_api_key:
|
19
|
+
)
|
20
|
+
@client = ::Weaviate::Client.new(
|
21
|
+
url: url,
|
22
|
+
api_key: api_key,
|
23
|
+
model_service: llm,
|
24
|
+
model_service_api_key: llm_api_key
|
25
|
+
)
|
26
|
+
@index_name = index_name
|
27
|
+
|
28
|
+
super(llm: llm, llm_api_key: llm_api_key)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Add a list of texts to the index
|
32
|
+
# @param texts [Array] The list of texts to add
|
33
|
+
# @return [Hash] The response from the server
|
34
|
+
def add_texts(
|
35
|
+
texts:
|
36
|
+
)
|
37
|
+
objects = texts.map do |text|
|
38
|
+
{
|
39
|
+
class: index_name,
|
40
|
+
properties: { content: text }
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
client.objects.batch_create(
|
45
|
+
objects: objects
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Create default schema
|
50
|
+
def create_default_schema
|
51
|
+
client.schema.create(
|
52
|
+
class_name: index_name,
|
53
|
+
vectorizer: "text2vec-#{llm.to_s}",
|
54
|
+
# TODO: Figure out a way to optionally enable it
|
55
|
+
# "module_config": {
|
56
|
+
# "qna-openai": {}
|
57
|
+
# },
|
58
|
+
properties: [
|
59
|
+
# TODO: Allow passing in your own IDs
|
60
|
+
{
|
61
|
+
dataType: ["text"],
|
62
|
+
name: "content"
|
63
|
+
}
|
64
|
+
]
|
65
|
+
)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Return documents similar to the query
|
69
|
+
# @param query [String] The query to search for
|
70
|
+
# @param k [Integer|String] The number of results to return
|
71
|
+
# @return [Hash] The search results
|
72
|
+
def similarity_search(
|
73
|
+
query:,
|
74
|
+
k: 4
|
75
|
+
)
|
76
|
+
near_text = "{ concepts: [\"#{query}\"] }"
|
77
|
+
|
78
|
+
client.query.get(
|
79
|
+
class_name: index_name,
|
80
|
+
near_text: near_text,
|
81
|
+
limit: k.to_s,
|
82
|
+
fields: "content _additional { id }"
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Return documents similar to the vector
|
87
|
+
# @param embedding [Array] The vector to search for
|
88
|
+
# @param k [Integer|String] The number of results to return
|
89
|
+
# @return [Hash] The search results
|
90
|
+
def similarity_search_by_vector(
|
91
|
+
embedding:,
|
92
|
+
k: 4
|
93
|
+
)
|
94
|
+
near_vector = "{ vector: #{embedding} }"
|
95
|
+
|
96
|
+
client.query.get(
|
97
|
+
class_name: index_name,
|
98
|
+
near_vector: near_vector,
|
99
|
+
limit: k.to_s,
|
100
|
+
fields: "content _additional { id }"
|
101
|
+
)
|
102
|
+
end
|
103
|
+
|
104
|
+
# Ask a question and return the answer
|
105
|
+
# @param question [String] The question to ask
|
106
|
+
# @return [Hash] The answer
|
107
|
+
def ask(
|
108
|
+
question:
|
109
|
+
)
|
110
|
+
# Weaviate currently supports the `ask:` parameter only for the OpenAI LLM (with `qna-openai` module enabled).
|
111
|
+
# The Cohere support is on the way: https://github.com/weaviate/weaviate/pull/2600
|
112
|
+
if llm == :openai
|
113
|
+
ask_object = "{ question: \"#{question}\" }"
|
114
|
+
|
115
|
+
client.query.get(
|
116
|
+
class_name: index_name,
|
117
|
+
ask: ask_object,
|
118
|
+
limit: "1",
|
119
|
+
fields: "_additional { answer { result } }"
|
120
|
+
)
|
121
|
+
elsif llm == :cohere
|
122
|
+
search_results = similarity_search(query: question)
|
123
|
+
|
124
|
+
context = search_results.map do |result|
|
125
|
+
result.dig("content").to_s
|
126
|
+
end
|
127
|
+
context = context.join("\n---\n")
|
128
|
+
|
129
|
+
prompt = generate_prompt(question: question, context: context)
|
130
|
+
|
131
|
+
generate_completion(prompt: prompt)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
data/lib/version.rb
ADDED
data/sig/langchain.rbs
ADDED
metadata
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: langchainrb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrei Bondarev
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-05-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: pry-byebug
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.10.0
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.10.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: dotenv-rails
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.7.6
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 2.7.6
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: cohere-ruby
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.9.1
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.9.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: milvus
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.9.0
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.9.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: pinecone
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.1.6
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.1.6
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: ruby-openai
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 4.0.0
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 4.0.0
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: qdrant-ruby
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 0.9.0
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.9.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: weaviate-ruby
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.8.0
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.8.0
|
125
|
+
description: Build ML/AI-powered applications with Ruby's LangChain
|
126
|
+
email:
|
127
|
+
- andrei.bondarev13@gmail.com
|
128
|
+
executables: []
|
129
|
+
extensions: []
|
130
|
+
extra_rdoc_files: []
|
131
|
+
files:
|
132
|
+
- ".rspec"
|
133
|
+
- CHANGELOG.md
|
134
|
+
- Gemfile
|
135
|
+
- Gemfile.lock
|
136
|
+
- LICENSE.txt
|
137
|
+
- README.md
|
138
|
+
- Rakefile
|
139
|
+
- examples/.keep
|
140
|
+
- examples/store_and_query_with_pinecone.rb
|
141
|
+
- examples/store_and_query_with_qdrant.rb
|
142
|
+
- examples/store_and_query_with_weaviate.rb
|
143
|
+
- lib/langchain.rb
|
144
|
+
- lib/llm/base.rb
|
145
|
+
- lib/llm/cohere.rb
|
146
|
+
- lib/llm/openai.rb
|
147
|
+
- lib/vectorsearch/base.rb
|
148
|
+
- lib/vectorsearch/milvus.rb
|
149
|
+
- lib/vectorsearch/pinecone.rb
|
150
|
+
- lib/vectorsearch/qdrant.rb
|
151
|
+
- lib/vectorsearch/weaviate.rb
|
152
|
+
- lib/version.rb
|
153
|
+
- sig/langchain.rbs
|
154
|
+
homepage: https://rubygems.org/gems/langchainrb
|
155
|
+
licenses:
|
156
|
+
- MIT
|
157
|
+
metadata:
|
158
|
+
homepage_uri: https://rubygems.org/gems/langchainrb
|
159
|
+
source_code_uri: https://github.com/andreibondarev/langchainrb
|
160
|
+
changelog_uri: https://github.com/andreibondarev/langchainrb/CHANGELOG.md
|
161
|
+
post_install_message:
|
162
|
+
rdoc_options: []
|
163
|
+
require_paths:
|
164
|
+
- lib
|
165
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
166
|
+
requirements:
|
167
|
+
- - ">="
|
168
|
+
- !ruby/object:Gem::Version
|
169
|
+
version: 2.6.0
|
170
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
|
+
requirements:
|
172
|
+
- - ">="
|
173
|
+
- !ruby/object:Gem::Version
|
174
|
+
version: '0'
|
175
|
+
requirements: []
|
176
|
+
rubygems_version: 3.2.3
|
177
|
+
signing_key:
|
178
|
+
specification_version: 4
|
179
|
+
summary: Build ML/AI-powered applications with Ruby's LangChain
|
180
|
+
test_files: []
|