rag_embeddings 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +186 -31
- data/ext/rag_embeddings/embedding.c +122 -19
- data/lib/rag_embeddings/engine.rb +12 -5
- data/lib/rag_embeddings/version.rb +1 -1
- data/lib/rag_embeddings.rb +5 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 80367e6f32ddd36828010b2f3f53339e50b06efc6e58dc1f708e08980e774053
|
4
|
+
data.tar.gz: 3e9c9febe99e5ec54fb5288727749b3a5ce282061ff8bda438a256659fbcbbff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 16de62dff09874f0f062699657758a58b930678bef3a58ab9c18269616e2a6b47aa9d2c85c3698de94ddefb6160adde8a29cda66dbd0e15d72ed2b7d3eb9eb7d
|
7
|
+
data.tar.gz: '092cc593c1957e6fca731fa41c33a6e0ccee55f2d7bb691b0f6f4b684c71e676061546f1ceeea5bb7a98f49d6466d3e9cd9e09b8ff3f1dff0022f74aeca8fabb'
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# 💎 Rag Embeddings
|
2
2
|
|
3
|
-
[](https://badge.fury.io/rb/rag_embeddings)
|
3
|
+
[](https://badge.fury.io/rb/rag_embeddings)
|
4
4
|
|
5
5
|
**rag_embeddings** is a native Ruby library for efficient storage and comparison of AI-generated embedding vectors (float arrays) using high-performance C extensions. It is designed for seamless integration with external LLMs (Ollama, OpenAI, Mistral, etc) and works perfectly for RAG (Retrieval-Augmented Generation) applications.
|
6
6
|
|
@@ -22,27 +22,35 @@
|
|
22
22
|
|
23
23
|
---
|
24
24
|
|
25
|
-
##
|
25
|
+
## 🌍 Real-world Use Cases
|
26
26
|
|
27
|
-
|
27
|
+
- **Question Answering over Documents:** Instantly search and retrieve the most relevant document snippets from thousands of articles, FAQs, or customer support logs in your Ruby app.
|
28
|
+
- **Semantic Search for E-commerce:** Power product search with semantic understanding, returning items similar in meaning, not just keywords.
|
29
|
+
- **Personalized Recommendations:** Find related content (articles, products, videos) by comparing user preferences and content embeddings.
|
30
|
+
- **Knowledge Base Augmentation:** Use with OpenAI or Ollama to enhance chatbots, letting them ground answers in your company’s internal documentation or wiki.
|
31
|
+
- **Fast Prototyping for AI Products:** Effortlessly build MVPs for RAG-enabled chatbots, semantic search tools, and AI-driven discovery apps—all in native Ruby.
|
28
32
|
|
29
|
-
|
30
|
-
gem "rag_embeddings"
|
31
|
-
gem "langchainrb"
|
32
|
-
gem "faraday"
|
33
|
-
gem "sqlite3"
|
34
|
-
```
|
33
|
+
---
|
35
34
|
|
36
|
-
|
37
|
-
rake compile
|
35
|
+
## 👷 Requirements
|
38
36
|
|
39
|
-
|
37
|
+
- Ruby >= 3.3
|
38
|
+
- `langchainrb` (for embedding)
|
39
|
+
- At the moment `ollama` is used as LLM so it must be active and working, although there are some workarounds
|
40
|
+
- `sqlite3` (for storage)
|
40
41
|
|
41
|
-
##
|
42
|
+
## 🔧 Installation
|
42
43
|
|
43
|
-
|
44
|
+
Requires a working C compiler in order to build the native extension
|
45
|
+
|
46
|
+
`gem install rag_embeddings`
|
47
|
+
|
48
|
+
If you'd rather install it using bundler, add a line for it in your Gemfile (but set the require option to false, as it is a standalone tool):
|
49
|
+
|
50
|
+
```ruby
|
51
|
+
gem "rag_embeddings", require: false
|
52
|
+
```
|
44
53
|
|
45
|
-
`bundle exec rspec`
|
46
54
|
|
47
55
|
## 🧪 Practical examples
|
48
56
|
|
@@ -50,17 +58,24 @@ To run all specs (RSpec required):
|
|
50
58
|
|
51
59
|
```ruby
|
52
60
|
require "rag_embeddings"
|
53
|
-
|
54
|
-
text = "Hello world, this is RAG!"
|
55
|
-
embedding = RagEmbeddings.embed(text)
|
61
|
+
embedding = RagEmbeddings.embed("Hello world, this is RAG!")
|
56
62
|
# embedding is a float array
|
57
63
|
```
|
58
64
|
|
65
|
+
The default model is llama3.2 but you can set another one (reload the console as the llm is memoized):
|
66
|
+
|
67
|
+
```ruby
|
68
|
+
embedding = RagEmbeddings.embed("Hello world, this is RAG!", model: 'qwen3:0.6b')
|
69
|
+
````
|
70
|
+
|
59
71
|
### 2. Create a C embedding object
|
60
72
|
|
61
73
|
```ruby
|
62
74
|
c_embedding = RagEmbeddings::Embedding.from_array(embedding)
|
63
75
|
puts "Dimension: #{c_embedding.dim}"
|
76
|
+
# Dimension: 1024 # qwen3:0.6b
|
77
|
+
# Dimension: 3072 # llama3.2
|
78
|
+
|
64
79
|
puts "Ruby array: #{c_embedding.to_a.inspect}"
|
65
80
|
```
|
66
81
|
|
@@ -87,30 +102,170 @@ result = db.top_k_similar("Hello!", k: 1)
|
|
87
102
|
puts "Most similar text: #{result.first[1]}, score: #{result.first[2]}"
|
88
103
|
```
|
89
104
|
|
105
|
+
### 5. Batch-index a folder of documents
|
106
|
+
|
107
|
+
```ruby
|
108
|
+
# load all .txt files
|
109
|
+
files = Dir["./docs/*.txt"].map { |f| [File.basename(f), File.read(f)] }
|
110
|
+
|
111
|
+
db = RagEmbeddings::Database.new("knowledge_base.db")
|
112
|
+
files.each do |name, text|
|
113
|
+
vector = RagEmbeddings.embed(text)
|
114
|
+
db.insert(name, vector)
|
115
|
+
end
|
116
|
+
|
117
|
+
puts "Indexed #{files.size} documents."
|
118
|
+
```
|
119
|
+
|
120
|
+
### 6. Simple Retrieval-Augmented Generation (RAG) loop
|
121
|
+
|
122
|
+
```ruby
|
123
|
+
require "openai" # or your favorite LLM client
|
124
|
+
|
125
|
+
# 1) build or open your vector store
|
126
|
+
db = RagEmbeddings::Database.new("knowledge_base.db")
|
127
|
+
|
128
|
+
# 2) embed your user question
|
129
|
+
client = OpenAI::Client.new(api_key: ENV.fetch("OPENAI_API_KEY"))
|
130
|
+
q_embedding = client.embeddings(
|
131
|
+
parameters: {
|
132
|
+
model: "text-embedding-ada-002",
|
133
|
+
input: "What are the benefits of retrieval-augmented generation?"
|
134
|
+
}
|
135
|
+
).dig("data", 0, "embedding")
|
136
|
+
|
137
|
+
# 3) retrieve top-3 relevant passages
|
138
|
+
results = db.top_k_similar(q_embedding, k: 3)
|
139
|
+
|
140
|
+
# 4) build a prompt for your LLM
|
141
|
+
context = results.map { |id, text, score| text }.join("\n\n---\n\n")
|
142
|
+
prompt = <<~PROMPT
|
143
|
+
You are an expert.
|
144
|
+
Use the following context to answer the question:
|
145
|
+
|
146
|
+
CONTEXT:
|
147
|
+
#{context}
|
148
|
+
|
149
|
+
QUESTION:
|
150
|
+
What are the benefits of retrieval-augmented generation?
|
151
|
+
PROMPT
|
152
|
+
|
153
|
+
# 5) call the LLM for final answer
|
154
|
+
response = client.chat(
|
155
|
+
parameters: {
|
156
|
+
model: "gpt-4o",
|
157
|
+
messages: [{ role: "user", content: prompt }]
|
158
|
+
}
|
159
|
+
)
|
160
|
+
puts response.dig("choices", 0, "message", "content")
|
161
|
+
|
162
|
+
```
|
163
|
+
|
164
|
+
### 7. In-memory store for fast prototyping
|
165
|
+
|
166
|
+
```ruby
|
167
|
+
# use SQLite :memory: for ephemeral experiments
|
168
|
+
db = RagEmbeddings::Database.new(":memory:")
|
169
|
+
|
170
|
+
# insert & search exactly as with a file-backed DB
|
171
|
+
db.insert("Quick test", RagEmbeddings.embed("Quick test"))
|
172
|
+
db.top_k_similar("Test", k: 1)
|
173
|
+
```
|
174
|
+
|
175
|
+
---
|
176
|
+
|
90
177
|
## 🏗️ How it works
|
91
178
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
179
|
+
**rag_embeddings** combines the simplicity of Ruby with the performance of C to deliver fast vector operations for RAG applications.
|
180
|
+
|
181
|
+
### Architecture Overview
|
182
|
+
|
183
|
+
The library uses a **hybrid memory-storage approach**:
|
184
|
+
|
185
|
+
1. **In-Memory Processing**: All vector operations (cosine similarity calculations, embedding manipulations) happen entirely in memory using optimized C code
|
186
|
+
2. **Persistent Storage**: SQLite serves as a simple, portable storage layer for embeddings and associated text
|
187
|
+
3. **Dynamic C Objects**: Embeddings are managed as native C structures with automatic memory management
|
188
|
+
|
189
|
+
### Key Components
|
190
|
+
|
191
|
+
**C Extension (`embedding.c`)**
|
192
|
+
- Handles all computationally intensive operations
|
193
|
+
- Manages dynamic vector dimensions (adapts to any LLM output size)
|
194
|
+
- Performs cosine similarity calculations with optimized algorithms
|
195
|
+
- Ensures memory-safe operations with proper garbage collection integration
|
196
|
+
|
197
|
+
**Ruby Interface**
|
198
|
+
- Provides an intuitive API for vector operations
|
199
|
+
- Integrates seamlessly with LLM providers via langchainrb
|
200
|
+
- Handles database operations and query orchestration
|
201
|
+
|
202
|
+
**SQLite Storage**
|
203
|
+
- Stores embeddings as BLOBs alongside their associated text
|
204
|
+
- Provides persistent storage without requiring external databases
|
205
|
+
- Supports both file-based and in-memory (`:memory:`) databases
|
206
|
+
- Enables portable, self-contained applications
|
207
|
+
|
208
|
+
### Processing Flow
|
209
|
+
|
210
|
+
1. **Text → Embedding**: Generate vectors using your preferred LLM (Ollama, OpenAI, etc.)
|
211
|
+
2. **Memory Allocation**: Create C embedding objects with `Embedding.from_array()`
|
212
|
+
3. **Storage**: Persist embeddings and text to SQLite for later retrieval
|
213
|
+
4. **Query Processing**:
|
214
|
+
- Load query embedding into memory
|
215
|
+
- Compare against stored embeddings using fast C-based cosine similarity
|
216
|
+
- Return top-K most similar results ranked by similarity score
|
217
|
+
|
218
|
+
### Why This Design?
|
219
|
+
|
220
|
+
**Performance**: Critical operations run in optimized C code, delivering significant speed improvements over pure Ruby implementations.
|
221
|
+
|
222
|
+
**Memory Efficiency**: While embeddings are stored in SQLite, all vector computations happen in memory, avoiding I/O bottlenecks during similarity calculations.
|
223
|
+
|
224
|
+
**Simplicity**: SQLite eliminates the need for complex vector database setups while maintaining good performance for moderate-scale applications.
|
225
|
+
|
226
|
+
**Portability**: The entire knowledge base fits in a single SQLite file, making deployment and backup trivial.
|
227
|
+
|
228
|
+
### Performance Characteristics
|
229
|
+
|
230
|
+
- **Embedding creation**: ~82ms for 10,000 operations
|
231
|
+
- **Cosine similarity**: ~107ms for 10,000 calculations
|
232
|
+
- **Memory usage**: ~34MB for 10,000 embeddings
|
233
|
+
- **Scalability**: Suitable for thousands to tens of thousands of vectors
|
234
|
+
|
235
|
+
For applications requiring millions of vectors, consider specialized vector databases (Faiss, sqlite-vss) while using this library for prototyping and smaller-scale production use.
|
96
236
|
|
97
237
|
## 🎛️ Customization
|
98
238
|
|
99
239
|
- Embedding provider: switch model/provider in engine.rb (Ollama, OpenAI, etc)
|
100
240
|
- Database: set the SQLite file path as desired
|
101
241
|
|
102
|
-
|
242
|
+
If you need to customize the c part (`ext/rag_embeddings/embedding.c`), recompile it with:
|
103
243
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
- A working C compiler
|
244
|
+
`rake compile`
|
245
|
+
|
246
|
+
---
|
108
247
|
|
109
|
-
##
|
248
|
+
## 🏁 Running the test suite
|
110
249
|
|
111
|
-
|
112
|
-
|
113
|
-
|
250
|
+
To run all specs (RSpec required):
|
251
|
+
|
252
|
+
`bundle exec rspec`
|
253
|
+
|
254
|
+
## ⚡️ Performance
|
255
|
+
|
256
|
+
`bundle exec rspec spec/performance_spec.rb`
|
257
|
+
|
258
|
+
```bash
|
259
|
+
Embedding creation (10000 times): 82 ms
|
260
|
+
Cosine similarity (10000 times): 107 ms
|
261
|
+
RSS: 186.7 MB
|
262
|
+
.
|
263
|
+
Memory usage delta: 33.97 MB for 10000 embeddings
|
264
|
+
.
|
265
|
+
|
266
|
+
Finished in 0.42577 seconds (files took 0.06832 seconds to load)
|
267
|
+
2 examples, 0 failures
|
268
|
+
```
|
114
269
|
|
115
270
|
## 📬 Contact & Issues
|
116
271
|
Open an issue or contact the maintainer for questions, suggestions, or bugs.
|
@@ -12,7 +12,9 @@ typedef struct {
|
|
12
12
|
|
13
13
|
// Callback for freeing memory when Ruby's GC collects our object
|
14
14
|
static void embedding_free(void *ptr) {
|
15
|
-
|
15
|
+
if (ptr) {
|
16
|
+
xfree(ptr); // Ruby's memory free function (with null check)
|
17
|
+
}
|
16
18
|
}
|
17
19
|
|
18
20
|
// Callback to report memory usage to Ruby's GC
|
@@ -27,22 +29,46 @@ static const rb_data_type_t embedding_type = {
|
|
27
29
|
"RagEmbeddings/Embedding", // Type name
|
28
30
|
{0, embedding_free, embedding_memsize,}, // Functions: mark, free, size
|
29
31
|
0, 0, // Parent type, data
|
30
|
-
RUBY_TYPED_FREE_IMMEDIATELY // Flags
|
32
|
+
RUBY_TYPED_FREE_IMMEDIATELY // Flags for immediate cleanup
|
31
33
|
};
|
32
34
|
|
33
35
|
// Class method: RagEmbeddings::Embedding.from_array([1.0, 2.0, ...])
|
34
36
|
// Creates a new embedding from a Ruby array
|
35
37
|
static VALUE embedding_from_array(VALUE klass, VALUE rb_array) {
|
36
38
|
Check_Type(rb_array, T_ARRAY); // Ensure argument is a Ruby array
|
37
|
-
|
39
|
+
|
40
|
+
long array_len = RARRAY_LEN(rb_array);
|
41
|
+
|
42
|
+
// Validate array length fits in uint16_t (max 65535 dimensions)
|
43
|
+
if (array_len > UINT16_MAX) {
|
44
|
+
rb_raise(rb_eArgError, "Array too large: maximum %d dimensions allowed", UINT16_MAX);
|
45
|
+
}
|
46
|
+
|
47
|
+
// Prevent zero-length embeddings
|
48
|
+
if (array_len == 0) {
|
49
|
+
rb_raise(rb_eArgError, "Cannot create embedding from empty array");
|
50
|
+
}
|
51
|
+
|
52
|
+
uint16_t dim = (uint16_t)array_len;
|
38
53
|
|
39
54
|
// Allocate memory for struct + array of floats
|
40
55
|
embedding_t *ptr = xmalloc(sizeof(embedding_t) + dim * sizeof(float));
|
41
56
|
ptr->dim = dim;
|
42
57
|
|
43
58
|
// Copy values from Ruby array to our C array
|
44
|
-
|
45
|
-
|
59
|
+
// Using RARRAY_CONST_PTR for better performance when available
|
60
|
+
const VALUE *array_ptr = RARRAY_CONST_PTR(rb_array);
|
61
|
+
for (uint16_t i = 0; i < dim; ++i) {
|
62
|
+
VALUE val = array_ptr[i];
|
63
|
+
|
64
|
+
// Ensure the value is numeric
|
65
|
+
if (!RB_FLOAT_TYPE_P(val) && !RB_INTEGER_TYPE_P(val)) {
|
66
|
+
xfree(ptr); // Clean up allocated memory before raising exception
|
67
|
+
rb_raise(rb_eTypeError, "Array element at index %d is not numeric", i);
|
68
|
+
}
|
69
|
+
|
70
|
+
ptr->values[i] = (float)NUM2DBL(val);
|
71
|
+
}
|
46
72
|
|
47
73
|
// Wrap our C struct in a Ruby object
|
48
74
|
VALUE obj = TypedData_Wrap_Struct(klass, &embedding_type, ptr);
|
@@ -65,17 +91,19 @@ static VALUE embedding_to_a(VALUE self) {
|
|
65
91
|
TypedData_Get_Struct(self, embedding_t, &embedding_type, ptr);
|
66
92
|
|
67
93
|
// Create a new Ruby array with pre-allocated capacity
|
68
|
-
VALUE arr =
|
94
|
+
VALUE arr = rb_ary_new_capa(ptr->dim);
|
69
95
|
|
70
96
|
// Copy each float value to the Ruby array
|
71
|
-
|
72
|
-
|
97
|
+
// Using rb_ary_store for better performance than rb_ary_push
|
98
|
+
for (uint16_t i = 0; i < ptr->dim; ++i) {
|
99
|
+
rb_ary_store(arr, i, DBL2NUM(ptr->values[i]));
|
100
|
+
}
|
73
101
|
|
74
102
|
return arr;
|
75
103
|
}
|
76
104
|
|
77
105
|
// Instance method: embedding.cosine_similarity(other_embedding)
|
78
|
-
// Calculate cosine similarity between two embeddings
|
106
|
+
// Calculate cosine similarity between two embeddings using optimized algorithm
|
79
107
|
static VALUE embedding_cosine_similarity(VALUE self, VALUE other) {
|
80
108
|
embedding_t *a, *b;
|
81
109
|
// Get C structs for both embeddings
|
@@ -83,21 +111,90 @@ static VALUE embedding_cosine_similarity(VALUE self, VALUE other) {
|
|
83
111
|
TypedData_Get_Struct(other, embedding_t, &embedding_type, b);
|
84
112
|
|
85
113
|
// Ensure dimensions match
|
86
|
-
if (a->dim != b->dim)
|
87
|
-
rb_raise(rb_eArgError, "Dimension mismatch");
|
114
|
+
if (a->dim != b->dim) {
|
115
|
+
rb_raise(rb_eArgError, "Dimension mismatch: %d vs %d", a->dim, b->dim);
|
116
|
+
}
|
88
117
|
|
89
|
-
|
118
|
+
// Use double precision for intermediate calculations to reduce accumulation errors
|
119
|
+
double dot = 0.0, norm_a = 0.0, norm_b = 0.0;
|
90
120
|
|
91
|
-
// Calculate dot product and vector magnitudes
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
121
|
+
// Calculate dot product and vector magnitudes in a single loop
|
122
|
+
// This is more cache-friendly than separate loops
|
123
|
+
const float *va = a->values;
|
124
|
+
const float *vb = b->values;
|
125
|
+
|
126
|
+
for (uint16_t i = 0; i < a->dim; ++i) {
|
127
|
+
float ai = va[i];
|
128
|
+
float bi = vb[i];
|
129
|
+
|
130
|
+
dot += (double)ai * bi; // Dot product
|
131
|
+
norm_a += (double)ai * ai; // Square of magnitude for vector a
|
132
|
+
norm_b += (double)bi * bi; // Square of magnitude for vector b
|
133
|
+
}
|
134
|
+
|
135
|
+
// Check for zero vectors to avoid division by zero
|
136
|
+
if (norm_a == 0.0 || norm_b == 0.0) {
|
137
|
+
return DBL2NUM(0.0); // Return 0 similarity for zero vectors
|
96
138
|
}
|
97
139
|
|
98
140
|
// Apply cosine similarity formula: dot(a,b)/(|a|*|b|)
|
99
|
-
//
|
100
|
-
|
141
|
+
// Using sqrt for better numerical stability
|
142
|
+
double magnitude_product = sqrt(norm_a * norm_b);
|
143
|
+
double similarity = dot / magnitude_product;
|
144
|
+
|
145
|
+
// Clamp result to [-1, 1] to handle floating point precision errors
|
146
|
+
if (similarity > 1.0) similarity = 1.0;
|
147
|
+
if (similarity < -1.0) similarity = -1.0;
|
148
|
+
|
149
|
+
return DBL2NUM(similarity);
|
150
|
+
}
|
151
|
+
|
152
|
+
// Instance method: embedding.magnitude
|
153
|
+
// Calculate the magnitude (L2 norm) of the embedding vector
|
154
|
+
static VALUE embedding_magnitude(VALUE self) {
|
155
|
+
embedding_t *ptr;
|
156
|
+
TypedData_Get_Struct(self, embedding_t, &embedding_type, ptr);
|
157
|
+
|
158
|
+
double sum_squares = 0.0;
|
159
|
+
const float *values = ptr->values;
|
160
|
+
|
161
|
+
for (uint16_t i = 0; i < ptr->dim; ++i) {
|
162
|
+
float val = values[i];
|
163
|
+
sum_squares += (double)val * val;
|
164
|
+
}
|
165
|
+
|
166
|
+
return DBL2NUM(sqrt(sum_squares));
|
167
|
+
}
|
168
|
+
|
169
|
+
// Instance method: embedding.normalize!
|
170
|
+
// Normalize the embedding vector in-place (destructive operation)
|
171
|
+
static VALUE embedding_normalize_bang(VALUE self) {
|
172
|
+
embedding_t *ptr;
|
173
|
+
TypedData_Get_Struct(self, embedding_t, &embedding_type, ptr);
|
174
|
+
|
175
|
+
// Calculate magnitude
|
176
|
+
double sum_squares = 0.0;
|
177
|
+
float *values = ptr->values;
|
178
|
+
|
179
|
+
for (uint16_t i = 0; i < ptr->dim; ++i) {
|
180
|
+
float val = values[i];
|
181
|
+
sum_squares += (double)val * val;
|
182
|
+
}
|
183
|
+
|
184
|
+
double magnitude = sqrt(sum_squares);
|
185
|
+
|
186
|
+
// Avoid division by zero
|
187
|
+
if (magnitude == 0.0) {
|
188
|
+
rb_raise(rb_eZeroDivError, "Cannot normalize zero vector");
|
189
|
+
}
|
190
|
+
|
191
|
+
// Normalize each component
|
192
|
+
float inv_magnitude = (float)(1.0 / magnitude);
|
193
|
+
for (uint16_t i = 0; i < ptr->dim; ++i) {
|
194
|
+
values[i] *= inv_magnitude;
|
195
|
+
}
|
196
|
+
|
197
|
+
return self; // Return self for method chaining
|
101
198
|
}
|
102
199
|
|
103
200
|
// Ruby extension initialization function
|
@@ -107,6 +204,10 @@ void Init_embedding(void) {
|
|
107
204
|
VALUE mRag = rb_define_module("RagEmbeddings");
|
108
205
|
VALUE cEmbedding = rb_define_class_under(mRag, "Embedding", rb_cObject);
|
109
206
|
|
207
|
+
// IMPORTANT: Undefine the default allocator to prevent the warning
|
208
|
+
// This is necessary when using TypedData_Wrap_Struct
|
209
|
+
rb_undef_alloc_func(cEmbedding);
|
210
|
+
|
110
211
|
// Register class methods
|
111
212
|
rb_define_singleton_method(cEmbedding, "from_array", embedding_from_array, 1);
|
112
213
|
|
@@ -114,4 +215,6 @@ void Init_embedding(void) {
|
|
114
215
|
rb_define_method(cEmbedding, "dim", embedding_dim, 0);
|
115
216
|
rb_define_method(cEmbedding, "to_a", embedding_to_a, 0);
|
116
217
|
rb_define_method(cEmbedding, "cosine_similarity", embedding_cosine_similarity, 1);
|
218
|
+
rb_define_method(cEmbedding, "magnitude", embedding_magnitude, 0);
|
219
|
+
rb_define_method(cEmbedding, "normalize!", embedding_normalize_bang, 0);
|
117
220
|
}
|
@@ -1,13 +1,20 @@
|
|
1
1
|
require "langchainrb"
|
2
2
|
|
3
3
|
module RagEmbeddings
|
4
|
-
|
4
|
+
DEFAULT_MODEL = "llama3.2".freeze
|
5
5
|
|
6
|
-
def self.llm
|
7
|
-
@llm ||= Langchain::LLM::Ollama.new(url: "http://localhost:11434",
|
6
|
+
def self.llm(model: DEFAULT_MODEL)
|
7
|
+
@llm ||= Langchain::LLM::Ollama.new(url: "http://localhost:11434",
|
8
|
+
default_options: {
|
9
|
+
temperature: 0.1,
|
10
|
+
chat_model: model,
|
11
|
+
completion_model: model,
|
12
|
+
embedding_model: model,
|
13
|
+
}
|
14
|
+
)
|
8
15
|
end
|
9
16
|
|
10
|
-
def self.embed(text)
|
11
|
-
llm.embed(text:
|
17
|
+
def self.embed(text, model: DEFAULT_MODEL)
|
18
|
+
llm(model:).embed(text:).embedding
|
12
19
|
end
|
13
20
|
end
|
data/lib/rag_embeddings.rb
CHANGED
@@ -1,4 +1,8 @@
|
|
1
1
|
require_relative "rag_embeddings/version"
|
2
2
|
require_relative "rag_embeddings/engine"
|
3
3
|
require_relative "rag_embeddings/database"
|
4
|
-
|
4
|
+
|
5
|
+
# Loads the compiled C extension
|
6
|
+
require "rag_embeddings/embedding"
|
7
|
+
|
8
|
+
require "faraday"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rag_embeddings
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Marco Mastrodonato
|
@@ -77,6 +77,7 @@ metadata:
|
|
77
77
|
rdoc_options: []
|
78
78
|
require_paths:
|
79
79
|
- lib
|
80
|
+
- ext
|
80
81
|
required_ruby_version: !ruby/object:Gem::Requirement
|
81
82
|
requirements:
|
82
83
|
- - ">="
|