rag_embeddings 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cd1dc0ac570ef83c9c79142cb516411454137919c649ab7f446e18748f7f7717
4
- data.tar.gz: d21f7e9b2eee1324b4a0f7a5d892d3f55f1767bb42a965e0828225ba823461b8
3
+ metadata.gz: 3bebe9dc8527ed47e0d7b48534911e97ee7549bb73763c1dd59db063e41558c8
4
+ data.tar.gz: fdb272ee4dd12f52f33fb2cd33b9ff22f5207911c68c09b976af59346f666039
5
5
  SHA512:
6
- metadata.gz: 3fd776d3ff4b3082eac778534f2fc56aa541ea048d70d69ddbfb2738da9fb252012d6d7ffa4af0f2a1fbed229ac86960ada844cd062a4c8294ecd4999b4cd0ed
7
- data.tar.gz: b2682f1d217d9689a73e78fdbcfd74973de598f323dc12a35868530cb95e0833a584d29468d91c2e6e01afcf66585fe18f8388e54cee04b71b8e7dc955a10181
6
+ metadata.gz: a106f044b23d4438110516ee57dc6079b177861c8e09da4eeffc7e842c3aa9d506b96b6141d6e033a74de1ca61ecf1e53490682f906e14915d76fd5fe4d81103
7
+ data.tar.gz: 6be68129ca5338a99f3d55816cbb06785cff2530e39784d5202c4602bb73aec783e5180b7cf09a1f1ef962bb5d6a2070af3db080f677369b49a5463d7c466be3
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # 💎 Rag Embeddings
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/rag_embeddings.svg)](https://badge.fury.io/rb/rag_embeddings)
3
+ [![Gem Version](https://badge.fury.io/rb/rag_embeddings.svg?icon=si%3Arubygems)](https://badge.fury.io/rb/rag_embeddings)
4
4
 
5
5
  **rag_embeddings** is a native Ruby library for efficient storage and comparison of AI-generated embedding vectors (float arrays) using high-performance C extensions. It is designed for seamless integration with external LLMs (Ollama, OpenAI, Mistral, etc) and works perfectly for RAG (Retrieval-Augmented Generation) applications.
6
6
 
@@ -49,18 +49,25 @@ To run all specs (RSpec required):
49
49
  ### 1. Generate an embedding from text
50
50
 
51
51
  ```ruby
52
- require "rag_embeddings"
53
-
54
52
  text = "Hello world, this is RAG!"
55
53
  embedding = RagEmbeddings.embed(text)
56
54
  # embedding is a float array
57
55
  ```
58
56
 
57
+ The default model is llama3.2 but you can set another one (reload the console as the llm is memoized):
58
+
59
+ ```ruby
60
+ embedding = RagEmbeddings.embed(text, model: 'qwen3:0.6b')
61
+ ````
62
+
59
63
  ### 2. Create a C embedding object
60
64
 
61
65
  ```ruby
62
66
  c_embedding = RagEmbeddings::Embedding.from_array(embedding)
63
67
  puts "Dimension: #{c_embedding.dim}"
68
+ # Dimension: 1024 # qwen3:0.6b
69
+ # Dimension: 3072 # llama3.2
70
+
64
71
  puts "Ruby array: #{c_embedding.to_a.inspect}"
65
72
  ```
66
73
 
@@ -99,6 +106,22 @@ puts "Most similar text: #{result.first[1]}, score: #{result.first[2]}"
99
106
  - Embedding provider: switch model/provider in engine.rb (Ollama, OpenAI, etc)
100
107
  - Database: set the SQLite file path as desired
101
108
 
109
+ ## 🔢 Embeddings dimension
110
+
111
+ The size of embeddings is dynamic and fits with what the LLM provides.
112
+
113
+ ## ⚡️ Performance
114
+
115
+ Embedding creation (10000 times): 82 ms
116
+ Cosine similarity (10000 times): 107 ms
117
+ RSS: 186.7 MB
118
+ .
119
+ Memory usage delta: 33.97 MB for 10000 embeddings
120
+ .
121
+
122
+ Finished in 0.42577 seconds (files took 0.06832 seconds to load)
123
+ 2 examples, 0 failures
124
+
102
125
  ## 👷 Requirements
103
126
 
104
127
  - Ruby >= 3.3
@@ -12,7 +12,9 @@ typedef struct {
12
12
 
13
13
  // Callback for freeing memory when Ruby's GC collects our object
14
14
  static void embedding_free(void *ptr) {
15
- xfree(ptr); // Ruby's memory free function
15
+ if (ptr) {
16
+ xfree(ptr); // Ruby's memory free function (with null check)
17
+ }
16
18
  }
17
19
 
18
20
  // Callback to report memory usage to Ruby's GC
@@ -27,22 +29,46 @@ static const rb_data_type_t embedding_type = {
27
29
  "RagEmbeddings/Embedding", // Type name
28
30
  {0, embedding_free, embedding_memsize,}, // Functions: mark, free, size
29
31
  0, 0, // Parent type, data
30
- RUBY_TYPED_FREE_IMMEDIATELY // Flags
32
+ RUBY_TYPED_FREE_IMMEDIATELY // Flags for immediate cleanup
31
33
  };
32
34
 
33
35
  // Class method: RagEmbeddings::Embedding.from_array([1.0, 2.0, ...])
34
36
  // Creates a new embedding from a Ruby array
35
37
  static VALUE embedding_from_array(VALUE klass, VALUE rb_array) {
36
38
  Check_Type(rb_array, T_ARRAY); // Ensure argument is a Ruby array
37
- uint16_t dim = (uint16_t)RARRAY_LEN(rb_array);
39
+
40
+ long array_len = RARRAY_LEN(rb_array);
41
+
42
+ // Validate array length fits in uint16_t (max 65535 dimensions)
43
+ if (array_len > UINT16_MAX) {
44
+ rb_raise(rb_eArgError, "Array too large: maximum %d dimensions allowed", UINT16_MAX);
45
+ }
46
+
47
+ // Prevent zero-length embeddings
48
+ if (array_len == 0) {
49
+ rb_raise(rb_eArgError, "Cannot create embedding from empty array");
50
+ }
51
+
52
+ uint16_t dim = (uint16_t)array_len;
38
53
 
39
54
  // Allocate memory for struct + array of floats
40
55
  embedding_t *ptr = xmalloc(sizeof(embedding_t) + dim * sizeof(float));
41
56
  ptr->dim = dim;
42
57
 
43
58
  // Copy values from Ruby array to our C array
44
- for (int i = 0; i < dim; ++i)
45
- ptr->values[i] = (float)NUM2DBL(rb_ary_entry(rb_array, i));
59
+ // Using RARRAY_CONST_PTR for better performance when available
60
+ const VALUE *array_ptr = RARRAY_CONST_PTR(rb_array);
61
+ for (uint16_t i = 0; i < dim; ++i) {
62
+ VALUE val = array_ptr[i];
63
+
64
+ // Ensure the value is numeric
65
+ if (!RB_FLOAT_TYPE_P(val) && !RB_INTEGER_TYPE_P(val)) {
66
+ xfree(ptr); // Clean up allocated memory before raising exception
67
+ rb_raise(rb_eTypeError, "Array element at index %d is not numeric", i);
68
+ }
69
+
70
+ ptr->values[i] = (float)NUM2DBL(val);
71
+ }
46
72
 
47
73
  // Wrap our C struct in a Ruby object
48
74
  VALUE obj = TypedData_Wrap_Struct(klass, &embedding_type, ptr);
@@ -65,17 +91,19 @@ static VALUE embedding_to_a(VALUE self) {
65
91
  TypedData_Get_Struct(self, embedding_t, &embedding_type, ptr);
66
92
 
67
93
  // Create a new Ruby array with pre-allocated capacity
68
- VALUE arr = rb_ary_new2(ptr->dim);
94
+ VALUE arr = rb_ary_new_capa(ptr->dim);
69
95
 
70
96
  // Copy each float value to the Ruby array
71
- for (int i = 0; i < ptr->dim; ++i)
72
- rb_ary_push(arr, DBL2NUM(ptr->values[i]));
97
+ // Using rb_ary_store for better performance than rb_ary_push
98
+ for (uint16_t i = 0; i < ptr->dim; ++i) {
99
+ rb_ary_store(arr, i, DBL2NUM(ptr->values[i]));
100
+ }
73
101
 
74
102
  return arr;
75
103
  }
76
104
 
77
105
  // Instance method: embedding.cosine_similarity(other_embedding)
78
- // Calculate cosine similarity between two embeddings
106
+ // Calculate cosine similarity between two embeddings using optimized algorithm
79
107
  static VALUE embedding_cosine_similarity(VALUE self, VALUE other) {
80
108
  embedding_t *a, *b;
81
109
  // Get C structs for both embeddings
@@ -83,21 +111,90 @@ static VALUE embedding_cosine_similarity(VALUE self, VALUE other) {
83
111
  TypedData_Get_Struct(other, embedding_t, &embedding_type, b);
84
112
 
85
113
  // Ensure dimensions match
86
- if (a->dim != b->dim)
87
- rb_raise(rb_eArgError, "Dimension mismatch");
114
+ if (a->dim != b->dim) {
115
+ rb_raise(rb_eArgError, "Dimension mismatch: %d vs %d", a->dim, b->dim);
116
+ }
88
117
 
89
- float dot = 0.0f, norm_a = 0.0f, norm_b = 0.0f;
118
+ // Use double precision for intermediate calculations to reduce accumulation errors
119
+ double dot = 0.0, norm_a = 0.0, norm_b = 0.0;
90
120
 
91
- // Calculate dot product and vector magnitudes
92
- for (int i = 0; i < a->dim; ++i) {
93
- dot += a->values[i] * b->values[i]; // Dot product
94
- norm_a += a->values[i] * a->values[i]; // Square of magnitude for vector a
95
- norm_b += b->values[i] * b->values[i]; // Square of magnitude for vector b
121
+ // Calculate dot product and vector magnitudes in a single loop
122
+ // This is more cache-friendly than separate loops
123
+ const float *va = a->values;
124
+ const float *vb = b->values;
125
+
126
+ for (uint16_t i = 0; i < a->dim; ++i) {
127
+ float ai = va[i];
128
+ float bi = vb[i];
129
+
130
+ dot += (double)ai * bi; // Dot product
131
+ norm_a += (double)ai * ai; // Square of magnitude for vector a
132
+ norm_b += (double)bi * bi; // Square of magnitude for vector b
133
+ }
134
+
135
+ // Check for zero vectors to avoid division by zero
136
+ if (norm_a == 0.0 || norm_b == 0.0) {
137
+ return DBL2NUM(0.0); // Return 0 similarity for zero vectors
96
138
  }
97
139
 
98
140
  // Apply cosine similarity formula: dot(a,b)/(|a|*|b|)
99
- // Small epsilon (1e-8) added to prevent division by zero
100
- return DBL2NUM(dot / (sqrt(norm_a) * sqrt(norm_b) + 1e-8));
141
+ // Using sqrt for better numerical stability
142
+ double magnitude_product = sqrt(norm_a * norm_b);
143
+ double similarity = dot / magnitude_product;
144
+
145
+ // Clamp result to [-1, 1] to handle floating point precision errors
146
+ if (similarity > 1.0) similarity = 1.0;
147
+ if (similarity < -1.0) similarity = -1.0;
148
+
149
+ return DBL2NUM(similarity);
150
+ }
151
+
152
+ // Instance method: embedding.magnitude
153
+ // Calculate the magnitude (L2 norm) of the embedding vector
154
+ static VALUE embedding_magnitude(VALUE self) {
155
+ embedding_t *ptr;
156
+ TypedData_Get_Struct(self, embedding_t, &embedding_type, ptr);
157
+
158
+ double sum_squares = 0.0;
159
+ const float *values = ptr->values;
160
+
161
+ for (uint16_t i = 0; i < ptr->dim; ++i) {
162
+ float val = values[i];
163
+ sum_squares += (double)val * val;
164
+ }
165
+
166
+ return DBL2NUM(sqrt(sum_squares));
167
+ }
168
+
169
+ // Instance method: embedding.normalize!
170
+ // Normalize the embedding vector in-place (destructive operation)
171
+ static VALUE embedding_normalize_bang(VALUE self) {
172
+ embedding_t *ptr;
173
+ TypedData_Get_Struct(self, embedding_t, &embedding_type, ptr);
174
+
175
+ // Calculate magnitude
176
+ double sum_squares = 0.0;
177
+ float *values = ptr->values;
178
+
179
+ for (uint16_t i = 0; i < ptr->dim; ++i) {
180
+ float val = values[i];
181
+ sum_squares += (double)val * val;
182
+ }
183
+
184
+ double magnitude = sqrt(sum_squares);
185
+
186
+ // Avoid division by zero
187
+ if (magnitude == 0.0) {
188
+ rb_raise(rb_eZeroDivError, "Cannot normalize zero vector");
189
+ }
190
+
191
+ // Normalize each component
192
+ float inv_magnitude = (float)(1.0 / magnitude);
193
+ for (uint16_t i = 0; i < ptr->dim; ++i) {
194
+ values[i] *= inv_magnitude;
195
+ }
196
+
197
+ return self; // Return self for method chaining
101
198
  }
102
199
 
103
200
  // Ruby extension initialization function
@@ -107,6 +204,10 @@ void Init_embedding(void) {
107
204
  VALUE mRag = rb_define_module("RagEmbeddings");
108
205
  VALUE cEmbedding = rb_define_class_under(mRag, "Embedding", rb_cObject);
109
206
 
207
+ // IMPORTANT: Undefine the default allocator to prevent the warning
208
+ // This is necessary when using TypedData_Wrap_Struct
209
+ rb_undef_alloc_func(cEmbedding);
210
+
110
211
  // Register class methods
111
212
  rb_define_singleton_method(cEmbedding, "from_array", embedding_from_array, 1);
112
213
 
@@ -114,4 +215,6 @@ void Init_embedding(void) {
114
215
  rb_define_method(cEmbedding, "dim", embedding_dim, 0);
115
216
  rb_define_method(cEmbedding, "to_a", embedding_to_a, 0);
116
217
  rb_define_method(cEmbedding, "cosine_similarity", embedding_cosine_similarity, 1);
218
+ rb_define_method(cEmbedding, "magnitude", embedding_magnitude, 0);
219
+ rb_define_method(cEmbedding, "normalize!", embedding_normalize_bang, 0);
117
220
  }
@@ -1,13 +1,20 @@
1
1
  require "langchainrb"
2
2
 
3
3
  module RagEmbeddings
4
- MODEL = "gemma3".freeze
4
+ DEFAULT_MODEL = "llama3.2".freeze
5
5
 
6
- def self.llm
7
- @llm ||= Langchain::LLM::Ollama.new(url: "http://localhost:11434", default_options: { temperature: 0.1, model: MODEL })
6
+ def self.llm(model: DEFAULT_MODEL)
7
+ @llm ||= Langchain::LLM::Ollama.new(url: "http://localhost:11434",
8
+ default_options: {
9
+ temperature: 0.1,
10
+ chat_model: model,
11
+ completion_model: model,
12
+ embedding_model: model,
13
+ }
14
+ )
8
15
  end
9
16
 
10
- def self.embed(text)
11
- llm.embed(text: text).embedding
17
+ def self.embed(text, model: DEFAULT_MODEL)
18
+ llm(model:).embed(text:).embedding
12
19
  end
13
20
  end
@@ -1,3 +1,3 @@
1
1
  module RagEmbeddings
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rag_embeddings
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marco Mastrodonato