prometheus-client-mmap 1.2.4-x86_64-linux-gnu

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/.tool-versions +1 -0
  3. data/README.md +281 -0
  4. data/ext/fast_mmaped_file_rs/Cargo.toml +35 -0
  5. data/ext/fast_mmaped_file_rs/README.md +52 -0
  6. data/ext/fast_mmaped_file_rs/build.rs +5 -0
  7. data/ext/fast_mmaped_file_rs/extconf.rb +28 -0
  8. data/ext/fast_mmaped_file_rs/src/error.rs +174 -0
  9. data/ext/fast_mmaped_file_rs/src/file_entry.rs +784 -0
  10. data/ext/fast_mmaped_file_rs/src/file_info.rs +240 -0
  11. data/ext/fast_mmaped_file_rs/src/lib.rs +78 -0
  12. data/ext/fast_mmaped_file_rs/src/macros.rs +14 -0
  13. data/ext/fast_mmaped_file_rs/src/map.rs +492 -0
  14. data/ext/fast_mmaped_file_rs/src/mmap/inner.rs +704 -0
  15. data/ext/fast_mmaped_file_rs/src/mmap.rs +891 -0
  16. data/ext/fast_mmaped_file_rs/src/raw_entry.rs +473 -0
  17. data/ext/fast_mmaped_file_rs/src/testhelper.rs +222 -0
  18. data/ext/fast_mmaped_file_rs/src/util.rs +121 -0
  19. data/lib/3.1/fast_mmaped_file_rs.so +0 -0
  20. data/lib/3.2/fast_mmaped_file_rs.so +0 -0
  21. data/lib/3.3/fast_mmaped_file_rs.so +0 -0
  22. data/lib/3.4/fast_mmaped_file_rs.so +0 -0
  23. data/lib/prometheus/client/configuration.rb +23 -0
  24. data/lib/prometheus/client/counter.rb +27 -0
  25. data/lib/prometheus/client/formats/text.rb +85 -0
  26. data/lib/prometheus/client/gauge.rb +40 -0
  27. data/lib/prometheus/client/helper/entry_parser.rb +132 -0
  28. data/lib/prometheus/client/helper/file_locker.rb +50 -0
  29. data/lib/prometheus/client/helper/json_parser.rb +23 -0
  30. data/lib/prometheus/client/helper/metrics_processing.rb +45 -0
  31. data/lib/prometheus/client/helper/metrics_representation.rb +51 -0
  32. data/lib/prometheus/client/helper/mmaped_file.rb +64 -0
  33. data/lib/prometheus/client/helper/plain_file.rb +29 -0
  34. data/lib/prometheus/client/histogram.rb +80 -0
  35. data/lib/prometheus/client/label_set_validator.rb +85 -0
  36. data/lib/prometheus/client/metric.rb +80 -0
  37. data/lib/prometheus/client/mmaped_dict.rb +79 -0
  38. data/lib/prometheus/client/mmaped_value.rb +154 -0
  39. data/lib/prometheus/client/page_size.rb +17 -0
  40. data/lib/prometheus/client/push.rb +203 -0
  41. data/lib/prometheus/client/rack/collector.rb +88 -0
  42. data/lib/prometheus/client/rack/exporter.rb +96 -0
  43. data/lib/prometheus/client/registry.rb +65 -0
  44. data/lib/prometheus/client/simple_value.rb +31 -0
  45. data/lib/prometheus/client/summary.rb +69 -0
  46. data/lib/prometheus/client/support/puma.rb +44 -0
  47. data/lib/prometheus/client/support/unicorn.rb +35 -0
  48. data/lib/prometheus/client/uses_value_type.rb +20 -0
  49. data/lib/prometheus/client/version.rb +5 -0
  50. data/lib/prometheus/client.rb +58 -0
  51. data/lib/prometheus.rb +3 -0
  52. metadata +249 -0
@@ -0,0 +1,891 @@
1
+ use magnus::exception::*;
2
+ use magnus::prelude::*;
3
+ use magnus::rb_sys::{AsRawValue, FromRawValue};
4
+ use magnus::typed_data::Obj;
5
+ use magnus::value::Fixnum;
6
+ use magnus::{eval, scan_args, Error, Integer, RArray, RClass, RHash, RString, Value};
7
+ use nix::libc::{c_char, c_long, c_ulong};
8
+ use rb_sys::rb_str_new_static;
9
+ use std::fs::File;
10
+ use std::io::{prelude::*, SeekFrom};
11
+ use std::mem;
12
+ use std::path::Path;
13
+ use std::ptr::NonNull;
14
+ use std::sync::RwLock;
15
+
16
+ use crate::err;
17
+ use crate::error::MmapError;
18
+ use crate::file_entry::FileEntry;
19
+ use crate::map::EntryMap;
20
+ use crate::raw_entry::RawEntry;
21
+ use crate::util::{self, CheckedOps};
22
+ use crate::Result;
23
+ use crate::HEADER_SIZE;
24
+ use inner::InnerMmap;
25
+
26
+ mod inner;
27
+
28
+ #[cfg(ruby_gte_3_4)]
29
+ /// The Ruby `STR_SHARED` flag, aka `FL_USER0`.
30
+ /// This was changed from `FL_USER2` in https://github.com/ruby/ruby/commit/6deeec5d459ecff5ec4628523b14ac7379fd942e.
31
+ const STR_SHARED: c_ulong = 1 << (12);
32
+
33
+ #[cfg(ruby_lte_3_3)]
34
+ /// The Ruby `STR_SHARED` flag, aka `FL_USER2`.
35
+ const STR_SHARED: c_ulong = 1 << (14);
36
+
37
+ /// The Ruby `STR_NOEMBED` flag, aka `FL_USER1`.
38
+ const STR_NOEMBED: c_ulong = 1 << (13);
39
+
40
+ /// A Rust struct wrapped in a Ruby object, providing access to a memory-mapped
41
+ /// file used to store, update, and read out Prometheus metrics.
42
+ ///
43
+ /// - File format:
44
+ /// - Header:
45
+ /// - 4 bytes: u32 - total size of metrics in file.
46
+ /// - 4 bytes: NUL byte padding.
47
+ /// - Repeating metrics entries:
48
+ /// - 4 bytes: u32 - entry JSON string size.
49
+ /// - `N` bytes: UTF-8 encoded JSON string used as entry key.
50
+ /// - (8 - (4 + `N`) % 8) bytes: 1 to 8 padding space (0x20) bytes to
51
+ /// reach 8-byte alignment.
52
+ /// - 8 bytes: f64 - entry value.
53
+ ///
54
+ /// All numbers are saved in native-endian format.
55
+ ///
56
+ /// Generated via [luismartingarcia/protocol](https://github.com/luismartingarcia/protocol):
57
+ ///
58
+ ///
59
+ /// ```
60
+ /// protocol "Used:4,Pad:4,K1 Size:4,K1 Name:4,K1 Value:8,K2 Size:4,K2 Name:4,K2 Value:8"
61
+ ///
62
+ /// 0 1 2 3
63
+ /// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
64
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
65
+ /// | Used | Pad |K1 Size|K1 Name| K1 Value |K2 Size|K2 Name|
66
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
67
+ /// | K2 Value |
68
+ /// +-+-+-+-+-+-+-+
69
+ /// ```
70
+ //
71
+ // The API imposed by `magnus` requires all methods to use shared borrows.
72
+ // This means we can't store any mutable state in the top-level struct,
73
+ // and must store the interior data behind a `RwLock`, which adds run-time
74
+ // checks that mutable operations have no concurrent read or writes.
75
+ //
76
+ // We are further limited by the need to support subclassing in Ruby, which
77
+ // requires us to define an allocation function for the class, the
78
+ // `magnus::class::define_alloc_func()` function. This needs a support the
79
+ // `Default` trait, so a `File` cannot directly help by the object being
80
+ // constructed. Having the `RwLock` hold an `Option` of the interior object
81
+ // resolves this.
82
+ #[derive(Debug, Default)]
83
+ #[magnus::wrap(class = "FastMmapedFileRs", free_immediately, size)]
84
+ pub struct MmapedFile(RwLock<Option<InnerMmap>>);
85
+
86
+ impl MmapedFile {
87
+ /// call-seq:
88
+ /// new(file)
89
+ ///
90
+ /// create a new Mmap object
91
+ ///
92
+ /// * <em>file</em>
93
+ ///
94
+ ///
95
+ /// Creates a mapping that's shared with all other processes
96
+ /// mapping the same area of the file.
97
+ pub fn new(klass: RClass, args: &[Value]) -> magnus::error::Result<Obj<Self>> {
98
+ let args = scan_args::scan_args::<(RString,), (), (), (), (), ()>(args)?;
99
+ let path = args.required.0;
100
+
101
+ let lock = MmapedFile(RwLock::new(None));
102
+ let obj = Obj::wrap_as(lock, klass);
103
+
104
+ let _: Value = obj.funcall("initialize", (path,))?;
105
+
106
+ Ok(obj)
107
+ }
108
+
109
+ /// Initialize a new `FastMmapedFileRs` object. This must be defined in
110
+ /// order for inheritance to work.
111
+ pub fn initialize(rb_self: Obj<Self>, fname: String) -> magnus::error::Result<()> {
112
+ let file = File::options()
113
+ .read(true)
114
+ .write(true)
115
+ .open(&fname)
116
+ .map_err(|_| err!(arg_error(), "Can't open {}", fname))?;
117
+
118
+ let inner = InnerMmap::new(fname.into(), file)?;
119
+ rb_self.insert_inner(inner)?;
120
+
121
+ let weak_klass = RClass::from_value(eval("ObjectSpace::WeakMap")?)
122
+ .ok_or_else(|| err!(no_method_error(), "unable to create WeakMap"))?;
123
+ let weak_obj_tracker = weak_klass.new_instance(())?;
124
+
125
+ // We will need to iterate over strings backed by the mmapped file, but
126
+ // don't want to prevent the GC from reaping them when the Ruby code
127
+ // has finished with them. `ObjectSpace::WeakMap` allows us to track
128
+ // them without extending their lifetime.
129
+ //
130
+ // https://ruby-doc.org/core-3.0.0/ObjectSpace/WeakMap.html
131
+ rb_self.ivar_set("@weak_obj_tracker", weak_obj_tracker)?;
132
+
133
+ Ok(())
134
+ }
135
+
136
+ /// Read the list of files provided from Ruby and convert them to a Prometheus
137
+ /// metrics String.
138
+ pub fn to_metrics(file_list: RArray) -> magnus::error::Result<String> {
139
+ let mut map = EntryMap::new();
140
+ map.aggregate_files(file_list)?;
141
+
142
+ let sorted = map.into_sorted()?;
143
+
144
+ FileEntry::entries_to_string(sorted).map_err(|e| e.into())
145
+ }
146
+
147
+ /// Document-method: []
148
+ /// Document-method: slice
149
+ ///
150
+ /// call-seq: [](args)
151
+ ///
152
+ /// Element reference - with the following syntax:
153
+ ///
154
+ /// self[nth]
155
+ ///
156
+ /// retrieve the <em>nth</em> character
157
+ ///
158
+ /// self[start..last]
159
+ ///
160
+ /// return a substring from <em>start</em> to <em>last</em>
161
+ ///
162
+ /// self[start, length]
163
+ ///
164
+ /// return a substring of <em>lenght</em> characters from <em>start</em>
165
+ pub fn slice(rb_self: Obj<Self>, args: &[Value]) -> magnus::error::Result<RString> {
166
+ // The C implementation would trigger a GC cycle via `rb_gc_force_recycle`
167
+ // if the `MM_PROTECT` flag is set, but in practice this is never used.
168
+ // We omit this logic, particularly because `rb_gc_force_recycle` is a
169
+ // no-op as of Ruby 3.1.
170
+ let rs_self = &*rb_self;
171
+
172
+ let str = rs_self.str(rb_self)?;
173
+ rs_self._slice(rb_self, str, args)
174
+ }
175
+
176
+ fn _slice(
177
+ &self,
178
+ rb_self: Obj<Self>,
179
+ str: RString,
180
+ args: &[Value],
181
+ ) -> magnus::error::Result<RString> {
182
+ let substr: RString = str.funcall("[]", args)?;
183
+
184
+ // Track shared child strings which use the same backing storage.
185
+ if Self::rb_string_is_shared(substr) {
186
+ (*rb_self).track_rstring(rb_self, substr)?;
187
+ }
188
+
189
+ // The C implementation does this, perhaps to validate that the len we
190
+ // provided is actually being used.
191
+ (*rb_self).inner_mut(|inner| {
192
+ inner.set_len(str.len());
193
+ Ok(())
194
+ })?;
195
+
196
+ Ok(substr)
197
+ }
198
+
199
+ /// Document-method: msync
200
+ /// Document-method: sync
201
+ /// Document-method: flush
202
+ ///
203
+ /// call-seq: msync
204
+ ///
205
+ /// flush the file
206
+ pub fn sync(&self, args: &[Value]) -> magnus::error::Result<()> {
207
+ use nix::sys::mman::MsFlags;
208
+
209
+ let mut ms_async = false;
210
+ let args = scan_args::scan_args::<(), (Option<i32>,), (), (), (), ()>(args)?;
211
+
212
+ if let Some(flag) = args.optional.0 {
213
+ let flag = MsFlags::from_bits(flag).unwrap_or(MsFlags::empty());
214
+ ms_async = flag.contains(MsFlags::MS_ASYNC);
215
+ }
216
+
217
+ // The `memmap2` crate does not support the `MS_INVALIDATE` flag. We ignore that
218
+ // flag if passed in, checking only for `MS_ASYNC`. In practice no arguments are ever
219
+ // passed to this function, but we do this to maintain compatibility with the
220
+ // C implementation.
221
+ self.inner_mut(|inner| inner.flush(ms_async))
222
+ .map_err(|e| e.into())
223
+ }
224
+
225
+ /// Document-method: munmap
226
+ /// Document-method: unmap
227
+ ///
228
+ /// call-seq: munmap
229
+ ///
230
+ /// terminate the association
231
+ pub fn munmap(rb_self: Obj<Self>) -> magnus::error::Result<()> {
232
+ let rs_self = &*rb_self;
233
+
234
+ rs_self.inner_mut(|inner| {
235
+ // We are about to release the backing mmap for Ruby's String
236
+ // objects. If Ruby attempts to read from them the program will
237
+ // segfault. We update the length of all Strings to zero so Ruby
238
+ // does not attempt to access the now invalid address between now
239
+ // and when GC eventually reaps the objects.
240
+ //
241
+ // See the following for more detail:
242
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/39
243
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/41
244
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/merge_requests/80
245
+ inner.set_len(0);
246
+ Ok(())
247
+ })?;
248
+
249
+ // Update each String object to be zero-length.
250
+ let cap = util::cast_chk::<_, c_long>(rs_self.capacity(), "capacity")?;
251
+ rs_self.update_weak_map(rb_self, rs_self.as_mut_ptr(), cap)?;
252
+
253
+ // Remove the `InnerMmap` from the `RwLock`. This will drop
254
+ // end of this function, unmapping and closing the file.
255
+ let _ = rs_self.take_inner()?;
256
+ Ok(())
257
+ }
258
+
259
+ /// Fetch the `used` header from the `.db` file, the length
260
+ /// in bytes of the data written to the file.
261
+ pub fn load_used(&self) -> magnus::error::Result<Integer> {
262
+ let used = self.inner(|inner| inner.load_used())?;
263
+
264
+ Ok(Integer::from_u64(used as u64))
265
+ }
266
+
267
+ /// Update the `used` header for the `.db` file, the length
268
+ /// in bytes of the data written to the file.
269
+ pub fn save_used(rb_self: Obj<Self>, used: Fixnum) -> magnus::error::Result<Fixnum> {
270
+ let rs_self = &*rb_self;
271
+ let used_uint = used.to_u32()?;
272
+
273
+ // If the underlying mmap is smaller than the header, then resize to fit.
274
+ // The file has already been expanded to page size when first opened, so
275
+ // even if the map is less than HEADER_SIZE, we're not at risk of a
276
+ // SIGBUS.
277
+ if rs_self.capacity() < HEADER_SIZE {
278
+ rs_self.expand_to_fit(rb_self, HEADER_SIZE)?;
279
+ }
280
+
281
+ rs_self.inner_mut(|inner| inner.save_used(used_uint))?;
282
+
283
+ Ok(used)
284
+ }
285
+
286
+ /// Fetch the value associated with a key from the mmap.
287
+ /// If no entry is present, initialize with the default
288
+ /// value provided.
289
+ pub fn fetch_entry(
290
+ rb_self: Obj<Self>,
291
+ positions: RHash,
292
+ key: RString,
293
+ default_value: f64,
294
+ ) -> magnus::error::Result<f64> {
295
+ let rs_self = &*rb_self;
296
+ let position: Option<Fixnum> = positions.lookup(key)?;
297
+
298
+ if let Some(pos) = position {
299
+ let pos = pos.to_usize()?;
300
+ return rs_self
301
+ .inner(|inner| inner.load_value(pos))
302
+ .map_err(|e| e.into());
303
+ }
304
+
305
+ rs_self.check_expand(rb_self, key.len())?;
306
+
307
+ let value_offset: usize = rs_self.inner_mut(|inner| {
308
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
309
+ unsafe { inner.initialize_entry(key.as_slice(), default_value) }
310
+ })?;
311
+
312
+ // CAST: no-op on 64-bit, widening on 32-bit.
313
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
314
+
315
+ rs_self.load_value(value_offset)
316
+ }
317
+
318
+ /// Update the value of an existing entry, if present. Otherwise create a new entry
319
+ /// for the key.
320
+ pub fn upsert_entry(
321
+ rb_self: Obj<Self>,
322
+ positions: RHash,
323
+ key: RString,
324
+ value: f64,
325
+ ) -> magnus::error::Result<f64> {
326
+ let rs_self = &*rb_self;
327
+ let position: Option<Fixnum> = positions.lookup(key)?;
328
+
329
+ if let Some(pos) = position {
330
+ let pos = pos.to_usize()?;
331
+ return rs_self
332
+ .inner_mut(|inner| {
333
+ inner.save_value(pos, value)?;
334
+
335
+ // TODO just return `value` here instead of loading it?
336
+ // This is how the C implementation did it, but I don't
337
+ // see what the extra load gains us.
338
+ inner.load_value(pos)
339
+ })
340
+ .map_err(|e| e.into());
341
+ }
342
+
343
+ rs_self.check_expand(rb_self, key.len())?;
344
+
345
+ let value_offset: usize = rs_self.inner_mut(|inner| {
346
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
347
+ unsafe { inner.initialize_entry(key.as_slice(), value) }
348
+ })?;
349
+
350
+ // CAST: no-op on 64-bit, widening on 32-bit.
351
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
352
+
353
+ rs_self.load_value(value_offset)
354
+ }
355
+
356
+ /// Creates a Ruby String containing the section of the mmapped file that
357
+ /// has been written to.
358
+ fn str(&self, rb_self: Obj<Self>) -> magnus::error::Result<RString> {
359
+ let val_id = (*rb_self).inner(|inner| {
360
+ let ptr = inner.as_ptr();
361
+ let len = inner.len();
362
+
363
+ // SAFETY: This is safe so long as the data provided to Ruby meets its
364
+ // requirements. When unmapping the file this will no longer be the
365
+ // case, see the comment on `munmap` for how we handle this.
366
+ Ok(unsafe { rb_str_new_static(ptr as _, len as _) })
367
+ })?;
368
+
369
+ // SAFETY: We know that rb_str_new_static returns a VALUE.
370
+ let val = unsafe { Value::from_raw(val_id) };
371
+
372
+ // UNWRAP: We created this value as a string above.
373
+ let str = RString::from_value(val).unwrap();
374
+
375
+ // Freeze the root string so it can't be mutated out from under any
376
+ // substrings created. This object is never exposed to callers.
377
+ str.freeze();
378
+
379
+ // Track the RString in our `WeakMap` so we can update its address if
380
+ // we re-mmap the backing file.
381
+ (*rb_self).track_rstring(rb_self, str)?;
382
+
383
+ Ok(str)
384
+ }
385
+
386
+ /// If we reallocate, any live Ruby strings provided by the `str()` method
387
+ /// will be invalidated. We need to iterate over them using and update their
388
+ /// heap pointers to the newly allocated memory region.
389
+ fn update_weak_map(
390
+ &self,
391
+ rb_self: Obj<Self>,
392
+ old_ptr: *const c_char,
393
+ old_cap: c_long,
394
+ ) -> magnus::error::Result<()> {
395
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
396
+
397
+ let new_len = self.inner(|inner| util::cast_chk::<_, c_long>(inner.len(), "mmap len"))?;
398
+
399
+ // Iterate over the values of the `WeakMap`.
400
+ for val in tracker.enumeratorize("each_value", ()) {
401
+ let rb_string = val?;
402
+ let str = RString::from_value(rb_string)
403
+ .ok_or_else(|| err!(arg_error(), "weakmap value was not a string"))?;
404
+
405
+ // SAFETY: We're messing with Ruby's internals here, YOLO.
406
+ unsafe {
407
+ // Convert the magnus wrapper type to a raw string exposed by `rb_sys`,
408
+ // which provides access to its internals.
409
+ let mut raw_str = Self::rb_string_internal(str);
410
+
411
+ // Shared string have their own `ptr` and `len` values, but `aux`
412
+ // is the id of the parent string so the GC can track this
413
+ // dependency. The `ptr` will always be an offset from the base
414
+ // address of the mmap, and `len` will be the length of the mmap
415
+ // less the offset from the base.
416
+ if Self::rb_string_is_shared(str) && new_len > 0 {
417
+ // Calculate how far into the original mmap the shared string
418
+ // started and update to the equivalent address in the new
419
+ // one.
420
+ let substr_ptr = raw_str.as_ref().as_.heap.ptr;
421
+ let offset = substr_ptr.offset_from(old_ptr);
422
+
423
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr().offset(offset);
424
+
425
+ let current_len = str.len() as c_long;
426
+ let new_shared_len = old_cap + current_len;
427
+
428
+ self.update_rstring_len(raw_str, new_shared_len);
429
+ continue;
430
+ }
431
+
432
+ // Update the string to point to the new mmapped file.
433
+ // We're matching the behavior of Ruby's `str_new_static` function.
434
+ // See https://github.com/ruby/ruby/blob/e51014f9c05aa65cbf203442d37fef7c12390015/string.c#L1030-L1053
435
+ //
436
+ // We deliberately do _NOT_ increment the `capa` field of the
437
+ // string to match the new `len`. We were initially doing this,
438
+ // but consistently triggered GCs in the middle of updating the
439
+ // string pointers, causing a segfault.
440
+ //
441
+ // See https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/45
442
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr();
443
+ self.update_rstring_len(raw_str, new_len);
444
+ }
445
+ }
446
+
447
+ Ok(())
448
+ }
449
+
450
+ /// Check that the mmap is large enough to contain the value to be added,
451
+ /// and expand it to fit if necessary.
452
+ fn check_expand(&self, rb_self: Obj<Self>, key_len: usize) -> magnus::error::Result<()> {
453
+ // CAST: no-op on 32-bit, widening on 64-bit.
454
+ let used = self.inner(|inner| inner.load_used())? as usize;
455
+ let entry_len = RawEntry::calc_total_len(key_len)?;
456
+
457
+ // We need the mmapped region to contain at least one byte beyond the
458
+ // written data to create a NUL- terminated C string. Validate that
459
+ // new length does not exactly match or exceed the length of the mmap.
460
+ while self.capacity() <= used.add_chk(entry_len)? {
461
+ self.expand_to_fit(rb_self, self.capacity().mul_chk(2)?)?;
462
+ }
463
+
464
+ Ok(())
465
+ }
466
+
467
+ /// Expand the underlying file until it is long enough to fit `target_cap`.
468
+ /// This will remove the existing mmap, expand the file, then update any
469
+ /// strings held by the `WeakMap` to point to the newly mmapped address.
470
+ fn expand_to_fit(&self, rb_self: Obj<Self>, target_cap: usize) -> magnus::error::Result<()> {
471
+ if target_cap < self.capacity() {
472
+ return Err(err!(arg_error(), "Can't reduce the size of mmap"));
473
+ }
474
+
475
+ let mut new_cap = self.capacity();
476
+ while new_cap < target_cap {
477
+ new_cap = new_cap.mul_chk(2)?;
478
+ }
479
+
480
+ if new_cap != self.capacity() {
481
+ let old_ptr = self.as_mut_ptr();
482
+ let old_cap = util::cast_chk::<_, c_long>(self.capacity(), "capacity")?;
483
+
484
+ // Drop the old mmap.
485
+ let (mut file, path) = self.take_inner()?.munmap();
486
+
487
+ self.expand_file(&mut file, &path, target_cap)?;
488
+
489
+ // Re-mmap the expanded file.
490
+ let new_inner = InnerMmap::reestablish(path, file, target_cap)?;
491
+
492
+ self.insert_inner(new_inner)?;
493
+
494
+ return self.update_weak_map(rb_self, old_ptr, old_cap);
495
+ }
496
+
497
+ Ok(())
498
+ }
499
+
500
+ /// Use lseek(2) to seek past the end of the file and write a NUL byte. This
501
+ /// creates a file hole that expands the size of the file without consuming
502
+ /// disk space until it is actually written to.
503
+ fn expand_file(&self, file: &mut File, path: &Path, len: usize) -> Result<()> {
504
+ if len == 0 {
505
+ return Err(MmapError::overflowed(0, -1, "adding"));
506
+ }
507
+
508
+ // CAST: no-op on 64-bit, widening on 32-bit.
509
+ let len = len as u64;
510
+
511
+ match file.seek(SeekFrom::Start(len - 1)) {
512
+ Ok(_) => {}
513
+ Err(_) => {
514
+ return Err(MmapError::with_errno(format!("Can't lseek {}", len - 1)));
515
+ }
516
+ }
517
+
518
+ match file.write(&[0x0]) {
519
+ Ok(1) => {}
520
+ _ => {
521
+ return Err(MmapError::with_errno(format!(
522
+ "Can't extend {}",
523
+ path.display()
524
+ )));
525
+ }
526
+ }
527
+
528
+ Ok(())
529
+ }
530
+
531
+ fn track_rstring(&self, rb_self: Obj<Self>, str: RString) -> magnus::error::Result<()> {
532
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
533
+
534
+ // Use the string's Id as the key in the `WeakMap`.
535
+ let key = str.as_raw();
536
+ let _: Value = tracker.funcall("[]=", (key, str))?;
537
+ Ok(())
538
+ }
539
+
540
+ /// The total capacity of the underlying mmap.
541
+ #[inline]
542
+ fn capacity(&self) -> usize {
543
+ // UNWRAP: This is actually infallible, but we need to
544
+ // wrap it in a `Result` for use with `inner()`.
545
+ self.inner(|inner| Ok(inner.capacity())).unwrap()
546
+ }
547
+
548
+ fn load_value(&self, position: usize) -> magnus::error::Result<f64> {
549
+ self.inner(|inner| inner.load_value(position))
550
+ .map_err(|e| e.into())
551
+ }
552
+
553
+ fn as_mut_ptr(&self) -> *mut c_char {
554
+ // UNWRAP: This is actually infallible, but we need to
555
+ // wrap it in a `Result` for use with `inner()`.
556
+ self.inner(|inner| Ok(inner.as_mut_ptr() as *mut c_char))
557
+ .unwrap()
558
+ }
559
+
560
+ /// Takes a closure with immutable access to InnerMmap. Will fail if the inner
561
+ /// object has a mutable borrow or has been dropped.
562
+ fn inner<F, T>(&self, func: F) -> Result<T>
563
+ where
564
+ F: FnOnce(&InnerMmap) -> Result<T>,
565
+ {
566
+ let inner_opt = self.0.try_read().map_err(|_| MmapError::ConcurrentAccess)?;
567
+
568
+ let inner = inner_opt.as_ref().ok_or(MmapError::UnmappedFile)?;
569
+
570
+ func(inner)
571
+ }
572
+
573
+ /// Takes a closure with mutable access to InnerMmap. Will fail if the inner
574
+ /// object has an existing mutable borrow, or has been dropped.
575
+ fn inner_mut<F, T>(&self, func: F) -> Result<T>
576
+ where
577
+ F: FnOnce(&mut InnerMmap) -> Result<T>,
578
+ {
579
+ let mut inner_opt = self
580
+ .0
581
+ .try_write()
582
+ .map_err(|_| MmapError::ConcurrentAccess)?;
583
+
584
+ let inner = inner_opt.as_mut().ok_or(MmapError::UnmappedFile)?;
585
+
586
+ func(inner)
587
+ }
588
+
589
+ /// Take ownership of the `InnerMmap` from the `RwLock`.
590
+ /// Will fail if a mutable borrow is already held or the inner
591
+ /// object has been dropped.
592
+ fn take_inner(&self) -> Result<InnerMmap> {
593
+ let mut inner_opt = self
594
+ .0
595
+ .try_write()
596
+ .map_err(|_| MmapError::ConcurrentAccess)?;
597
+ match (*inner_opt).take() {
598
+ Some(i) => Ok(i),
599
+ None => Err(MmapError::UnmappedFile),
600
+ }
601
+ }
602
+
603
+ /// Move `new_inner` into the `RwLock`.
604
+ /// Will return an error if a mutable borrow is already held.
605
+ fn insert_inner(&self, new_inner: InnerMmap) -> Result<()> {
606
+ let mut inner_opt = self
607
+ .0
608
+ .try_write()
609
+ .map_err(|_| MmapError::ConcurrentAccess)?;
610
+ (*inner_opt).replace(new_inner);
611
+
612
+ Ok(())
613
+ }
614
+
615
+ /// Check if an RString is shared. Shared string use the same underlying
616
+ /// storage as their parent, taking an offset from the start. By default
617
+ /// they must run to the end of the parent string.
618
+ fn rb_string_is_shared(rb_str: RString) -> bool {
619
+ // SAFETY: We only hold a reference to the raw object for the duration
620
+ // of this function, and no Ruby code is called.
621
+ let flags = unsafe {
622
+ let raw_str = Self::rb_string_internal(rb_str);
623
+ raw_str.as_ref().basic.flags
624
+ };
625
+ let shared_flags = STR_SHARED | STR_NOEMBED;
626
+
627
+ flags & shared_flags == shared_flags
628
+ }
629
+
630
+ /// Convert `magnus::RString` into the raw binding used by `rb_sys::RString`.
631
+ /// We need this to manually change the pointer and length values for strings
632
+ /// when moving the mmap to a new file.
633
+ ///
634
+ /// SAFETY: Calling Ruby code while the returned object is held may result
635
+ /// in it being mutated or dropped.
636
+ unsafe fn rb_string_internal(rb_str: RString) -> NonNull<rb_sys::RString> {
637
+ mem::transmute::<RString, NonNull<rb_sys::RString>>(rb_str)
638
+ }
639
+
640
+ #[cfg(ruby_lte_3_2)]
641
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
642
+ raw_str.as_mut().as_.heap.len = new_len;
643
+ }
644
+
645
+ #[cfg(ruby_gte_3_3)]
646
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
647
+ raw_str.as_mut().len = new_len;
648
+ }
649
+ }
650
+
651
+ #[cfg(test)]
652
+ mod test {
653
+ use magnus::error::Error;
654
+ use magnus::eval;
655
+ use magnus::Range;
656
+ use nix::unistd::{sysconf, SysconfVar};
657
+ use std::mem::size_of;
658
+
659
+ use super::*;
660
+ use crate::raw_entry::RawEntry;
661
+ use crate::testhelper::TestFile;
662
+
663
+ /// Create a wrapped MmapedFile object.
664
+ fn create_obj() -> Obj<MmapedFile> {
665
+ let TestFile {
666
+ file: _file,
667
+ path,
668
+ dir: _dir,
669
+ } = TestFile::new(&[0u8; 8]);
670
+
671
+ let path_str = path.display().to_string();
672
+ let rpath = RString::new(&path_str);
673
+
674
+ eval!("FastMmapedFileRs.new(path)", path = rpath).unwrap()
675
+ }
676
+
677
+ /// Add three entries to the mmap. Expected length is 56, 3x 16-byte
678
+ /// entries with 8-byte header.
679
+ fn populate_entries(rb_self: &Obj<MmapedFile>) -> RHash {
680
+ let positions = RHash::from_value(eval("{}").unwrap()).unwrap();
681
+
682
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("a"), 0.0).unwrap();
683
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("b"), 1.0).unwrap();
684
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("c"), 2.0).unwrap();
685
+
686
+ positions
687
+ }
688
+
689
+ #[test]
690
+ fn test_new() {
691
+ let _cleanup = unsafe { magnus::embed::init() };
692
+ let ruby = magnus::Ruby::get().unwrap();
693
+ crate::init(&ruby).unwrap();
694
+
695
+ let TestFile {
696
+ file,
697
+ path,
698
+ dir: _dir,
699
+ } = TestFile::new(&[0u8; 8]);
700
+
701
+ let path_str = path.display().to_string();
702
+ let rpath = RString::new(&path_str);
703
+
704
+ // Object created successfully
705
+ let result: std::result::Result<Obj<MmapedFile>, Error> =
706
+ eval!("FastMmapedFileRs.new(path)", path = rpath);
707
+ assert!(result.is_ok());
708
+
709
+ // Weak map added
710
+ let obj = result.unwrap();
711
+ let weak_tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
712
+ assert_eq!("ObjectSpace::WeakMap", weak_tracker.class().inspect());
713
+
714
+ // File expanded to page size
715
+ let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as u64;
716
+ let stat = file.metadata().unwrap();
717
+ assert_eq!(page_size, stat.len());
718
+
719
+ // Used set to header size
720
+ assert_eq!(
721
+ HEADER_SIZE as u64,
722
+ obj.load_used().unwrap().to_u64().unwrap()
723
+ );
724
+ }
725
+
726
+ #[test]
727
+ fn test_slice() {
728
+ let _cleanup = unsafe { magnus::embed::init() };
729
+ let ruby = magnus::Ruby::get().unwrap();
730
+ crate::init(&ruby).unwrap();
731
+
732
+ let obj = create_obj();
733
+ let _ = populate_entries(&obj);
734
+
735
+ // Validate header updated with new length
736
+ let header_range = Range::new(0, HEADER_SIZE, true).unwrap().as_value();
737
+ let header_slice = MmapedFile::slice(obj, &[header_range]).unwrap();
738
+ assert_eq!([56, 0, 0, 0, 0, 0, 0, 0], unsafe {
739
+ header_slice.as_slice()
740
+ });
741
+
742
+ let value_range = Range::new(HEADER_SIZE, 24, true).unwrap().as_value();
743
+ let value_slice = MmapedFile::slice(obj, &[value_range]).unwrap();
744
+
745
+ // Validate string length
746
+ assert_eq!(1u32.to_ne_bytes(), unsafe { &value_slice.as_slice()[0..4] });
747
+
748
+ // Validate string and padding
749
+ assert_eq!("a ", unsafe {
750
+ String::from_utf8_lossy(&value_slice.as_slice()[4..8])
751
+ });
752
+
753
+ // Validate value
754
+ assert_eq!(0.0f64.to_ne_bytes(), unsafe {
755
+ &value_slice.as_slice()[8..16]
756
+ });
757
+ }
758
+
759
+ #[test]
760
+ fn test_slice_resize() {
761
+ let _cleanup = unsafe { magnus::embed::init() };
762
+ let ruby = magnus::Ruby::get().unwrap();
763
+ crate::init(&ruby).unwrap();
764
+
765
+ fn assert_internals(
766
+ obj: Obj<MmapedFile>,
767
+ parent_id: c_ulong,
768
+ child_id: c_ulong,
769
+ unshared_id: c_ulong,
770
+ ) {
771
+ let rs_self = &*obj;
772
+ let tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
773
+
774
+ let mmap_ptr = rs_self.as_mut_ptr();
775
+ let mmap_len = rs_self.capacity();
776
+
777
+ let mut parent_checked = false;
778
+ let mut child_checked = false;
779
+
780
+ for val in tracker.enumeratorize("each_value", ()) {
781
+ let rb_string = val.unwrap();
782
+ let str = RString::from_value(rb_string).unwrap();
783
+
784
+ unsafe {
785
+ let raw_str = MmapedFile::rb_string_internal(str);
786
+ if str.as_raw() == child_id {
787
+ assert_eq!(parent_id, raw_str.as_ref().as_.heap.aux.shared);
788
+
789
+ let child_offset = mmap_len as isize - str.len() as isize;
790
+ assert_eq!(mmap_ptr.offset(child_offset), raw_str.as_ref().as_.heap.ptr);
791
+
792
+ child_checked = true;
793
+ } else if str.as_raw() == parent_id {
794
+ assert_eq!(parent_id, str.as_raw());
795
+
796
+ assert_eq!(mmap_ptr, raw_str.as_ref().as_.heap.ptr);
797
+ assert_eq!(mmap_len as c_long, str.len() as c_long);
798
+ assert!(raw_str.as_ref().basic.flags & (STR_SHARED | STR_NOEMBED) > 0);
799
+ assert!(str.is_frozen());
800
+
801
+ parent_checked = true;
802
+ } else if str.as_raw() == unshared_id {
803
+ panic!("tracking unshared string");
804
+ } else {
805
+ panic!("unknown string");
806
+ }
807
+ }
808
+ }
809
+ assert!(parent_checked && child_checked);
810
+ }
811
+
812
+ let obj = create_obj();
813
+ let _ = populate_entries(&obj);
814
+
815
+ let rs_self = &*obj;
816
+
817
+ // Create a string containing the full mmap.
818
+ let parent_str = rs_self.str(obj).unwrap();
819
+ let parent_id = parent_str.as_raw();
820
+
821
+ // Ruby's shared strings are only created when they go to the end of
822
+ // original string.
823
+ let len = rs_self.inner(|inner| Ok(inner.len())).unwrap();
824
+ let shareable_range = Range::new(1, len - 1, false).unwrap().as_value();
825
+
826
+ // This string should re-use the parent's buffer with an offset and have
827
+ // the parent's id in `as.heap.aux.shared`
828
+ let child_str = rs_self._slice(obj, parent_str, &[shareable_range]).unwrap();
829
+ let child_id = child_str.as_raw();
830
+
831
+ // A range that does not reach the end of the parent will not be shared.
832
+ assert!(len > 4);
833
+ let unshareable_range = Range::new(0, 4, false).unwrap().as_value();
834
+
835
+ // This string should NOT be tracked, it should own its own buffer.
836
+ let unshared_str = rs_self
837
+ ._slice(obj, parent_str, &[unshareable_range])
838
+ .unwrap();
839
+ let unshared_id = unshared_str.as_raw();
840
+ assert!(!MmapedFile::rb_string_is_shared(unshared_str));
841
+
842
+ assert_internals(obj, parent_id, child_id, unshared_id);
843
+
844
+ let orig_ptr = rs_self.as_mut_ptr();
845
+ // Expand a bunch to ensure we remap
846
+ for _ in 0..16 {
847
+ rs_self.expand_to_fit(obj, rs_self.capacity() * 2).unwrap();
848
+ }
849
+ let new_ptr = rs_self.as_mut_ptr();
850
+ assert!(orig_ptr != new_ptr);
851
+
852
+ // If we haven't updated the pointer to the newly remapped file this will segfault.
853
+ let _: Value = eval!("puts parent", parent = parent_str).unwrap();
854
+ let _: Value = eval!("puts child", child = child_str).unwrap();
855
+ let _: Value = eval!("puts unshared", unshared = unshared_str).unwrap();
856
+
857
+ // Confirm that tracked strings are still valid.
858
+ assert_internals(obj, parent_id, child_id, unshared_id);
859
+ }
860
+
861
+ #[test]
862
+ fn test_dont_fill_mmap() {
863
+ let _cleanup = unsafe { magnus::embed::init() };
864
+ let ruby = magnus::Ruby::get().unwrap();
865
+ crate::init(&ruby).unwrap();
866
+
867
+ let obj = create_obj();
868
+ let positions = populate_entries(&obj);
869
+
870
+ let rs_self = &*obj;
871
+
872
+ rs_self.expand_to_fit(obj, 1024).unwrap();
873
+
874
+ let current_used = rs_self.inner(|inner| inner.load_used()).unwrap() as usize;
875
+ let current_cap = rs_self.inner(|inner| Ok(inner.len())).unwrap();
876
+
877
+ // Create a new entry that exactly fills the capacity of the mmap.
878
+ let val_len =
879
+ current_cap - current_used - HEADER_SIZE - size_of::<f64>() - size_of::<u32>();
880
+ assert_eq!(
881
+ current_cap,
882
+ RawEntry::calc_total_len(val_len).unwrap() + current_used
883
+ );
884
+
885
+ let str = String::from_utf8(vec![b'A'; val_len]).unwrap();
886
+ MmapedFile::upsert_entry(obj, positions, RString::new(&str), 1.0).unwrap();
887
+
888
+ // Validate that we have expanded the mmap, ensuring a trailing NUL.
889
+ assert!(rs_self.capacity() > current_cap);
890
+ }
891
+ }