prometheus-client-mmap 0.21.0-x86_64-linux-musl → 1.2.1-x86_64-linux-musl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/.tool-versions +1 -0
  3. data/README.md +32 -17
  4. data/ext/fast_mmaped_file_rs/Cargo.toml +14 -9
  5. data/ext/fast_mmaped_file_rs/build.rs +5 -0
  6. data/ext/fast_mmaped_file_rs/extconf.rb +1 -3
  7. data/ext/fast_mmaped_file_rs/src/error.rs +2 -2
  8. data/ext/fast_mmaped_file_rs/src/file_entry.rs +222 -17
  9. data/ext/fast_mmaped_file_rs/src/file_info.rs +56 -6
  10. data/ext/fast_mmaped_file_rs/src/lib.rs +0 -1
  11. data/ext/fast_mmaped_file_rs/src/map.rs +12 -12
  12. data/ext/fast_mmaped_file_rs/src/mmap/inner.rs +704 -0
  13. data/ext/fast_mmaped_file_rs/src/mmap.rs +765 -25
  14. data/ext/fast_mmaped_file_rs/src/raw_entry.rs +1 -1
  15. data/ext/fast_mmaped_file_rs/src/testhelper.rs +1 -1
  16. data/lib/3.1/fast_mmaped_file_rs.so +0 -0
  17. data/lib/3.2/fast_mmaped_file_rs.so +0 -0
  18. data/lib/3.3/fast_mmaped_file_rs.so +0 -0
  19. data/lib/3.4/fast_mmaped_file_rs.so +0 -0
  20. data/lib/prometheus/client/formats/text.rb +1 -34
  21. data/lib/prometheus/client/helper/mmaped_file.rb +3 -3
  22. data/lib/prometheus/client/label_set_validator.rb +1 -2
  23. data/lib/prometheus/client/support/puma.rb +44 -0
  24. data/lib/prometheus/client/version.rb +1 -1
  25. metadata +67 -61
  26. data/ext/fast_mmaped_file/extconf.rb +0 -30
  27. data/ext/fast_mmaped_file/fast_mmaped_file.c +0 -122
  28. data/ext/fast_mmaped_file/file_format.c +0 -5
  29. data/ext/fast_mmaped_file/file_format.h +0 -11
  30. data/ext/fast_mmaped_file/file_parsing.c +0 -195
  31. data/ext/fast_mmaped_file/file_parsing.h +0 -27
  32. data/ext/fast_mmaped_file/file_reading.c +0 -102
  33. data/ext/fast_mmaped_file/file_reading.h +0 -30
  34. data/ext/fast_mmaped_file/globals.h +0 -14
  35. data/ext/fast_mmaped_file/mmap.c +0 -427
  36. data/ext/fast_mmaped_file/mmap.h +0 -61
  37. data/ext/fast_mmaped_file/rendering.c +0 -199
  38. data/ext/fast_mmaped_file/rendering.h +0 -8
  39. data/ext/fast_mmaped_file/utils.c +0 -56
  40. data/ext/fast_mmaped_file/utils.h +0 -22
  41. data/ext/fast_mmaped_file/value_access.c +0 -242
  42. data/ext/fast_mmaped_file/value_access.h +0 -15
  43. data/ext/fast_mmaped_file_rs/.cargo/config.toml +0 -23
  44. data/ext/fast_mmaped_file_rs/Cargo.lock +0 -790
  45. data/ext/fast_mmaped_file_rs/src/parser.rs +0 -346
  46. data/lib/2.7/fast_mmaped_file.so +0 -0
  47. data/lib/2.7/fast_mmaped_file_rs.so +0 -0
  48. data/lib/3.0/fast_mmaped_file.so +0 -0
  49. data/lib/3.0/fast_mmaped_file_rs.so +0 -0
  50. data/lib/3.1/fast_mmaped_file.so +0 -0
  51. data/lib/3.2/fast_mmaped_file.so +0 -0
  52. data/vendor/c/hashmap/.gitignore +0 -52
  53. data/vendor/c/hashmap/LICENSE +0 -21
  54. data/vendor/c/hashmap/README.md +0 -90
  55. data/vendor/c/hashmap/_config.yml +0 -1
  56. data/vendor/c/hashmap/src/hashmap.c +0 -692
  57. data/vendor/c/hashmap/src/hashmap.h +0 -267
  58. data/vendor/c/hashmap/test/Makefile +0 -22
  59. data/vendor/c/hashmap/test/hashmap_test.c +0 -608
  60. data/vendor/c/jsmn/.travis.yml +0 -4
  61. data/vendor/c/jsmn/LICENSE +0 -20
  62. data/vendor/c/jsmn/Makefile +0 -41
  63. data/vendor/c/jsmn/README.md +0 -168
  64. data/vendor/c/jsmn/example/jsondump.c +0 -126
  65. data/vendor/c/jsmn/example/simple.c +0 -76
  66. data/vendor/c/jsmn/jsmn.c +0 -314
  67. data/vendor/c/jsmn/jsmn.h +0 -76
  68. data/vendor/c/jsmn/library.json +0 -16
  69. data/vendor/c/jsmn/test/test.h +0 -27
  70. data/vendor/c/jsmn/test/tests.c +0 -407
  71. data/vendor/c/jsmn/test/testutil.h +0 -94
@@ -1,9 +1,41 @@
1
+ use magnus::exception::*;
2
+ use magnus::prelude::*;
3
+ use magnus::rb_sys::{AsRawValue, FromRawValue};
1
4
  use magnus::typed_data::Obj;
2
5
  use magnus::value::Fixnum;
3
- use magnus::{Integer, RArray, RClass, RHash, RString, Value};
6
+ use magnus::{eval, scan_args, Error, Integer, RArray, RClass, RHash, RString, Value};
7
+ use nix::libc::{c_char, c_long, c_ulong};
8
+ use rb_sys::rb_str_new_static;
9
+ use std::fs::File;
10
+ use std::io::{prelude::*, SeekFrom};
11
+ use std::mem;
12
+ use std::path::Path;
13
+ use std::ptr::NonNull;
14
+ use std::sync::RwLock;
4
15
 
16
+ use crate::err;
17
+ use crate::error::MmapError;
5
18
  use crate::file_entry::FileEntry;
6
19
  use crate::map::EntryMap;
20
+ use crate::raw_entry::RawEntry;
21
+ use crate::util::{self, CheckedOps};
22
+ use crate::Result;
23
+ use crate::HEADER_SIZE;
24
+ use inner::InnerMmap;
25
+
26
+ mod inner;
27
+
28
+ #[cfg(ruby_gte_3_4)]
29
+ /// The Ruby `STR_SHARED` flag, aka `FL_USER0`.
30
+ /// This was changed from `FL_USER2` in https://github.com/ruby/ruby/commit/6deeec5d459ecff5ec4628523b14ac7379fd942e.
31
+ const STR_SHARED: c_ulong = 1 << (12);
32
+
33
+ #[cfg(ruby_lte_3_3)]
34
+ /// The Ruby `STR_SHARED` flag, aka `FL_USER2`.
35
+ const STR_SHARED: c_ulong = 1 << (14);
36
+
37
+ /// The Ruby `STR_NOEMBED` flag, aka `FL_USER1`.
38
+ const STR_NOEMBED: c_ulong = 1 << (13);
7
39
 
8
40
  /// A Rust struct wrapped in a Ruby object, providing access to a memory-mapped
9
41
  /// file used to store, update, and read out Prometheus metrics.
@@ -35,9 +67,21 @@ use crate::map::EntryMap;
35
67
  /// | K2 Value |
36
68
  /// +-+-+-+-+-+-+-+
37
69
  /// ```
70
+ //
71
+ // The API imposed by `magnus` requires all methods to use shared borrows.
72
+ // This means we can't store any mutable state in the top-level struct,
73
+ // and must store the interior data behind a `RwLock`, which adds run-time
74
+ // checks that mutable operations have no concurrent read or writes.
75
+ //
76
+ // We are further limited by the need to support subclassing in Ruby, which
77
+ // requires us to define an allocation function for the class, the
78
+ // `magnus::class::define_alloc_func()` function. This needs a support the
79
+ // `Default` trait, so a `File` cannot directly help by the object being
80
+ // constructed. Having the `RwLock` hold an `Option` of the interior object
81
+ // resolves this.
38
82
  #[derive(Debug, Default)]
39
83
  #[magnus::wrap(class = "FastMmapedFileRs", free_immediately, size)]
40
- pub struct MmapedFile;
84
+ pub struct MmapedFile(RwLock<Option<InnerMmap>>);
41
85
 
42
86
  impl MmapedFile {
43
87
  /// call-seq:
@@ -50,14 +94,43 @@ impl MmapedFile {
50
94
  ///
51
95
  /// Creates a mapping that's shared with all other processes
52
96
  /// mapping the same area of the file.
53
- pub fn new(_klass: RClass, _args: &[Value]) -> magnus::error::Result<Obj<Self>> {
54
- Ok(Obj::wrap(Self))
97
+ pub fn new(klass: RClass, args: &[Value]) -> magnus::error::Result<Obj<Self>> {
98
+ let args = scan_args::scan_args::<(RString,), (), (), (), (), ()>(args)?;
99
+ let path = args.required.0;
100
+
101
+ let lock = MmapedFile(RwLock::new(None));
102
+ let obj = Obj::wrap_as(lock, klass);
103
+
104
+ let _: Value = obj.funcall("initialize", (path,))?;
105
+
106
+ Ok(obj)
55
107
  }
56
108
 
57
109
  /// Initialize a new `FastMmapedFileRs` object. This must be defined in
58
110
  /// order for inheritance to work.
59
- pub fn initialize(_rb_self: Obj<Self>, _fname: String) -> magnus::error::Result<()> {
60
- unimplemented!();
111
+ pub fn initialize(rb_self: Obj<Self>, fname: String) -> magnus::error::Result<()> {
112
+ let file = File::options()
113
+ .read(true)
114
+ .write(true)
115
+ .open(&fname)
116
+ .map_err(|_| err!(arg_error(), "Can't open {}", fname))?;
117
+
118
+ let inner = InnerMmap::new(fname.into(), file)?;
119
+ rb_self.insert_inner(inner)?;
120
+
121
+ let weak_klass = RClass::from_value(eval("ObjectSpace::WeakMap")?)
122
+ .ok_or_else(|| err!(no_method_error(), "unable to create WeakMap"))?;
123
+ let weak_obj_tracker = weak_klass.new_instance(())?;
124
+
125
+ // We will need to iterate over strings backed by the mmapped file, but
126
+ // don't want to prevent the GC from reaping them when the Ruby code
127
+ // has finished with them. `ObjectSpace::WeakMap` allows us to track
128
+ // them without extending their lifetime.
129
+ //
130
+ // https://ruby-doc.org/core-3.0.0/ObjectSpace/WeakMap.html
131
+ rb_self.ivar_set("@weak_obj_tracker", weak_obj_tracker)?;
132
+
133
+ Ok(())
61
134
  }
62
135
 
63
136
  /// Read the list of files provided from Ruby and convert them to a Prometheus
@@ -89,8 +162,38 @@ impl MmapedFile {
89
162
  /// self[start, length]
90
163
  ///
91
164
  /// return a substring of <em>lenght</em> characters from <em>start</em>
92
- pub fn slice(_rb_self: Obj<Self>, _args: &[Value]) -> magnus::error::Result<RString> {
93
- unimplemented!();
165
+ pub fn slice(rb_self: Obj<Self>, args: &[Value]) -> magnus::error::Result<RString> {
166
+ // The C implementation would trigger a GC cycle via `rb_gc_force_recycle`
167
+ // if the `MM_PROTECT` flag is set, but in practice this is never used.
168
+ // We omit this logic, particularly because `rb_gc_force_recycle` is a
169
+ // no-op as of Ruby 3.1.
170
+ let rs_self = &*rb_self;
171
+
172
+ let str = rs_self.str(rb_self)?;
173
+ rs_self._slice(rb_self, str, args)
174
+ }
175
+
176
+ fn _slice(
177
+ &self,
178
+ rb_self: Obj<Self>,
179
+ str: RString,
180
+ args: &[Value],
181
+ ) -> magnus::error::Result<RString> {
182
+ let substr: RString = str.funcall("[]", args)?;
183
+
184
+ // Track shared child strings which use the same backing storage.
185
+ if Self::rb_string_is_shared(substr) {
186
+ (*rb_self).track_rstring(rb_self, substr)?;
187
+ }
188
+
189
+ // The C implementation does this, perhaps to validate that the len we
190
+ // provided is actually being used.
191
+ (*rb_self).inner_mut(|inner| {
192
+ inner.set_len(str.len());
193
+ Ok(())
194
+ })?;
195
+
196
+ Ok(substr)
94
197
  }
95
198
 
96
199
  /// Document-method: msync
@@ -100,8 +203,23 @@ impl MmapedFile {
100
203
  /// call-seq: msync
101
204
  ///
102
205
  /// flush the file
103
- pub fn sync(&self, _args: &[Value]) -> magnus::error::Result<()> {
104
- unimplemented!();
206
+ pub fn sync(&self, args: &[Value]) -> magnus::error::Result<()> {
207
+ use nix::sys::mman::MsFlags;
208
+
209
+ let mut ms_async = false;
210
+ let args = scan_args::scan_args::<(), (Option<i32>,), (), (), (), ()>(args)?;
211
+
212
+ if let Some(flag) = args.optional.0 {
213
+ let flag = MsFlags::from_bits(flag).unwrap_or(MsFlags::empty());
214
+ ms_async = flag.contains(MsFlags::MS_ASYNC);
215
+ }
216
+
217
+ // The `memmap2` crate does not support the `MS_INVALIDATE` flag. We ignore that
218
+ // flag if passed in, checking only for `MS_ASYNC`. In practice no arguments are ever
219
+ // passed to this function, but we do this to maintain compatibility with the
220
+ // C implementation.
221
+ self.inner_mut(|inner| inner.flush(ms_async))
222
+ .map_err(|e| e.into())
105
223
  }
106
224
 
107
225
  /// Document-method: munmap
@@ -110,42 +228,664 @@ impl MmapedFile {
110
228
  /// call-seq: munmap
111
229
  ///
112
230
  /// terminate the association
113
- pub fn munmap(_rb_self: Obj<Self>) -> magnus::error::Result<()> {
114
- unimplemented!();
231
+ pub fn munmap(rb_self: Obj<Self>) -> magnus::error::Result<()> {
232
+ let rs_self = &*rb_self;
233
+
234
+ rs_self.inner_mut(|inner| {
235
+ // We are about to release the backing mmap for Ruby's String
236
+ // objects. If Ruby attempts to read from them the program will
237
+ // segfault. We update the length of all Strings to zero so Ruby
238
+ // does not attempt to access the now invalid address between now
239
+ // and when GC eventually reaps the objects.
240
+ //
241
+ // See the following for more detail:
242
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/39
243
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/41
244
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/merge_requests/80
245
+ inner.set_len(0);
246
+ Ok(())
247
+ })?;
248
+
249
+ // Update each String object to be zero-length.
250
+ let cap = util::cast_chk::<_, c_long>(rs_self.capacity(), "capacity")?;
251
+ rs_self.update_weak_map(rb_self, rs_self.as_mut_ptr(), cap)?;
252
+
253
+ // Remove the `InnerMmap` from the `RwLock`. This will drop
254
+ // end of this function, unmapping and closing the file.
255
+ let _ = rs_self.take_inner()?;
256
+ Ok(())
115
257
  }
116
258
 
117
259
  /// Fetch the `used` header from the `.db` file, the length
118
260
  /// in bytes of the data written to the file.
119
261
  pub fn load_used(&self) -> magnus::error::Result<Integer> {
120
- unimplemented!();
262
+ let used = self.inner(|inner| inner.load_used())?;
263
+
264
+ Ok(Integer::from_u64(used as u64))
121
265
  }
122
266
 
123
267
  /// Update the `used` header for the `.db` file, the length
124
268
  /// in bytes of the data written to the file.
125
- pub fn save_used(_rb_self: Obj<Self>, _used: Fixnum) -> magnus::error::Result<Fixnum> {
126
- unimplemented!();
269
+ pub fn save_used(rb_self: Obj<Self>, used: Fixnum) -> magnus::error::Result<Fixnum> {
270
+ let rs_self = &*rb_self;
271
+ let used_uint = used.to_u32()?;
272
+
273
+ // If the underlying mmap is smaller than the header, then resize to fit.
274
+ // The file has already been expanded to page size when first opened, so
275
+ // even if the map is less than HEADER_SIZE, we're not at risk of a
276
+ // SIGBUS.
277
+ if rs_self.capacity() < HEADER_SIZE {
278
+ rs_self.expand_to_fit(rb_self, HEADER_SIZE)?;
279
+ }
280
+
281
+ rs_self.inner_mut(|inner| inner.save_used(used_uint))?;
282
+
283
+ Ok(used)
127
284
  }
128
285
 
129
286
  /// Fetch the value associated with a key from the mmap.
130
287
  /// If no entry is present, initialize with the default
131
288
  /// value provided.
132
289
  pub fn fetch_entry(
133
- _rb_self: Obj<Self>,
134
- _positions: RHash,
135
- _key: RString,
136
- _default_value: f64,
290
+ rb_self: Obj<Self>,
291
+ positions: RHash,
292
+ key: RString,
293
+ default_value: f64,
137
294
  ) -> magnus::error::Result<f64> {
138
- unimplemented!();
295
+ let rs_self = &*rb_self;
296
+ let position: Option<Fixnum> = positions.lookup(key)?;
297
+
298
+ if let Some(pos) = position {
299
+ let pos = pos.to_usize()?;
300
+ return rs_self
301
+ .inner(|inner| inner.load_value(pos))
302
+ .map_err(|e| e.into());
303
+ }
304
+
305
+ rs_self.check_expand(rb_self, key.len())?;
306
+
307
+ let value_offset: usize = rs_self.inner_mut(|inner| {
308
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
309
+ unsafe { inner.initialize_entry(key.as_slice(), default_value) }
310
+ })?;
311
+
312
+ // CAST: no-op on 64-bit, widening on 32-bit.
313
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
314
+
315
+ rs_self.load_value(value_offset)
139
316
  }
140
317
 
141
318
  /// Update the value of an existing entry, if present. Otherwise create a new entry
142
319
  /// for the key.
143
320
  pub fn upsert_entry(
144
- _rb_self: Obj<Self>,
145
- _positions: RHash,
146
- _key: RString,
147
- _value: f64,
321
+ rb_self: Obj<Self>,
322
+ positions: RHash,
323
+ key: RString,
324
+ value: f64,
148
325
  ) -> magnus::error::Result<f64> {
149
- unimplemented!();
326
+ let rs_self = &*rb_self;
327
+ let position: Option<Fixnum> = positions.lookup(key)?;
328
+
329
+ if let Some(pos) = position {
330
+ let pos = pos.to_usize()?;
331
+ return rs_self
332
+ .inner_mut(|inner| {
333
+ inner.save_value(pos, value)?;
334
+
335
+ // TODO just return `value` here instead of loading it?
336
+ // This is how the C implementation did it, but I don't
337
+ // see what the extra load gains us.
338
+ inner.load_value(pos)
339
+ })
340
+ .map_err(|e| e.into());
341
+ }
342
+
343
+ rs_self.check_expand(rb_self, key.len())?;
344
+
345
+ let value_offset: usize = rs_self.inner_mut(|inner| {
346
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
347
+ unsafe { inner.initialize_entry(key.as_slice(), value) }
348
+ })?;
349
+
350
+ // CAST: no-op on 64-bit, widening on 32-bit.
351
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
352
+
353
+ rs_self.load_value(value_offset)
354
+ }
355
+
356
+ /// Creates a Ruby String containing the section of the mmapped file that
357
+ /// has been written to.
358
+ fn str(&self, rb_self: Obj<Self>) -> magnus::error::Result<RString> {
359
+ let val_id = (*rb_self).inner(|inner| {
360
+ let ptr = inner.as_ptr();
361
+ let len = inner.len();
362
+
363
+ // SAFETY: This is safe so long as the data provided to Ruby meets its
364
+ // requirements. When unmapping the file this will no longer be the
365
+ // case, see the comment on `munmap` for how we handle this.
366
+ Ok(unsafe { rb_str_new_static(ptr as _, len as _) })
367
+ })?;
368
+
369
+ // SAFETY: We know that rb_str_new_static returns a VALUE.
370
+ let val = unsafe { Value::from_raw(val_id) };
371
+
372
+ // UNWRAP: We created this value as a string above.
373
+ let str = RString::from_value(val).unwrap();
374
+
375
+ // Freeze the root string so it can't be mutated out from under any
376
+ // substrings created. This object is never exposed to callers.
377
+ str.freeze();
378
+
379
+ // Track the RString in our `WeakMap` so we can update its address if
380
+ // we re-mmap the backing file.
381
+ (*rb_self).track_rstring(rb_self, str)?;
382
+
383
+ Ok(str)
384
+ }
385
+
386
+ /// If we reallocate, any live Ruby strings provided by the `str()` method
387
+ /// will be invalidated. We need to iterate over them using and update their
388
+ /// heap pointers to the newly allocated memory region.
389
+ fn update_weak_map(
390
+ &self,
391
+ rb_self: Obj<Self>,
392
+ old_ptr: *const c_char,
393
+ old_cap: c_long,
394
+ ) -> magnus::error::Result<()> {
395
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
396
+
397
+ let new_len = self.inner(|inner| util::cast_chk::<_, c_long>(inner.len(), "mmap len"))?;
398
+
399
+ // Iterate over the values of the `WeakMap`.
400
+ for val in tracker.enumeratorize("each_value", ()) {
401
+ let rb_string = val?;
402
+ let str = RString::from_value(rb_string)
403
+ .ok_or_else(|| err!(arg_error(), "weakmap value was not a string"))?;
404
+
405
+ // SAFETY: We're messing with Ruby's internals here, YOLO.
406
+ unsafe {
407
+ // Convert the magnus wrapper type to a raw string exposed by `rb_sys`,
408
+ // which provides access to its internals.
409
+ let mut raw_str = Self::rb_string_internal(str);
410
+
411
+ // Shared string have their own `ptr` and `len` values, but `aux`
412
+ // is the id of the parent string so the GC can track this
413
+ // dependency. The `ptr` will always be an offset from the base
414
+ // address of the mmap, and `len` will be the length of the mmap
415
+ // less the offset from the base.
416
+ if Self::rb_string_is_shared(str) && new_len > 0 {
417
+ // Calculate how far into the original mmap the shared string
418
+ // started and update to the equivalent address in the new
419
+ // one.
420
+ let substr_ptr = raw_str.as_ref().as_.heap.ptr;
421
+ let offset = substr_ptr.offset_from(old_ptr);
422
+
423
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr().offset(offset);
424
+
425
+ let current_len = str.len() as c_long;
426
+ let new_shared_len = old_cap + current_len;
427
+
428
+ self.update_rstring_len(raw_str, new_shared_len);
429
+ continue;
430
+ }
431
+
432
+ // Update the string to point to the new mmapped file.
433
+ // We're matching the behavior of Ruby's `str_new_static` function.
434
+ // See https://github.com/ruby/ruby/blob/e51014f9c05aa65cbf203442d37fef7c12390015/string.c#L1030-L1053
435
+ //
436
+ // We deliberately do _NOT_ increment the `capa` field of the
437
+ // string to match the new `len`. We were initially doing this,
438
+ // but consistently triggered GCs in the middle of updating the
439
+ // string pointers, causing a segfault.
440
+ //
441
+ // See https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/45
442
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr();
443
+ self.update_rstring_len(raw_str, new_len);
444
+ }
445
+ }
446
+
447
+ Ok(())
448
+ }
449
+
450
+ /// Check that the mmap is large enough to contain the value to be added,
451
+ /// and expand it to fit if necessary.
452
+ fn check_expand(&self, rb_self: Obj<Self>, key_len: usize) -> magnus::error::Result<()> {
453
+ // CAST: no-op on 32-bit, widening on 64-bit.
454
+ let used = self.inner(|inner| inner.load_used())? as usize;
455
+ let entry_len = RawEntry::calc_total_len(key_len)?;
456
+
457
+ // We need the mmapped region to contain at least one byte beyond the
458
+ // written data to create a NUL- terminated C string. Validate that
459
+ // new length does not exactly match or exceed the length of the mmap.
460
+ while self.capacity() <= used.add_chk(entry_len)? {
461
+ self.expand_to_fit(rb_self, self.capacity().mul_chk(2)?)?;
462
+ }
463
+
464
+ Ok(())
465
+ }
466
+
467
+ /// Expand the underlying file until it is long enough to fit `target_cap`.
468
+ /// This will remove the existing mmap, expand the file, then update any
469
+ /// strings held by the `WeakMap` to point to the newly mmapped address.
470
+ fn expand_to_fit(&self, rb_self: Obj<Self>, target_cap: usize) -> magnus::error::Result<()> {
471
+ if target_cap < self.capacity() {
472
+ return Err(err!(arg_error(), "Can't reduce the size of mmap"));
473
+ }
474
+
475
+ let mut new_cap = self.capacity();
476
+ while new_cap < target_cap {
477
+ new_cap = new_cap.mul_chk(2)?;
478
+ }
479
+
480
+ if new_cap != self.capacity() {
481
+ let old_ptr = self.as_mut_ptr();
482
+ let old_cap = util::cast_chk::<_, c_long>(self.capacity(), "capacity")?;
483
+
484
+ // Drop the old mmap.
485
+ let (mut file, path) = self.take_inner()?.munmap();
486
+
487
+ self.expand_file(&mut file, &path, target_cap)?;
488
+
489
+ // Re-mmap the expanded file.
490
+ let new_inner = InnerMmap::reestablish(path, file, target_cap)?;
491
+
492
+ self.insert_inner(new_inner)?;
493
+
494
+ return self.update_weak_map(rb_self, old_ptr, old_cap);
495
+ }
496
+
497
+ Ok(())
498
+ }
499
+
500
+ /// Use lseek(2) to seek past the end of the file and write a NUL byte. This
501
+ /// creates a file hole that expands the size of the file without consuming
502
+ /// disk space until it is actually written to.
503
+ fn expand_file(&self, file: &mut File, path: &Path, len: usize) -> Result<()> {
504
+ if len == 0 {
505
+ return Err(MmapError::overflowed(0, -1, "adding"));
506
+ }
507
+
508
+ // CAST: no-op on 64-bit, widening on 32-bit.
509
+ let len = len as u64;
510
+
511
+ match file.seek(SeekFrom::Start(len - 1)) {
512
+ Ok(_) => {}
513
+ Err(_) => {
514
+ return Err(MmapError::with_errno(format!("Can't lseek {}", len - 1)));
515
+ }
516
+ }
517
+
518
+ match file.write(&[0x0]) {
519
+ Ok(1) => {}
520
+ _ => {
521
+ return Err(MmapError::with_errno(format!(
522
+ "Can't extend {}",
523
+ path.display()
524
+ )));
525
+ }
526
+ }
527
+
528
+ Ok(())
529
+ }
530
+
531
+ fn track_rstring(&self, rb_self: Obj<Self>, str: RString) -> magnus::error::Result<()> {
532
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
533
+
534
+ // Use the string's Id as the key in the `WeakMap`.
535
+ let key = str.as_raw();
536
+ let _: Value = tracker.funcall("[]=", (key, str))?;
537
+ Ok(())
538
+ }
539
+
540
+ /// The total capacity of the underlying mmap.
541
+ #[inline]
542
+ fn capacity(&self) -> usize {
543
+ // UNWRAP: This is actually infallible, but we need to
544
+ // wrap it in a `Result` for use with `inner()`.
545
+ self.inner(|inner| Ok(inner.capacity())).unwrap()
546
+ }
547
+
548
+ fn load_value(&self, position: usize) -> magnus::error::Result<f64> {
549
+ self.inner(|inner| inner.load_value(position))
550
+ .map_err(|e| e.into())
551
+ }
552
+
553
+ fn as_mut_ptr(&self) -> *mut c_char {
554
+ // UNWRAP: This is actually infallible, but we need to
555
+ // wrap it in a `Result` for use with `inner()`.
556
+ self.inner(|inner| Ok(inner.as_mut_ptr() as *mut c_char))
557
+ .unwrap()
558
+ }
559
+
560
+ /// Takes a closure with immutable access to InnerMmap. Will fail if the inner
561
+ /// object has a mutable borrow or has been dropped.
562
+ fn inner<F, T>(&self, func: F) -> Result<T>
563
+ where
564
+ F: FnOnce(&InnerMmap) -> Result<T>,
565
+ {
566
+ let inner_opt = self.0.try_read().map_err(|_| MmapError::ConcurrentAccess)?;
567
+
568
+ let inner = inner_opt.as_ref().ok_or(MmapError::UnmappedFile)?;
569
+
570
+ func(inner)
571
+ }
572
+
573
+ /// Takes a closure with mutable access to InnerMmap. Will fail if the inner
574
+ /// object has an existing mutable borrow, or has been dropped.
575
+ fn inner_mut<F, T>(&self, func: F) -> Result<T>
576
+ where
577
+ F: FnOnce(&mut InnerMmap) -> Result<T>,
578
+ {
579
+ let mut inner_opt = self
580
+ .0
581
+ .try_write()
582
+ .map_err(|_| MmapError::ConcurrentAccess)?;
583
+
584
+ let inner = inner_opt.as_mut().ok_or(MmapError::UnmappedFile)?;
585
+
586
+ func(inner)
587
+ }
588
+
589
+ /// Take ownership of the `InnerMmap` from the `RwLock`.
590
+ /// Will fail if a mutable borrow is already held or the inner
591
+ /// object has been dropped.
592
+ fn take_inner(&self) -> Result<InnerMmap> {
593
+ let mut inner_opt = self
594
+ .0
595
+ .try_write()
596
+ .map_err(|_| MmapError::ConcurrentAccess)?;
597
+ match (*inner_opt).take() {
598
+ Some(i) => Ok(i),
599
+ None => Err(MmapError::UnmappedFile),
600
+ }
601
+ }
602
+
603
+ /// Move `new_inner` into the `RwLock`.
604
+ /// Will return an error if a mutable borrow is already held.
605
+ fn insert_inner(&self, new_inner: InnerMmap) -> Result<()> {
606
+ let mut inner_opt = self
607
+ .0
608
+ .try_write()
609
+ .map_err(|_| MmapError::ConcurrentAccess)?;
610
+ (*inner_opt).replace(new_inner);
611
+
612
+ Ok(())
613
+ }
614
+
615
+ /// Check if an RString is shared. Shared string use the same underlying
616
+ /// storage as their parent, taking an offset from the start. By default
617
+ /// they must run to the end of the parent string.
618
+ fn rb_string_is_shared(rb_str: RString) -> bool {
619
+ // SAFETY: We only hold a reference to the raw object for the duration
620
+ // of this function, and no Ruby code is called.
621
+ let flags = unsafe {
622
+ let raw_str = Self::rb_string_internal(rb_str);
623
+ raw_str.as_ref().basic.flags
624
+ };
625
+ let shared_flags = STR_SHARED | STR_NOEMBED;
626
+
627
+ flags & shared_flags == shared_flags
628
+ }
629
+
630
+ /// Convert `magnus::RString` into the raw binding used by `rb_sys::RString`.
631
+ /// We need this to manually change the pointer and length values for strings
632
+ /// when moving the mmap to a new file.
633
+ ///
634
+ /// SAFETY: Calling Ruby code while the returned object is held may result
635
+ /// in it being mutated or dropped.
636
+ unsafe fn rb_string_internal(rb_str: RString) -> NonNull<rb_sys::RString> {
637
+ mem::transmute::<RString, NonNull<rb_sys::RString>>(rb_str)
638
+ }
639
+
640
+ #[cfg(ruby_lte_3_2)]
641
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
642
+ raw_str.as_mut().as_.heap.len = new_len;
643
+ }
644
+
645
+ #[cfg(ruby_gte_3_3)]
646
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
647
+ raw_str.as_mut().len = new_len;
648
+ }
649
+ }
650
+
651
+ #[cfg(test)]
652
+ mod test {
653
+ use magnus::error::Error;
654
+ use magnus::eval;
655
+ use magnus::Range;
656
+ use nix::unistd::{sysconf, SysconfVar};
657
+ use std::mem::size_of;
658
+
659
+ use super::*;
660
+ use crate::raw_entry::RawEntry;
661
+ use crate::testhelper::TestFile;
662
+
663
+ /// Create a wrapped MmapedFile object.
664
+ fn create_obj() -> Obj<MmapedFile> {
665
+ let TestFile {
666
+ file: _file,
667
+ path,
668
+ dir: _dir,
669
+ } = TestFile::new(&[0u8; 8]);
670
+
671
+ let path_str = path.display().to_string();
672
+ let rpath = RString::new(&path_str);
673
+
674
+ eval!("FastMmapedFileRs.new(path)", path = rpath).unwrap()
675
+ }
676
+
677
+ /// Add three entries to the mmap. Expected length is 56, 3x 16-byte
678
+ /// entries with 8-byte header.
679
+ fn populate_entries(rb_self: &Obj<MmapedFile>) -> RHash {
680
+ let positions = RHash::from_value(eval("{}").unwrap()).unwrap();
681
+
682
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("a"), 0.0).unwrap();
683
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("b"), 1.0).unwrap();
684
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("c"), 2.0).unwrap();
685
+
686
+ positions
687
+ }
688
+
689
+ #[test]
690
+ fn test_new() {
691
+ let _cleanup = unsafe { magnus::embed::init() };
692
+ let ruby = magnus::Ruby::get().unwrap();
693
+ crate::init(&ruby).unwrap();
694
+
695
+ let TestFile {
696
+ file,
697
+ path,
698
+ dir: _dir,
699
+ } = TestFile::new(&[0u8; 8]);
700
+
701
+ let path_str = path.display().to_string();
702
+ let rpath = RString::new(&path_str);
703
+
704
+ // Object created successfully
705
+ let result: std::result::Result<Obj<MmapedFile>, Error> =
706
+ eval!("FastMmapedFileRs.new(path)", path = rpath);
707
+ assert!(result.is_ok());
708
+
709
+ // Weak map added
710
+ let obj = result.unwrap();
711
+ let weak_tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
712
+ assert_eq!("ObjectSpace::WeakMap", weak_tracker.class().inspect());
713
+
714
+ // File expanded to page size
715
+ let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as u64;
716
+ let stat = file.metadata().unwrap();
717
+ assert_eq!(page_size, stat.len());
718
+
719
+ // Used set to header size
720
+ assert_eq!(
721
+ HEADER_SIZE as u64,
722
+ obj.load_used().unwrap().to_u64().unwrap()
723
+ );
724
+ }
725
+
726
+ #[test]
727
+ fn test_slice() {
728
+ let _cleanup = unsafe { magnus::embed::init() };
729
+ let ruby = magnus::Ruby::get().unwrap();
730
+ crate::init(&ruby).unwrap();
731
+
732
+ let obj = create_obj();
733
+ let _ = populate_entries(&obj);
734
+
735
+ // Validate header updated with new length
736
+ let header_range = Range::new(0, HEADER_SIZE, true).unwrap().as_value();
737
+ let header_slice = MmapedFile::slice(obj, &[header_range]).unwrap();
738
+ assert_eq!([56, 0, 0, 0, 0, 0, 0, 0], unsafe {
739
+ header_slice.as_slice()
740
+ });
741
+
742
+ let value_range = Range::new(HEADER_SIZE, 24, true).unwrap().as_value();
743
+ let value_slice = MmapedFile::slice(obj, &[value_range]).unwrap();
744
+
745
+ // Validate string length
746
+ assert_eq!(1u32.to_ne_bytes(), unsafe { &value_slice.as_slice()[0..4] });
747
+
748
+ // Validate string and padding
749
+ assert_eq!("a ", unsafe {
750
+ String::from_utf8_lossy(&value_slice.as_slice()[4..8])
751
+ });
752
+
753
+ // Validate value
754
+ assert_eq!(0.0f64.to_ne_bytes(), unsafe {
755
+ &value_slice.as_slice()[8..16]
756
+ });
757
+ }
758
+
759
+ #[test]
760
+ fn test_slice_resize() {
761
+ let _cleanup = unsafe { magnus::embed::init() };
762
+ let ruby = magnus::Ruby::get().unwrap();
763
+ crate::init(&ruby).unwrap();
764
+
765
+ fn assert_internals(
766
+ obj: Obj<MmapedFile>,
767
+ parent_id: c_ulong,
768
+ child_id: c_ulong,
769
+ unshared_id: c_ulong,
770
+ ) {
771
+ let rs_self = &*obj;
772
+ let tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
773
+
774
+ let mmap_ptr = rs_self.as_mut_ptr();
775
+ let mmap_len = rs_self.capacity();
776
+
777
+ let mut parent_checked = false;
778
+ let mut child_checked = false;
779
+
780
+ for val in tracker.enumeratorize("each_value", ()) {
781
+ let rb_string = val.unwrap();
782
+ let str = RString::from_value(rb_string).unwrap();
783
+
784
+ unsafe {
785
+ let raw_str = MmapedFile::rb_string_internal(str);
786
+ if str.as_raw() == child_id {
787
+ assert_eq!(parent_id, raw_str.as_ref().as_.heap.aux.shared);
788
+
789
+ let child_offset = mmap_len as isize - str.len() as isize;
790
+ assert_eq!(mmap_ptr.offset(child_offset), raw_str.as_ref().as_.heap.ptr);
791
+
792
+ child_checked = true;
793
+ } else if str.as_raw() == parent_id {
794
+ assert_eq!(parent_id, str.as_raw());
795
+
796
+ assert_eq!(mmap_ptr, raw_str.as_ref().as_.heap.ptr);
797
+ assert_eq!(mmap_len as c_long, str.len() as c_long);
798
+ assert!(raw_str.as_ref().basic.flags & (STR_SHARED | STR_NOEMBED) > 0);
799
+ assert!(str.is_frozen());
800
+
801
+ parent_checked = true;
802
+ } else if str.as_raw() == unshared_id {
803
+ panic!("tracking unshared string");
804
+ } else {
805
+ panic!("unknown string");
806
+ }
807
+ }
808
+ }
809
+ assert!(parent_checked && child_checked);
810
+ }
811
+
812
+ let obj = create_obj();
813
+ let _ = populate_entries(&obj);
814
+
815
+ let rs_self = &*obj;
816
+
817
+ // Create a string containing the full mmap.
818
+ let parent_str = rs_self.str(obj).unwrap();
819
+ let parent_id = parent_str.as_raw();
820
+
821
+ // Ruby's shared strings are only created when they go to the end of
822
+ // original string.
823
+ let len = rs_self.inner(|inner| Ok(inner.len())).unwrap();
824
+ let shareable_range = Range::new(1, len - 1, false).unwrap().as_value();
825
+
826
+ // This string should re-use the parent's buffer with an offset and have
827
+ // the parent's id in `as.heap.aux.shared`
828
+ let child_str = rs_self._slice(obj, parent_str, &[shareable_range]).unwrap();
829
+ let child_id = child_str.as_raw();
830
+
831
+ // A range that does not reach the end of the parent will not be shared.
832
+ assert!(len > 4);
833
+ let unshareable_range = Range::new(0, 4, false).unwrap().as_value();
834
+
835
+ // This string should NOT be tracked, it should own its own buffer.
836
+ let unshared_str = rs_self
837
+ ._slice(obj, parent_str, &[unshareable_range])
838
+ .unwrap();
839
+ let unshared_id = unshared_str.as_raw();
840
+ assert!(!MmapedFile::rb_string_is_shared(unshared_str));
841
+
842
+ assert_internals(obj, parent_id, child_id, unshared_id);
843
+
844
+ let orig_ptr = rs_self.as_mut_ptr();
845
+ // Expand a bunch to ensure we remap
846
+ for _ in 0..16 {
847
+ rs_self.expand_to_fit(obj, rs_self.capacity() * 2).unwrap();
848
+ }
849
+ let new_ptr = rs_self.as_mut_ptr();
850
+ assert!(orig_ptr != new_ptr);
851
+
852
+ // If we haven't updated the pointer to the newly remapped file this will segfault.
853
+ let _: Value = eval!("puts parent", parent = parent_str).unwrap();
854
+ let _: Value = eval!("puts child", child = child_str).unwrap();
855
+ let _: Value = eval!("puts unshared", unshared = unshared_str).unwrap();
856
+
857
+ // Confirm that tracked strings are still valid.
858
+ assert_internals(obj, parent_id, child_id, unshared_id);
859
+ }
860
+
861
+ #[test]
862
+ fn test_dont_fill_mmap() {
863
+ let _cleanup = unsafe { magnus::embed::init() };
864
+ let ruby = magnus::Ruby::get().unwrap();
865
+ crate::init(&ruby).unwrap();
866
+
867
+ let obj = create_obj();
868
+ let positions = populate_entries(&obj);
869
+
870
+ let rs_self = &*obj;
871
+
872
+ rs_self.expand_to_fit(obj, 1024).unwrap();
873
+
874
+ let current_used = rs_self.inner(|inner| inner.load_used()).unwrap() as usize;
875
+ let current_cap = rs_self.inner(|inner| Ok(inner.len())).unwrap();
876
+
877
+ // Create a new entry that exactly fills the capacity of the mmap.
878
+ let val_len =
879
+ current_cap - current_used - HEADER_SIZE - size_of::<f64>() - size_of::<u32>();
880
+ assert_eq!(
881
+ current_cap,
882
+ RawEntry::calc_total_len(val_len).unwrap() + current_used
883
+ );
884
+
885
+ let str = String::from_utf8(vec![b'A'; val_len]).unwrap();
886
+ MmapedFile::upsert_entry(obj, positions, RString::new(&str), 1.0).unwrap();
887
+
888
+ // Validate that we have expanded the mmap, ensuring a trailing NUL.
889
+ assert!(rs_self.capacity() > current_cap);
150
890
  }
151
891
  }