prometheus-client-mmap 1.2.4-x86_64-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/.tool-versions +1 -0
  3. data/README.md +281 -0
  4. data/ext/fast_mmaped_file_rs/Cargo.toml +35 -0
  5. data/ext/fast_mmaped_file_rs/README.md +52 -0
  6. data/ext/fast_mmaped_file_rs/build.rs +5 -0
  7. data/ext/fast_mmaped_file_rs/extconf.rb +28 -0
  8. data/ext/fast_mmaped_file_rs/src/error.rs +174 -0
  9. data/ext/fast_mmaped_file_rs/src/file_entry.rs +784 -0
  10. data/ext/fast_mmaped_file_rs/src/file_info.rs +240 -0
  11. data/ext/fast_mmaped_file_rs/src/lib.rs +78 -0
  12. data/ext/fast_mmaped_file_rs/src/macros.rs +14 -0
  13. data/ext/fast_mmaped_file_rs/src/map.rs +492 -0
  14. data/ext/fast_mmaped_file_rs/src/mmap/inner.rs +704 -0
  15. data/ext/fast_mmaped_file_rs/src/mmap.rs +891 -0
  16. data/ext/fast_mmaped_file_rs/src/raw_entry.rs +473 -0
  17. data/ext/fast_mmaped_file_rs/src/testhelper.rs +222 -0
  18. data/ext/fast_mmaped_file_rs/src/util.rs +121 -0
  19. data/lib/3.1/fast_mmaped_file_rs.so +0 -0
  20. data/lib/3.2/fast_mmaped_file_rs.so +0 -0
  21. data/lib/3.3/fast_mmaped_file_rs.so +0 -0
  22. data/lib/3.4/fast_mmaped_file_rs.so +0 -0
  23. data/lib/prometheus/client/configuration.rb +23 -0
  24. data/lib/prometheus/client/counter.rb +27 -0
  25. data/lib/prometheus/client/formats/text.rb +85 -0
  26. data/lib/prometheus/client/gauge.rb +40 -0
  27. data/lib/prometheus/client/helper/entry_parser.rb +132 -0
  28. data/lib/prometheus/client/helper/file_locker.rb +50 -0
  29. data/lib/prometheus/client/helper/json_parser.rb +23 -0
  30. data/lib/prometheus/client/helper/metrics_processing.rb +45 -0
  31. data/lib/prometheus/client/helper/metrics_representation.rb +51 -0
  32. data/lib/prometheus/client/helper/mmaped_file.rb +64 -0
  33. data/lib/prometheus/client/helper/plain_file.rb +29 -0
  34. data/lib/prometheus/client/histogram.rb +80 -0
  35. data/lib/prometheus/client/label_set_validator.rb +85 -0
  36. data/lib/prometheus/client/metric.rb +80 -0
  37. data/lib/prometheus/client/mmaped_dict.rb +79 -0
  38. data/lib/prometheus/client/mmaped_value.rb +154 -0
  39. data/lib/prometheus/client/page_size.rb +17 -0
  40. data/lib/prometheus/client/push.rb +203 -0
  41. data/lib/prometheus/client/rack/collector.rb +88 -0
  42. data/lib/prometheus/client/rack/exporter.rb +96 -0
  43. data/lib/prometheus/client/registry.rb +65 -0
  44. data/lib/prometheus/client/simple_value.rb +31 -0
  45. data/lib/prometheus/client/summary.rb +69 -0
  46. data/lib/prometheus/client/support/puma.rb +44 -0
  47. data/lib/prometheus/client/support/unicorn.rb +35 -0
  48. data/lib/prometheus/client/uses_value_type.rb +20 -0
  49. data/lib/prometheus/client/version.rb +5 -0
  50. data/lib/prometheus/client.rb +58 -0
  51. data/lib/prometheus.rb +3 -0
  52. metadata +249 -0
@@ -0,0 +1,891 @@
1
+ use magnus::exception::*;
2
+ use magnus::prelude::*;
3
+ use magnus::rb_sys::{AsRawValue, FromRawValue};
4
+ use magnus::typed_data::Obj;
5
+ use magnus::value::Fixnum;
6
+ use magnus::{eval, scan_args, Error, Integer, RArray, RClass, RHash, RString, Value};
7
+ use nix::libc::{c_char, c_long, c_ulong};
8
+ use rb_sys::rb_str_new_static;
9
+ use std::fs::File;
10
+ use std::io::{prelude::*, SeekFrom};
11
+ use std::mem;
12
+ use std::path::Path;
13
+ use std::ptr::NonNull;
14
+ use std::sync::RwLock;
15
+
16
+ use crate::err;
17
+ use crate::error::MmapError;
18
+ use crate::file_entry::FileEntry;
19
+ use crate::map::EntryMap;
20
+ use crate::raw_entry::RawEntry;
21
+ use crate::util::{self, CheckedOps};
22
+ use crate::Result;
23
+ use crate::HEADER_SIZE;
24
+ use inner::InnerMmap;
25
+
26
+ mod inner;
27
+
28
+ #[cfg(ruby_gte_3_4)]
29
+ /// The Ruby `STR_SHARED` flag, aka `FL_USER0`.
30
+ /// This was changed from `FL_USER2` in https://github.com/ruby/ruby/commit/6deeec5d459ecff5ec4628523b14ac7379fd942e.
31
+ const STR_SHARED: c_ulong = 1 << (12);
32
+
33
+ #[cfg(ruby_lte_3_3)]
34
+ /// The Ruby `STR_SHARED` flag, aka `FL_USER2`.
35
+ const STR_SHARED: c_ulong = 1 << (14);
36
+
37
+ /// The Ruby `STR_NOEMBED` flag, aka `FL_USER1`.
38
+ const STR_NOEMBED: c_ulong = 1 << (13);
39
+
40
+ /// A Rust struct wrapped in a Ruby object, providing access to a memory-mapped
41
+ /// file used to store, update, and read out Prometheus metrics.
42
+ ///
43
+ /// - File format:
44
+ /// - Header:
45
+ /// - 4 bytes: u32 - total size of metrics in file.
46
+ /// - 4 bytes: NUL byte padding.
47
+ /// - Repeating metrics entries:
48
+ /// - 4 bytes: u32 - entry JSON string size.
49
+ /// - `N` bytes: UTF-8 encoded JSON string used as entry key.
50
+ /// - (8 - (4 + `N`) % 8) bytes: 1 to 8 padding space (0x20) bytes to
51
+ /// reach 8-byte alignment.
52
+ /// - 8 bytes: f64 - entry value.
53
+ ///
54
+ /// All numbers are saved in native-endian format.
55
+ ///
56
+ /// Generated via [luismartingarcia/protocol](https://github.com/luismartingarcia/protocol):
57
+ ///
58
+ ///
59
+ /// ```
60
+ /// protocol "Used:4,Pad:4,K1 Size:4,K1 Name:4,K1 Value:8,K2 Size:4,K2 Name:4,K2 Value:8"
61
+ ///
62
+ /// 0 1 2 3
63
+ /// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
64
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
65
+ /// | Used | Pad |K1 Size|K1 Name| K1 Value |K2 Size|K2 Name|
66
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
67
+ /// | K2 Value |
68
+ /// +-+-+-+-+-+-+-+
69
+ /// ```
70
+ //
71
+ // The API imposed by `magnus` requires all methods to use shared borrows.
72
+ // This means we can't store any mutable state in the top-level struct,
73
+ // and must store the interior data behind a `RwLock`, which adds run-time
74
+ // checks that mutable operations have no concurrent read or writes.
75
+ //
76
+ // We are further limited by the need to support subclassing in Ruby, which
77
+ // requires us to define an allocation function for the class, the
78
+ // `magnus::class::define_alloc_func()` function. This needs a support the
79
+ // `Default` trait, so a `File` cannot directly help by the object being
80
+ // constructed. Having the `RwLock` hold an `Option` of the interior object
81
+ // resolves this.
82
+ #[derive(Debug, Default)]
83
+ #[magnus::wrap(class = "FastMmapedFileRs", free_immediately, size)]
84
+ pub struct MmapedFile(RwLock<Option<InnerMmap>>);
85
+
86
+ impl MmapedFile {
87
+ /// call-seq:
88
+ /// new(file)
89
+ ///
90
+ /// create a new Mmap object
91
+ ///
92
+ /// * <em>file</em>
93
+ ///
94
+ ///
95
+ /// Creates a mapping that's shared with all other processes
96
+ /// mapping the same area of the file.
97
+ pub fn new(klass: RClass, args: &[Value]) -> magnus::error::Result<Obj<Self>> {
98
+ let args = scan_args::scan_args::<(RString,), (), (), (), (), ()>(args)?;
99
+ let path = args.required.0;
100
+
101
+ let lock = MmapedFile(RwLock::new(None));
102
+ let obj = Obj::wrap_as(lock, klass);
103
+
104
+ let _: Value = obj.funcall("initialize", (path,))?;
105
+
106
+ Ok(obj)
107
+ }
108
+
109
+ /// Initialize a new `FastMmapedFileRs` object. This must be defined in
110
+ /// order for inheritance to work.
111
+ pub fn initialize(rb_self: Obj<Self>, fname: String) -> magnus::error::Result<()> {
112
+ let file = File::options()
113
+ .read(true)
114
+ .write(true)
115
+ .open(&fname)
116
+ .map_err(|_| err!(arg_error(), "Can't open {}", fname))?;
117
+
118
+ let inner = InnerMmap::new(fname.into(), file)?;
119
+ rb_self.insert_inner(inner)?;
120
+
121
+ let weak_klass = RClass::from_value(eval("ObjectSpace::WeakMap")?)
122
+ .ok_or_else(|| err!(no_method_error(), "unable to create WeakMap"))?;
123
+ let weak_obj_tracker = weak_klass.new_instance(())?;
124
+
125
+ // We will need to iterate over strings backed by the mmapped file, but
126
+ // don't want to prevent the GC from reaping them when the Ruby code
127
+ // has finished with them. `ObjectSpace::WeakMap` allows us to track
128
+ // them without extending their lifetime.
129
+ //
130
+ // https://ruby-doc.org/core-3.0.0/ObjectSpace/WeakMap.html
131
+ rb_self.ivar_set("@weak_obj_tracker", weak_obj_tracker)?;
132
+
133
+ Ok(())
134
+ }
135
+
136
+ /// Read the list of files provided from Ruby and convert them to a Prometheus
137
+ /// metrics String.
138
+ pub fn to_metrics(file_list: RArray) -> magnus::error::Result<String> {
139
+ let mut map = EntryMap::new();
140
+ map.aggregate_files(file_list)?;
141
+
142
+ let sorted = map.into_sorted()?;
143
+
144
+ FileEntry::entries_to_string(sorted).map_err(|e| e.into())
145
+ }
146
+
147
+ /// Document-method: []
148
+ /// Document-method: slice
149
+ ///
150
+ /// call-seq: [](args)
151
+ ///
152
+ /// Element reference - with the following syntax:
153
+ ///
154
+ /// self[nth]
155
+ ///
156
+ /// retrieve the <em>nth</em> character
157
+ ///
158
+ /// self[start..last]
159
+ ///
160
+ /// return a substring from <em>start</em> to <em>last</em>
161
+ ///
162
+ /// self[start, length]
163
+ ///
164
+ /// return a substring of <em>lenght</em> characters from <em>start</em>
165
+ pub fn slice(rb_self: Obj<Self>, args: &[Value]) -> magnus::error::Result<RString> {
166
+ // The C implementation would trigger a GC cycle via `rb_gc_force_recycle`
167
+ // if the `MM_PROTECT` flag is set, but in practice this is never used.
168
+ // We omit this logic, particularly because `rb_gc_force_recycle` is a
169
+ // no-op as of Ruby 3.1.
170
+ let rs_self = &*rb_self;
171
+
172
+ let str = rs_self.str(rb_self)?;
173
+ rs_self._slice(rb_self, str, args)
174
+ }
175
+
176
+ fn _slice(
177
+ &self,
178
+ rb_self: Obj<Self>,
179
+ str: RString,
180
+ args: &[Value],
181
+ ) -> magnus::error::Result<RString> {
182
+ let substr: RString = str.funcall("[]", args)?;
183
+
184
+ // Track shared child strings which use the same backing storage.
185
+ if Self::rb_string_is_shared(substr) {
186
+ (*rb_self).track_rstring(rb_self, substr)?;
187
+ }
188
+
189
+ // The C implementation does this, perhaps to validate that the len we
190
+ // provided is actually being used.
191
+ (*rb_self).inner_mut(|inner| {
192
+ inner.set_len(str.len());
193
+ Ok(())
194
+ })?;
195
+
196
+ Ok(substr)
197
+ }
198
+
199
+ /// Document-method: msync
200
+ /// Document-method: sync
201
+ /// Document-method: flush
202
+ ///
203
+ /// call-seq: msync
204
+ ///
205
+ /// flush the file
206
+ pub fn sync(&self, args: &[Value]) -> magnus::error::Result<()> {
207
+ use nix::sys::mman::MsFlags;
208
+
209
+ let mut ms_async = false;
210
+ let args = scan_args::scan_args::<(), (Option<i32>,), (), (), (), ()>(args)?;
211
+
212
+ if let Some(flag) = args.optional.0 {
213
+ let flag = MsFlags::from_bits(flag).unwrap_or(MsFlags::empty());
214
+ ms_async = flag.contains(MsFlags::MS_ASYNC);
215
+ }
216
+
217
+ // The `memmap2` crate does not support the `MS_INVALIDATE` flag. We ignore that
218
+ // flag if passed in, checking only for `MS_ASYNC`. In practice no arguments are ever
219
+ // passed to this function, but we do this to maintain compatibility with the
220
+ // C implementation.
221
+ self.inner_mut(|inner| inner.flush(ms_async))
222
+ .map_err(|e| e.into())
223
+ }
224
+
225
+ /// Document-method: munmap
226
+ /// Document-method: unmap
227
+ ///
228
+ /// call-seq: munmap
229
+ ///
230
+ /// terminate the association
231
+ pub fn munmap(rb_self: Obj<Self>) -> magnus::error::Result<()> {
232
+ let rs_self = &*rb_self;
233
+
234
+ rs_self.inner_mut(|inner| {
235
+ // We are about to release the backing mmap for Ruby's String
236
+ // objects. If Ruby attempts to read from them the program will
237
+ // segfault. We update the length of all Strings to zero so Ruby
238
+ // does not attempt to access the now invalid address between now
239
+ // and when GC eventually reaps the objects.
240
+ //
241
+ // See the following for more detail:
242
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/39
243
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/41
244
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/merge_requests/80
245
+ inner.set_len(0);
246
+ Ok(())
247
+ })?;
248
+
249
+ // Update each String object to be zero-length.
250
+ let cap = util::cast_chk::<_, c_long>(rs_self.capacity(), "capacity")?;
251
+ rs_self.update_weak_map(rb_self, rs_self.as_mut_ptr(), cap)?;
252
+
253
+ // Remove the `InnerMmap` from the `RwLock`. This will drop
254
+ // end of this function, unmapping and closing the file.
255
+ let _ = rs_self.take_inner()?;
256
+ Ok(())
257
+ }
258
+
259
+ /// Fetch the `used` header from the `.db` file, the length
260
+ /// in bytes of the data written to the file.
261
+ pub fn load_used(&self) -> magnus::error::Result<Integer> {
262
+ let used = self.inner(|inner| inner.load_used())?;
263
+
264
+ Ok(Integer::from_u64(used as u64))
265
+ }
266
+
267
+ /// Update the `used` header for the `.db` file, the length
268
+ /// in bytes of the data written to the file.
269
+ pub fn save_used(rb_self: Obj<Self>, used: Fixnum) -> magnus::error::Result<Fixnum> {
270
+ let rs_self = &*rb_self;
271
+ let used_uint = used.to_u32()?;
272
+
273
+ // If the underlying mmap is smaller than the header, then resize to fit.
274
+ // The file has already been expanded to page size when first opened, so
275
+ // even if the map is less than HEADER_SIZE, we're not at risk of a
276
+ // SIGBUS.
277
+ if rs_self.capacity() < HEADER_SIZE {
278
+ rs_self.expand_to_fit(rb_self, HEADER_SIZE)?;
279
+ }
280
+
281
+ rs_self.inner_mut(|inner| inner.save_used(used_uint))?;
282
+
283
+ Ok(used)
284
+ }
285
+
286
+ /// Fetch the value associated with a key from the mmap.
287
+ /// If no entry is present, initialize with the default
288
+ /// value provided.
289
+ pub fn fetch_entry(
290
+ rb_self: Obj<Self>,
291
+ positions: RHash,
292
+ key: RString,
293
+ default_value: f64,
294
+ ) -> magnus::error::Result<f64> {
295
+ let rs_self = &*rb_self;
296
+ let position: Option<Fixnum> = positions.lookup(key)?;
297
+
298
+ if let Some(pos) = position {
299
+ let pos = pos.to_usize()?;
300
+ return rs_self
301
+ .inner(|inner| inner.load_value(pos))
302
+ .map_err(|e| e.into());
303
+ }
304
+
305
+ rs_self.check_expand(rb_self, key.len())?;
306
+
307
+ let value_offset: usize = rs_self.inner_mut(|inner| {
308
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
309
+ unsafe { inner.initialize_entry(key.as_slice(), default_value) }
310
+ })?;
311
+
312
+ // CAST: no-op on 64-bit, widening on 32-bit.
313
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
314
+
315
+ rs_self.load_value(value_offset)
316
+ }
317
+
318
+ /// Update the value of an existing entry, if present. Otherwise create a new entry
319
+ /// for the key.
320
+ pub fn upsert_entry(
321
+ rb_self: Obj<Self>,
322
+ positions: RHash,
323
+ key: RString,
324
+ value: f64,
325
+ ) -> magnus::error::Result<f64> {
326
+ let rs_self = &*rb_self;
327
+ let position: Option<Fixnum> = positions.lookup(key)?;
328
+
329
+ if let Some(pos) = position {
330
+ let pos = pos.to_usize()?;
331
+ return rs_self
332
+ .inner_mut(|inner| {
333
+ inner.save_value(pos, value)?;
334
+
335
+ // TODO just return `value` here instead of loading it?
336
+ // This is how the C implementation did it, but I don't
337
+ // see what the extra load gains us.
338
+ inner.load_value(pos)
339
+ })
340
+ .map_err(|e| e.into());
341
+ }
342
+
343
+ rs_self.check_expand(rb_self, key.len())?;
344
+
345
+ let value_offset: usize = rs_self.inner_mut(|inner| {
346
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
347
+ unsafe { inner.initialize_entry(key.as_slice(), value) }
348
+ })?;
349
+
350
+ // CAST: no-op on 64-bit, widening on 32-bit.
351
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
352
+
353
+ rs_self.load_value(value_offset)
354
+ }
355
+
356
+ /// Creates a Ruby String containing the section of the mmapped file that
357
+ /// has been written to.
358
+ fn str(&self, rb_self: Obj<Self>) -> magnus::error::Result<RString> {
359
+ let val_id = (*rb_self).inner(|inner| {
360
+ let ptr = inner.as_ptr();
361
+ let len = inner.len();
362
+
363
+ // SAFETY: This is safe so long as the data provided to Ruby meets its
364
+ // requirements. When unmapping the file this will no longer be the
365
+ // case, see the comment on `munmap` for how we handle this.
366
+ Ok(unsafe { rb_str_new_static(ptr as _, len as _) })
367
+ })?;
368
+
369
+ // SAFETY: We know that rb_str_new_static returns a VALUE.
370
+ let val = unsafe { Value::from_raw(val_id) };
371
+
372
+ // UNWRAP: We created this value as a string above.
373
+ let str = RString::from_value(val).unwrap();
374
+
375
+ // Freeze the root string so it can't be mutated out from under any
376
+ // substrings created. This object is never exposed to callers.
377
+ str.freeze();
378
+
379
+ // Track the RString in our `WeakMap` so we can update its address if
380
+ // we re-mmap the backing file.
381
+ (*rb_self).track_rstring(rb_self, str)?;
382
+
383
+ Ok(str)
384
+ }
385
+
386
+ /// If we reallocate, any live Ruby strings provided by the `str()` method
387
+ /// will be invalidated. We need to iterate over them using and update their
388
+ /// heap pointers to the newly allocated memory region.
389
+ fn update_weak_map(
390
+ &self,
391
+ rb_self: Obj<Self>,
392
+ old_ptr: *const c_char,
393
+ old_cap: c_long,
394
+ ) -> magnus::error::Result<()> {
395
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
396
+
397
+ let new_len = self.inner(|inner| util::cast_chk::<_, c_long>(inner.len(), "mmap len"))?;
398
+
399
+ // Iterate over the values of the `WeakMap`.
400
+ for val in tracker.enumeratorize("each_value", ()) {
401
+ let rb_string = val?;
402
+ let str = RString::from_value(rb_string)
403
+ .ok_or_else(|| err!(arg_error(), "weakmap value was not a string"))?;
404
+
405
+ // SAFETY: We're messing with Ruby's internals here, YOLO.
406
+ unsafe {
407
+ // Convert the magnus wrapper type to a raw string exposed by `rb_sys`,
408
+ // which provides access to its internals.
409
+ let mut raw_str = Self::rb_string_internal(str);
410
+
411
+ // Shared string have their own `ptr` and `len` values, but `aux`
412
+ // is the id of the parent string so the GC can track this
413
+ // dependency. The `ptr` will always be an offset from the base
414
+ // address of the mmap, and `len` will be the length of the mmap
415
+ // less the offset from the base.
416
+ if Self::rb_string_is_shared(str) && new_len > 0 {
417
+ // Calculate how far into the original mmap the shared string
418
+ // started and update to the equivalent address in the new
419
+ // one.
420
+ let substr_ptr = raw_str.as_ref().as_.heap.ptr;
421
+ let offset = substr_ptr.offset_from(old_ptr);
422
+
423
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr().offset(offset);
424
+
425
+ let current_len = str.len() as c_long;
426
+ let new_shared_len = old_cap + current_len;
427
+
428
+ self.update_rstring_len(raw_str, new_shared_len);
429
+ continue;
430
+ }
431
+
432
+ // Update the string to point to the new mmapped file.
433
+ // We're matching the behavior of Ruby's `str_new_static` function.
434
+ // See https://github.com/ruby/ruby/blob/e51014f9c05aa65cbf203442d37fef7c12390015/string.c#L1030-L1053
435
+ //
436
+ // We deliberately do _NOT_ increment the `capa` field of the
437
+ // string to match the new `len`. We were initially doing this,
438
+ // but consistently triggered GCs in the middle of updating the
439
+ // string pointers, causing a segfault.
440
+ //
441
+ // See https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/45
442
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr();
443
+ self.update_rstring_len(raw_str, new_len);
444
+ }
445
+ }
446
+
447
+ Ok(())
448
+ }
449
+
450
+ /// Check that the mmap is large enough to contain the value to be added,
451
+ /// and expand it to fit if necessary.
452
+ fn check_expand(&self, rb_self: Obj<Self>, key_len: usize) -> magnus::error::Result<()> {
453
+ // CAST: no-op on 32-bit, widening on 64-bit.
454
+ let used = self.inner(|inner| inner.load_used())? as usize;
455
+ let entry_len = RawEntry::calc_total_len(key_len)?;
456
+
457
+ // We need the mmapped region to contain at least one byte beyond the
458
+ // written data to create a NUL- terminated C string. Validate that
459
+ // new length does not exactly match or exceed the length of the mmap.
460
+ while self.capacity() <= used.add_chk(entry_len)? {
461
+ self.expand_to_fit(rb_self, self.capacity().mul_chk(2)?)?;
462
+ }
463
+
464
+ Ok(())
465
+ }
466
+
467
+ /// Expand the underlying file until it is long enough to fit `target_cap`.
468
+ /// This will remove the existing mmap, expand the file, then update any
469
+ /// strings held by the `WeakMap` to point to the newly mmapped address.
470
+ fn expand_to_fit(&self, rb_self: Obj<Self>, target_cap: usize) -> magnus::error::Result<()> {
471
+ if target_cap < self.capacity() {
472
+ return Err(err!(arg_error(), "Can't reduce the size of mmap"));
473
+ }
474
+
475
+ let mut new_cap = self.capacity();
476
+ while new_cap < target_cap {
477
+ new_cap = new_cap.mul_chk(2)?;
478
+ }
479
+
480
+ if new_cap != self.capacity() {
481
+ let old_ptr = self.as_mut_ptr();
482
+ let old_cap = util::cast_chk::<_, c_long>(self.capacity(), "capacity")?;
483
+
484
+ // Drop the old mmap.
485
+ let (mut file, path) = self.take_inner()?.munmap();
486
+
487
+ self.expand_file(&mut file, &path, target_cap)?;
488
+
489
+ // Re-mmap the expanded file.
490
+ let new_inner = InnerMmap::reestablish(path, file, target_cap)?;
491
+
492
+ self.insert_inner(new_inner)?;
493
+
494
+ return self.update_weak_map(rb_self, old_ptr, old_cap);
495
+ }
496
+
497
+ Ok(())
498
+ }
499
+
500
+ /// Use lseek(2) to seek past the end of the file and write a NUL byte. This
501
+ /// creates a file hole that expands the size of the file without consuming
502
+ /// disk space until it is actually written to.
503
+ fn expand_file(&self, file: &mut File, path: &Path, len: usize) -> Result<()> {
504
+ if len == 0 {
505
+ return Err(MmapError::overflowed(0, -1, "adding"));
506
+ }
507
+
508
+ // CAST: no-op on 64-bit, widening on 32-bit.
509
+ let len = len as u64;
510
+
511
+ match file.seek(SeekFrom::Start(len - 1)) {
512
+ Ok(_) => {}
513
+ Err(_) => {
514
+ return Err(MmapError::with_errno(format!("Can't lseek {}", len - 1)));
515
+ }
516
+ }
517
+
518
+ match file.write(&[0x0]) {
519
+ Ok(1) => {}
520
+ _ => {
521
+ return Err(MmapError::with_errno(format!(
522
+ "Can't extend {}",
523
+ path.display()
524
+ )));
525
+ }
526
+ }
527
+
528
+ Ok(())
529
+ }
530
+
531
+ fn track_rstring(&self, rb_self: Obj<Self>, str: RString) -> magnus::error::Result<()> {
532
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
533
+
534
+ // Use the string's Id as the key in the `WeakMap`.
535
+ let key = str.as_raw();
536
+ let _: Value = tracker.funcall("[]=", (key, str))?;
537
+ Ok(())
538
+ }
539
+
540
+ /// The total capacity of the underlying mmap.
541
+ #[inline]
542
+ fn capacity(&self) -> usize {
543
+ // UNWRAP: This is actually infallible, but we need to
544
+ // wrap it in a `Result` for use with `inner()`.
545
+ self.inner(|inner| Ok(inner.capacity())).unwrap()
546
+ }
547
+
548
+ fn load_value(&self, position: usize) -> magnus::error::Result<f64> {
549
+ self.inner(|inner| inner.load_value(position))
550
+ .map_err(|e| e.into())
551
+ }
552
+
553
+ fn as_mut_ptr(&self) -> *mut c_char {
554
+ // UNWRAP: This is actually infallible, but we need to
555
+ // wrap it in a `Result` for use with `inner()`.
556
+ self.inner(|inner| Ok(inner.as_mut_ptr() as *mut c_char))
557
+ .unwrap()
558
+ }
559
+
560
+ /// Takes a closure with immutable access to InnerMmap. Will fail if the inner
561
+ /// object has a mutable borrow or has been dropped.
562
+ fn inner<F, T>(&self, func: F) -> Result<T>
563
+ where
564
+ F: FnOnce(&InnerMmap) -> Result<T>,
565
+ {
566
+ let inner_opt = self.0.try_read().map_err(|_| MmapError::ConcurrentAccess)?;
567
+
568
+ let inner = inner_opt.as_ref().ok_or(MmapError::UnmappedFile)?;
569
+
570
+ func(inner)
571
+ }
572
+
573
+ /// Takes a closure with mutable access to InnerMmap. Will fail if the inner
574
+ /// object has an existing mutable borrow, or has been dropped.
575
+ fn inner_mut<F, T>(&self, func: F) -> Result<T>
576
+ where
577
+ F: FnOnce(&mut InnerMmap) -> Result<T>,
578
+ {
579
+ let mut inner_opt = self
580
+ .0
581
+ .try_write()
582
+ .map_err(|_| MmapError::ConcurrentAccess)?;
583
+
584
+ let inner = inner_opt.as_mut().ok_or(MmapError::UnmappedFile)?;
585
+
586
+ func(inner)
587
+ }
588
+
589
+ /// Take ownership of the `InnerMmap` from the `RwLock`.
590
+ /// Will fail if a mutable borrow is already held or the inner
591
+ /// object has been dropped.
592
+ fn take_inner(&self) -> Result<InnerMmap> {
593
+ let mut inner_opt = self
594
+ .0
595
+ .try_write()
596
+ .map_err(|_| MmapError::ConcurrentAccess)?;
597
+ match (*inner_opt).take() {
598
+ Some(i) => Ok(i),
599
+ None => Err(MmapError::UnmappedFile),
600
+ }
601
+ }
602
+
603
+ /// Move `new_inner` into the `RwLock`.
604
+ /// Will return an error if a mutable borrow is already held.
605
+ fn insert_inner(&self, new_inner: InnerMmap) -> Result<()> {
606
+ let mut inner_opt = self
607
+ .0
608
+ .try_write()
609
+ .map_err(|_| MmapError::ConcurrentAccess)?;
610
+ (*inner_opt).replace(new_inner);
611
+
612
+ Ok(())
613
+ }
614
+
615
+ /// Check if an RString is shared. Shared string use the same underlying
616
+ /// storage as their parent, taking an offset from the start. By default
617
+ /// they must run to the end of the parent string.
618
+ fn rb_string_is_shared(rb_str: RString) -> bool {
619
+ // SAFETY: We only hold a reference to the raw object for the duration
620
+ // of this function, and no Ruby code is called.
621
+ let flags = unsafe {
622
+ let raw_str = Self::rb_string_internal(rb_str);
623
+ raw_str.as_ref().basic.flags
624
+ };
625
+ let shared_flags = STR_SHARED | STR_NOEMBED;
626
+
627
+ flags & shared_flags == shared_flags
628
+ }
629
+
630
+ /// Convert `magnus::RString` into the raw binding used by `rb_sys::RString`.
631
+ /// We need this to manually change the pointer and length values for strings
632
+ /// when moving the mmap to a new file.
633
+ ///
634
+ /// SAFETY: Calling Ruby code while the returned object is held may result
635
+ /// in it being mutated or dropped.
636
+ unsafe fn rb_string_internal(rb_str: RString) -> NonNull<rb_sys::RString> {
637
+ mem::transmute::<RString, NonNull<rb_sys::RString>>(rb_str)
638
+ }
639
+
640
+ #[cfg(ruby_lte_3_2)]
641
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
642
+ raw_str.as_mut().as_.heap.len = new_len;
643
+ }
644
+
645
+ #[cfg(ruby_gte_3_3)]
646
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
647
+ raw_str.as_mut().len = new_len;
648
+ }
649
+ }
650
+
651
+ #[cfg(test)]
652
+ mod test {
653
+ use magnus::error::Error;
654
+ use magnus::eval;
655
+ use magnus::Range;
656
+ use nix::unistd::{sysconf, SysconfVar};
657
+ use std::mem::size_of;
658
+
659
+ use super::*;
660
+ use crate::raw_entry::RawEntry;
661
+ use crate::testhelper::TestFile;
662
+
663
+ /// Create a wrapped MmapedFile object.
664
+ fn create_obj() -> Obj<MmapedFile> {
665
+ let TestFile {
666
+ file: _file,
667
+ path,
668
+ dir: _dir,
669
+ } = TestFile::new(&[0u8; 8]);
670
+
671
+ let path_str = path.display().to_string();
672
+ let rpath = RString::new(&path_str);
673
+
674
+ eval!("FastMmapedFileRs.new(path)", path = rpath).unwrap()
675
+ }
676
+
677
+ /// Add three entries to the mmap. Expected length is 56, 3x 16-byte
678
+ /// entries with 8-byte header.
679
+ fn populate_entries(rb_self: &Obj<MmapedFile>) -> RHash {
680
+ let positions = RHash::from_value(eval("{}").unwrap()).unwrap();
681
+
682
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("a"), 0.0).unwrap();
683
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("b"), 1.0).unwrap();
684
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("c"), 2.0).unwrap();
685
+
686
+ positions
687
+ }
688
+
689
+ #[test]
690
+ fn test_new() {
691
+ let _cleanup = unsafe { magnus::embed::init() };
692
+ let ruby = magnus::Ruby::get().unwrap();
693
+ crate::init(&ruby).unwrap();
694
+
695
+ let TestFile {
696
+ file,
697
+ path,
698
+ dir: _dir,
699
+ } = TestFile::new(&[0u8; 8]);
700
+
701
+ let path_str = path.display().to_string();
702
+ let rpath = RString::new(&path_str);
703
+
704
+ // Object created successfully
705
+ let result: std::result::Result<Obj<MmapedFile>, Error> =
706
+ eval!("FastMmapedFileRs.new(path)", path = rpath);
707
+ assert!(result.is_ok());
708
+
709
+ // Weak map added
710
+ let obj = result.unwrap();
711
+ let weak_tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
712
+ assert_eq!("ObjectSpace::WeakMap", weak_tracker.class().inspect());
713
+
714
+ // File expanded to page size
715
+ let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as u64;
716
+ let stat = file.metadata().unwrap();
717
+ assert_eq!(page_size, stat.len());
718
+
719
+ // Used set to header size
720
+ assert_eq!(
721
+ HEADER_SIZE as u64,
722
+ obj.load_used().unwrap().to_u64().unwrap()
723
+ );
724
+ }
725
+
726
+ #[test]
727
+ fn test_slice() {
728
+ let _cleanup = unsafe { magnus::embed::init() };
729
+ let ruby = magnus::Ruby::get().unwrap();
730
+ crate::init(&ruby).unwrap();
731
+
732
+ let obj = create_obj();
733
+ let _ = populate_entries(&obj);
734
+
735
+ // Validate header updated with new length
736
+ let header_range = Range::new(0, HEADER_SIZE, true).unwrap().as_value();
737
+ let header_slice = MmapedFile::slice(obj, &[header_range]).unwrap();
738
+ assert_eq!([56, 0, 0, 0, 0, 0, 0, 0], unsafe {
739
+ header_slice.as_slice()
740
+ });
741
+
742
+ let value_range = Range::new(HEADER_SIZE, 24, true).unwrap().as_value();
743
+ let value_slice = MmapedFile::slice(obj, &[value_range]).unwrap();
744
+
745
+ // Validate string length
746
+ assert_eq!(1u32.to_ne_bytes(), unsafe { &value_slice.as_slice()[0..4] });
747
+
748
+ // Validate string and padding
749
+ assert_eq!("a ", unsafe {
750
+ String::from_utf8_lossy(&value_slice.as_slice()[4..8])
751
+ });
752
+
753
+ // Validate value
754
+ assert_eq!(0.0f64.to_ne_bytes(), unsafe {
755
+ &value_slice.as_slice()[8..16]
756
+ });
757
+ }
758
+
759
+ #[test]
760
+ fn test_slice_resize() {
761
+ let _cleanup = unsafe { magnus::embed::init() };
762
+ let ruby = magnus::Ruby::get().unwrap();
763
+ crate::init(&ruby).unwrap();
764
+
765
+ fn assert_internals(
766
+ obj: Obj<MmapedFile>,
767
+ parent_id: c_ulong,
768
+ child_id: c_ulong,
769
+ unshared_id: c_ulong,
770
+ ) {
771
+ let rs_self = &*obj;
772
+ let tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
773
+
774
+ let mmap_ptr = rs_self.as_mut_ptr();
775
+ let mmap_len = rs_self.capacity();
776
+
777
+ let mut parent_checked = false;
778
+ let mut child_checked = false;
779
+
780
+ for val in tracker.enumeratorize("each_value", ()) {
781
+ let rb_string = val.unwrap();
782
+ let str = RString::from_value(rb_string).unwrap();
783
+
784
+ unsafe {
785
+ let raw_str = MmapedFile::rb_string_internal(str);
786
+ if str.as_raw() == child_id {
787
+ assert_eq!(parent_id, raw_str.as_ref().as_.heap.aux.shared);
788
+
789
+ let child_offset = mmap_len as isize - str.len() as isize;
790
+ assert_eq!(mmap_ptr.offset(child_offset), raw_str.as_ref().as_.heap.ptr);
791
+
792
+ child_checked = true;
793
+ } else if str.as_raw() == parent_id {
794
+ assert_eq!(parent_id, str.as_raw());
795
+
796
+ assert_eq!(mmap_ptr, raw_str.as_ref().as_.heap.ptr);
797
+ assert_eq!(mmap_len as c_long, str.len() as c_long);
798
+ assert!(raw_str.as_ref().basic.flags & (STR_SHARED | STR_NOEMBED) > 0);
799
+ assert!(str.is_frozen());
800
+
801
+ parent_checked = true;
802
+ } else if str.as_raw() == unshared_id {
803
+ panic!("tracking unshared string");
804
+ } else {
805
+ panic!("unknown string");
806
+ }
807
+ }
808
+ }
809
+ assert!(parent_checked && child_checked);
810
+ }
811
+
812
+ let obj = create_obj();
813
+ let _ = populate_entries(&obj);
814
+
815
+ let rs_self = &*obj;
816
+
817
+ // Create a string containing the full mmap.
818
+ let parent_str = rs_self.str(obj).unwrap();
819
+ let parent_id = parent_str.as_raw();
820
+
821
+ // Ruby's shared strings are only created when they go to the end of
822
+ // original string.
823
+ let len = rs_self.inner(|inner| Ok(inner.len())).unwrap();
824
+ let shareable_range = Range::new(1, len - 1, false).unwrap().as_value();
825
+
826
+ // This string should re-use the parent's buffer with an offset and have
827
+ // the parent's id in `as.heap.aux.shared`
828
+ let child_str = rs_self._slice(obj, parent_str, &[shareable_range]).unwrap();
829
+ let child_id = child_str.as_raw();
830
+
831
+ // A range that does not reach the end of the parent will not be shared.
832
+ assert!(len > 4);
833
+ let unshareable_range = Range::new(0, 4, false).unwrap().as_value();
834
+
835
+ // This string should NOT be tracked, it should own its own buffer.
836
+ let unshared_str = rs_self
837
+ ._slice(obj, parent_str, &[unshareable_range])
838
+ .unwrap();
839
+ let unshared_id = unshared_str.as_raw();
840
+ assert!(!MmapedFile::rb_string_is_shared(unshared_str));
841
+
842
+ assert_internals(obj, parent_id, child_id, unshared_id);
843
+
844
+ let orig_ptr = rs_self.as_mut_ptr();
845
+ // Expand a bunch to ensure we remap
846
+ for _ in 0..16 {
847
+ rs_self.expand_to_fit(obj, rs_self.capacity() * 2).unwrap();
848
+ }
849
+ let new_ptr = rs_self.as_mut_ptr();
850
+ assert!(orig_ptr != new_ptr);
851
+
852
+ // If we haven't updated the pointer to the newly remapped file this will segfault.
853
+ let _: Value = eval!("puts parent", parent = parent_str).unwrap();
854
+ let _: Value = eval!("puts child", child = child_str).unwrap();
855
+ let _: Value = eval!("puts unshared", unshared = unshared_str).unwrap();
856
+
857
+ // Confirm that tracked strings are still valid.
858
+ assert_internals(obj, parent_id, child_id, unshared_id);
859
+ }
860
+
861
+ #[test]
862
+ fn test_dont_fill_mmap() {
863
+ let _cleanup = unsafe { magnus::embed::init() };
864
+ let ruby = magnus::Ruby::get().unwrap();
865
+ crate::init(&ruby).unwrap();
866
+
867
+ let obj = create_obj();
868
+ let positions = populate_entries(&obj);
869
+
870
+ let rs_self = &*obj;
871
+
872
+ rs_self.expand_to_fit(obj, 1024).unwrap();
873
+
874
+ let current_used = rs_self.inner(|inner| inner.load_used()).unwrap() as usize;
875
+ let current_cap = rs_self.inner(|inner| Ok(inner.len())).unwrap();
876
+
877
+ // Create a new entry that exactly fills the capacity of the mmap.
878
+ let val_len =
879
+ current_cap - current_used - HEADER_SIZE - size_of::<f64>() - size_of::<u32>();
880
+ assert_eq!(
881
+ current_cap,
882
+ RawEntry::calc_total_len(val_len).unwrap() + current_used
883
+ );
884
+
885
+ let str = String::from_utf8(vec![b'A'; val_len]).unwrap();
886
+ MmapedFile::upsert_entry(obj, positions, RString::new(&str), 1.0).unwrap();
887
+
888
+ // Validate that we have expanded the mmap, ensuring a trailing NUL.
889
+ assert!(rs_self.capacity() > current_cap);
890
+ }
891
+ }