vinted-prometheus-client-mmap 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +2 -0
  3. data/ext/fast_mmaped_file_rs/Cargo.toml +40 -0
  4. data/ext/fast_mmaped_file_rs/README.md +52 -0
  5. data/ext/fast_mmaped_file_rs/build.rs +7 -0
  6. data/ext/fast_mmaped_file_rs/extconf.rb +28 -0
  7. data/ext/fast_mmaped_file_rs/src/error.rs +174 -0
  8. data/ext/fast_mmaped_file_rs/src/exemplars.rs +25 -0
  9. data/ext/fast_mmaped_file_rs/src/file_entry.rs +1190 -0
  10. data/ext/fast_mmaped_file_rs/src/file_info.rs +240 -0
  11. data/ext/fast_mmaped_file_rs/src/lib.rs +87 -0
  12. data/ext/fast_mmaped_file_rs/src/macros.rs +14 -0
  13. data/ext/fast_mmaped_file_rs/src/map.rs +492 -0
  14. data/ext/fast_mmaped_file_rs/src/metrics.proto +153 -0
  15. data/ext/fast_mmaped_file_rs/src/mmap/inner.rs +704 -0
  16. data/ext/fast_mmaped_file_rs/src/mmap.rs +896 -0
  17. data/ext/fast_mmaped_file_rs/src/raw_entry.rs +473 -0
  18. data/ext/fast_mmaped_file_rs/src/testhelper.rs +222 -0
  19. data/ext/fast_mmaped_file_rs/src/util.rs +121 -0
  20. data/lib/.DS_Store +0 -0
  21. data/lib/prometheus/.DS_Store +0 -0
  22. data/lib/prometheus/client/configuration.rb +23 -0
  23. data/lib/prometheus/client/counter.rb +27 -0
  24. data/lib/prometheus/client/formats/protobuf.rb +92 -0
  25. data/lib/prometheus/client/formats/text.rb +85 -0
  26. data/lib/prometheus/client/gauge.rb +40 -0
  27. data/lib/prometheus/client/helper/entry_parser.rb +132 -0
  28. data/lib/prometheus/client/helper/file_locker.rb +50 -0
  29. data/lib/prometheus/client/helper/json_parser.rb +23 -0
  30. data/lib/prometheus/client/helper/metrics_processing.rb +45 -0
  31. data/lib/prometheus/client/helper/metrics_representation.rb +51 -0
  32. data/lib/prometheus/client/helper/mmaped_file.rb +64 -0
  33. data/lib/prometheus/client/helper/plain_file.rb +29 -0
  34. data/lib/prometheus/client/histogram.rb +80 -0
  35. data/lib/prometheus/client/label_set_validator.rb +85 -0
  36. data/lib/prometheus/client/metric.rb +80 -0
  37. data/lib/prometheus/client/mmaped_dict.rb +79 -0
  38. data/lib/prometheus/client/mmaped_value.rb +158 -0
  39. data/lib/prometheus/client/page_size.rb +17 -0
  40. data/lib/prometheus/client/push.rb +203 -0
  41. data/lib/prometheus/client/rack/collector.rb +88 -0
  42. data/lib/prometheus/client/rack/exporter.rb +102 -0
  43. data/lib/prometheus/client/registry.rb +65 -0
  44. data/lib/prometheus/client/simple_value.rb +31 -0
  45. data/lib/prometheus/client/summary.rb +69 -0
  46. data/lib/prometheus/client/support/puma.rb +44 -0
  47. data/lib/prometheus/client/support/unicorn.rb +35 -0
  48. data/lib/prometheus/client/uses_value_type.rb +20 -0
  49. data/lib/prometheus/client/version.rb +5 -0
  50. data/lib/prometheus/client.rb +58 -0
  51. data/lib/prometheus.rb +3 -0
  52. metadata +203 -0
@@ -0,0 +1,896 @@
1
+ use magnus::exception::*;
2
+ use magnus::prelude::*;
3
+ use magnus::rb_sys::{AsRawValue, FromRawValue};
4
+ use magnus::typed_data::Obj;
5
+ use magnus::value::Fixnum;
6
+ use magnus::{eval, scan_args, Error, Integer, RArray, RClass, RHash, RString, Value};
7
+ use nix::libc::{c_char, c_long, c_ulong};
8
+ use rb_sys::rb_str_new_static;
9
+ use std::fs::File;
10
+ use std::io::{prelude::*, SeekFrom};
11
+ use std::mem;
12
+ use std::path::Path;
13
+ use std::ptr::NonNull;
14
+ use std::sync::RwLock;
15
+
16
+ use crate::err;
17
+ use crate::error::MmapError;
18
+ use crate::file_entry::FileEntry;
19
+ use crate::map::EntryMap;
20
+ use crate::raw_entry::RawEntry;
21
+ use crate::util::{self, CheckedOps};
22
+ use crate::Result;
23
+ use crate::HEADER_SIZE;
24
+ use inner::InnerMmap;
25
+
26
+ mod inner;
27
+
28
+ /// The Ruby `STR_NOEMBED` flag, aka `FL_USER1`.
29
+ const STR_NOEMBED: c_ulong = 1 << (13);
30
+ /// The Ruby `STR_SHARED` flag, aka `FL_USER2`.
31
+ const STR_SHARED: c_ulong = 1 << (14);
32
+
33
+ /// A Rust struct wrapped in a Ruby object, providing access to a memory-mapped
34
+ /// file used to store, update, and read out Prometheus metrics.
35
+ ///
36
+ /// - File format:
37
+ /// - Header:
38
+ /// - 4 bytes: u32 - total size of metrics in file.
39
+ /// - 4 bytes: NUL byte padding.
40
+ /// - Repeating metrics entries:
41
+ /// - 4 bytes: u32 - entry JSON string size.
42
+ /// - `N` bytes: UTF-8 encoded JSON string used as entry key.
43
+ /// - (8 - (4 + `N`) % 8) bytes: 1 to 8 padding space (0x20) bytes to
44
+ /// reach 8-byte alignment.
45
+ /// - 8 bytes: f64 - entry value.
46
+ ///
47
+ /// All numbers are saved in native-endian format.
48
+ ///
49
+ /// Generated via [luismartingarcia/protocol](https://github.com/luismartingarcia/protocol):
50
+ ///
51
+ ///
52
+ /// ```
53
+ /// protocol "Used:4,Pad:4,K1 Size:4,K1 Name:4,K1 Value:8,K2 Size:4,K2 Name:4,K2 Value:8"
54
+ ///
55
+ /// 0 1 2 3
56
+ /// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
57
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
58
+ /// | Used | Pad |K1 Size|K1 Name| K1 Value |K2 Size|K2 Name|
59
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
60
+ /// | K2 Value |
61
+ /// +-+-+-+-+-+-+-+
62
+ /// ```
63
+ //
64
+ // The API imposed by `magnus` requires all methods to use shared borrows.
65
+ // This means we can't store any mutable state in the top-level struct,
66
+ // and must store the interior data behind a `RwLock`, which adds run-time
67
+ // checks that mutable operations have no concurrent read or writes.
68
+ //
69
+ // We are further limited by the need to support subclassing in Ruby, which
70
+ // requires us to define an allocation function for the class, the
71
+ // `magnus::class::define_alloc_func()` function. This needs a support the
72
+ // `Default` trait, so a `File` cannot directly help by the object being
73
+ // constructed. Having the `RwLock` hold an `Option` of the interior object
74
+ // resolves this.
75
+ #[derive(Debug, Default)]
76
+ #[magnus::wrap(class = "FastMmapedFileRs", free_immediately, size)]
77
+ pub struct MmapedFile(RwLock<Option<InnerMmap>>);
78
+
79
+ impl MmapedFile {
80
+ /// call-seq:
81
+ /// new(file)
82
+ ///
83
+ /// create a new Mmap object
84
+ ///
85
+ /// * <em>file</em>
86
+ ///
87
+ ///
88
+ /// Creates a mapping that's shared with all other processes
89
+ /// mapping the same area of the file.
90
+ pub fn new(klass: RClass, args: &[Value]) -> magnus::error::Result<Obj<Self>> {
91
+ let args = scan_args::scan_args::<(RString,), (), (), (), (), ()>(args)?;
92
+ let path = args.required.0;
93
+
94
+ let lock = MmapedFile(RwLock::new(None));
95
+ let obj = Obj::wrap_as(lock, klass);
96
+
97
+ let _: Value = obj.funcall("initialize", (path,))?;
98
+
99
+ Ok(obj)
100
+ }
101
+
102
+ /// Initialize a new `FastMmapedFileRs` object. This must be defined in
103
+ /// order for inheritance to work.
104
+ pub fn initialize(rb_self: Obj<Self>, fname: String) -> magnus::error::Result<()> {
105
+ let file = File::options()
106
+ .read(true)
107
+ .write(true)
108
+ .open(&fname)
109
+ .map_err(|_| err!(arg_error(), "Can't open {}", fname))?;
110
+
111
+ let inner = InnerMmap::new(fname.into(), file)?;
112
+ rb_self.insert_inner(inner)?;
113
+
114
+ let weak_klass = RClass::from_value(eval("ObjectSpace::WeakMap")?)
115
+ .ok_or_else(|| err!(no_method_error(), "unable to create WeakMap"))?;
116
+ let weak_obj_tracker = weak_klass.new_instance(())?;
117
+
118
+ // We will need to iterate over strings backed by the mmapped file, but
119
+ // don't want to prevent the GC from reaping them when the Ruby code
120
+ // has finished with them. `ObjectSpace::WeakMap` allows us to track
121
+ // them without extending their lifetime.
122
+ //
123
+ // https://ruby-doc.org/core-3.0.0/ObjectSpace/WeakMap.html
124
+ rb_self.ivar_set("@weak_obj_tracker", weak_obj_tracker)?;
125
+
126
+ Ok(())
127
+ }
128
+
129
+ /// Read the list of files provided from Ruby and convert them to a Prometheus
130
+ /// metrics String.
131
+ pub fn to_metrics(file_list: RArray) -> magnus::error::Result<String> {
132
+ let mut map = EntryMap::new();
133
+ map.aggregate_files(file_list)?;
134
+
135
+ let sorted = map.into_sorted()?;
136
+
137
+ FileEntry::entries_to_string(sorted).map_err(|e| e.into())
138
+ }
139
+
140
+ /// Read the list of files provided from Ruby and convert them to a Prometheus
141
+ /// metrics String.
142
+ pub fn to_protobuf(file_list: RArray) -> magnus::error::Result<String> {
143
+ let mut map = EntryMap::new();
144
+ map.aggregate_files(file_list)?;
145
+
146
+ let sorted = map.into_sorted()?;
147
+
148
+ FileEntry::entries_to_protobuf(sorted).map_err(|e| e.into())
149
+ }
150
+
151
+
152
+ /// Document-method: []
153
+ /// Document-method: slice
154
+ ///
155
+ /// call-seq: [](args)
156
+ ///
157
+ /// Element reference - with the following syntax:
158
+ ///
159
+ /// self[nth]
160
+ ///
161
+ /// retrieve the <em>nth</em> character
162
+ ///
163
+ /// self[start..last]
164
+ ///
165
+ /// return a substring from <em>start</em> to <em>last</em>
166
+ ///
167
+ /// self[start, length]
168
+ ///
169
+ /// return a substring of <em>lenght</em> characters from <em>start</em>
170
+ pub fn slice(rb_self: Obj<Self>, args: &[Value]) -> magnus::error::Result<RString> {
171
+ // The C implementation would trigger a GC cycle via `rb_gc_force_recycle`
172
+ // if the `MM_PROTECT` flag is set, but in practice this is never used.
173
+ // We omit this logic, particularly because `rb_gc_force_recycle` is a
174
+ // no-op as of Ruby 3.1.
175
+ let rs_self = &*rb_self;
176
+
177
+ let str = rs_self.str(rb_self)?;
178
+ rs_self._slice(rb_self, str, args)
179
+ }
180
+
181
+ fn _slice(
182
+ &self,
183
+ rb_self: Obj<Self>,
184
+ str: RString,
185
+ args: &[Value],
186
+ ) -> magnus::error::Result<RString> {
187
+ let substr: RString = str.funcall("[]", args)?;
188
+
189
+ // Track shared child strings which use the same backing storage.
190
+ if Self::rb_string_is_shared(substr) {
191
+ (*rb_self).track_rstring(rb_self, substr)?;
192
+ }
193
+
194
+ // The C implementation does this, perhaps to validate that the len we
195
+ // provided is actually being used.
196
+ (*rb_self).inner_mut(|inner| {
197
+ inner.set_len(str.len());
198
+ Ok(())
199
+ })?;
200
+
201
+ Ok(substr)
202
+ }
203
+
204
+ /// Document-method: msync
205
+ /// Document-method: sync
206
+ /// Document-method: flush
207
+ ///
208
+ /// call-seq: msync
209
+ ///
210
+ /// flush the file
211
+ pub fn sync(&self, args: &[Value]) -> magnus::error::Result<()> {
212
+ use nix::sys::mman::MsFlags;
213
+
214
+ let mut ms_async = false;
215
+ let args = scan_args::scan_args::<(), (Option<i32>,), (), (), (), ()>(args)?;
216
+
217
+ if let Some(flag) = args.optional.0 {
218
+ let flag = MsFlags::from_bits(flag).unwrap_or(MsFlags::empty());
219
+ ms_async = flag.contains(MsFlags::MS_ASYNC);
220
+ }
221
+
222
+ // The `memmap2` crate does not support the `MS_INVALIDATE` flag. We ignore that
223
+ // flag if passed in, checking only for `MS_ASYNC`. In practice no arguments are ever
224
+ // passed to this function, but we do this to maintain compatibility with the
225
+ // C implementation.
226
+ self.inner_mut(|inner| inner.flush(ms_async))
227
+ .map_err(|e| e.into())
228
+ }
229
+
230
+ /// Document-method: munmap
231
+ /// Document-method: unmap
232
+ ///
233
+ /// call-seq: munmap
234
+ ///
235
+ /// terminate the association
236
+ pub fn munmap(rb_self: Obj<Self>) -> magnus::error::Result<()> {
237
+ let rs_self = &*rb_self;
238
+
239
+ rs_self.inner_mut(|inner| {
240
+ // We are about to release the backing mmap for Ruby's String
241
+ // objects. If Ruby attempts to read from them the program will
242
+ // segfault. We update the length of all Strings to zero so Ruby
243
+ // does not attempt to access the now invalid address between now
244
+ // and when GC eventually reaps the objects.
245
+ //
246
+ // See the following for more detail:
247
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/39
248
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/41
249
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/merge_requests/80
250
+ inner.set_len(0);
251
+ Ok(())
252
+ })?;
253
+
254
+ // Update each String object to be zero-length.
255
+ let cap = util::cast_chk::<_, c_long>(rs_self.capacity(), "capacity")?;
256
+ rs_self.update_weak_map(rb_self, rs_self.as_mut_ptr(), cap)?;
257
+
258
+ // Remove the `InnerMmap` from the `RwLock`. This will drop
259
+ // end of this function, unmapping and closing the file.
260
+ let _ = rs_self.take_inner()?;
261
+ Ok(())
262
+ }
263
+
264
+ /// Fetch the `used` header from the `.db` file, the length
265
+ /// in bytes of the data written to the file.
266
+ pub fn load_used(&self) -> magnus::error::Result<Integer> {
267
+ let used = self.inner(|inner| inner.load_used())?;
268
+
269
+ Ok(Integer::from_u64(used as u64))
270
+ }
271
+
272
+ /// Update the `used` header for the `.db` file, the length
273
+ /// in bytes of the data written to the file.
274
+ pub fn save_used(rb_self: Obj<Self>, used: Fixnum) -> magnus::error::Result<Fixnum> {
275
+ let rs_self = &*rb_self;
276
+ let used_uint = used.to_u32()?;
277
+
278
+ // If the underlying mmap is smaller than the header, then resize to fit.
279
+ // The file has already been expanded to page size when first opened, so
280
+ // even if the map is less than HEADER_SIZE, we're not at risk of a
281
+ // SIGBUS.
282
+ if rs_self.capacity() < HEADER_SIZE {
283
+ rs_self.expand_to_fit(rb_self, HEADER_SIZE)?;
284
+ }
285
+
286
+ rs_self.inner_mut(|inner| inner.save_used(used_uint))?;
287
+
288
+ Ok(used)
289
+ }
290
+
291
+ /// Fetch the value associated with a key from the mmap.
292
+ /// If no entry is present, initialize with the default
293
+ /// value provided.
294
+ pub fn fetch_entry(
295
+ rb_self: Obj<Self>,
296
+ positions: RHash,
297
+ key: RString,
298
+ default_value: f64,
299
+ ) -> magnus::error::Result<f64> {
300
+ let rs_self = &*rb_self;
301
+ let position: Option<Fixnum> = positions.lookup(key)?;
302
+
303
+ if let Some(pos) = position {
304
+ let pos = pos.to_usize()?;
305
+ return rs_self
306
+ .inner(|inner| inner.load_value(pos))
307
+ .map_err(|e| e.into());
308
+ }
309
+
310
+ rs_self.check_expand(rb_self, key.len())?;
311
+
312
+ let value_offset: usize = rs_self.inner_mut(|inner| {
313
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
314
+ unsafe { inner.initialize_entry(key.as_slice(), default_value) }
315
+ })?;
316
+
317
+ // CAST: no-op on 64-bit, widening on 32-bit.
318
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
319
+
320
+ rs_self.load_value(value_offset)
321
+ }
322
+
323
+ /// Update the value of an existing entry, if present. Otherwise create a new entry
324
+ /// for the key.
325
+ pub fn upsert_entry(
326
+ rb_self: Obj<Self>,
327
+ positions: RHash,
328
+ key: RString,
329
+ value: f64,
330
+ ) -> magnus::error::Result<f64> {
331
+ let rs_self = &*rb_self;
332
+ let position: Option<Fixnum> = positions.lookup(key)?;
333
+
334
+ if let Some(pos) = position {
335
+ let pos = pos.to_usize()?;
336
+ return rs_self
337
+ .inner_mut(|inner| {
338
+ inner.save_value(pos, value)?;
339
+
340
+ // TODO just return `value` here instead of loading it?
341
+ // This is how the C implementation did it, but I don't
342
+ // see what the extra load gains us.
343
+ inner.load_value(pos)
344
+ })
345
+ .map_err(|e| e.into());
346
+ }
347
+
348
+ rs_self.check_expand(rb_self, key.len())?;
349
+
350
+ let value_offset: usize = rs_self.inner_mut(|inner| {
351
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
352
+ unsafe { inner.initialize_entry(key.as_slice(), value) }
353
+ })?;
354
+
355
+ // CAST: no-op on 64-bit, widening on 32-bit.
356
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
357
+
358
+ rs_self.load_value(value_offset)
359
+ }
360
+
361
+ /// Creates a Ruby String containing the section of the mmapped file that
362
+ /// has been written to.
363
+ fn str(&self, rb_self: Obj<Self>) -> magnus::error::Result<RString> {
364
+ let val_id = (*rb_self).inner(|inner| {
365
+ let ptr = inner.as_ptr();
366
+ let len = inner.len();
367
+
368
+ // SAFETY: This is safe so long as the data provided to Ruby meets its
369
+ // requirements. When unmapping the file this will no longer be the
370
+ // case, see the comment on `munmap` for how we handle this.
371
+ Ok(unsafe { rb_str_new_static(ptr as _, len as _) })
372
+ })?;
373
+
374
+ // SAFETY: We know that rb_str_new_static returns a VALUE.
375
+ let val = unsafe { Value::from_raw(val_id) };
376
+
377
+ // UNWRAP: We created this value as a string above.
378
+ let str = RString::from_value(val).unwrap();
379
+
380
+ // Freeze the root string so it can't be mutated out from under any
381
+ // substrings created. This object is never exposed to callers.
382
+ str.freeze();
383
+
384
+ // Track the RString in our `WeakMap` so we can update its address if
385
+ // we re-mmap the backing file.
386
+ (*rb_self).track_rstring(rb_self, str)?;
387
+
388
+ Ok(str)
389
+ }
390
+
391
+ /// If we reallocate, any live Ruby strings provided by the `str()` method
392
+ /// will be invalidated. We need to iterate over them using and update their
393
+ /// heap pointers to the newly allocated memory region.
394
+ fn update_weak_map(
395
+ &self,
396
+ rb_self: Obj<Self>,
397
+ old_ptr: *const c_char,
398
+ old_cap: c_long,
399
+ ) -> magnus::error::Result<()> {
400
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
401
+
402
+ let new_len = self.inner(|inner| util::cast_chk::<_, c_long>(inner.len(), "mmap len"))?;
403
+
404
+ // Iterate over the values of the `WeakMap`.
405
+ for val in tracker.enumeratorize("each_value", ()) {
406
+ let rb_string = val?;
407
+ let str = RString::from_value(rb_string)
408
+ .ok_or_else(|| err!(arg_error(), "weakmap value was not a string"))?;
409
+
410
+ // SAFETY: We're messing with Ruby's internals here, YOLO.
411
+ unsafe {
412
+ // Convert the magnus wrapper type to a raw string exposed by `rb_sys`,
413
+ // which provides access to its internals.
414
+ let mut raw_str = Self::rb_string_internal(str);
415
+
416
+ // Shared string have their own `ptr` and `len` values, but `aux`
417
+ // is the id of the parent string so the GC can track this
418
+ // dependency. The `ptr` will always be an offset from the base
419
+ // address of the mmap, and `len` will be the length of the mmap
420
+ // less the offset from the base.
421
+ if Self::rb_string_is_shared(str) && new_len > 0 {
422
+ // Calculate how far into the original mmap the shared string
423
+ // started and update to the equivalent address in the new
424
+ // one.
425
+ let substr_ptr = raw_str.as_ref().as_.heap.ptr;
426
+ let offset = substr_ptr.offset_from(old_ptr);
427
+
428
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr().offset(offset);
429
+
430
+ let current_len = str.len() as c_long;
431
+ let new_shared_len = old_cap + current_len;
432
+
433
+ self.update_rstring_len(raw_str, new_shared_len);
434
+ continue;
435
+ }
436
+
437
+ // Update the string to point to the new mmapped file.
438
+ // We're matching the behavior of Ruby's `str_new_static` function.
439
+ // See https://github.com/ruby/ruby/blob/e51014f9c05aa65cbf203442d37fef7c12390015/string.c#L1030-L1053
440
+ //
441
+ // We deliberately do _NOT_ increment the `capa` field of the
442
+ // string to match the new `len`. We were initially doing this,
443
+ // but consistently triggered GCs in the middle of updating the
444
+ // string pointers, causing a segfault.
445
+ //
446
+ // See https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/45
447
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr();
448
+ self.update_rstring_len(raw_str, new_len);
449
+ }
450
+ }
451
+
452
+ Ok(())
453
+ }
454
+
455
+ /// Check that the mmap is large enough to contain the value to be added,
456
+ /// and expand it to fit if necessary.
457
+ fn check_expand(&self, rb_self: Obj<Self>, key_len: usize) -> magnus::error::Result<()> {
458
+ // CAST: no-op on 32-bit, widening on 64-bit.
459
+ let used = self.inner(|inner| inner.load_used())? as usize;
460
+ let entry_len = RawEntry::calc_total_len(key_len)?;
461
+
462
+ // We need the mmapped region to contain at least one byte beyond the
463
+ // written data to create a NUL- terminated C string. Validate that
464
+ // new length does not exactly match or exceed the length of the mmap.
465
+ while self.capacity() <= used.add_chk(entry_len)? {
466
+ self.expand_to_fit(rb_self, self.capacity().mul_chk(2)?)?;
467
+ }
468
+
469
+ Ok(())
470
+ }
471
+
472
+ /// Expand the underlying file until it is long enough to fit `target_cap`.
473
+ /// This will remove the existing mmap, expand the file, then update any
474
+ /// strings held by the `WeakMap` to point to the newly mmapped address.
475
+ fn expand_to_fit(&self, rb_self: Obj<Self>, target_cap: usize) -> magnus::error::Result<()> {
476
+ if target_cap < self.capacity() {
477
+ return Err(err!(arg_error(), "Can't reduce the size of mmap"));
478
+ }
479
+
480
+ let mut new_cap = self.capacity();
481
+ while new_cap < target_cap {
482
+ new_cap = new_cap.mul_chk(2)?;
483
+ }
484
+
485
+ if new_cap != self.capacity() {
486
+ let old_ptr = self.as_mut_ptr();
487
+ let old_cap = util::cast_chk::<_, c_long>(self.capacity(), "capacity")?;
488
+
489
+ // Drop the old mmap.
490
+ let (mut file, path) = self.take_inner()?.munmap();
491
+
492
+ self.expand_file(&mut file, &path, target_cap)?;
493
+
494
+ // Re-mmap the expanded file.
495
+ let new_inner = InnerMmap::reestablish(path, file, target_cap)?;
496
+
497
+ self.insert_inner(new_inner)?;
498
+
499
+ return self.update_weak_map(rb_self, old_ptr, old_cap);
500
+ }
501
+
502
+ Ok(())
503
+ }
504
+
505
+ /// Use lseek(2) to seek past the end of the file and write a NUL byte. This
506
+ /// creates a file hole that expands the size of the file without consuming
507
+ /// disk space until it is actually written to.
508
+ fn expand_file(&self, file: &mut File, path: &Path, len: usize) -> Result<()> {
509
+ if len == 0 {
510
+ return Err(MmapError::overflowed(0, -1, "adding"));
511
+ }
512
+
513
+ // CAST: no-op on 64-bit, widening on 32-bit.
514
+ let len = len as u64;
515
+
516
+ match file.seek(SeekFrom::Start(len - 1)) {
517
+ Ok(_) => {}
518
+ Err(_) => {
519
+ return Err(MmapError::with_errno(format!("Can't lseek {}", len - 1)));
520
+ }
521
+ }
522
+
523
+ match file.write(&[0x0]) {
524
+ Ok(1) => {}
525
+ _ => {
526
+ return Err(MmapError::with_errno(format!(
527
+ "Can't extend {}",
528
+ path.display()
529
+ )));
530
+ }
531
+ }
532
+
533
+ Ok(())
534
+ }
535
+
536
+ fn track_rstring(&self, rb_self: Obj<Self>, str: RString) -> magnus::error::Result<()> {
537
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
538
+
539
+ // Use the string's Id as the key in the `WeakMap`.
540
+ let key = str.as_raw();
541
+ let _: Value = tracker.funcall("[]=", (key, str))?;
542
+ Ok(())
543
+ }
544
+
545
+ /// The total capacity of the underlying mmap.
546
+ #[inline]
547
+ fn capacity(&self) -> usize {
548
+ // UNWRAP: This is actually infallible, but we need to
549
+ // wrap it in a `Result` for use with `inner()`.
550
+ self.inner(|inner| Ok(inner.capacity())).unwrap()
551
+ }
552
+
553
+ fn load_value(&self, position: usize) -> magnus::error::Result<f64> {
554
+ self.inner(|inner| inner.load_value(position))
555
+ .map_err(|e| e.into())
556
+ }
557
+
558
+ fn as_mut_ptr(&self) -> *mut c_char {
559
+ // UNWRAP: This is actually infallible, but we need to
560
+ // wrap it in a `Result` for use with `inner()`.
561
+ self.inner(|inner| Ok(inner.as_mut_ptr() as *mut c_char))
562
+ .unwrap()
563
+ }
564
+
565
+ /// Takes a closure with immutable access to InnerMmap. Will fail if the inner
566
+ /// object has a mutable borrow or has been dropped.
567
+ fn inner<F, T>(&self, func: F) -> Result<T>
568
+ where
569
+ F: FnOnce(&InnerMmap) -> Result<T>,
570
+ {
571
+ let inner_opt = self.0.try_read().map_err(|_| MmapError::ConcurrentAccess)?;
572
+
573
+ let inner = inner_opt.as_ref().ok_or(MmapError::UnmappedFile)?;
574
+
575
+ func(inner)
576
+ }
577
+
578
+ /// Takes a closure with mutable access to InnerMmap. Will fail if the inner
579
+ /// object has an existing mutable borrow, or has been dropped.
580
+ fn inner_mut<F, T>(&self, func: F) -> Result<T>
581
+ where
582
+ F: FnOnce(&mut InnerMmap) -> Result<T>,
583
+ {
584
+ let mut inner_opt = self
585
+ .0
586
+ .try_write()
587
+ .map_err(|_| MmapError::ConcurrentAccess)?;
588
+
589
+ let inner = inner_opt.as_mut().ok_or(MmapError::UnmappedFile)?;
590
+
591
+ func(inner)
592
+ }
593
+
594
+ /// Take ownership of the `InnerMmap` from the `RwLock`.
595
+ /// Will fail if a mutable borrow is already held or the inner
596
+ /// object has been dropped.
597
+ fn take_inner(&self) -> Result<InnerMmap> {
598
+ let mut inner_opt = self
599
+ .0
600
+ .try_write()
601
+ .map_err(|_| MmapError::ConcurrentAccess)?;
602
+ match (*inner_opt).take() {
603
+ Some(i) => Ok(i),
604
+ None => Err(MmapError::UnmappedFile),
605
+ }
606
+ }
607
+
608
+ /// Move `new_inner` into the `RwLock`.
609
+ /// Will return an error if a mutable borrow is already held.
610
+ fn insert_inner(&self, new_inner: InnerMmap) -> Result<()> {
611
+ let mut inner_opt = self
612
+ .0
613
+ .try_write()
614
+ .map_err(|_| MmapError::ConcurrentAccess)?;
615
+ (*inner_opt).replace(new_inner);
616
+
617
+ Ok(())
618
+ }
619
+
620
+ /// Check if an RString is shared. Shared string use the same underlying
621
+ /// storage as their parent, taking an offset from the start. By default
622
+ /// they must run to the end of the parent string.
623
+ fn rb_string_is_shared(rb_str: RString) -> bool {
624
+ // SAFETY: We only hold a reference to the raw object for the duration
625
+ // of this function, and no Ruby code is called.
626
+ let flags = unsafe {
627
+ let raw_str = Self::rb_string_internal(rb_str);
628
+ raw_str.as_ref().basic.flags
629
+ };
630
+ let shared_flags = STR_SHARED | STR_NOEMBED;
631
+
632
+ flags & shared_flags == shared_flags
633
+ }
634
+
635
+ /// Convert `magnus::RString` into the raw binding used by `rb_sys::RString`.
636
+ /// We need this to manually change the pointer and length values for strings
637
+ /// when moving the mmap to a new file.
638
+ ///
639
+ /// SAFETY: Calling Ruby code while the returned object is held may result
640
+ /// in it being mutated or dropped.
641
+ unsafe fn rb_string_internal(rb_str: RString) -> NonNull<rb_sys::RString> {
642
+ mem::transmute::<RString, NonNull<rb_sys::RString>>(rb_str)
643
+ }
644
+
645
+ #[cfg(ruby_lte_3_2)]
646
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
647
+ raw_str.as_mut().as_.heap.len = new_len;
648
+ }
649
+
650
+ #[cfg(ruby_gte_3_3)]
651
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
652
+ raw_str.as_mut().len = new_len;
653
+ }
654
+ }
655
+
656
+ #[cfg(test)]
657
+ mod test {
658
+ use magnus::error::Error;
659
+ use magnus::eval;
660
+ use magnus::Range;
661
+ use nix::unistd::{sysconf, SysconfVar};
662
+ use std::mem::size_of;
663
+
664
+ use super::*;
665
+ use crate::raw_entry::RawEntry;
666
+ use crate::testhelper::TestFile;
667
+
668
+ /// Create a wrapped MmapedFile object.
669
+ fn create_obj() -> Obj<MmapedFile> {
670
+ let TestFile {
671
+ file: _file,
672
+ path,
673
+ dir: _dir,
674
+ } = TestFile::new(&[0u8; 8]);
675
+
676
+ let path_str = path.display().to_string();
677
+ let rpath = RString::new(&path_str);
678
+
679
+ eval!("FastMmapedFileRs.new(path)", path = rpath).unwrap()
680
+ }
681
+
682
+ /// Add three entries to the mmap. Expected length is 56, 3x 16-byte
683
+ /// entries with 8-byte header.
684
+ fn populate_entries(rb_self: &Obj<MmapedFile>) -> RHash {
685
+ let positions = RHash::from_value(eval("{}").unwrap()).unwrap();
686
+
687
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("a"), 0.0).unwrap();
688
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("b"), 1.0).unwrap();
689
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("c"), 2.0).unwrap();
690
+
691
+ positions
692
+ }
693
+
694
+ #[test]
695
+ fn test_new() {
696
+ let _cleanup = unsafe { magnus::embed::init() };
697
+ let ruby = magnus::Ruby::get().unwrap();
698
+ crate::init(&ruby).unwrap();
699
+
700
+ let TestFile {
701
+ file,
702
+ path,
703
+ dir: _dir,
704
+ } = TestFile::new(&[0u8; 8]);
705
+
706
+ let path_str = path.display().to_string();
707
+ let rpath = RString::new(&path_str);
708
+
709
+ // Object created successfully
710
+ let result: std::result::Result<Obj<MmapedFile>, Error> =
711
+ eval!("FastMmapedFileRs.new(path)", path = rpath);
712
+ assert!(result.is_ok());
713
+
714
+ // Weak map added
715
+ let obj = result.unwrap();
716
+ let weak_tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
717
+ assert_eq!("ObjectSpace::WeakMap", weak_tracker.class().inspect());
718
+
719
+ // File expanded to page size
720
+ let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as u64;
721
+ let stat = file.metadata().unwrap();
722
+ assert_eq!(page_size, stat.len());
723
+
724
+ // Used set to header size
725
+ assert_eq!(
726
+ HEADER_SIZE as u64,
727
+ obj.load_used().unwrap().to_u64().unwrap()
728
+ );
729
+ }
730
+
731
+ #[test]
732
+ fn test_slice() {
733
+ let _cleanup = unsafe { magnus::embed::init() };
734
+ let ruby = magnus::Ruby::get().unwrap();
735
+ crate::init(&ruby).unwrap();
736
+
737
+ let obj = create_obj();
738
+ let _ = populate_entries(&obj);
739
+
740
+ // Validate header updated with new length
741
+ let header_range = Range::new(0, HEADER_SIZE, true).unwrap().as_value();
742
+ let header_slice = MmapedFile::slice(obj, &[header_range]).unwrap();
743
+ assert_eq!([56, 0, 0, 0, 0, 0, 0, 0], unsafe {
744
+ header_slice.as_slice()
745
+ });
746
+
747
+ let value_range = Range::new(HEADER_SIZE, 24, true).unwrap().as_value();
748
+ let value_slice = MmapedFile::slice(obj, &[value_range]).unwrap();
749
+
750
+ // Validate string length
751
+ assert_eq!(1u32.to_ne_bytes(), unsafe { &value_slice.as_slice()[0..4] });
752
+
753
+ // Validate string and padding
754
+ assert_eq!("a ", unsafe {
755
+ String::from_utf8_lossy(&value_slice.as_slice()[4..8])
756
+ });
757
+
758
+ // Validate value
759
+ assert_eq!(0.0f64.to_ne_bytes(), unsafe {
760
+ &value_slice.as_slice()[8..16]
761
+ });
762
+ }
763
+
764
+ #[test]
765
+ fn test_slice_resize() {
766
+ let _cleanup = unsafe { magnus::embed::init() };
767
+ let ruby = magnus::Ruby::get().unwrap();
768
+ crate::init(&ruby).unwrap();
769
+
770
+ fn assert_internals(
771
+ obj: Obj<MmapedFile>,
772
+ parent_id: c_ulong,
773
+ child_id: c_ulong,
774
+ unshared_id: c_ulong,
775
+ ) {
776
+ let rs_self = &*obj;
777
+ let tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
778
+
779
+ let mmap_ptr = rs_self.as_mut_ptr();
780
+ let mmap_len = rs_self.capacity();
781
+
782
+ let mut parent_checked = false;
783
+ let mut child_checked = false;
784
+
785
+ for val in tracker.enumeratorize("each_value", ()) {
786
+ let rb_string = val.unwrap();
787
+ let str = RString::from_value(rb_string).unwrap();
788
+
789
+ unsafe {
790
+ let raw_str = MmapedFile::rb_string_internal(str);
791
+ if str.as_raw() == child_id {
792
+ assert_eq!(parent_id, raw_str.as_ref().as_.heap.aux.shared);
793
+
794
+ let child_offset = mmap_len as isize - str.len() as isize;
795
+ assert_eq!(mmap_ptr.offset(child_offset), raw_str.as_ref().as_.heap.ptr);
796
+
797
+ child_checked = true;
798
+ } else if str.as_raw() == parent_id {
799
+ assert_eq!(parent_id, str.as_raw());
800
+
801
+ assert_eq!(mmap_ptr, raw_str.as_ref().as_.heap.ptr);
802
+ assert_eq!(mmap_len as c_long, str.len() as c_long);
803
+ assert!(raw_str.as_ref().basic.flags & (STR_SHARED | STR_NOEMBED) > 0);
804
+ assert!(str.is_frozen());
805
+
806
+ parent_checked = true;
807
+ } else if str.as_raw() == unshared_id {
808
+ panic!("tracking unshared string");
809
+ } else {
810
+ panic!("unknown string");
811
+ }
812
+ }
813
+ }
814
+ assert!(parent_checked && child_checked);
815
+ }
816
+
817
+ let obj = create_obj();
818
+ let _ = populate_entries(&obj);
819
+
820
+ let rs_self = &*obj;
821
+
822
+ // Create a string containing the full mmap.
823
+ let parent_str = rs_self.str(obj).unwrap();
824
+ let parent_id = parent_str.as_raw();
825
+
826
+ // Ruby's shared strings are only created when they go to the end of
827
+ // original string.
828
+ let len = rs_self.inner(|inner| Ok(inner.len())).unwrap();
829
+ let shareable_range = Range::new(1, len - 1, false).unwrap().as_value();
830
+
831
+ // This string should re-use the parent's buffer with an offset and have
832
+ // the parent's id in `as.heap.aux.shared`
833
+ let child_str = rs_self._slice(obj, parent_str, &[shareable_range]).unwrap();
834
+ let child_id = child_str.as_raw();
835
+
836
+ // A range that does not reach the end of the parent will not be shared.
837
+ assert!(len > 4);
838
+ let unshareable_range = Range::new(0, 4, false).unwrap().as_value();
839
+
840
+ // This string should NOT be tracked, it should own its own buffer.
841
+ let unshared_str = rs_self
842
+ ._slice(obj, parent_str, &[unshareable_range])
843
+ .unwrap();
844
+ let unshared_id = unshared_str.as_raw();
845
+ assert!(!MmapedFile::rb_string_is_shared(unshared_str));
846
+
847
+ assert_internals(obj, parent_id, child_id, unshared_id);
848
+
849
+ let orig_ptr = rs_self.as_mut_ptr();
850
+ // Expand a bunch to ensure we remap
851
+ for _ in 0..16 {
852
+ rs_self.expand_to_fit(obj, rs_self.capacity() * 2).unwrap();
853
+ }
854
+ let new_ptr = rs_self.as_mut_ptr();
855
+ assert!(orig_ptr != new_ptr);
856
+
857
+ // If we haven't updated the pointer to the newly remapped file this will segfault.
858
+ let _: Value = eval!("puts parent", parent = parent_str).unwrap();
859
+ let _: Value = eval!("puts child", child = child_str).unwrap();
860
+ let _: Value = eval!("puts unshared", unshared = unshared_str).unwrap();
861
+
862
+ // Confirm that tracked strings are still valid.
863
+ assert_internals(obj, parent_id, child_id, unshared_id);
864
+ }
865
+
866
+ #[test]
867
+ fn test_dont_fill_mmap() {
868
+ let _cleanup = unsafe { magnus::embed::init() };
869
+ let ruby = magnus::Ruby::get().unwrap();
870
+ crate::init(&ruby).unwrap();
871
+
872
+ let obj = create_obj();
873
+ let positions = populate_entries(&obj);
874
+
875
+ let rs_self = &*obj;
876
+
877
+ rs_self.expand_to_fit(obj, 1024).unwrap();
878
+
879
+ let current_used = rs_self.inner(|inner| inner.load_used()).unwrap() as usize;
880
+ let current_cap = rs_self.inner(|inner| Ok(inner.len())).unwrap();
881
+
882
+ // Create a new entry that exactly fills the capacity of the mmap.
883
+ let val_len =
884
+ current_cap - current_used - HEADER_SIZE - size_of::<f64>() - size_of::<u32>();
885
+ assert_eq!(
886
+ current_cap,
887
+ RawEntry::calc_total_len(val_len).unwrap() + current_used
888
+ );
889
+
890
+ let str = String::from_utf8(vec![b'A'; val_len]).unwrap();
891
+ MmapedFile::upsert_entry(obj, positions, RString::new(&str), 1.0).unwrap();
892
+
893
+ // Validate that we have expanded the mmap, ensuring a trailing NUL.
894
+ assert!(rs_self.capacity() > current_cap);
895
+ }
896
+ }