vinted-prometheus-client-mmap 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +2 -0
  3. data/ext/fast_mmaped_file_rs/Cargo.toml +40 -0
  4. data/ext/fast_mmaped_file_rs/README.md +52 -0
  5. data/ext/fast_mmaped_file_rs/build.rs +7 -0
  6. data/ext/fast_mmaped_file_rs/extconf.rb +28 -0
  7. data/ext/fast_mmaped_file_rs/src/error.rs +174 -0
  8. data/ext/fast_mmaped_file_rs/src/exemplars.rs +25 -0
  9. data/ext/fast_mmaped_file_rs/src/file_entry.rs +1190 -0
  10. data/ext/fast_mmaped_file_rs/src/file_info.rs +240 -0
  11. data/ext/fast_mmaped_file_rs/src/lib.rs +87 -0
  12. data/ext/fast_mmaped_file_rs/src/macros.rs +14 -0
  13. data/ext/fast_mmaped_file_rs/src/map.rs +492 -0
  14. data/ext/fast_mmaped_file_rs/src/metrics.proto +153 -0
  15. data/ext/fast_mmaped_file_rs/src/mmap/inner.rs +704 -0
  16. data/ext/fast_mmaped_file_rs/src/mmap.rs +896 -0
  17. data/ext/fast_mmaped_file_rs/src/raw_entry.rs +473 -0
  18. data/ext/fast_mmaped_file_rs/src/testhelper.rs +222 -0
  19. data/ext/fast_mmaped_file_rs/src/util.rs +121 -0
  20. data/lib/.DS_Store +0 -0
  21. data/lib/prometheus/.DS_Store +0 -0
  22. data/lib/prometheus/client/configuration.rb +23 -0
  23. data/lib/prometheus/client/counter.rb +27 -0
  24. data/lib/prometheus/client/formats/protobuf.rb +92 -0
  25. data/lib/prometheus/client/formats/text.rb +85 -0
  26. data/lib/prometheus/client/gauge.rb +40 -0
  27. data/lib/prometheus/client/helper/entry_parser.rb +132 -0
  28. data/lib/prometheus/client/helper/file_locker.rb +50 -0
  29. data/lib/prometheus/client/helper/json_parser.rb +23 -0
  30. data/lib/prometheus/client/helper/metrics_processing.rb +45 -0
  31. data/lib/prometheus/client/helper/metrics_representation.rb +51 -0
  32. data/lib/prometheus/client/helper/mmaped_file.rb +64 -0
  33. data/lib/prometheus/client/helper/plain_file.rb +29 -0
  34. data/lib/prometheus/client/histogram.rb +80 -0
  35. data/lib/prometheus/client/label_set_validator.rb +85 -0
  36. data/lib/prometheus/client/metric.rb +80 -0
  37. data/lib/prometheus/client/mmaped_dict.rb +79 -0
  38. data/lib/prometheus/client/mmaped_value.rb +158 -0
  39. data/lib/prometheus/client/page_size.rb +17 -0
  40. data/lib/prometheus/client/push.rb +203 -0
  41. data/lib/prometheus/client/rack/collector.rb +88 -0
  42. data/lib/prometheus/client/rack/exporter.rb +102 -0
  43. data/lib/prometheus/client/registry.rb +65 -0
  44. data/lib/prometheus/client/simple_value.rb +31 -0
  45. data/lib/prometheus/client/summary.rb +69 -0
  46. data/lib/prometheus/client/support/puma.rb +44 -0
  47. data/lib/prometheus/client/support/unicorn.rb +35 -0
  48. data/lib/prometheus/client/uses_value_type.rb +20 -0
  49. data/lib/prometheus/client/version.rb +5 -0
  50. data/lib/prometheus/client.rb +58 -0
  51. data/lib/prometheus.rb +3 -0
  52. metadata +203 -0
@@ -0,0 +1,896 @@
1
+ use magnus::exception::*;
2
+ use magnus::prelude::*;
3
+ use magnus::rb_sys::{AsRawValue, FromRawValue};
4
+ use magnus::typed_data::Obj;
5
+ use magnus::value::Fixnum;
6
+ use magnus::{eval, scan_args, Error, Integer, RArray, RClass, RHash, RString, Value};
7
+ use nix::libc::{c_char, c_long, c_ulong};
8
+ use rb_sys::rb_str_new_static;
9
+ use std::fs::File;
10
+ use std::io::{prelude::*, SeekFrom};
11
+ use std::mem;
12
+ use std::path::Path;
13
+ use std::ptr::NonNull;
14
+ use std::sync::RwLock;
15
+
16
+ use crate::err;
17
+ use crate::error::MmapError;
18
+ use crate::file_entry::FileEntry;
19
+ use crate::map::EntryMap;
20
+ use crate::raw_entry::RawEntry;
21
+ use crate::util::{self, CheckedOps};
22
+ use crate::Result;
23
+ use crate::HEADER_SIZE;
24
+ use inner::InnerMmap;
25
+
26
+ mod inner;
27
+
28
+ /// The Ruby `STR_NOEMBED` flag, aka `FL_USER1`.
29
+ const STR_NOEMBED: c_ulong = 1 << (13);
30
+ /// The Ruby `STR_SHARED` flag, aka `FL_USER2`.
31
+ const STR_SHARED: c_ulong = 1 << (14);
32
+
33
+ /// A Rust struct wrapped in a Ruby object, providing access to a memory-mapped
34
+ /// file used to store, update, and read out Prometheus metrics.
35
+ ///
36
+ /// - File format:
37
+ /// - Header:
38
+ /// - 4 bytes: u32 - total size of metrics in file.
39
+ /// - 4 bytes: NUL byte padding.
40
+ /// - Repeating metrics entries:
41
+ /// - 4 bytes: u32 - entry JSON string size.
42
+ /// - `N` bytes: UTF-8 encoded JSON string used as entry key.
43
+ /// - (8 - (4 + `N`) % 8) bytes: 1 to 8 padding space (0x20) bytes to
44
+ /// reach 8-byte alignment.
45
+ /// - 8 bytes: f64 - entry value.
46
+ ///
47
+ /// All numbers are saved in native-endian format.
48
+ ///
49
+ /// Generated via [luismartingarcia/protocol](https://github.com/luismartingarcia/protocol):
50
+ ///
51
+ ///
52
+ /// ```
53
+ /// protocol "Used:4,Pad:4,K1 Size:4,K1 Name:4,K1 Value:8,K2 Size:4,K2 Name:4,K2 Value:8"
54
+ ///
55
+ /// 0 1 2 3
56
+ /// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
57
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
58
+ /// | Used | Pad |K1 Size|K1 Name| K1 Value |K2 Size|K2 Name|
59
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
60
+ /// | K2 Value |
61
+ /// +-+-+-+-+-+-+-+
62
+ /// ```
63
+ //
64
+ // The API imposed by `magnus` requires all methods to use shared borrows.
65
+ // This means we can't store any mutable state in the top-level struct,
66
+ // and must store the interior data behind a `RwLock`, which adds run-time
67
+ // checks that mutable operations have no concurrent read or writes.
68
+ //
69
+ // We are further limited by the need to support subclassing in Ruby, which
70
+ // requires us to define an allocation function for the class, the
71
+ // `magnus::class::define_alloc_func()` function. This needs a support the
72
+ // `Default` trait, so a `File` cannot directly help by the object being
73
+ // constructed. Having the `RwLock` hold an `Option` of the interior object
74
+ // resolves this.
75
+ #[derive(Debug, Default)]
76
+ #[magnus::wrap(class = "FastMmapedFileRs", free_immediately, size)]
77
+ pub struct MmapedFile(RwLock<Option<InnerMmap>>);
78
+
79
+ impl MmapedFile {
80
+ /// call-seq:
81
+ /// new(file)
82
+ ///
83
+ /// create a new Mmap object
84
+ ///
85
+ /// * <em>file</em>
86
+ ///
87
+ ///
88
+ /// Creates a mapping that's shared with all other processes
89
+ /// mapping the same area of the file.
90
+ pub fn new(klass: RClass, args: &[Value]) -> magnus::error::Result<Obj<Self>> {
91
+ let args = scan_args::scan_args::<(RString,), (), (), (), (), ()>(args)?;
92
+ let path = args.required.0;
93
+
94
+ let lock = MmapedFile(RwLock::new(None));
95
+ let obj = Obj::wrap_as(lock, klass);
96
+
97
+ let _: Value = obj.funcall("initialize", (path,))?;
98
+
99
+ Ok(obj)
100
+ }
101
+
102
+ /// Initialize a new `FastMmapedFileRs` object. This must be defined in
103
+ /// order for inheritance to work.
104
+ pub fn initialize(rb_self: Obj<Self>, fname: String) -> magnus::error::Result<()> {
105
+ let file = File::options()
106
+ .read(true)
107
+ .write(true)
108
+ .open(&fname)
109
+ .map_err(|_| err!(arg_error(), "Can't open {}", fname))?;
110
+
111
+ let inner = InnerMmap::new(fname.into(), file)?;
112
+ rb_self.insert_inner(inner)?;
113
+
114
+ let weak_klass = RClass::from_value(eval("ObjectSpace::WeakMap")?)
115
+ .ok_or_else(|| err!(no_method_error(), "unable to create WeakMap"))?;
116
+ let weak_obj_tracker = weak_klass.new_instance(())?;
117
+
118
+ // We will need to iterate over strings backed by the mmapped file, but
119
+ // don't want to prevent the GC from reaping them when the Ruby code
120
+ // has finished with them. `ObjectSpace::WeakMap` allows us to track
121
+ // them without extending their lifetime.
122
+ //
123
+ // https://ruby-doc.org/core-3.0.0/ObjectSpace/WeakMap.html
124
+ rb_self.ivar_set("@weak_obj_tracker", weak_obj_tracker)?;
125
+
126
+ Ok(())
127
+ }
128
+
129
+ /// Read the list of files provided from Ruby and convert them to a Prometheus
130
+ /// metrics String.
131
+ pub fn to_metrics(file_list: RArray) -> magnus::error::Result<String> {
132
+ let mut map = EntryMap::new();
133
+ map.aggregate_files(file_list)?;
134
+
135
+ let sorted = map.into_sorted()?;
136
+
137
+ FileEntry::entries_to_string(sorted).map_err(|e| e.into())
138
+ }
139
+
140
+ /// Read the list of files provided from Ruby and convert them to a Prometheus
141
+ /// metrics String.
142
+ pub fn to_protobuf(file_list: RArray) -> magnus::error::Result<String> {
143
+ let mut map = EntryMap::new();
144
+ map.aggregate_files(file_list)?;
145
+
146
+ let sorted = map.into_sorted()?;
147
+
148
+ FileEntry::entries_to_protobuf(sorted).map_err(|e| e.into())
149
+ }
150
+
151
+
152
+ /// Document-method: []
153
+ /// Document-method: slice
154
+ ///
155
+ /// call-seq: [](args)
156
+ ///
157
+ /// Element reference - with the following syntax:
158
+ ///
159
+ /// self[nth]
160
+ ///
161
+ /// retrieve the <em>nth</em> character
162
+ ///
163
+ /// self[start..last]
164
+ ///
165
+ /// return a substring from <em>start</em> to <em>last</em>
166
+ ///
167
+ /// self[start, length]
168
+ ///
169
+ /// return a substring of <em>lenght</em> characters from <em>start</em>
170
+ pub fn slice(rb_self: Obj<Self>, args: &[Value]) -> magnus::error::Result<RString> {
171
+ // The C implementation would trigger a GC cycle via `rb_gc_force_recycle`
172
+ // if the `MM_PROTECT` flag is set, but in practice this is never used.
173
+ // We omit this logic, particularly because `rb_gc_force_recycle` is a
174
+ // no-op as of Ruby 3.1.
175
+ let rs_self = &*rb_self;
176
+
177
+ let str = rs_self.str(rb_self)?;
178
+ rs_self._slice(rb_self, str, args)
179
+ }
180
+
181
+ fn _slice(
182
+ &self,
183
+ rb_self: Obj<Self>,
184
+ str: RString,
185
+ args: &[Value],
186
+ ) -> magnus::error::Result<RString> {
187
+ let substr: RString = str.funcall("[]", args)?;
188
+
189
+ // Track shared child strings which use the same backing storage.
190
+ if Self::rb_string_is_shared(substr) {
191
+ (*rb_self).track_rstring(rb_self, substr)?;
192
+ }
193
+
194
+ // The C implementation does this, perhaps to validate that the len we
195
+ // provided is actually being used.
196
+ (*rb_self).inner_mut(|inner| {
197
+ inner.set_len(str.len());
198
+ Ok(())
199
+ })?;
200
+
201
+ Ok(substr)
202
+ }
203
+
204
+ /// Document-method: msync
205
+ /// Document-method: sync
206
+ /// Document-method: flush
207
+ ///
208
+ /// call-seq: msync
209
+ ///
210
+ /// flush the file
211
+ pub fn sync(&self, args: &[Value]) -> magnus::error::Result<()> {
212
+ use nix::sys::mman::MsFlags;
213
+
214
+ let mut ms_async = false;
215
+ let args = scan_args::scan_args::<(), (Option<i32>,), (), (), (), ()>(args)?;
216
+
217
+ if let Some(flag) = args.optional.0 {
218
+ let flag = MsFlags::from_bits(flag).unwrap_or(MsFlags::empty());
219
+ ms_async = flag.contains(MsFlags::MS_ASYNC);
220
+ }
221
+
222
+ // The `memmap2` crate does not support the `MS_INVALIDATE` flag. We ignore that
223
+ // flag if passed in, checking only for `MS_ASYNC`. In practice no arguments are ever
224
+ // passed to this function, but we do this to maintain compatibility with the
225
+ // C implementation.
226
+ self.inner_mut(|inner| inner.flush(ms_async))
227
+ .map_err(|e| e.into())
228
+ }
229
+
230
+ /// Document-method: munmap
231
+ /// Document-method: unmap
232
+ ///
233
+ /// call-seq: munmap
234
+ ///
235
+ /// terminate the association
236
+ pub fn munmap(rb_self: Obj<Self>) -> magnus::error::Result<()> {
237
+ let rs_self = &*rb_self;
238
+
239
+ rs_self.inner_mut(|inner| {
240
+ // We are about to release the backing mmap for Ruby's String
241
+ // objects. If Ruby attempts to read from them the program will
242
+ // segfault. We update the length of all Strings to zero so Ruby
243
+ // does not attempt to access the now invalid address between now
244
+ // and when GC eventually reaps the objects.
245
+ //
246
+ // See the following for more detail:
247
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/39
248
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/41
249
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/merge_requests/80
250
+ inner.set_len(0);
251
+ Ok(())
252
+ })?;
253
+
254
+ // Update each String object to be zero-length.
255
+ let cap = util::cast_chk::<_, c_long>(rs_self.capacity(), "capacity")?;
256
+ rs_self.update_weak_map(rb_self, rs_self.as_mut_ptr(), cap)?;
257
+
258
+ // Remove the `InnerMmap` from the `RwLock`. This will drop
259
+ // end of this function, unmapping and closing the file.
260
+ let _ = rs_self.take_inner()?;
261
+ Ok(())
262
+ }
263
+
264
+ /// Fetch the `used` header from the `.db` file, the length
265
+ /// in bytes of the data written to the file.
266
+ pub fn load_used(&self) -> magnus::error::Result<Integer> {
267
+ let used = self.inner(|inner| inner.load_used())?;
268
+
269
+ Ok(Integer::from_u64(used as u64))
270
+ }
271
+
272
+ /// Update the `used` header for the `.db` file, the length
273
+ /// in bytes of the data written to the file.
274
+ pub fn save_used(rb_self: Obj<Self>, used: Fixnum) -> magnus::error::Result<Fixnum> {
275
+ let rs_self = &*rb_self;
276
+ let used_uint = used.to_u32()?;
277
+
278
+ // If the underlying mmap is smaller than the header, then resize to fit.
279
+ // The file has already been expanded to page size when first opened, so
280
+ // even if the map is less than HEADER_SIZE, we're not at risk of a
281
+ // SIGBUS.
282
+ if rs_self.capacity() < HEADER_SIZE {
283
+ rs_self.expand_to_fit(rb_self, HEADER_SIZE)?;
284
+ }
285
+
286
+ rs_self.inner_mut(|inner| inner.save_used(used_uint))?;
287
+
288
+ Ok(used)
289
+ }
290
+
291
+ /// Fetch the value associated with a key from the mmap.
292
+ /// If no entry is present, initialize with the default
293
+ /// value provided.
294
+ pub fn fetch_entry(
295
+ rb_self: Obj<Self>,
296
+ positions: RHash,
297
+ key: RString,
298
+ default_value: f64,
299
+ ) -> magnus::error::Result<f64> {
300
+ let rs_self = &*rb_self;
301
+ let position: Option<Fixnum> = positions.lookup(key)?;
302
+
303
+ if let Some(pos) = position {
304
+ let pos = pos.to_usize()?;
305
+ return rs_self
306
+ .inner(|inner| inner.load_value(pos))
307
+ .map_err(|e| e.into());
308
+ }
309
+
310
+ rs_self.check_expand(rb_self, key.len())?;
311
+
312
+ let value_offset: usize = rs_self.inner_mut(|inner| {
313
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
314
+ unsafe { inner.initialize_entry(key.as_slice(), default_value) }
315
+ })?;
316
+
317
+ // CAST: no-op on 64-bit, widening on 32-bit.
318
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
319
+
320
+ rs_self.load_value(value_offset)
321
+ }
322
+
323
+ /// Update the value of an existing entry, if present. Otherwise create a new entry
324
+ /// for the key.
325
+ pub fn upsert_entry(
326
+ rb_self: Obj<Self>,
327
+ positions: RHash,
328
+ key: RString,
329
+ value: f64,
330
+ ) -> magnus::error::Result<f64> {
331
+ let rs_self = &*rb_self;
332
+ let position: Option<Fixnum> = positions.lookup(key)?;
333
+
334
+ if let Some(pos) = position {
335
+ let pos = pos.to_usize()?;
336
+ return rs_self
337
+ .inner_mut(|inner| {
338
+ inner.save_value(pos, value)?;
339
+
340
+ // TODO just return `value` here instead of loading it?
341
+ // This is how the C implementation did it, but I don't
342
+ // see what the extra load gains us.
343
+ inner.load_value(pos)
344
+ })
345
+ .map_err(|e| e.into());
346
+ }
347
+
348
+ rs_self.check_expand(rb_self, key.len())?;
349
+
350
+ let value_offset: usize = rs_self.inner_mut(|inner| {
351
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
352
+ unsafe { inner.initialize_entry(key.as_slice(), value) }
353
+ })?;
354
+
355
+ // CAST: no-op on 64-bit, widening on 32-bit.
356
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
357
+
358
+ rs_self.load_value(value_offset)
359
+ }
360
+
361
+ /// Creates a Ruby String containing the section of the mmapped file that
362
+ /// has been written to.
363
+ fn str(&self, rb_self: Obj<Self>) -> magnus::error::Result<RString> {
364
+ let val_id = (*rb_self).inner(|inner| {
365
+ let ptr = inner.as_ptr();
366
+ let len = inner.len();
367
+
368
+ // SAFETY: This is safe so long as the data provided to Ruby meets its
369
+ // requirements. When unmapping the file this will no longer be the
370
+ // case, see the comment on `munmap` for how we handle this.
371
+ Ok(unsafe { rb_str_new_static(ptr as _, len as _) })
372
+ })?;
373
+
374
+ // SAFETY: We know that rb_str_new_static returns a VALUE.
375
+ let val = unsafe { Value::from_raw(val_id) };
376
+
377
+ // UNWRAP: We created this value as a string above.
378
+ let str = RString::from_value(val).unwrap();
379
+
380
+ // Freeze the root string so it can't be mutated out from under any
381
+ // substrings created. This object is never exposed to callers.
382
+ str.freeze();
383
+
384
+ // Track the RString in our `WeakMap` so we can update its address if
385
+ // we re-mmap the backing file.
386
+ (*rb_self).track_rstring(rb_self, str)?;
387
+
388
+ Ok(str)
389
+ }
390
+
391
+ /// If we reallocate, any live Ruby strings provided by the `str()` method
392
+ /// will be invalidated. We need to iterate over them using and update their
393
+ /// heap pointers to the newly allocated memory region.
394
+ fn update_weak_map(
395
+ &self,
396
+ rb_self: Obj<Self>,
397
+ old_ptr: *const c_char,
398
+ old_cap: c_long,
399
+ ) -> magnus::error::Result<()> {
400
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
401
+
402
+ let new_len = self.inner(|inner| util::cast_chk::<_, c_long>(inner.len(), "mmap len"))?;
403
+
404
+ // Iterate over the values of the `WeakMap`.
405
+ for val in tracker.enumeratorize("each_value", ()) {
406
+ let rb_string = val?;
407
+ let str = RString::from_value(rb_string)
408
+ .ok_or_else(|| err!(arg_error(), "weakmap value was not a string"))?;
409
+
410
+ // SAFETY: We're messing with Ruby's internals here, YOLO.
411
+ unsafe {
412
+ // Convert the magnus wrapper type to a raw string exposed by `rb_sys`,
413
+ // which provides access to its internals.
414
+ let mut raw_str = Self::rb_string_internal(str);
415
+
416
+ // Shared string have their own `ptr` and `len` values, but `aux`
417
+ // is the id of the parent string so the GC can track this
418
+ // dependency. The `ptr` will always be an offset from the base
419
+ // address of the mmap, and `len` will be the length of the mmap
420
+ // less the offset from the base.
421
+ if Self::rb_string_is_shared(str) && new_len > 0 {
422
+ // Calculate how far into the original mmap the shared string
423
+ // started and update to the equivalent address in the new
424
+ // one.
425
+ let substr_ptr = raw_str.as_ref().as_.heap.ptr;
426
+ let offset = substr_ptr.offset_from(old_ptr);
427
+
428
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr().offset(offset);
429
+
430
+ let current_len = str.len() as c_long;
431
+ let new_shared_len = old_cap + current_len;
432
+
433
+ self.update_rstring_len(raw_str, new_shared_len);
434
+ continue;
435
+ }
436
+
437
+ // Update the string to point to the new mmapped file.
438
+ // We're matching the behavior of Ruby's `str_new_static` function.
439
+ // See https://github.com/ruby/ruby/blob/e51014f9c05aa65cbf203442d37fef7c12390015/string.c#L1030-L1053
440
+ //
441
+ // We deliberately do _NOT_ increment the `capa` field of the
442
+ // string to match the new `len`. We were initially doing this,
443
+ // but consistently triggered GCs in the middle of updating the
444
+ // string pointers, causing a segfault.
445
+ //
446
+ // See https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/45
447
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr();
448
+ self.update_rstring_len(raw_str, new_len);
449
+ }
450
+ }
451
+
452
+ Ok(())
453
+ }
454
+
455
+ /// Check that the mmap is large enough to contain the value to be added,
456
+ /// and expand it to fit if necessary.
457
+ fn check_expand(&self, rb_self: Obj<Self>, key_len: usize) -> magnus::error::Result<()> {
458
+ // CAST: no-op on 32-bit, widening on 64-bit.
459
+ let used = self.inner(|inner| inner.load_used())? as usize;
460
+ let entry_len = RawEntry::calc_total_len(key_len)?;
461
+
462
+ // We need the mmapped region to contain at least one byte beyond the
463
+ // written data to create a NUL- terminated C string. Validate that
464
+ // new length does not exactly match or exceed the length of the mmap.
465
+ while self.capacity() <= used.add_chk(entry_len)? {
466
+ self.expand_to_fit(rb_self, self.capacity().mul_chk(2)?)?;
467
+ }
468
+
469
+ Ok(())
470
+ }
471
+
472
+ /// Expand the underlying file until it is long enough to fit `target_cap`.
473
+ /// This will remove the existing mmap, expand the file, then update any
474
+ /// strings held by the `WeakMap` to point to the newly mmapped address.
475
+ fn expand_to_fit(&self, rb_self: Obj<Self>, target_cap: usize) -> magnus::error::Result<()> {
476
+ if target_cap < self.capacity() {
477
+ return Err(err!(arg_error(), "Can't reduce the size of mmap"));
478
+ }
479
+
480
+ let mut new_cap = self.capacity();
481
+ while new_cap < target_cap {
482
+ new_cap = new_cap.mul_chk(2)?;
483
+ }
484
+
485
+ if new_cap != self.capacity() {
486
+ let old_ptr = self.as_mut_ptr();
487
+ let old_cap = util::cast_chk::<_, c_long>(self.capacity(), "capacity")?;
488
+
489
+ // Drop the old mmap.
490
+ let (mut file, path) = self.take_inner()?.munmap();
491
+
492
+ self.expand_file(&mut file, &path, target_cap)?;
493
+
494
+ // Re-mmap the expanded file.
495
+ let new_inner = InnerMmap::reestablish(path, file, target_cap)?;
496
+
497
+ self.insert_inner(new_inner)?;
498
+
499
+ return self.update_weak_map(rb_self, old_ptr, old_cap);
500
+ }
501
+
502
+ Ok(())
503
+ }
504
+
505
+ /// Use lseek(2) to seek past the end of the file and write a NUL byte. This
506
+ /// creates a file hole that expands the size of the file without consuming
507
+ /// disk space until it is actually written to.
508
+ fn expand_file(&self, file: &mut File, path: &Path, len: usize) -> Result<()> {
509
+ if len == 0 {
510
+ return Err(MmapError::overflowed(0, -1, "adding"));
511
+ }
512
+
513
+ // CAST: no-op on 64-bit, widening on 32-bit.
514
+ let len = len as u64;
515
+
516
+ match file.seek(SeekFrom::Start(len - 1)) {
517
+ Ok(_) => {}
518
+ Err(_) => {
519
+ return Err(MmapError::with_errno(format!("Can't lseek {}", len - 1)));
520
+ }
521
+ }
522
+
523
+ match file.write(&[0x0]) {
524
+ Ok(1) => {}
525
+ _ => {
526
+ return Err(MmapError::with_errno(format!(
527
+ "Can't extend {}",
528
+ path.display()
529
+ )));
530
+ }
531
+ }
532
+
533
+ Ok(())
534
+ }
535
+
536
+ fn track_rstring(&self, rb_self: Obj<Self>, str: RString) -> magnus::error::Result<()> {
537
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
538
+
539
+ // Use the string's Id as the key in the `WeakMap`.
540
+ let key = str.as_raw();
541
+ let _: Value = tracker.funcall("[]=", (key, str))?;
542
+ Ok(())
543
+ }
544
+
545
+ /// The total capacity of the underlying mmap.
546
+ #[inline]
547
+ fn capacity(&self) -> usize {
548
+ // UNWRAP: This is actually infallible, but we need to
549
+ // wrap it in a `Result` for use with `inner()`.
550
+ self.inner(|inner| Ok(inner.capacity())).unwrap()
551
+ }
552
+
553
+ fn load_value(&self, position: usize) -> magnus::error::Result<f64> {
554
+ self.inner(|inner| inner.load_value(position))
555
+ .map_err(|e| e.into())
556
+ }
557
+
558
+ fn as_mut_ptr(&self) -> *mut c_char {
559
+ // UNWRAP: This is actually infallible, but we need to
560
+ // wrap it in a `Result` for use with `inner()`.
561
+ self.inner(|inner| Ok(inner.as_mut_ptr() as *mut c_char))
562
+ .unwrap()
563
+ }
564
+
565
+ /// Takes a closure with immutable access to InnerMmap. Will fail if the inner
566
+ /// object has a mutable borrow or has been dropped.
567
+ fn inner<F, T>(&self, func: F) -> Result<T>
568
+ where
569
+ F: FnOnce(&InnerMmap) -> Result<T>,
570
+ {
571
+ let inner_opt = self.0.try_read().map_err(|_| MmapError::ConcurrentAccess)?;
572
+
573
+ let inner = inner_opt.as_ref().ok_or(MmapError::UnmappedFile)?;
574
+
575
+ func(inner)
576
+ }
577
+
578
+ /// Takes a closure with mutable access to InnerMmap. Will fail if the inner
579
+ /// object has an existing mutable borrow, or has been dropped.
580
+ fn inner_mut<F, T>(&self, func: F) -> Result<T>
581
+ where
582
+ F: FnOnce(&mut InnerMmap) -> Result<T>,
583
+ {
584
+ let mut inner_opt = self
585
+ .0
586
+ .try_write()
587
+ .map_err(|_| MmapError::ConcurrentAccess)?;
588
+
589
+ let inner = inner_opt.as_mut().ok_or(MmapError::UnmappedFile)?;
590
+
591
+ func(inner)
592
+ }
593
+
594
+ /// Take ownership of the `InnerMmap` from the `RwLock`.
595
+ /// Will fail if a mutable borrow is already held or the inner
596
+ /// object has been dropped.
597
+ fn take_inner(&self) -> Result<InnerMmap> {
598
+ let mut inner_opt = self
599
+ .0
600
+ .try_write()
601
+ .map_err(|_| MmapError::ConcurrentAccess)?;
602
+ match (*inner_opt).take() {
603
+ Some(i) => Ok(i),
604
+ None => Err(MmapError::UnmappedFile),
605
+ }
606
+ }
607
+
608
+ /// Move `new_inner` into the `RwLock`.
609
+ /// Will return an error if a mutable borrow is already held.
610
+ fn insert_inner(&self, new_inner: InnerMmap) -> Result<()> {
611
+ let mut inner_opt = self
612
+ .0
613
+ .try_write()
614
+ .map_err(|_| MmapError::ConcurrentAccess)?;
615
+ (*inner_opt).replace(new_inner);
616
+
617
+ Ok(())
618
+ }
619
+
620
+ /// Check if an RString is shared. Shared string use the same underlying
621
+ /// storage as their parent, taking an offset from the start. By default
622
+ /// they must run to the end of the parent string.
623
+ fn rb_string_is_shared(rb_str: RString) -> bool {
624
+ // SAFETY: We only hold a reference to the raw object for the duration
625
+ // of this function, and no Ruby code is called.
626
+ let flags = unsafe {
627
+ let raw_str = Self::rb_string_internal(rb_str);
628
+ raw_str.as_ref().basic.flags
629
+ };
630
+ let shared_flags = STR_SHARED | STR_NOEMBED;
631
+
632
+ flags & shared_flags == shared_flags
633
+ }
634
+
635
+ /// Convert `magnus::RString` into the raw binding used by `rb_sys::RString`.
636
+ /// We need this to manually change the pointer and length values for strings
637
+ /// when moving the mmap to a new file.
638
+ ///
639
+ /// SAFETY: Calling Ruby code while the returned object is held may result
640
+ /// in it being mutated or dropped.
641
+ unsafe fn rb_string_internal(rb_str: RString) -> NonNull<rb_sys::RString> {
642
+ mem::transmute::<RString, NonNull<rb_sys::RString>>(rb_str)
643
+ }
644
+
645
+ #[cfg(ruby_lte_3_2)]
646
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
647
+ raw_str.as_mut().as_.heap.len = new_len;
648
+ }
649
+
650
+ #[cfg(ruby_gte_3_3)]
651
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
652
+ raw_str.as_mut().len = new_len;
653
+ }
654
+ }
655
+
656
+ #[cfg(test)]
657
+ mod test {
658
+ use magnus::error::Error;
659
+ use magnus::eval;
660
+ use magnus::Range;
661
+ use nix::unistd::{sysconf, SysconfVar};
662
+ use std::mem::size_of;
663
+
664
+ use super::*;
665
+ use crate::raw_entry::RawEntry;
666
+ use crate::testhelper::TestFile;
667
+
668
+ /// Create a wrapped MmapedFile object.
669
+ fn create_obj() -> Obj<MmapedFile> {
670
+ let TestFile {
671
+ file: _file,
672
+ path,
673
+ dir: _dir,
674
+ } = TestFile::new(&[0u8; 8]);
675
+
676
+ let path_str = path.display().to_string();
677
+ let rpath = RString::new(&path_str);
678
+
679
+ eval!("FastMmapedFileRs.new(path)", path = rpath).unwrap()
680
+ }
681
+
682
+ /// Add three entries to the mmap. Expected length is 56, 3x 16-byte
683
+ /// entries with 8-byte header.
684
+ fn populate_entries(rb_self: &Obj<MmapedFile>) -> RHash {
685
+ let positions = RHash::from_value(eval("{}").unwrap()).unwrap();
686
+
687
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("a"), 0.0).unwrap();
688
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("b"), 1.0).unwrap();
689
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("c"), 2.0).unwrap();
690
+
691
+ positions
692
+ }
693
+
694
+ #[test]
695
+ fn test_new() {
696
+ let _cleanup = unsafe { magnus::embed::init() };
697
+ let ruby = magnus::Ruby::get().unwrap();
698
+ crate::init(&ruby).unwrap();
699
+
700
+ let TestFile {
701
+ file,
702
+ path,
703
+ dir: _dir,
704
+ } = TestFile::new(&[0u8; 8]);
705
+
706
+ let path_str = path.display().to_string();
707
+ let rpath = RString::new(&path_str);
708
+
709
+ // Object created successfully
710
+ let result: std::result::Result<Obj<MmapedFile>, Error> =
711
+ eval!("FastMmapedFileRs.new(path)", path = rpath);
712
+ assert!(result.is_ok());
713
+
714
+ // Weak map added
715
+ let obj = result.unwrap();
716
+ let weak_tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
717
+ assert_eq!("ObjectSpace::WeakMap", weak_tracker.class().inspect());
718
+
719
+ // File expanded to page size
720
+ let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as u64;
721
+ let stat = file.metadata().unwrap();
722
+ assert_eq!(page_size, stat.len());
723
+
724
+ // Used set to header size
725
+ assert_eq!(
726
+ HEADER_SIZE as u64,
727
+ obj.load_used().unwrap().to_u64().unwrap()
728
+ );
729
+ }
730
+
731
+ #[test]
732
+ fn test_slice() {
733
+ let _cleanup = unsafe { magnus::embed::init() };
734
+ let ruby = magnus::Ruby::get().unwrap();
735
+ crate::init(&ruby).unwrap();
736
+
737
+ let obj = create_obj();
738
+ let _ = populate_entries(&obj);
739
+
740
+ // Validate header updated with new length
741
+ let header_range = Range::new(0, HEADER_SIZE, true).unwrap().as_value();
742
+ let header_slice = MmapedFile::slice(obj, &[header_range]).unwrap();
743
+ assert_eq!([56, 0, 0, 0, 0, 0, 0, 0], unsafe {
744
+ header_slice.as_slice()
745
+ });
746
+
747
+ let value_range = Range::new(HEADER_SIZE, 24, true).unwrap().as_value();
748
+ let value_slice = MmapedFile::slice(obj, &[value_range]).unwrap();
749
+
750
+ // Validate string length
751
+ assert_eq!(1u32.to_ne_bytes(), unsafe { &value_slice.as_slice()[0..4] });
752
+
753
+ // Validate string and padding
754
+ assert_eq!("a ", unsafe {
755
+ String::from_utf8_lossy(&value_slice.as_slice()[4..8])
756
+ });
757
+
758
+ // Validate value
759
+ assert_eq!(0.0f64.to_ne_bytes(), unsafe {
760
+ &value_slice.as_slice()[8..16]
761
+ });
762
+ }
763
+
764
+ #[test]
765
+ fn test_slice_resize() {
766
+ let _cleanup = unsafe { magnus::embed::init() };
767
+ let ruby = magnus::Ruby::get().unwrap();
768
+ crate::init(&ruby).unwrap();
769
+
770
+ fn assert_internals(
771
+ obj: Obj<MmapedFile>,
772
+ parent_id: c_ulong,
773
+ child_id: c_ulong,
774
+ unshared_id: c_ulong,
775
+ ) {
776
+ let rs_self = &*obj;
777
+ let tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
778
+
779
+ let mmap_ptr = rs_self.as_mut_ptr();
780
+ let mmap_len = rs_self.capacity();
781
+
782
+ let mut parent_checked = false;
783
+ let mut child_checked = false;
784
+
785
+ for val in tracker.enumeratorize("each_value", ()) {
786
+ let rb_string = val.unwrap();
787
+ let str = RString::from_value(rb_string).unwrap();
788
+
789
+ unsafe {
790
+ let raw_str = MmapedFile::rb_string_internal(str);
791
+ if str.as_raw() == child_id {
792
+ assert_eq!(parent_id, raw_str.as_ref().as_.heap.aux.shared);
793
+
794
+ let child_offset = mmap_len as isize - str.len() as isize;
795
+ assert_eq!(mmap_ptr.offset(child_offset), raw_str.as_ref().as_.heap.ptr);
796
+
797
+ child_checked = true;
798
+ } else if str.as_raw() == parent_id {
799
+ assert_eq!(parent_id, str.as_raw());
800
+
801
+ assert_eq!(mmap_ptr, raw_str.as_ref().as_.heap.ptr);
802
+ assert_eq!(mmap_len as c_long, str.len() as c_long);
803
+ assert!(raw_str.as_ref().basic.flags & (STR_SHARED | STR_NOEMBED) > 0);
804
+ assert!(str.is_frozen());
805
+
806
+ parent_checked = true;
807
+ } else if str.as_raw() == unshared_id {
808
+ panic!("tracking unshared string");
809
+ } else {
810
+ panic!("unknown string");
811
+ }
812
+ }
813
+ }
814
+ assert!(parent_checked && child_checked);
815
+ }
816
+
817
+ let obj = create_obj();
818
+ let _ = populate_entries(&obj);
819
+
820
+ let rs_self = &*obj;
821
+
822
+ // Create a string containing the full mmap.
823
+ let parent_str = rs_self.str(obj).unwrap();
824
+ let parent_id = parent_str.as_raw();
825
+
826
+ // Ruby's shared strings are only created when they go to the end of
827
+ // original string.
828
+ let len = rs_self.inner(|inner| Ok(inner.len())).unwrap();
829
+ let shareable_range = Range::new(1, len - 1, false).unwrap().as_value();
830
+
831
+ // This string should re-use the parent's buffer with an offset and have
832
+ // the parent's id in `as.heap.aux.shared`
833
+ let child_str = rs_self._slice(obj, parent_str, &[shareable_range]).unwrap();
834
+ let child_id = child_str.as_raw();
835
+
836
+ // A range that does not reach the end of the parent will not be shared.
837
+ assert!(len > 4);
838
+ let unshareable_range = Range::new(0, 4, false).unwrap().as_value();
839
+
840
+ // This string should NOT be tracked, it should own its own buffer.
841
+ let unshared_str = rs_self
842
+ ._slice(obj, parent_str, &[unshareable_range])
843
+ .unwrap();
844
+ let unshared_id = unshared_str.as_raw();
845
+ assert!(!MmapedFile::rb_string_is_shared(unshared_str));
846
+
847
+ assert_internals(obj, parent_id, child_id, unshared_id);
848
+
849
+ let orig_ptr = rs_self.as_mut_ptr();
850
+ // Expand a bunch to ensure we remap
851
+ for _ in 0..16 {
852
+ rs_self.expand_to_fit(obj, rs_self.capacity() * 2).unwrap();
853
+ }
854
+ let new_ptr = rs_self.as_mut_ptr();
855
+ assert!(orig_ptr != new_ptr);
856
+
857
+ // If we haven't updated the pointer to the newly remapped file this will segfault.
858
+ let _: Value = eval!("puts parent", parent = parent_str).unwrap();
859
+ let _: Value = eval!("puts child", child = child_str).unwrap();
860
+ let _: Value = eval!("puts unshared", unshared = unshared_str).unwrap();
861
+
862
+ // Confirm that tracked strings are still valid.
863
+ assert_internals(obj, parent_id, child_id, unshared_id);
864
+ }
865
+
866
+ #[test]
867
+ fn test_dont_fill_mmap() {
868
+ let _cleanup = unsafe { magnus::embed::init() };
869
+ let ruby = magnus::Ruby::get().unwrap();
870
+ crate::init(&ruby).unwrap();
871
+
872
+ let obj = create_obj();
873
+ let positions = populate_entries(&obj);
874
+
875
+ let rs_self = &*obj;
876
+
877
+ rs_self.expand_to_fit(obj, 1024).unwrap();
878
+
879
+ let current_used = rs_self.inner(|inner| inner.load_used()).unwrap() as usize;
880
+ let current_cap = rs_self.inner(|inner| Ok(inner.len())).unwrap();
881
+
882
+ // Create a new entry that exactly fills the capacity of the mmap.
883
+ let val_len =
884
+ current_cap - current_used - HEADER_SIZE - size_of::<f64>() - size_of::<u32>();
885
+ assert_eq!(
886
+ current_cap,
887
+ RawEntry::calc_total_len(val_len).unwrap() + current_used
888
+ );
889
+
890
+ let str = String::from_utf8(vec![b'A'; val_len]).unwrap();
891
+ MmapedFile::upsert_entry(obj, positions, RString::new(&str), 1.0).unwrap();
892
+
893
+ // Validate that we have expanded the mmap, ensuring a trailing NUL.
894
+ assert!(rs_self.capacity() > current_cap);
895
+ }
896
+ }