vinted-prometheus-client-mmap 1.5.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +5 -0
  3. data/ext/fast_mmaped_file_rs/Cargo.toml +40 -0
  4. data/ext/fast_mmaped_file_rs/README.md +52 -0
  5. data/ext/fast_mmaped_file_rs/build.rs +7 -0
  6. data/ext/fast_mmaped_file_rs/extconf.rb +28 -0
  7. data/ext/fast_mmaped_file_rs/src/error.rs +174 -0
  8. data/ext/fast_mmaped_file_rs/src/exemplars.rs +25 -0
  9. data/ext/fast_mmaped_file_rs/src/file_entry.rs +1252 -0
  10. data/ext/fast_mmaped_file_rs/src/file_info.rs +240 -0
  11. data/ext/fast_mmaped_file_rs/src/lib.rs +89 -0
  12. data/ext/fast_mmaped_file_rs/src/macros.rs +14 -0
  13. data/ext/fast_mmaped_file_rs/src/map.rs +519 -0
  14. data/ext/fast_mmaped_file_rs/src/metrics.proto +153 -0
  15. data/ext/fast_mmaped_file_rs/src/mmap/inner.rs +775 -0
  16. data/ext/fast_mmaped_file_rs/src/mmap.rs +977 -0
  17. data/ext/fast_mmaped_file_rs/src/raw_entry.rs +547 -0
  18. data/ext/fast_mmaped_file_rs/src/testhelper.rs +222 -0
  19. data/ext/fast_mmaped_file_rs/src/util.rs +140 -0
  20. data/lib/.DS_Store +0 -0
  21. data/lib/2.7/fast_mmaped_file_rs.so +0 -0
  22. data/lib/3.0/fast_mmaped_file_rs.so +0 -0
  23. data/lib/3.1/fast_mmaped_file_rs.so +0 -0
  24. data/lib/3.2/fast_mmaped_file_rs.so +0 -0
  25. data/lib/3.3/fast_mmaped_file_rs.so +0 -0
  26. data/lib/prometheus/.DS_Store +0 -0
  27. data/lib/prometheus/client/configuration.rb +24 -0
  28. data/lib/prometheus/client/counter.rb +27 -0
  29. data/lib/prometheus/client/formats/protobuf.rb +93 -0
  30. data/lib/prometheus/client/formats/text.rb +85 -0
  31. data/lib/prometheus/client/gauge.rb +40 -0
  32. data/lib/prometheus/client/helper/entry_parser.rb +132 -0
  33. data/lib/prometheus/client/helper/file_locker.rb +50 -0
  34. data/lib/prometheus/client/helper/json_parser.rb +23 -0
  35. data/lib/prometheus/client/helper/metrics_processing.rb +45 -0
  36. data/lib/prometheus/client/helper/metrics_representation.rb +51 -0
  37. data/lib/prometheus/client/helper/mmaped_file.rb +64 -0
  38. data/lib/prometheus/client/helper/plain_file.rb +29 -0
  39. data/lib/prometheus/client/histogram.rb +80 -0
  40. data/lib/prometheus/client/label_set_validator.rb +85 -0
  41. data/lib/prometheus/client/metric.rb +80 -0
  42. data/lib/prometheus/client/mmaped_dict.rb +83 -0
  43. data/lib/prometheus/client/mmaped_value.rb +164 -0
  44. data/lib/prometheus/client/page_size.rb +17 -0
  45. data/lib/prometheus/client/push.rb +203 -0
  46. data/lib/prometheus/client/rack/collector.rb +88 -0
  47. data/lib/prometheus/client/rack/exporter.rb +102 -0
  48. data/lib/prometheus/client/registry.rb +65 -0
  49. data/lib/prometheus/client/simple_value.rb +31 -0
  50. data/lib/prometheus/client/summary.rb +69 -0
  51. data/lib/prometheus/client/support/puma.rb +44 -0
  52. data/lib/prometheus/client/support/unicorn.rb +35 -0
  53. data/lib/prometheus/client/uses_value_type.rb +20 -0
  54. data/lib/prometheus/client/version.rb +5 -0
  55. data/lib/prometheus/client.rb +58 -0
  56. data/lib/prometheus.rb +3 -0
  57. metadata +210 -0
@@ -0,0 +1,977 @@
1
+ use magnus::exception::*;
2
+ use magnus::prelude::*;
3
+ use magnus::rb_sys::{AsRawValue, FromRawValue};
4
+ use magnus::typed_data::Obj;
5
+ use magnus::value::Fixnum;
6
+ use magnus::{eval, scan_args, Error, Integer, RArray, RClass, RHash, RString, Value};
7
+ use nix::libc::{c_char, c_long, c_ulong};
8
+ use rb_sys::rb_str_new_static;
9
+ use std::fs::File;
10
+ use std::io::{prelude::*, SeekFrom};
11
+ use std::mem;
12
+ use std::path::Path;
13
+ use std::ptr::NonNull;
14
+ use std::sync::RwLock;
15
+
16
+ use crate::err;
17
+ use crate::error::MmapError;
18
+ use crate::exemplars::Exemplar;
19
+ use crate::file_entry::FileEntry;
20
+ use crate::map::EntryMap;
21
+ use crate::raw_entry::RawEntry;
22
+ use crate::util::{self, CheckedOps};
23
+ use crate::Result;
24
+ use crate::HEADER_SIZE;
25
+ use inner::InnerMmap;
26
+
27
+ mod inner;
28
+
29
+ /// The Ruby `STR_NOEMBED` flag, aka `FL_USER1`.
30
+ const STR_NOEMBED: c_ulong = 1 << (13);
31
+ /// The Ruby `STR_SHARED` flag, aka `FL_USER2`.
32
+ const STR_SHARED: c_ulong = 1 << (14);
33
+
34
+ /// A Rust struct wrapped in a Ruby object, providing access to a memory-mapped
35
+ /// file used to store, update, and read out Prometheus metrics.
36
+ ///
37
+ /// - File format:
38
+ /// - Header:
39
+ /// - 4 bytes: u32 - total size of metrics in file.
40
+ /// - 4 bytes: NUL byte padding.
41
+ /// - Repeating metrics entries:
42
+ /// - 4 bytes: u32 - entry JSON string size.
43
+ /// - `N` bytes: UTF-8 encoded JSON string used as entry key.
44
+ /// - (8 - (4 + `N`) % 8) bytes: 1 to 8 padding space (0x20) bytes to
45
+ /// reach 8-byte alignment.
46
+ /// - 8 bytes: f64 - entry value.
47
+ ///
48
+ /// All numbers are saved in native-endian format.
49
+ ///
50
+ /// Generated via [luismartingarcia/protocol](https://github.com/luismartingarcia/protocol):
51
+ ///
52
+ ///
53
+ /// ```
54
+ /// protocol "Used:4,Pad:4,K1 Size:4,K1 Name:4,K1 Value:8,K2 Size:4,K2 Name:4,K2 Value:8"
55
+ ///
56
+ /// 0 1 2 3
57
+ /// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
58
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
59
+ /// | Used | Pad |K1 Size|K1 Name| K1 Value |K2 Size|K2 Name|
60
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
61
+ /// | K2 Value |
62
+ /// +-+-+-+-+-+-+-+
63
+ /// ```
64
+ //
65
+ // The API imposed by `magnus` requires all methods to use shared borrows.
66
+ // This means we can't store any mutable state in the top-level struct,
67
+ // and must store the interior data behind a `RwLock`, which adds run-time
68
+ // checks that mutable operations have no concurrent read or writes.
69
+ //
70
+ // We are further limited by the need to support subclassing in Ruby, which
71
+ // requires us to define an allocation function for the class, the
72
+ // `magnus::class::define_alloc_func()` function. This needs a support the
73
+ // `Default` trait, so a `File` cannot directly help by the object being
74
+ // constructed. Having the `RwLock` hold an `Option` of the interior object
75
+ // resolves this.
76
+ #[derive(Debug, Default)]
77
+ #[magnus::wrap(class = "FastMmapedFileRs", free_immediately, size)]
78
+ pub struct MmapedFile(RwLock<Option<InnerMmap>>);
79
+
80
+ use std::time::{SystemTime, UNIX_EPOCH};
81
+
82
+ impl MmapedFile {
83
+ /// call-seq:
84
+ /// new(file)
85
+ ///
86
+ /// create a new Mmap object
87
+ ///
88
+ /// * <em>file</em>
89
+ ///
90
+ ///
91
+ /// Creates a mapping that's shared with all other processes
92
+ /// mapping the same area of the file.
93
+ pub fn new(klass: RClass, args: &[Value]) -> magnus::error::Result<Obj<Self>> {
94
+ let args = scan_args::scan_args::<(RString,), (), (), (), (), ()>(args)?;
95
+ let path = args.required.0;
96
+
97
+ let lock = MmapedFile(RwLock::new(None));
98
+ let obj = Obj::wrap_as(lock, klass);
99
+
100
+ let _: Value = obj.funcall("initialize", (path,))?;
101
+
102
+ Ok(obj)
103
+ }
104
+
105
+ /// Initialize a new `FastMmapedFileRs` object. This must be defined in
106
+ /// order for inheritance to work.
107
+ pub fn initialize(rb_self: Obj<Self>, fname: String) -> magnus::error::Result<()> {
108
+ let file = File::options()
109
+ .read(true)
110
+ .write(true)
111
+ .open(&fname)
112
+ .map_err(|_| err!(arg_error(), "Can't open {}", fname))?;
113
+
114
+ let inner = InnerMmap::new(fname.into(), file)?;
115
+ rb_self.insert_inner(inner)?;
116
+
117
+ let weak_klass = RClass::from_value(eval("ObjectSpace::WeakMap")?)
118
+ .ok_or_else(|| err!(no_method_error(), "unable to create WeakMap"))?;
119
+ let weak_obj_tracker = weak_klass.new_instance(())?;
120
+
121
+ // We will need to iterate over strings backed by the mmapped file, but
122
+ // don't want to prevent the GC from reaping them when the Ruby code
123
+ // has finished with them. `ObjectSpace::WeakMap` allows us to track
124
+ // them without extending their lifetime.
125
+ //
126
+ // https://ruby-doc.org/core-3.0.0/ObjectSpace/WeakMap.html
127
+ rb_self.ivar_set("@weak_obj_tracker", weak_obj_tracker)?;
128
+
129
+ Ok(())
130
+ }
131
+
132
+ /// Read the list of files provided from Ruby and convert them to a Prometheus
133
+ /// metrics String.
134
+ pub fn to_metrics(file_list: RArray) -> magnus::error::Result<String> {
135
+ let mut map = EntryMap::new();
136
+ map.aggregate_files(file_list)?;
137
+
138
+ let sorted = map.into_sorted()?;
139
+
140
+ FileEntry::entries_to_string(sorted).map_err(|e| e.into())
141
+ }
142
+
143
+ /// Read the list of files provided from Ruby and convert them to a Prometheus
144
+ /// metrics String.
145
+ pub fn to_protobuf(file_list: RArray) -> magnus::error::Result<String> {
146
+ let mut map = EntryMap::new();
147
+ map.aggregate_files(file_list)?;
148
+
149
+ let sorted = map.into_sorted()?;
150
+
151
+ FileEntry::entries_to_protobuf(sorted).map_err(|e| e.into())
152
+ }
153
+
154
+
155
+ /// Document-method: []
156
+ /// Document-method: slice
157
+ ///
158
+ /// call-seq: [](args)
159
+ ///
160
+ /// Element reference - with the following syntax:
161
+ ///
162
+ /// self[nth]
163
+ ///
164
+ /// retrieve the <em>nth</em> character
165
+ ///
166
+ /// self[start..last]
167
+ ///
168
+ /// return a substring from <em>start</em> to <em>last</em>
169
+ ///
170
+ /// self[start, length]
171
+ ///
172
+ /// return a substring of <em>lenght</em> characters from <em>start</em>
173
+ pub fn slice(rb_self: Obj<Self>, args: &[Value]) -> magnus::error::Result<RString> {
174
+ // The C implementation would trigger a GC cycle via `rb_gc_force_recycle`
175
+ // if the `MM_PROTECT` flag is set, but in practice this is never used.
176
+ // We omit this logic, particularly because `rb_gc_force_recycle` is a
177
+ // no-op as of Ruby 3.1.
178
+ let rs_self = &*rb_self;
179
+
180
+ let str = rs_self.str(rb_self)?;
181
+ rs_self._slice(rb_self, str, args)
182
+ }
183
+
184
+ fn _slice(
185
+ &self,
186
+ rb_self: Obj<Self>,
187
+ str: RString,
188
+ args: &[Value],
189
+ ) -> magnus::error::Result<RString> {
190
+ let substr: RString = str.funcall("[]", args)?;
191
+
192
+ // Track shared child strings which use the same backing storage.
193
+ if Self::rb_string_is_shared(substr) {
194
+ (*rb_self).track_rstring(rb_self, substr)?;
195
+ }
196
+
197
+ // The C implementation does this, perhaps to validate that the len we
198
+ // provided is actually being used.
199
+ (*rb_self).inner_mut(|inner| {
200
+ inner.set_len(str.len());
201
+ Ok(())
202
+ })?;
203
+
204
+ Ok(substr)
205
+ }
206
+
207
+ /// Document-method: msync
208
+ /// Document-method: sync
209
+ /// Document-method: flush
210
+ ///
211
+ /// call-seq: msync
212
+ ///
213
+ /// flush the file
214
+ pub fn sync(&self, args: &[Value]) -> magnus::error::Result<()> {
215
+ use nix::sys::mman::MsFlags;
216
+
217
+ let mut ms_async = false;
218
+ let args = scan_args::scan_args::<(), (Option<i32>,), (), (), (), ()>(args)?;
219
+
220
+ if let Some(flag) = args.optional.0 {
221
+ let flag = MsFlags::from_bits(flag).unwrap_or(MsFlags::empty());
222
+ ms_async = flag.contains(MsFlags::MS_ASYNC);
223
+ }
224
+
225
+ // The `memmap2` crate does not support the `MS_INVALIDATE` flag. We ignore that
226
+ // flag if passed in, checking only for `MS_ASYNC`. In practice no arguments are ever
227
+ // passed to this function, but we do this to maintain compatibility with the
228
+ // C implementation.
229
+ self.inner_mut(|inner| inner.flush(ms_async))
230
+ .map_err(|e| e.into())
231
+ }
232
+
233
+ /// Document-method: munmap
234
+ /// Document-method: unmap
235
+ ///
236
+ /// call-seq: munmap
237
+ ///
238
+ /// terminate the association
239
+ pub fn munmap(rb_self: Obj<Self>) -> magnus::error::Result<()> {
240
+ let rs_self = &*rb_self;
241
+
242
+ rs_self.inner_mut(|inner| {
243
+ // We are about to release the backing mmap for Ruby's String
244
+ // objects. If Ruby attempts to read from them the program will
245
+ // segfault. We update the length of all Strings to zero so Ruby
246
+ // does not attempt to access the now invalid address between now
247
+ // and when GC eventually reaps the objects.
248
+ //
249
+ // See the following for more detail:
250
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/39
251
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/41
252
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/merge_requests/80
253
+ inner.set_len(0);
254
+ Ok(())
255
+ })?;
256
+
257
+ // Update each String object to be zero-length.
258
+ let cap = util::cast_chk::<_, c_long>(rs_self.capacity(), "capacity")?;
259
+ rs_self.update_weak_map(rb_self, rs_self.as_mut_ptr(), cap)?;
260
+
261
+ // Remove the `InnerMmap` from the `RwLock`. This will drop
262
+ // end of this function, unmapping and closing the file.
263
+ let _ = rs_self.take_inner()?;
264
+ Ok(())
265
+ }
266
+
267
+ /// Fetch the `used` header from the `.db` file, the length
268
+ /// in bytes of the data written to the file.
269
+ pub fn load_used(&self) -> magnus::error::Result<Integer> {
270
+ let used = self.inner(|inner| inner.load_used())?;
271
+
272
+ Ok(Integer::from_u64(used as u64))
273
+ }
274
+
275
+ /// Update the `used` header for the `.db` file, the length
276
+ /// in bytes of the data written to the file.
277
+ pub fn save_used(rb_self: Obj<Self>, used: Fixnum) -> magnus::error::Result<Fixnum> {
278
+ let rs_self = &*rb_self;
279
+ let used_uint = used.to_u32()?;
280
+
281
+ // If the underlying mmap is smaller than the header, then resize to fit.
282
+ // The file has already been expanded to page size when first opened, so
283
+ // even if the map is less than HEADER_SIZE, we're not at risk of a
284
+ // SIGBUS.
285
+ if rs_self.capacity() < HEADER_SIZE {
286
+ rs_self.expand_to_fit(rb_self, HEADER_SIZE)?;
287
+ }
288
+
289
+ rs_self.inner_mut(|inner| inner.save_used(used_uint))?;
290
+
291
+ Ok(used)
292
+ }
293
+
294
+ /// Fetch the value associated with a key from the mmap.
295
+ /// If no entry is present, initialize with the default
296
+ /// value provided.
297
+ pub fn fetch_entry(
298
+ rb_self: Obj<Self>,
299
+ positions: RHash,
300
+ key: RString,
301
+ default_value: f64,
302
+ ) -> magnus::error::Result<f64> {
303
+ let rs_self = &*rb_self;
304
+ let position: Option<Fixnum> = positions.lookup(key)?;
305
+
306
+ if let Some(pos) = position {
307
+ let pos = pos.to_usize()?;
308
+ return rs_self
309
+ .inner(|inner| inner.load_value(pos))
310
+ .map_err(|e| e.into());
311
+ }
312
+
313
+ rs_self.check_expand(rb_self, key.len())?;
314
+
315
+ let value_offset: usize = rs_self.inner_mut(|inner| {
316
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
317
+ unsafe { inner.initialize_entry(key.as_slice(), default_value) }
318
+ })?;
319
+
320
+ // CAST: no-op on 64-bit, widening on 32-bit.
321
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
322
+
323
+ rs_self.load_value(value_offset)
324
+ }
325
+
326
+ pub fn upsert_exemplar(
327
+ rb_self: Obj<Self>,
328
+ positions: RHash,
329
+ key: RString,
330
+ value: f64,
331
+ exemplar_name: RString,
332
+ exemplar_value: RString,
333
+ ) -> magnus::error::Result<f64> {
334
+ let rs_self = &*rb_self;
335
+ let position: Option<Fixnum> = positions.lookup(key)?;
336
+
337
+ let start = SystemTime::now();
338
+ let since_the_epoch = start
339
+ .duration_since(UNIX_EPOCH)
340
+ .expect("Time went backwards");
341
+
342
+ let ex: Exemplar = Exemplar {
343
+ label_name: unsafe { exemplar_name.as_str().unwrap().into() },
344
+ label_value: unsafe { exemplar_value.as_str().unwrap().into() },
345
+ value: value,
346
+ timestamp: since_the_epoch.as_nanos(),
347
+ };
348
+
349
+ if let Some(pos) = position {
350
+ let pos = pos.to_usize()?;
351
+ return rs_self
352
+ .inner_mut(|inner| {
353
+ inner.save_exemplar(pos, ex)?;
354
+
355
+ // TODO just return `value` here instead of loading it?
356
+ // This is how the C implementation did it, but I don't
357
+ // see what the extra load gains us.
358
+ let ex = inner.load_exemplar(pos);
359
+
360
+ Ok(ex.unwrap().value)
361
+ })
362
+ .map_err(|e| e.into());
363
+ }
364
+
365
+
366
+ rs_self.check_expand_exemplar(rb_self, key.len())?;
367
+
368
+ let value_offset: usize = rs_self.inner_mut(|inner| {
369
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
370
+ unsafe { inner.initialize_entry_exemplar(key.as_slice(), ex) }
371
+ })?;
372
+
373
+ // CAST: no-op on 64-bit, widening on 32-bit.
374
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
375
+
376
+ let ex = rs_self.load_exemplar(value_offset);
377
+
378
+ Ok(ex.unwrap().value)
379
+ }
380
+
381
+ /// Update the value of an existing entry, if present. Otherwise create a new entry
382
+ /// for the key.
383
+ pub fn upsert_entry(
384
+ rb_self: Obj<Self>,
385
+ positions: RHash,
386
+ key: RString,
387
+ value: f64,
388
+ ) -> magnus::error::Result<f64> {
389
+ let rs_self = &*rb_self;
390
+ let position: Option<Fixnum> = positions.lookup(key)?;
391
+
392
+ if let Some(pos) = position {
393
+ let pos = pos.to_usize()?;
394
+ return rs_self
395
+ .inner_mut(|inner| {
396
+ inner.save_value(pos, value)?;
397
+
398
+ // TODO just return `value` here instead of loading it?
399
+ // This is how the C implementation did it, but I don't
400
+ // see what the extra load gains us.
401
+ inner.load_value(pos)
402
+ })
403
+ .map_err(|e| e.into());
404
+ }
405
+
406
+ rs_self.check_expand(rb_self, key.len())?;
407
+
408
+ let value_offset: usize = rs_self.inner_mut(|inner| {
409
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
410
+ unsafe { inner.initialize_entry(key.as_slice(), value) }
411
+ })?;
412
+
413
+ // CAST: no-op on 64-bit, widening on 32-bit.
414
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
415
+
416
+ rs_self.load_value(value_offset)
417
+ }
418
+
419
+ /// Creates a Ruby String containing the section of the mmapped file that
420
+ /// has been written to.
421
+ fn str(&self, rb_self: Obj<Self>) -> magnus::error::Result<RString> {
422
+ let val_id = (*rb_self).inner(|inner| {
423
+ let ptr = inner.as_ptr();
424
+ let len = inner.len();
425
+
426
+ // SAFETY: This is safe so long as the data provided to Ruby meets its
427
+ // requirements. When unmapping the file this will no longer be the
428
+ // case, see the comment on `munmap` for how we handle this.
429
+ Ok(unsafe { rb_str_new_static(ptr as _, len as _) })
430
+ })?;
431
+
432
+ // SAFETY: We know that rb_str_new_static returns a VALUE.
433
+ let val = unsafe { Value::from_raw(val_id) };
434
+
435
+ // UNWRAP: We created this value as a string above.
436
+ let str = RString::from_value(val).unwrap();
437
+
438
+ // Freeze the root string so it can't be mutated out from under any
439
+ // substrings created. This object is never exposed to callers.
440
+ str.freeze();
441
+
442
+ // Track the RString in our `WeakMap` so we can update its address if
443
+ // we re-mmap the backing file.
444
+ (*rb_self).track_rstring(rb_self, str)?;
445
+
446
+ Ok(str)
447
+ }
448
+
449
+ /// If we reallocate, any live Ruby strings provided by the `str()` method
450
+ /// will be invalidated. We need to iterate over them using and update their
451
+ /// heap pointers to the newly allocated memory region.
452
+ fn update_weak_map(
453
+ &self,
454
+ rb_self: Obj<Self>,
455
+ old_ptr: *const c_char,
456
+ old_cap: c_long,
457
+ ) -> magnus::error::Result<()> {
458
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
459
+
460
+ let new_len = self.inner(|inner| util::cast_chk::<_, c_long>(inner.len(), "mmap len"))?;
461
+
462
+ // Iterate over the values of the `WeakMap`.
463
+ for val in tracker.enumeratorize("each_value", ()) {
464
+ let rb_string = val?;
465
+ let str = RString::from_value(rb_string)
466
+ .ok_or_else(|| err!(arg_error(), "weakmap value was not a string"))?;
467
+
468
+ // SAFETY: We're messing with Ruby's internals here, YOLO.
469
+ unsafe {
470
+ // Convert the magnus wrapper type to a raw string exposed by `rb_sys`,
471
+ // which provides access to its internals.
472
+ let mut raw_str = Self::rb_string_internal(str);
473
+
474
+ // Shared string have their own `ptr` and `len` values, but `aux`
475
+ // is the id of the parent string so the GC can track this
476
+ // dependency. The `ptr` will always be an offset from the base
477
+ // address of the mmap, and `len` will be the length of the mmap
478
+ // less the offset from the base.
479
+ if Self::rb_string_is_shared(str) && new_len > 0 {
480
+ // Calculate how far into the original mmap the shared string
481
+ // started and update to the equivalent address in the new
482
+ // one.
483
+ let substr_ptr = raw_str.as_ref().as_.heap.ptr;
484
+ let offset = substr_ptr.offset_from(old_ptr);
485
+
486
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr().offset(offset);
487
+
488
+ let current_len = str.len() as c_long;
489
+ let new_shared_len = old_cap + current_len;
490
+
491
+ self.update_rstring_len(raw_str, new_shared_len);
492
+ continue;
493
+ }
494
+
495
+ // Update the string to point to the new mmapped file.
496
+ // We're matching the behavior of Ruby's `str_new_static` function.
497
+ // See https://github.com/ruby/ruby/blob/e51014f9c05aa65cbf203442d37fef7c12390015/string.c#L1030-L1053
498
+ //
499
+ // We deliberately do _NOT_ increment the `capa` field of the
500
+ // string to match the new `len`. We were initially doing this,
501
+ // but consistently triggered GCs in the middle of updating the
502
+ // string pointers, causing a segfault.
503
+ //
504
+ // See https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/45
505
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr();
506
+ self.update_rstring_len(raw_str, new_len);
507
+ }
508
+ }
509
+
510
+ Ok(())
511
+ }
512
+
513
+ /// Check that the mmap is large enough to contain the value to be added,
514
+ /// and expand it to fit if necessary.
515
+ fn check_expand(&self, rb_self: Obj<Self>, key_len: usize) -> magnus::error::Result<()> {
516
+ // CAST: no-op on 32-bit, widening on 64-bit.
517
+ let used = self.inner(|inner| inner.load_used())? as usize;
518
+ let entry_len = RawEntry::calc_total_len(key_len)?;
519
+
520
+ // We need the mmapped region to contain at least one byte beyond the
521
+ // written data to create a NUL- terminated C string. Validate that
522
+ // new length does not exactly match or exceed the length of the mmap.
523
+ while self.capacity() <= used.add_chk(entry_len)? {
524
+ self.expand_to_fit(rb_self, self.capacity().mul_chk(2)?)?;
525
+ }
526
+
527
+ Ok(())
528
+ }
529
+
530
+ /// Check that the mmap is large enough to contain the value to be added,
531
+ /// and expand it to fit if necessary.
532
+ fn check_expand_exemplar(&self, rb_self: Obj<Self>, key_len: usize) -> magnus::error::Result<()> {
533
+ // CAST: no-op on 32-bit, widening on 64-bit.
534
+ let used = self.inner(|inner| inner.load_used())? as usize;
535
+ let entry_len = RawEntry::calc_total_len_exemplar(key_len)?;
536
+
537
+ // We need the mmapped region to contain at least one byte beyond the
538
+ // written data to create a NUL- terminated C string. Validate that
539
+ // new length does not exactly match or exceed the length of the mmap.
540
+ while self.capacity() <= used.add_chk(entry_len)? {
541
+ self.expand_to_fit(rb_self, self.capacity().mul_chk(2)?)?;
542
+ }
543
+
544
+ Ok(())
545
+ }
546
+
547
+ /// Expand the underlying file until it is long enough to fit `target_cap`.
548
+ /// This will remove the existing mmap, expand the file, then update any
549
+ /// strings held by the `WeakMap` to point to the newly mmapped address.
550
+ fn expand_to_fit(&self, rb_self: Obj<Self>, target_cap: usize) -> magnus::error::Result<()> {
551
+ if target_cap < self.capacity() {
552
+ return Err(err!(arg_error(), "Can't reduce the size of mmap"));
553
+ }
554
+
555
+ let mut new_cap = self.capacity();
556
+ while new_cap < target_cap {
557
+ new_cap = new_cap.mul_chk(2)?;
558
+ }
559
+
560
+ if new_cap != self.capacity() {
561
+ let old_ptr = self.as_mut_ptr();
562
+ let old_cap = util::cast_chk::<_, c_long>(self.capacity(), "capacity")?;
563
+
564
+ // Drop the old mmap.
565
+ let (mut file, path) = self.take_inner()?.munmap();
566
+
567
+ self.expand_file(&mut file, &path, target_cap)?;
568
+
569
+ // Re-mmap the expanded file.
570
+ let new_inner = InnerMmap::reestablish(path, file, target_cap)?;
571
+
572
+ self.insert_inner(new_inner)?;
573
+
574
+ return self.update_weak_map(rb_self, old_ptr, old_cap);
575
+ }
576
+
577
+ Ok(())
578
+ }
579
+
580
+ /// Use lseek(2) to seek past the end of the file and write a NUL byte. This
581
+ /// creates a file hole that expands the size of the file without consuming
582
+ /// disk space until it is actually written to.
583
+ fn expand_file(&self, file: &mut File, path: &Path, len: usize) -> Result<()> {
584
+ if len == 0 {
585
+ return Err(MmapError::overflowed(0, -1, "adding"));
586
+ }
587
+
588
+ // CAST: no-op on 64-bit, widening on 32-bit.
589
+ let len = len as u64;
590
+
591
+ match file.seek(SeekFrom::Start(len - 1)) {
592
+ Ok(_) => {}
593
+ Err(_) => {
594
+ return Err(MmapError::with_errno(format!("Can't lseek {}", len - 1)));
595
+ }
596
+ }
597
+
598
+ match file.write(&[0x0]) {
599
+ Ok(1) => {}
600
+ _ => {
601
+ return Err(MmapError::with_errno(format!(
602
+ "Can't extend {}",
603
+ path.display()
604
+ )));
605
+ }
606
+ }
607
+
608
+ Ok(())
609
+ }
610
+
611
+ fn track_rstring(&self, rb_self: Obj<Self>, str: RString) -> magnus::error::Result<()> {
612
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
613
+
614
+ // Use the string's Id as the key in the `WeakMap`.
615
+ let key = str.as_raw();
616
+ let _: Value = tracker.funcall("[]=", (key, str))?;
617
+ Ok(())
618
+ }
619
+
620
+ /// The total capacity of the underlying mmap.
621
+ #[inline]
622
+ fn capacity(&self) -> usize {
623
+ // UNWRAP: This is actually infallible, but we need to
624
+ // wrap it in a `Result` for use with `inner()`.
625
+ self.inner(|inner| Ok(inner.capacity())).unwrap()
626
+ }
627
+
628
+ fn load_value(&self, position: usize) -> magnus::error::Result<f64> {
629
+ self.inner(|inner| inner.load_value(position))
630
+ .map_err(|e| e.into())
631
+ }
632
+
633
+ fn load_exemplar<'a, 'b>(&'a self, position: usize) -> magnus::error::Result<Exemplar> {
634
+ self.inner_mut(|inner| inner.load_exemplar(position))
635
+ .map_err(|e| e.into())
636
+ }
637
+
638
+ fn as_mut_ptr(&self) -> *mut c_char {
639
+ // UNWRAP: This is actually infallible, but we need to
640
+ // wrap it in a `Result` for use with `inner()`.
641
+ self.inner(|inner| Ok(inner.as_mut_ptr() as *mut c_char))
642
+ .unwrap()
643
+ }
644
+
645
+ /// Takes a closure with immutable access to InnerMmap. Will fail if the inner
646
+ /// object has a mutable borrow or has been dropped.
647
+ fn inner<F, T>(&self, func: F) -> Result<T>
648
+ where
649
+ F: FnOnce(&InnerMmap) -> Result<T>,
650
+ {
651
+ let inner_opt = self.0.try_read().map_err(|_| MmapError::ConcurrentAccess)?;
652
+
653
+ let inner = inner_opt.as_ref().ok_or(MmapError::UnmappedFile)?;
654
+
655
+ func(inner)
656
+ }
657
+
658
+ /// Takes a closure with mutable access to InnerMmap. Will fail if the inner
659
+ /// object has an existing mutable borrow, or has been dropped.
660
+ fn inner_mut<F, T>(&self, func: F) -> Result<T>
661
+ where
662
+ F: FnOnce(&mut InnerMmap) -> Result<T>,
663
+ {
664
+ let mut inner_opt = self
665
+ .0
666
+ .try_write()
667
+ .map_err(|_| MmapError::ConcurrentAccess)?;
668
+
669
+ let inner = inner_opt.as_mut().ok_or(MmapError::UnmappedFile)?;
670
+
671
+ func(inner)
672
+ }
673
+
674
+ /// Take ownership of the `InnerMmap` from the `RwLock`.
675
+ /// Will fail if a mutable borrow is already held or the inner
676
+ /// object has been dropped.
677
+ fn take_inner(&self) -> Result<InnerMmap> {
678
+ let mut inner_opt = self
679
+ .0
680
+ .try_write()
681
+ .map_err(|_| MmapError::ConcurrentAccess)?;
682
+ match (*inner_opt).take() {
683
+ Some(i) => Ok(i),
684
+ None => Err(MmapError::UnmappedFile),
685
+ }
686
+ }
687
+
688
+ /// Move `new_inner` into the `RwLock`.
689
+ /// Will return an error if a mutable borrow is already held.
690
+ fn insert_inner(&self, new_inner: InnerMmap) -> Result<()> {
691
+ let mut inner_opt = self
692
+ .0
693
+ .try_write()
694
+ .map_err(|_| MmapError::ConcurrentAccess)?;
695
+ (*inner_opt).replace(new_inner);
696
+
697
+ Ok(())
698
+ }
699
+
700
+ /// Check if an RString is shared. Shared string use the same underlying
701
+ /// storage as their parent, taking an offset from the start. By default
702
+ /// they must run to the end of the parent string.
703
+ fn rb_string_is_shared(rb_str: RString) -> bool {
704
+ // SAFETY: We only hold a reference to the raw object for the duration
705
+ // of this function, and no Ruby code is called.
706
+ let flags = unsafe {
707
+ let raw_str = Self::rb_string_internal(rb_str);
708
+ raw_str.as_ref().basic.flags
709
+ };
710
+ let shared_flags = STR_SHARED | STR_NOEMBED;
711
+
712
+ flags & shared_flags == shared_flags
713
+ }
714
+
715
+ /// Convert `magnus::RString` into the raw binding used by `rb_sys::RString`.
716
+ /// We need this to manually change the pointer and length values for strings
717
+ /// when moving the mmap to a new file.
718
+ ///
719
+ /// SAFETY: Calling Ruby code while the returned object is held may result
720
+ /// in it being mutated or dropped.
721
+ unsafe fn rb_string_internal(rb_str: RString) -> NonNull<rb_sys::RString> {
722
+ mem::transmute::<RString, NonNull<rb_sys::RString>>(rb_str)
723
+ }
724
+
725
+ #[cfg(ruby_lte_3_2)]
726
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
727
+ raw_str.as_mut().as_.heap.len = new_len;
728
+ }
729
+
730
+ #[cfg(ruby_gte_3_3)]
731
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
732
+ raw_str.as_mut().len = new_len;
733
+ }
734
+ }
735
+
736
+ #[cfg(test)]
737
+ mod test {
738
+ use super::*;
739
+ use core::panic;
740
+ use magnus::error::Error;
741
+ use magnus::eval;
742
+ use magnus::Range;
743
+ use nix::unistd::{sysconf, SysconfVar};
744
+ use std::mem::size_of;
745
+
746
+ use crate::raw_entry::RawEntry;
747
+ use crate::testhelper::TestFile;
748
+
749
+ /// Create a wrapped MmapedFile object.
750
+ fn create_obj() -> Obj<MmapedFile> {
751
+ let TestFile {
752
+ file: _file,
753
+ path,
754
+ dir: _dir,
755
+ } = TestFile::new(&[0u8; 8]);
756
+
757
+ let path_str = path.display().to_string();
758
+ let rpath = RString::new(&path_str);
759
+
760
+ eval!("FastMmapedFileRs.new(path)", path = rpath).unwrap()
761
+ }
762
+
763
+ /// Add three entries to the mmap. Expected length is 56, 3x 16-byte
764
+ /// entries with 8-byte header.
765
+ fn populate_entries(rb_self: &Obj<MmapedFile>) -> RHash {
766
+ let positions = RHash::from_value(eval("{}").unwrap()).unwrap();
767
+
768
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("a"), 0.0).unwrap();
769
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("b"), 1.0).unwrap();
770
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("c"), 2.0).unwrap();
771
+
772
+ positions
773
+ }
774
+
775
+ #[test]
776
+ fn test_new() {
777
+ let _cleanup = unsafe { magnus::embed::init() };
778
+ let ruby = magnus::Ruby::get().unwrap();
779
+ crate::init(&ruby).unwrap();
780
+
781
+ let TestFile {
782
+ file,
783
+ path,
784
+ dir: _dir,
785
+ } = TestFile::new(&[0u8; 8]);
786
+
787
+ let path_str = path.display().to_string();
788
+ let rpath = RString::new(&path_str);
789
+
790
+ // Object created successfully
791
+ let result: std::result::Result<Obj<MmapedFile>, Error> =
792
+ eval!("FastMmapedFileRs.new(path)", path = rpath);
793
+ assert!(result.is_ok());
794
+
795
+ // Weak map added
796
+ let obj = result.unwrap();
797
+ let weak_tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
798
+ assert_eq!("ObjectSpace::WeakMap", weak_tracker.class().inspect());
799
+
800
+ // File expanded to page size
801
+ let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as u64;
802
+ let stat = file.metadata().unwrap();
803
+ assert_eq!(page_size, stat.len());
804
+
805
+ // Used set to header size
806
+ assert_eq!(
807
+ HEADER_SIZE as u64,
808
+ obj.load_used().unwrap().to_u64().unwrap()
809
+ );
810
+ }
811
+
812
+ #[test]
813
+ fn test_slice() {
814
+ let _cleanup = unsafe { magnus::embed::init() };
815
+ let ruby = magnus::Ruby::get().unwrap();
816
+ crate::init(&ruby).unwrap();
817
+
818
+ let obj = create_obj();
819
+ let _ = populate_entries(&obj);
820
+
821
+ // Validate header updated with new length
822
+ let header_range = Range::new(0, HEADER_SIZE, true).unwrap().as_value();
823
+ let header_slice = MmapedFile::slice(obj, &[header_range]).unwrap();
824
+ assert_eq!([56, 0, 0, 0, 0, 0, 0, 0], unsafe {
825
+ header_slice.as_slice()
826
+ });
827
+
828
+ let value_range = Range::new(HEADER_SIZE, 24, true).unwrap().as_value();
829
+ let value_slice = MmapedFile::slice(obj, &[value_range]).unwrap();
830
+
831
+ // Validate string length
832
+ assert_eq!(1u32.to_ne_bytes(), unsafe { &value_slice.as_slice()[0..4] });
833
+
834
+ // Validate string and padding
835
+ assert_eq!("a ", unsafe {
836
+ String::from_utf8_lossy(&value_slice.as_slice()[4..8])
837
+ });
838
+
839
+ // Validate value
840
+ assert_eq!(0.0f64.to_ne_bytes(), unsafe {
841
+ &value_slice.as_slice()[8..16]
842
+ });
843
+ }
844
+
845
+ #[test]
846
+ fn test_slice_resize() {
847
+ let _cleanup = unsafe { magnus::embed::init() };
848
+ let ruby = magnus::Ruby::get().unwrap();
849
+ crate::init(&ruby).unwrap();
850
+
851
+ fn assert_internals(
852
+ obj: Obj<MmapedFile>,
853
+ parent_id: c_ulong,
854
+ child_id: c_ulong,
855
+ unshared_id: c_ulong,
856
+ ) {
857
+ let rs_self = &*obj;
858
+ let tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
859
+
860
+ let mmap_ptr = rs_self.as_mut_ptr();
861
+ let mmap_len = rs_self.capacity();
862
+
863
+ let mut parent_checked = false;
864
+ let mut child_checked = false;
865
+
866
+ for val in tracker.enumeratorize("each_value", ()) {
867
+ let rb_string = val.unwrap();
868
+ let str = RString::from_value(rb_string).unwrap();
869
+
870
+ unsafe {
871
+ let raw_str = MmapedFile::rb_string_internal(str);
872
+ if str.as_raw() == child_id {
873
+ assert_eq!(parent_id, raw_str.as_ref().as_.heap.aux.shared);
874
+
875
+ let child_offset = mmap_len as isize - str.len() as isize;
876
+ assert_eq!(mmap_ptr.offset(child_offset), raw_str.as_ref().as_.heap.ptr);
877
+
878
+ child_checked = true;
879
+ } else if str.as_raw() == parent_id {
880
+ assert_eq!(parent_id, str.as_raw());
881
+
882
+ assert_eq!(mmap_ptr, raw_str.as_ref().as_.heap.ptr);
883
+ assert_eq!(mmap_len as c_long, str.len() as c_long);
884
+ assert!(raw_str.as_ref().basic.flags & (STR_SHARED | STR_NOEMBED) > 0);
885
+ assert!(str.is_frozen());
886
+
887
+ parent_checked = true;
888
+ } else if str.as_raw() == unshared_id {
889
+ panic!("tracking unshared string");
890
+ } else {
891
+ panic!("unknown string");
892
+ }
893
+ }
894
+ }
895
+ assert!(parent_checked && child_checked);
896
+ }
897
+
898
+ let obj = create_obj();
899
+ let _ = populate_entries(&obj);
900
+
901
+ let rs_self = &*obj;
902
+
903
+ // Create a string containing the full mmap.
904
+ let parent_str = rs_self.str(obj).unwrap();
905
+ let parent_id = parent_str.as_raw();
906
+
907
+ // Ruby's shared strings are only created when they go to the end of
908
+ // original string.
909
+ let len = rs_self.inner(|inner| Ok(inner.len())).unwrap();
910
+ let shareable_range = Range::new(1, len - 1, false).unwrap().as_value();
911
+
912
+ // This string should re-use the parent's buffer with an offset and have
913
+ // the parent's id in `as.heap.aux.shared`
914
+ let child_str = rs_self._slice(obj, parent_str, &[shareable_range]).unwrap();
915
+ let child_id = child_str.as_raw();
916
+
917
+ // A range that does not reach the end of the parent will not be shared.
918
+ assert!(len > 4);
919
+ let unshareable_range = Range::new(0, 4, false).unwrap().as_value();
920
+
921
+ // This string should NOT be tracked, it should own its own buffer.
922
+ let unshared_str = rs_self
923
+ ._slice(obj, parent_str, &[unshareable_range])
924
+ .unwrap();
925
+ let unshared_id = unshared_str.as_raw();
926
+ assert!(!MmapedFile::rb_string_is_shared(unshared_str));
927
+
928
+ assert_internals(obj, parent_id, child_id, unshared_id);
929
+
930
+ let orig_ptr = rs_self.as_mut_ptr();
931
+ // Expand a bunch to ensure we remap
932
+ for _ in 0..16 {
933
+ rs_self.expand_to_fit(obj, rs_self.capacity() * 2).unwrap();
934
+ }
935
+ let new_ptr = rs_self.as_mut_ptr();
936
+ assert!(orig_ptr != new_ptr);
937
+
938
+ // If we haven't updated the pointer to the newly remapped file this will segfault.
939
+ let _: Value = eval!("puts parent", parent = parent_str).unwrap();
940
+ let _: Value = eval!("puts child", child = child_str).unwrap();
941
+ let _: Value = eval!("puts unshared", unshared = unshared_str).unwrap();
942
+
943
+ // Confirm that tracked strings are still valid.
944
+ assert_internals(obj, parent_id, child_id, unshared_id);
945
+ }
946
+
947
+ #[test]
948
+ fn test_dont_fill_mmap() {
949
+ let _cleanup = unsafe { magnus::embed::init() };
950
+ let ruby = magnus::Ruby::get().unwrap();
951
+ crate::init(&ruby).unwrap();
952
+
953
+ let obj = create_obj();
954
+ let positions = populate_entries(&obj);
955
+
956
+ let rs_self = &*obj;
957
+
958
+ rs_self.expand_to_fit(obj, 1024).unwrap();
959
+
960
+ let current_used = rs_self.inner(|inner| inner.load_used()).unwrap() as usize;
961
+ let current_cap = rs_self.inner(|inner| Ok(inner.len())).unwrap();
962
+
963
+ // Create a new entry that exactly fills the capacity of the mmap.
964
+ let val_len =
965
+ current_cap - current_used - HEADER_SIZE - size_of::<f64>() - size_of::<u32>();
966
+ assert_eq!(
967
+ current_cap,
968
+ RawEntry::calc_total_len(val_len).unwrap() + current_used
969
+ );
970
+
971
+ let str = String::from_utf8(vec![b'A'; val_len]).unwrap();
972
+ MmapedFile::upsert_entry(obj, positions, RString::new(&str), 1.0).unwrap();
973
+
974
+ // Validate that we have expanded the mmap, ensuring a trailing NUL.
975
+ assert!(rs_self.capacity() > current_cap);
976
+ }
977
+ }