vinted-prometheus-client-mmap 1.5.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +5 -0
  3. data/ext/fast_mmaped_file_rs/Cargo.toml +40 -0
  4. data/ext/fast_mmaped_file_rs/README.md +52 -0
  5. data/ext/fast_mmaped_file_rs/build.rs +7 -0
  6. data/ext/fast_mmaped_file_rs/extconf.rb +28 -0
  7. data/ext/fast_mmaped_file_rs/src/error.rs +174 -0
  8. data/ext/fast_mmaped_file_rs/src/exemplars.rs +25 -0
  9. data/ext/fast_mmaped_file_rs/src/file_entry.rs +1252 -0
  10. data/ext/fast_mmaped_file_rs/src/file_info.rs +240 -0
  11. data/ext/fast_mmaped_file_rs/src/lib.rs +89 -0
  12. data/ext/fast_mmaped_file_rs/src/macros.rs +14 -0
  13. data/ext/fast_mmaped_file_rs/src/map.rs +519 -0
  14. data/ext/fast_mmaped_file_rs/src/metrics.proto +153 -0
  15. data/ext/fast_mmaped_file_rs/src/mmap/inner.rs +775 -0
  16. data/ext/fast_mmaped_file_rs/src/mmap.rs +977 -0
  17. data/ext/fast_mmaped_file_rs/src/raw_entry.rs +547 -0
  18. data/ext/fast_mmaped_file_rs/src/testhelper.rs +222 -0
  19. data/ext/fast_mmaped_file_rs/src/util.rs +140 -0
  20. data/lib/.DS_Store +0 -0
  21. data/lib/2.7/fast_mmaped_file_rs.so +0 -0
  22. data/lib/3.0/fast_mmaped_file_rs.so +0 -0
  23. data/lib/3.1/fast_mmaped_file_rs.so +0 -0
  24. data/lib/3.2/fast_mmaped_file_rs.so +0 -0
  25. data/lib/3.3/fast_mmaped_file_rs.so +0 -0
  26. data/lib/prometheus/.DS_Store +0 -0
  27. data/lib/prometheus/client/configuration.rb +24 -0
  28. data/lib/prometheus/client/counter.rb +27 -0
  29. data/lib/prometheus/client/formats/protobuf.rb +93 -0
  30. data/lib/prometheus/client/formats/text.rb +85 -0
  31. data/lib/prometheus/client/gauge.rb +40 -0
  32. data/lib/prometheus/client/helper/entry_parser.rb +132 -0
  33. data/lib/prometheus/client/helper/file_locker.rb +50 -0
  34. data/lib/prometheus/client/helper/json_parser.rb +23 -0
  35. data/lib/prometheus/client/helper/metrics_processing.rb +45 -0
  36. data/lib/prometheus/client/helper/metrics_representation.rb +51 -0
  37. data/lib/prometheus/client/helper/mmaped_file.rb +64 -0
  38. data/lib/prometheus/client/helper/plain_file.rb +29 -0
  39. data/lib/prometheus/client/histogram.rb +80 -0
  40. data/lib/prometheus/client/label_set_validator.rb +85 -0
  41. data/lib/prometheus/client/metric.rb +80 -0
  42. data/lib/prometheus/client/mmaped_dict.rb +83 -0
  43. data/lib/prometheus/client/mmaped_value.rb +164 -0
  44. data/lib/prometheus/client/page_size.rb +17 -0
  45. data/lib/prometheus/client/push.rb +203 -0
  46. data/lib/prometheus/client/rack/collector.rb +88 -0
  47. data/lib/prometheus/client/rack/exporter.rb +102 -0
  48. data/lib/prometheus/client/registry.rb +65 -0
  49. data/lib/prometheus/client/simple_value.rb +31 -0
  50. data/lib/prometheus/client/summary.rb +69 -0
  51. data/lib/prometheus/client/support/puma.rb +44 -0
  52. data/lib/prometheus/client/support/unicorn.rb +35 -0
  53. data/lib/prometheus/client/uses_value_type.rb +20 -0
  54. data/lib/prometheus/client/version.rb +5 -0
  55. data/lib/prometheus/client.rb +58 -0
  56. data/lib/prometheus.rb +3 -0
  57. metadata +210 -0
@@ -0,0 +1,977 @@
1
+ use magnus::exception::*;
2
+ use magnus::prelude::*;
3
+ use magnus::rb_sys::{AsRawValue, FromRawValue};
4
+ use magnus::typed_data::Obj;
5
+ use magnus::value::Fixnum;
6
+ use magnus::{eval, scan_args, Error, Integer, RArray, RClass, RHash, RString, Value};
7
+ use nix::libc::{c_char, c_long, c_ulong};
8
+ use rb_sys::rb_str_new_static;
9
+ use std::fs::File;
10
+ use std::io::{prelude::*, SeekFrom};
11
+ use std::mem;
12
+ use std::path::Path;
13
+ use std::ptr::NonNull;
14
+ use std::sync::RwLock;
15
+
16
+ use crate::err;
17
+ use crate::error::MmapError;
18
+ use crate::exemplars::Exemplar;
19
+ use crate::file_entry::FileEntry;
20
+ use crate::map::EntryMap;
21
+ use crate::raw_entry::RawEntry;
22
+ use crate::util::{self, CheckedOps};
23
+ use crate::Result;
24
+ use crate::HEADER_SIZE;
25
+ use inner::InnerMmap;
26
+
27
+ mod inner;
28
+
29
+ /// The Ruby `STR_NOEMBED` flag, aka `FL_USER1`.
30
+ const STR_NOEMBED: c_ulong = 1 << (13);
31
+ /// The Ruby `STR_SHARED` flag, aka `FL_USER2`.
32
+ const STR_SHARED: c_ulong = 1 << (14);
33
+
34
+ /// A Rust struct wrapped in a Ruby object, providing access to a memory-mapped
35
+ /// file used to store, update, and read out Prometheus metrics.
36
+ ///
37
+ /// - File format:
38
+ /// - Header:
39
+ /// - 4 bytes: u32 - total size of metrics in file.
40
+ /// - 4 bytes: NUL byte padding.
41
+ /// - Repeating metrics entries:
42
+ /// - 4 bytes: u32 - entry JSON string size.
43
+ /// - `N` bytes: UTF-8 encoded JSON string used as entry key.
44
+ /// - (8 - (4 + `N`) % 8) bytes: 1 to 8 padding space (0x20) bytes to
45
+ /// reach 8-byte alignment.
46
+ /// - 8 bytes: f64 - entry value.
47
+ ///
48
+ /// All numbers are saved in native-endian format.
49
+ ///
50
+ /// Generated via [luismartingarcia/protocol](https://github.com/luismartingarcia/protocol):
51
+ ///
52
+ ///
53
+ /// ```
54
+ /// protocol "Used:4,Pad:4,K1 Size:4,K1 Name:4,K1 Value:8,K2 Size:4,K2 Name:4,K2 Value:8"
55
+ ///
56
+ /// 0 1 2 3
57
+ /// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
58
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
59
+ /// | Used | Pad |K1 Size|K1 Name| K1 Value |K2 Size|K2 Name|
60
+ /// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
61
+ /// | K2 Value |
62
+ /// +-+-+-+-+-+-+-+
63
+ /// ```
64
+ //
65
+ // The API imposed by `magnus` requires all methods to use shared borrows.
66
+ // This means we can't store any mutable state in the top-level struct,
67
+ // and must store the interior data behind a `RwLock`, which adds run-time
68
+ // checks that mutable operations have no concurrent read or writes.
69
+ //
70
+ // We are further limited by the need to support subclassing in Ruby, which
71
+ // requires us to define an allocation function for the class, the
72
+ // `magnus::class::define_alloc_func()` function. This needs a support the
73
+ // `Default` trait, so a `File` cannot directly help by the object being
74
+ // constructed. Having the `RwLock` hold an `Option` of the interior object
75
+ // resolves this.
76
+ #[derive(Debug, Default)]
77
+ #[magnus::wrap(class = "FastMmapedFileRs", free_immediately, size)]
78
+ pub struct MmapedFile(RwLock<Option<InnerMmap>>);
79
+
80
+ use std::time::{SystemTime, UNIX_EPOCH};
81
+
82
+ impl MmapedFile {
83
+ /// call-seq:
84
+ /// new(file)
85
+ ///
86
+ /// create a new Mmap object
87
+ ///
88
+ /// * <em>file</em>
89
+ ///
90
+ ///
91
+ /// Creates a mapping that's shared with all other processes
92
+ /// mapping the same area of the file.
93
+ pub fn new(klass: RClass, args: &[Value]) -> magnus::error::Result<Obj<Self>> {
94
+ let args = scan_args::scan_args::<(RString,), (), (), (), (), ()>(args)?;
95
+ let path = args.required.0;
96
+
97
+ let lock = MmapedFile(RwLock::new(None));
98
+ let obj = Obj::wrap_as(lock, klass);
99
+
100
+ let _: Value = obj.funcall("initialize", (path,))?;
101
+
102
+ Ok(obj)
103
+ }
104
+
105
+ /// Initialize a new `FastMmapedFileRs` object. This must be defined in
106
+ /// order for inheritance to work.
107
+ pub fn initialize(rb_self: Obj<Self>, fname: String) -> magnus::error::Result<()> {
108
+ let file = File::options()
109
+ .read(true)
110
+ .write(true)
111
+ .open(&fname)
112
+ .map_err(|_| err!(arg_error(), "Can't open {}", fname))?;
113
+
114
+ let inner = InnerMmap::new(fname.into(), file)?;
115
+ rb_self.insert_inner(inner)?;
116
+
117
+ let weak_klass = RClass::from_value(eval("ObjectSpace::WeakMap")?)
118
+ .ok_or_else(|| err!(no_method_error(), "unable to create WeakMap"))?;
119
+ let weak_obj_tracker = weak_klass.new_instance(())?;
120
+
121
+ // We will need to iterate over strings backed by the mmapped file, but
122
+ // don't want to prevent the GC from reaping them when the Ruby code
123
+ // has finished with them. `ObjectSpace::WeakMap` allows us to track
124
+ // them without extending their lifetime.
125
+ //
126
+ // https://ruby-doc.org/core-3.0.0/ObjectSpace/WeakMap.html
127
+ rb_self.ivar_set("@weak_obj_tracker", weak_obj_tracker)?;
128
+
129
+ Ok(())
130
+ }
131
+
132
+ /// Read the list of files provided from Ruby and convert them to a Prometheus
133
+ /// metrics String.
134
+ pub fn to_metrics(file_list: RArray) -> magnus::error::Result<String> {
135
+ let mut map = EntryMap::new();
136
+ map.aggregate_files(file_list)?;
137
+
138
+ let sorted = map.into_sorted()?;
139
+
140
+ FileEntry::entries_to_string(sorted).map_err(|e| e.into())
141
+ }
142
+
143
+ /// Read the list of files provided from Ruby and convert them to a Prometheus
144
+ /// metrics String.
145
+ pub fn to_protobuf(file_list: RArray) -> magnus::error::Result<String> {
146
+ let mut map = EntryMap::new();
147
+ map.aggregate_files(file_list)?;
148
+
149
+ let sorted = map.into_sorted()?;
150
+
151
+ FileEntry::entries_to_protobuf(sorted).map_err(|e| e.into())
152
+ }
153
+
154
+
155
+ /// Document-method: []
156
+ /// Document-method: slice
157
+ ///
158
+ /// call-seq: [](args)
159
+ ///
160
+ /// Element reference - with the following syntax:
161
+ ///
162
+ /// self[nth]
163
+ ///
164
+ /// retrieve the <em>nth</em> character
165
+ ///
166
+ /// self[start..last]
167
+ ///
168
+ /// return a substring from <em>start</em> to <em>last</em>
169
+ ///
170
+ /// self[start, length]
171
+ ///
172
+ /// return a substring of <em>lenght</em> characters from <em>start</em>
173
+ pub fn slice(rb_self: Obj<Self>, args: &[Value]) -> magnus::error::Result<RString> {
174
+ // The C implementation would trigger a GC cycle via `rb_gc_force_recycle`
175
+ // if the `MM_PROTECT` flag is set, but in practice this is never used.
176
+ // We omit this logic, particularly because `rb_gc_force_recycle` is a
177
+ // no-op as of Ruby 3.1.
178
+ let rs_self = &*rb_self;
179
+
180
+ let str = rs_self.str(rb_self)?;
181
+ rs_self._slice(rb_self, str, args)
182
+ }
183
+
184
+ fn _slice(
185
+ &self,
186
+ rb_self: Obj<Self>,
187
+ str: RString,
188
+ args: &[Value],
189
+ ) -> magnus::error::Result<RString> {
190
+ let substr: RString = str.funcall("[]", args)?;
191
+
192
+ // Track shared child strings which use the same backing storage.
193
+ if Self::rb_string_is_shared(substr) {
194
+ (*rb_self).track_rstring(rb_self, substr)?;
195
+ }
196
+
197
+ // The C implementation does this, perhaps to validate that the len we
198
+ // provided is actually being used.
199
+ (*rb_self).inner_mut(|inner| {
200
+ inner.set_len(str.len());
201
+ Ok(())
202
+ })?;
203
+
204
+ Ok(substr)
205
+ }
206
+
207
+ /// Document-method: msync
208
+ /// Document-method: sync
209
+ /// Document-method: flush
210
+ ///
211
+ /// call-seq: msync
212
+ ///
213
+ /// flush the file
214
+ pub fn sync(&self, args: &[Value]) -> magnus::error::Result<()> {
215
+ use nix::sys::mman::MsFlags;
216
+
217
+ let mut ms_async = false;
218
+ let args = scan_args::scan_args::<(), (Option<i32>,), (), (), (), ()>(args)?;
219
+
220
+ if let Some(flag) = args.optional.0 {
221
+ let flag = MsFlags::from_bits(flag).unwrap_or(MsFlags::empty());
222
+ ms_async = flag.contains(MsFlags::MS_ASYNC);
223
+ }
224
+
225
+ // The `memmap2` crate does not support the `MS_INVALIDATE` flag. We ignore that
226
+ // flag if passed in, checking only for `MS_ASYNC`. In practice no arguments are ever
227
+ // passed to this function, but we do this to maintain compatibility with the
228
+ // C implementation.
229
+ self.inner_mut(|inner| inner.flush(ms_async))
230
+ .map_err(|e| e.into())
231
+ }
232
+
233
+ /// Document-method: munmap
234
+ /// Document-method: unmap
235
+ ///
236
+ /// call-seq: munmap
237
+ ///
238
+ /// terminate the association
239
+ pub fn munmap(rb_self: Obj<Self>) -> magnus::error::Result<()> {
240
+ let rs_self = &*rb_self;
241
+
242
+ rs_self.inner_mut(|inner| {
243
+ // We are about to release the backing mmap for Ruby's String
244
+ // objects. If Ruby attempts to read from them the program will
245
+ // segfault. We update the length of all Strings to zero so Ruby
246
+ // does not attempt to access the now invalid address between now
247
+ // and when GC eventually reaps the objects.
248
+ //
249
+ // See the following for more detail:
250
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/39
251
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/41
252
+ // https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/merge_requests/80
253
+ inner.set_len(0);
254
+ Ok(())
255
+ })?;
256
+
257
+ // Update each String object to be zero-length.
258
+ let cap = util::cast_chk::<_, c_long>(rs_self.capacity(), "capacity")?;
259
+ rs_self.update_weak_map(rb_self, rs_self.as_mut_ptr(), cap)?;
260
+
261
+ // Remove the `InnerMmap` from the `RwLock`. This will drop
262
+ // end of this function, unmapping and closing the file.
263
+ let _ = rs_self.take_inner()?;
264
+ Ok(())
265
+ }
266
+
267
+ /// Fetch the `used` header from the `.db` file, the length
268
+ /// in bytes of the data written to the file.
269
+ pub fn load_used(&self) -> magnus::error::Result<Integer> {
270
+ let used = self.inner(|inner| inner.load_used())?;
271
+
272
+ Ok(Integer::from_u64(used as u64))
273
+ }
274
+
275
+ /// Update the `used` header for the `.db` file, the length
276
+ /// in bytes of the data written to the file.
277
+ pub fn save_used(rb_self: Obj<Self>, used: Fixnum) -> magnus::error::Result<Fixnum> {
278
+ let rs_self = &*rb_self;
279
+ let used_uint = used.to_u32()?;
280
+
281
+ // If the underlying mmap is smaller than the header, then resize to fit.
282
+ // The file has already been expanded to page size when first opened, so
283
+ // even if the map is less than HEADER_SIZE, we're not at risk of a
284
+ // SIGBUS.
285
+ if rs_self.capacity() < HEADER_SIZE {
286
+ rs_self.expand_to_fit(rb_self, HEADER_SIZE)?;
287
+ }
288
+
289
+ rs_self.inner_mut(|inner| inner.save_used(used_uint))?;
290
+
291
+ Ok(used)
292
+ }
293
+
294
+ /// Fetch the value associated with a key from the mmap.
295
+ /// If no entry is present, initialize with the default
296
+ /// value provided.
297
+ pub fn fetch_entry(
298
+ rb_self: Obj<Self>,
299
+ positions: RHash,
300
+ key: RString,
301
+ default_value: f64,
302
+ ) -> magnus::error::Result<f64> {
303
+ let rs_self = &*rb_self;
304
+ let position: Option<Fixnum> = positions.lookup(key)?;
305
+
306
+ if let Some(pos) = position {
307
+ let pos = pos.to_usize()?;
308
+ return rs_self
309
+ .inner(|inner| inner.load_value(pos))
310
+ .map_err(|e| e.into());
311
+ }
312
+
313
+ rs_self.check_expand(rb_self, key.len())?;
314
+
315
+ let value_offset: usize = rs_self.inner_mut(|inner| {
316
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
317
+ unsafe { inner.initialize_entry(key.as_slice(), default_value) }
318
+ })?;
319
+
320
+ // CAST: no-op on 64-bit, widening on 32-bit.
321
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
322
+
323
+ rs_self.load_value(value_offset)
324
+ }
325
+
326
+ pub fn upsert_exemplar(
327
+ rb_self: Obj<Self>,
328
+ positions: RHash,
329
+ key: RString,
330
+ value: f64,
331
+ exemplar_name: RString,
332
+ exemplar_value: RString,
333
+ ) -> magnus::error::Result<f64> {
334
+ let rs_self = &*rb_self;
335
+ let position: Option<Fixnum> = positions.lookup(key)?;
336
+
337
+ let start = SystemTime::now();
338
+ let since_the_epoch = start
339
+ .duration_since(UNIX_EPOCH)
340
+ .expect("Time went backwards");
341
+
342
+ let ex: Exemplar = Exemplar {
343
+ label_name: unsafe { exemplar_name.as_str().unwrap().into() },
344
+ label_value: unsafe { exemplar_value.as_str().unwrap().into() },
345
+ value: value,
346
+ timestamp: since_the_epoch.as_nanos(),
347
+ };
348
+
349
+ if let Some(pos) = position {
350
+ let pos = pos.to_usize()?;
351
+ return rs_self
352
+ .inner_mut(|inner| {
353
+ inner.save_exemplar(pos, ex)?;
354
+
355
+ // TODO just return `value` here instead of loading it?
356
+ // This is how the C implementation did it, but I don't
357
+ // see what the extra load gains us.
358
+ let ex = inner.load_exemplar(pos);
359
+
360
+ Ok(ex.unwrap().value)
361
+ })
362
+ .map_err(|e| e.into());
363
+ }
364
+
365
+
366
+ rs_self.check_expand_exemplar(rb_self, key.len())?;
367
+
368
+ let value_offset: usize = rs_self.inner_mut(|inner| {
369
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
370
+ unsafe { inner.initialize_entry_exemplar(key.as_slice(), ex) }
371
+ })?;
372
+
373
+ // CAST: no-op on 64-bit, widening on 32-bit.
374
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
375
+
376
+ let ex = rs_self.load_exemplar(value_offset);
377
+
378
+ Ok(ex.unwrap().value)
379
+ }
380
+
381
+ /// Update the value of an existing entry, if present. Otherwise create a new entry
382
+ /// for the key.
383
+ pub fn upsert_entry(
384
+ rb_self: Obj<Self>,
385
+ positions: RHash,
386
+ key: RString,
387
+ value: f64,
388
+ ) -> magnus::error::Result<f64> {
389
+ let rs_self = &*rb_self;
390
+ let position: Option<Fixnum> = positions.lookup(key)?;
391
+
392
+ if let Some(pos) = position {
393
+ let pos = pos.to_usize()?;
394
+ return rs_self
395
+ .inner_mut(|inner| {
396
+ inner.save_value(pos, value)?;
397
+
398
+ // TODO just return `value` here instead of loading it?
399
+ // This is how the C implementation did it, but I don't
400
+ // see what the extra load gains us.
401
+ inner.load_value(pos)
402
+ })
403
+ .map_err(|e| e.into());
404
+ }
405
+
406
+ rs_self.check_expand(rb_self, key.len())?;
407
+
408
+ let value_offset: usize = rs_self.inner_mut(|inner| {
409
+ // SAFETY: We must not call any Ruby code for the lifetime of this borrow.
410
+ unsafe { inner.initialize_entry(key.as_slice(), value) }
411
+ })?;
412
+
413
+ // CAST: no-op on 64-bit, widening on 32-bit.
414
+ positions.aset(key, Integer::from_u64(value_offset as u64))?;
415
+
416
+ rs_self.load_value(value_offset)
417
+ }
418
+
419
+ /// Creates a Ruby String containing the section of the mmapped file that
420
+ /// has been written to.
421
+ fn str(&self, rb_self: Obj<Self>) -> magnus::error::Result<RString> {
422
+ let val_id = (*rb_self).inner(|inner| {
423
+ let ptr = inner.as_ptr();
424
+ let len = inner.len();
425
+
426
+ // SAFETY: This is safe so long as the data provided to Ruby meets its
427
+ // requirements. When unmapping the file this will no longer be the
428
+ // case, see the comment on `munmap` for how we handle this.
429
+ Ok(unsafe { rb_str_new_static(ptr as _, len as _) })
430
+ })?;
431
+
432
+ // SAFETY: We know that rb_str_new_static returns a VALUE.
433
+ let val = unsafe { Value::from_raw(val_id) };
434
+
435
+ // UNWRAP: We created this value as a string above.
436
+ let str = RString::from_value(val).unwrap();
437
+
438
+ // Freeze the root string so it can't be mutated out from under any
439
+ // substrings created. This object is never exposed to callers.
440
+ str.freeze();
441
+
442
+ // Track the RString in our `WeakMap` so we can update its address if
443
+ // we re-mmap the backing file.
444
+ (*rb_self).track_rstring(rb_self, str)?;
445
+
446
+ Ok(str)
447
+ }
448
+
449
+ /// If we reallocate, any live Ruby strings provided by the `str()` method
450
+ /// will be invalidated. We need to iterate over them using and update their
451
+ /// heap pointers to the newly allocated memory region.
452
+ fn update_weak_map(
453
+ &self,
454
+ rb_self: Obj<Self>,
455
+ old_ptr: *const c_char,
456
+ old_cap: c_long,
457
+ ) -> magnus::error::Result<()> {
458
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
459
+
460
+ let new_len = self.inner(|inner| util::cast_chk::<_, c_long>(inner.len(), "mmap len"))?;
461
+
462
+ // Iterate over the values of the `WeakMap`.
463
+ for val in tracker.enumeratorize("each_value", ()) {
464
+ let rb_string = val?;
465
+ let str = RString::from_value(rb_string)
466
+ .ok_or_else(|| err!(arg_error(), "weakmap value was not a string"))?;
467
+
468
+ // SAFETY: We're messing with Ruby's internals here, YOLO.
469
+ unsafe {
470
+ // Convert the magnus wrapper type to a raw string exposed by `rb_sys`,
471
+ // which provides access to its internals.
472
+ let mut raw_str = Self::rb_string_internal(str);
473
+
474
+ // Shared string have their own `ptr` and `len` values, but `aux`
475
+ // is the id of the parent string so the GC can track this
476
+ // dependency. The `ptr` will always be an offset from the base
477
+ // address of the mmap, and `len` will be the length of the mmap
478
+ // less the offset from the base.
479
+ if Self::rb_string_is_shared(str) && new_len > 0 {
480
+ // Calculate how far into the original mmap the shared string
481
+ // started and update to the equivalent address in the new
482
+ // one.
483
+ let substr_ptr = raw_str.as_ref().as_.heap.ptr;
484
+ let offset = substr_ptr.offset_from(old_ptr);
485
+
486
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr().offset(offset);
487
+
488
+ let current_len = str.len() as c_long;
489
+ let new_shared_len = old_cap + current_len;
490
+
491
+ self.update_rstring_len(raw_str, new_shared_len);
492
+ continue;
493
+ }
494
+
495
+ // Update the string to point to the new mmapped file.
496
+ // We're matching the behavior of Ruby's `str_new_static` function.
497
+ // See https://github.com/ruby/ruby/blob/e51014f9c05aa65cbf203442d37fef7c12390015/string.c#L1030-L1053
498
+ //
499
+ // We deliberately do _NOT_ increment the `capa` field of the
500
+ // string to match the new `len`. We were initially doing this,
501
+ // but consistently triggered GCs in the middle of updating the
502
+ // string pointers, causing a segfault.
503
+ //
504
+ // See https://gitlab.com/gitlab-org/ruby/gems/prometheus-client-mmap/-/issues/45
505
+ raw_str.as_mut().as_.heap.ptr = self.as_mut_ptr();
506
+ self.update_rstring_len(raw_str, new_len);
507
+ }
508
+ }
509
+
510
+ Ok(())
511
+ }
512
+
513
+ /// Check that the mmap is large enough to contain the value to be added,
514
+ /// and expand it to fit if necessary.
515
+ fn check_expand(&self, rb_self: Obj<Self>, key_len: usize) -> magnus::error::Result<()> {
516
+ // CAST: no-op on 32-bit, widening on 64-bit.
517
+ let used = self.inner(|inner| inner.load_used())? as usize;
518
+ let entry_len = RawEntry::calc_total_len(key_len)?;
519
+
520
+ // We need the mmapped region to contain at least one byte beyond the
521
+ // written data to create a NUL- terminated C string. Validate that
522
+ // new length does not exactly match or exceed the length of the mmap.
523
+ while self.capacity() <= used.add_chk(entry_len)? {
524
+ self.expand_to_fit(rb_self, self.capacity().mul_chk(2)?)?;
525
+ }
526
+
527
+ Ok(())
528
+ }
529
+
530
+ /// Check that the mmap is large enough to contain the value to be added,
531
+ /// and expand it to fit if necessary.
532
+ fn check_expand_exemplar(&self, rb_self: Obj<Self>, key_len: usize) -> magnus::error::Result<()> {
533
+ // CAST: no-op on 32-bit, widening on 64-bit.
534
+ let used = self.inner(|inner| inner.load_used())? as usize;
535
+ let entry_len = RawEntry::calc_total_len_exemplar(key_len)?;
536
+
537
+ // We need the mmapped region to contain at least one byte beyond the
538
+ // written data to create a NUL- terminated C string. Validate that
539
+ // new length does not exactly match or exceed the length of the mmap.
540
+ while self.capacity() <= used.add_chk(entry_len)? {
541
+ self.expand_to_fit(rb_self, self.capacity().mul_chk(2)?)?;
542
+ }
543
+
544
+ Ok(())
545
+ }
546
+
547
+ /// Expand the underlying file until it is long enough to fit `target_cap`.
548
+ /// This will remove the existing mmap, expand the file, then update any
549
+ /// strings held by the `WeakMap` to point to the newly mmapped address.
550
+ fn expand_to_fit(&self, rb_self: Obj<Self>, target_cap: usize) -> magnus::error::Result<()> {
551
+ if target_cap < self.capacity() {
552
+ return Err(err!(arg_error(), "Can't reduce the size of mmap"));
553
+ }
554
+
555
+ let mut new_cap = self.capacity();
556
+ while new_cap < target_cap {
557
+ new_cap = new_cap.mul_chk(2)?;
558
+ }
559
+
560
+ if new_cap != self.capacity() {
561
+ let old_ptr = self.as_mut_ptr();
562
+ let old_cap = util::cast_chk::<_, c_long>(self.capacity(), "capacity")?;
563
+
564
+ // Drop the old mmap.
565
+ let (mut file, path) = self.take_inner()?.munmap();
566
+
567
+ self.expand_file(&mut file, &path, target_cap)?;
568
+
569
+ // Re-mmap the expanded file.
570
+ let new_inner = InnerMmap::reestablish(path, file, target_cap)?;
571
+
572
+ self.insert_inner(new_inner)?;
573
+
574
+ return self.update_weak_map(rb_self, old_ptr, old_cap);
575
+ }
576
+
577
+ Ok(())
578
+ }
579
+
580
+ /// Use lseek(2) to seek past the end of the file and write a NUL byte. This
581
+ /// creates a file hole that expands the size of the file without consuming
582
+ /// disk space until it is actually written to.
583
+ fn expand_file(&self, file: &mut File, path: &Path, len: usize) -> Result<()> {
584
+ if len == 0 {
585
+ return Err(MmapError::overflowed(0, -1, "adding"));
586
+ }
587
+
588
+ // CAST: no-op on 64-bit, widening on 32-bit.
589
+ let len = len as u64;
590
+
591
+ match file.seek(SeekFrom::Start(len - 1)) {
592
+ Ok(_) => {}
593
+ Err(_) => {
594
+ return Err(MmapError::with_errno(format!("Can't lseek {}", len - 1)));
595
+ }
596
+ }
597
+
598
+ match file.write(&[0x0]) {
599
+ Ok(1) => {}
600
+ _ => {
601
+ return Err(MmapError::with_errno(format!(
602
+ "Can't extend {}",
603
+ path.display()
604
+ )));
605
+ }
606
+ }
607
+
608
+ Ok(())
609
+ }
610
+
611
+ fn track_rstring(&self, rb_self: Obj<Self>, str: RString) -> magnus::error::Result<()> {
612
+ let tracker: Value = rb_self.ivar_get("@weak_obj_tracker")?;
613
+
614
+ // Use the string's Id as the key in the `WeakMap`.
615
+ let key = str.as_raw();
616
+ let _: Value = tracker.funcall("[]=", (key, str))?;
617
+ Ok(())
618
+ }
619
+
620
+ /// The total capacity of the underlying mmap.
621
+ #[inline]
622
+ fn capacity(&self) -> usize {
623
+ // UNWRAP: This is actually infallible, but we need to
624
+ // wrap it in a `Result` for use with `inner()`.
625
+ self.inner(|inner| Ok(inner.capacity())).unwrap()
626
+ }
627
+
628
+ fn load_value(&self, position: usize) -> magnus::error::Result<f64> {
629
+ self.inner(|inner| inner.load_value(position))
630
+ .map_err(|e| e.into())
631
+ }
632
+
633
+ fn load_exemplar<'a, 'b>(&'a self, position: usize) -> magnus::error::Result<Exemplar> {
634
+ self.inner_mut(|inner| inner.load_exemplar(position))
635
+ .map_err(|e| e.into())
636
+ }
637
+
638
+ fn as_mut_ptr(&self) -> *mut c_char {
639
+ // UNWRAP: This is actually infallible, but we need to
640
+ // wrap it in a `Result` for use with `inner()`.
641
+ self.inner(|inner| Ok(inner.as_mut_ptr() as *mut c_char))
642
+ .unwrap()
643
+ }
644
+
645
+ /// Takes a closure with immutable access to InnerMmap. Will fail if the inner
646
+ /// object has a mutable borrow or has been dropped.
647
+ fn inner<F, T>(&self, func: F) -> Result<T>
648
+ where
649
+ F: FnOnce(&InnerMmap) -> Result<T>,
650
+ {
651
+ let inner_opt = self.0.try_read().map_err(|_| MmapError::ConcurrentAccess)?;
652
+
653
+ let inner = inner_opt.as_ref().ok_or(MmapError::UnmappedFile)?;
654
+
655
+ func(inner)
656
+ }
657
+
658
+ /// Takes a closure with mutable access to InnerMmap. Will fail if the inner
659
+ /// object has an existing mutable borrow, or has been dropped.
660
+ fn inner_mut<F, T>(&self, func: F) -> Result<T>
661
+ where
662
+ F: FnOnce(&mut InnerMmap) -> Result<T>,
663
+ {
664
+ let mut inner_opt = self
665
+ .0
666
+ .try_write()
667
+ .map_err(|_| MmapError::ConcurrentAccess)?;
668
+
669
+ let inner = inner_opt.as_mut().ok_or(MmapError::UnmappedFile)?;
670
+
671
+ func(inner)
672
+ }
673
+
674
+ /// Take ownership of the `InnerMmap` from the `RwLock`.
675
+ /// Will fail if a mutable borrow is already held or the inner
676
+ /// object has been dropped.
677
+ fn take_inner(&self) -> Result<InnerMmap> {
678
+ let mut inner_opt = self
679
+ .0
680
+ .try_write()
681
+ .map_err(|_| MmapError::ConcurrentAccess)?;
682
+ match (*inner_opt).take() {
683
+ Some(i) => Ok(i),
684
+ None => Err(MmapError::UnmappedFile),
685
+ }
686
+ }
687
+
688
+ /// Move `new_inner` into the `RwLock`.
689
+ /// Will return an error if a mutable borrow is already held.
690
+ fn insert_inner(&self, new_inner: InnerMmap) -> Result<()> {
691
+ let mut inner_opt = self
692
+ .0
693
+ .try_write()
694
+ .map_err(|_| MmapError::ConcurrentAccess)?;
695
+ (*inner_opt).replace(new_inner);
696
+
697
+ Ok(())
698
+ }
699
+
700
+ /// Check if an RString is shared. Shared string use the same underlying
701
+ /// storage as their parent, taking an offset from the start. By default
702
+ /// they must run to the end of the parent string.
703
+ fn rb_string_is_shared(rb_str: RString) -> bool {
704
+ // SAFETY: We only hold a reference to the raw object for the duration
705
+ // of this function, and no Ruby code is called.
706
+ let flags = unsafe {
707
+ let raw_str = Self::rb_string_internal(rb_str);
708
+ raw_str.as_ref().basic.flags
709
+ };
710
+ let shared_flags = STR_SHARED | STR_NOEMBED;
711
+
712
+ flags & shared_flags == shared_flags
713
+ }
714
+
715
+ /// Convert `magnus::RString` into the raw binding used by `rb_sys::RString`.
716
+ /// We need this to manually change the pointer and length values for strings
717
+ /// when moving the mmap to a new file.
718
+ ///
719
+ /// SAFETY: Calling Ruby code while the returned object is held may result
720
+ /// in it being mutated or dropped.
721
+ unsafe fn rb_string_internal(rb_str: RString) -> NonNull<rb_sys::RString> {
722
+ mem::transmute::<RString, NonNull<rb_sys::RString>>(rb_str)
723
+ }
724
+
725
+ #[cfg(ruby_lte_3_2)]
726
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
727
+ raw_str.as_mut().as_.heap.len = new_len;
728
+ }
729
+
730
+ #[cfg(ruby_gte_3_3)]
731
+ unsafe fn update_rstring_len(&self, mut raw_str: NonNull<rb_sys::RString>, new_len: c_long) {
732
+ raw_str.as_mut().len = new_len;
733
+ }
734
+ }
735
+
736
+ #[cfg(test)]
737
+ mod test {
738
+ use super::*;
739
+ use core::panic;
740
+ use magnus::error::Error;
741
+ use magnus::eval;
742
+ use magnus::Range;
743
+ use nix::unistd::{sysconf, SysconfVar};
744
+ use std::mem::size_of;
745
+
746
+ use crate::raw_entry::RawEntry;
747
+ use crate::testhelper::TestFile;
748
+
749
+ /// Create a wrapped MmapedFile object.
750
+ fn create_obj() -> Obj<MmapedFile> {
751
+ let TestFile {
752
+ file: _file,
753
+ path,
754
+ dir: _dir,
755
+ } = TestFile::new(&[0u8; 8]);
756
+
757
+ let path_str = path.display().to_string();
758
+ let rpath = RString::new(&path_str);
759
+
760
+ eval!("FastMmapedFileRs.new(path)", path = rpath).unwrap()
761
+ }
762
+
763
+ /// Add three entries to the mmap. Expected length is 56, 3x 16-byte
764
+ /// entries with 8-byte header.
765
+ fn populate_entries(rb_self: &Obj<MmapedFile>) -> RHash {
766
+ let positions = RHash::from_value(eval("{}").unwrap()).unwrap();
767
+
768
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("a"), 0.0).unwrap();
769
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("b"), 1.0).unwrap();
770
+ MmapedFile::upsert_entry(*rb_self, positions, RString::new("c"), 2.0).unwrap();
771
+
772
+ positions
773
+ }
774
+
775
+ #[test]
776
+ fn test_new() {
777
+ let _cleanup = unsafe { magnus::embed::init() };
778
+ let ruby = magnus::Ruby::get().unwrap();
779
+ crate::init(&ruby).unwrap();
780
+
781
+ let TestFile {
782
+ file,
783
+ path,
784
+ dir: _dir,
785
+ } = TestFile::new(&[0u8; 8]);
786
+
787
+ let path_str = path.display().to_string();
788
+ let rpath = RString::new(&path_str);
789
+
790
+ // Object created successfully
791
+ let result: std::result::Result<Obj<MmapedFile>, Error> =
792
+ eval!("FastMmapedFileRs.new(path)", path = rpath);
793
+ assert!(result.is_ok());
794
+
795
+ // Weak map added
796
+ let obj = result.unwrap();
797
+ let weak_tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
798
+ assert_eq!("ObjectSpace::WeakMap", weak_tracker.class().inspect());
799
+
800
+ // File expanded to page size
801
+ let page_size = sysconf(SysconfVar::PAGE_SIZE).unwrap().unwrap() as u64;
802
+ let stat = file.metadata().unwrap();
803
+ assert_eq!(page_size, stat.len());
804
+
805
+ // Used set to header size
806
+ assert_eq!(
807
+ HEADER_SIZE as u64,
808
+ obj.load_used().unwrap().to_u64().unwrap()
809
+ );
810
+ }
811
+
812
+ #[test]
813
+ fn test_slice() {
814
+ let _cleanup = unsafe { magnus::embed::init() };
815
+ let ruby = magnus::Ruby::get().unwrap();
816
+ crate::init(&ruby).unwrap();
817
+
818
+ let obj = create_obj();
819
+ let _ = populate_entries(&obj);
820
+
821
+ // Validate header updated with new length
822
+ let header_range = Range::new(0, HEADER_SIZE, true).unwrap().as_value();
823
+ let header_slice = MmapedFile::slice(obj, &[header_range]).unwrap();
824
+ assert_eq!([56, 0, 0, 0, 0, 0, 0, 0], unsafe {
825
+ header_slice.as_slice()
826
+ });
827
+
828
+ let value_range = Range::new(HEADER_SIZE, 24, true).unwrap().as_value();
829
+ let value_slice = MmapedFile::slice(obj, &[value_range]).unwrap();
830
+
831
+ // Validate string length
832
+ assert_eq!(1u32.to_ne_bytes(), unsafe { &value_slice.as_slice()[0..4] });
833
+
834
+ // Validate string and padding
835
+ assert_eq!("a ", unsafe {
836
+ String::from_utf8_lossy(&value_slice.as_slice()[4..8])
837
+ });
838
+
839
+ // Validate value
840
+ assert_eq!(0.0f64.to_ne_bytes(), unsafe {
841
+ &value_slice.as_slice()[8..16]
842
+ });
843
+ }
844
+
845
+ #[test]
846
+ fn test_slice_resize() {
847
+ let _cleanup = unsafe { magnus::embed::init() };
848
+ let ruby = magnus::Ruby::get().unwrap();
849
+ crate::init(&ruby).unwrap();
850
+
851
+ fn assert_internals(
852
+ obj: Obj<MmapedFile>,
853
+ parent_id: c_ulong,
854
+ child_id: c_ulong,
855
+ unshared_id: c_ulong,
856
+ ) {
857
+ let rs_self = &*obj;
858
+ let tracker: Value = obj.ivar_get("@weak_obj_tracker").unwrap();
859
+
860
+ let mmap_ptr = rs_self.as_mut_ptr();
861
+ let mmap_len = rs_self.capacity();
862
+
863
+ let mut parent_checked = false;
864
+ let mut child_checked = false;
865
+
866
+ for val in tracker.enumeratorize("each_value", ()) {
867
+ let rb_string = val.unwrap();
868
+ let str = RString::from_value(rb_string).unwrap();
869
+
870
+ unsafe {
871
+ let raw_str = MmapedFile::rb_string_internal(str);
872
+ if str.as_raw() == child_id {
873
+ assert_eq!(parent_id, raw_str.as_ref().as_.heap.aux.shared);
874
+
875
+ let child_offset = mmap_len as isize - str.len() as isize;
876
+ assert_eq!(mmap_ptr.offset(child_offset), raw_str.as_ref().as_.heap.ptr);
877
+
878
+ child_checked = true;
879
+ } else if str.as_raw() == parent_id {
880
+ assert_eq!(parent_id, str.as_raw());
881
+
882
+ assert_eq!(mmap_ptr, raw_str.as_ref().as_.heap.ptr);
883
+ assert_eq!(mmap_len as c_long, str.len() as c_long);
884
+ assert!(raw_str.as_ref().basic.flags & (STR_SHARED | STR_NOEMBED) > 0);
885
+ assert!(str.is_frozen());
886
+
887
+ parent_checked = true;
888
+ } else if str.as_raw() == unshared_id {
889
+ panic!("tracking unshared string");
890
+ } else {
891
+ panic!("unknown string");
892
+ }
893
+ }
894
+ }
895
+ assert!(parent_checked && child_checked);
896
+ }
897
+
898
+ let obj = create_obj();
899
+ let _ = populate_entries(&obj);
900
+
901
+ let rs_self = &*obj;
902
+
903
+ // Create a string containing the full mmap.
904
+ let parent_str = rs_self.str(obj).unwrap();
905
+ let parent_id = parent_str.as_raw();
906
+
907
+ // Ruby's shared strings are only created when they go to the end of
908
+ // original string.
909
+ let len = rs_self.inner(|inner| Ok(inner.len())).unwrap();
910
+ let shareable_range = Range::new(1, len - 1, false).unwrap().as_value();
911
+
912
+ // This string should re-use the parent's buffer with an offset and have
913
+ // the parent's id in `as.heap.aux.shared`
914
+ let child_str = rs_self._slice(obj, parent_str, &[shareable_range]).unwrap();
915
+ let child_id = child_str.as_raw();
916
+
917
+ // A range that does not reach the end of the parent will not be shared.
918
+ assert!(len > 4);
919
+ let unshareable_range = Range::new(0, 4, false).unwrap().as_value();
920
+
921
+ // This string should NOT be tracked, it should own its own buffer.
922
+ let unshared_str = rs_self
923
+ ._slice(obj, parent_str, &[unshareable_range])
924
+ .unwrap();
925
+ let unshared_id = unshared_str.as_raw();
926
+ assert!(!MmapedFile::rb_string_is_shared(unshared_str));
927
+
928
+ assert_internals(obj, parent_id, child_id, unshared_id);
929
+
930
+ let orig_ptr = rs_self.as_mut_ptr();
931
+ // Expand a bunch to ensure we remap
932
+ for _ in 0..16 {
933
+ rs_self.expand_to_fit(obj, rs_self.capacity() * 2).unwrap();
934
+ }
935
+ let new_ptr = rs_self.as_mut_ptr();
936
+ assert!(orig_ptr != new_ptr);
937
+
938
+ // If we haven't updated the pointer to the newly remapped file this will segfault.
939
+ let _: Value = eval!("puts parent", parent = parent_str).unwrap();
940
+ let _: Value = eval!("puts child", child = child_str).unwrap();
941
+ let _: Value = eval!("puts unshared", unshared = unshared_str).unwrap();
942
+
943
+ // Confirm that tracked strings are still valid.
944
+ assert_internals(obj, parent_id, child_id, unshared_id);
945
+ }
946
+
947
+ #[test]
948
+ fn test_dont_fill_mmap() {
949
+ let _cleanup = unsafe { magnus::embed::init() };
950
+ let ruby = magnus::Ruby::get().unwrap();
951
+ crate::init(&ruby).unwrap();
952
+
953
+ let obj = create_obj();
954
+ let positions = populate_entries(&obj);
955
+
956
+ let rs_self = &*obj;
957
+
958
+ rs_self.expand_to_fit(obj, 1024).unwrap();
959
+
960
+ let current_used = rs_self.inner(|inner| inner.load_used()).unwrap() as usize;
961
+ let current_cap = rs_self.inner(|inner| Ok(inner.len())).unwrap();
962
+
963
+ // Create a new entry that exactly fills the capacity of the mmap.
964
+ let val_len =
965
+ current_cap - current_used - HEADER_SIZE - size_of::<f64>() - size_of::<u32>();
966
+ assert_eq!(
967
+ current_cap,
968
+ RawEntry::calc_total_len(val_len).unwrap() + current_used
969
+ );
970
+
971
+ let str = String::from_utf8(vec![b'A'; val_len]).unwrap();
972
+ MmapedFile::upsert_entry(obj, positions, RString::new(&str), 1.0).unwrap();
973
+
974
+ // Validate that we have expanded the mmap, ensuring a trailing NUL.
975
+ assert!(rs_self.capacity() > current_cap);
976
+ }
977
+ }