bootsnap 1.6.0 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,15 +14,21 @@
14
14
  #include "bootsnap.h"
15
15
  #include "ruby.h"
16
16
  #include <stdint.h>
17
+ #include <stdbool.h>
17
18
  #include <sys/types.h>
18
19
  #include <errno.h>
19
20
  #include <fcntl.h>
21
+ #include <unistd.h>
20
22
  #include <sys/stat.h>
21
- #ifndef _WIN32
22
- #include <sys/utsname.h>
23
+
24
+ #ifdef __APPLE__
25
+ // The symbol is present, however not in the headers
26
+ // See: https://github.com/Shopify/bootsnap/issues/470
27
+ extern int fdatasync(int);
23
28
  #endif
24
- #ifdef __GLIBC__
25
- #include <gnu/libc-version.h>
29
+
30
+ #ifndef O_NOATIME
31
+ #define O_NOATIME 0
26
32
  #endif
27
33
 
28
34
  /* 1000 is an arbitrary limit; FNV64 plus some slashes brings the cap down to
@@ -34,6 +40,10 @@
34
40
 
35
41
  #define MAX_CREATE_TEMPFILE_ATTEMPT 3
36
42
 
43
+ #ifndef RB_UNLIKELY
44
+ #define RB_UNLIKELY(x) (x)
45
+ #endif
46
+
37
47
  /*
38
48
  * An instance of this key is written as the first 64 bytes of each cache file.
39
49
  * The mtime and size members track whether the file contents have changed, and
@@ -55,8 +65,10 @@ struct bs_cache_key {
55
65
  uint32_t ruby_revision;
56
66
  uint64_t size;
57
67
  uint64_t mtime;
58
- uint64_t data_size; /* not used for equality */
59
- uint8_t pad[24];
68
+ uint64_t data_size; //
69
+ uint64_t digest;
70
+ uint8_t digest_set;
71
+ uint8_t pad[15];
60
72
  } __attribute__((packed));
61
73
 
62
74
  /*
@@ -70,7 +82,7 @@ struct bs_cache_key {
70
82
  STATIC_ASSERT(sizeof(struct bs_cache_key) == KEY_SIZE);
71
83
 
72
84
  /* Effectively a schema version. Bumping invalidates all previous caches */
73
- static const uint32_t current_version = 3;
85
+ static const uint32_t current_version = 5;
74
86
 
75
87
  /* hash of e.g. "x86_64-darwin17", invalidating when ruby is recompiled on a
76
88
  * new OS ABI, etc. */
@@ -86,22 +98,38 @@ static mode_t current_umask;
86
98
  static VALUE rb_mBootsnap;
87
99
  static VALUE rb_mBootsnap_CompileCache;
88
100
  static VALUE rb_mBootsnap_CompileCache_Native;
89
- static VALUE rb_eBootsnap_CompileCache_Uncompilable;
90
- static ID uncompilable;
101
+ static VALUE rb_cBootsnap_CompileCache_UNCOMPILABLE;
102
+ static ID instrumentation_method;
103
+ static VALUE sym_hit, sym_miss, sym_stale, sym_revalidated;
104
+ static bool instrumentation_enabled = false;
105
+ static bool readonly = false;
106
+ static bool revalidation = false;
107
+ static bool perm_issue = false;
91
108
 
92
109
  /* Functions exposed as module functions on Bootsnap::CompileCache::Native */
110
+ static VALUE bs_instrumentation_enabled_set(VALUE self, VALUE enabled);
111
+ static VALUE bs_readonly_set(VALUE self, VALUE enabled);
112
+ static VALUE bs_revalidation_set(VALUE self, VALUE enabled);
93
113
  static VALUE bs_compile_option_crc32_set(VALUE self, VALUE crc32_v);
94
114
  static VALUE bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler, VALUE args);
95
115
  static VALUE bs_rb_precompile(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler);
96
116
 
97
117
  /* Helpers */
98
- static uint64_t fnv1a_64(const char *str);
99
- static void bs_cache_path(const char * cachedir, const char * path, char (* cache_path)[MAX_CACHEPATH_SIZE]);
118
+ enum cache_status {
119
+ miss,
120
+ hit,
121
+ stale,
122
+ };
123
+ static void bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_CACHEPATH_SIZE]);
100
124
  static int bs_read_key(int fd, struct bs_cache_key * key);
101
- static int cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2);
125
+ static enum cache_status cache_key_equal_fast_path(struct bs_cache_key * k1, struct bs_cache_key * k2);
126
+ static int cache_key_equal_slow_path(struct bs_cache_key * current_key, struct bs_cache_key * cached_key, const VALUE input_data);
127
+ static int update_cache_key(struct bs_cache_key *current_key, struct bs_cache_key *old_key, int cache_fd, const char ** errno_provenance);
128
+
129
+ static void bs_cache_key_digest(struct bs_cache_key * key, const VALUE input_data);
102
130
  static VALUE bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args);
103
131
  static VALUE bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler);
104
- static int open_current_file(char * path, struct bs_cache_key * key, const char ** errno_provenance);
132
+ static int open_current_file(const char * path, struct bs_cache_key * key, const char ** errno_provenance);
105
133
  static int fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE * output_data, int * exception_tag, const char ** errno_provenance);
106
134
  static uint32_t get_ruby_revision(void);
107
135
  static uint32_t get_ruby_platform(void);
@@ -111,10 +139,8 @@ static uint32_t get_ruby_platform(void);
111
139
  * exception.
112
140
  */
113
141
  static int bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * output_data);
114
- static VALUE prot_storage_to_output(VALUE arg);
115
142
  static VALUE prot_input_to_output(VALUE arg);
116
143
  static void bs_input_to_output(VALUE handler, VALUE args, VALUE input_data, VALUE * output_data, int * exception_tag);
117
- static VALUE prot_input_to_storage(VALUE arg);
118
144
  static int bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval, VALUE * storage_data);
119
145
  struct s2o_data;
120
146
  struct i2o_data;
@@ -129,6 +155,12 @@ bs_rb_coverage_running(VALUE self)
129
155
  return RTEST(cov) ? Qtrue : Qfalse;
130
156
  }
131
157
 
158
+ static VALUE
159
+ bs_rb_get_path(VALUE self, VALUE fname)
160
+ {
161
+ return rb_get_path(fname);
162
+ }
163
+
132
164
  /*
133
165
  * Ruby C extensions are initialized by calling Init_<extname>.
134
166
  *
@@ -140,15 +172,27 @@ void
140
172
  Init_bootsnap(void)
141
173
  {
142
174
  rb_mBootsnap = rb_define_module("Bootsnap");
175
+
176
+ rb_define_singleton_method(rb_mBootsnap, "rb_get_path", bs_rb_get_path, 1);
177
+
143
178
  rb_mBootsnap_CompileCache = rb_define_module_under(rb_mBootsnap, "CompileCache");
144
179
  rb_mBootsnap_CompileCache_Native = rb_define_module_under(rb_mBootsnap_CompileCache, "Native");
145
- rb_eBootsnap_CompileCache_Uncompilable = rb_define_class_under(rb_mBootsnap_CompileCache, "Uncompilable", rb_eStandardError);
180
+ rb_cBootsnap_CompileCache_UNCOMPILABLE = rb_const_get(rb_mBootsnap_CompileCache, rb_intern("UNCOMPILABLE"));
181
+ rb_global_variable(&rb_cBootsnap_CompileCache_UNCOMPILABLE);
146
182
 
147
183
  current_ruby_revision = get_ruby_revision();
148
184
  current_ruby_platform = get_ruby_platform();
149
185
 
150
- uncompilable = rb_intern("__bootsnap_uncompilable__");
186
+ instrumentation_method = rb_intern("_instrument");
151
187
 
188
+ sym_hit = ID2SYM(rb_intern("hit"));
189
+ sym_miss = ID2SYM(rb_intern("miss"));
190
+ sym_stale = ID2SYM(rb_intern("stale"));
191
+ sym_revalidated = ID2SYM(rb_intern("revalidated"));
192
+
193
+ rb_define_module_function(rb_mBootsnap, "instrumentation_enabled=", bs_instrumentation_enabled_set, 1);
194
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "readonly=", bs_readonly_set, 1);
195
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "revalidation=", bs_revalidation_set, 1);
152
196
  rb_define_module_function(rb_mBootsnap_CompileCache_Native, "coverage_running?", bs_rb_coverage_running, 0);
153
197
  rb_define_module_function(rb_mBootsnap_CompileCache_Native, "fetch", bs_rb_fetch, 4);
154
198
  rb_define_module_function(rb_mBootsnap_CompileCache_Native, "precompile", bs_rb_precompile, 3);
@@ -158,6 +202,35 @@ Init_bootsnap(void)
158
202
  umask(current_umask);
159
203
  }
160
204
 
205
+ static VALUE
206
+ bs_instrumentation_enabled_set(VALUE self, VALUE enabled)
207
+ {
208
+ instrumentation_enabled = RTEST(enabled);
209
+ return enabled;
210
+ }
211
+
212
+ static inline void
213
+ bs_instrumentation(VALUE event, VALUE path)
214
+ {
215
+ if (RB_UNLIKELY(instrumentation_enabled)) {
216
+ rb_funcall(rb_mBootsnap, instrumentation_method, 2, event, path);
217
+ }
218
+ }
219
+
220
+ static VALUE
221
+ bs_readonly_set(VALUE self, VALUE enabled)
222
+ {
223
+ readonly = RTEST(enabled);
224
+ return enabled;
225
+ }
226
+
227
+ static VALUE
228
+ bs_revalidation_set(VALUE self, VALUE enabled)
229
+ {
230
+ revalidation = RTEST(enabled);
231
+ return enabled;
232
+ }
233
+
161
234
  /*
162
235
  * Bootsnap's ruby code registers a hook that notifies us via this function
163
236
  * when compile_option changes. These changes invalidate all existing caches.
@@ -175,22 +248,13 @@ bs_compile_option_crc32_set(VALUE self, VALUE crc32_v)
175
248
  return Qnil;
176
249
  }
177
250
 
178
- /*
179
- * We use FNV1a-64 to derive cache paths. The choice is somewhat arbitrary but
180
- * it has several nice properties:
181
- *
182
- * - Tiny implementation
183
- * - No external dependency
184
- * - Solid performance
185
- * - Solid randomness
186
- * - 32 bits doesn't feel collision-resistant enough; 64 is nice.
187
- */
188
251
  static uint64_t
189
- fnv1a_64_iter(uint64_t h, const char *str)
252
+ fnv1a_64_iter(uint64_t h, const VALUE str)
190
253
  {
191
- unsigned char *s = (unsigned char *)str;
254
+ unsigned char *s = (unsigned char *)RSTRING_PTR(str);
255
+ unsigned char *str_end = (unsigned char *)RSTRING_PTR(str) + RSTRING_LEN(str);
192
256
 
193
- while (*s) {
257
+ while (s < str_end) {
194
258
  h ^= (uint64_t)*s++;
195
259
  h += (h << 1) + (h << 4) + (h << 5) + (h << 7) + (h << 8) + (h << 40);
196
260
  }
@@ -199,7 +263,7 @@ fnv1a_64_iter(uint64_t h, const char *str)
199
263
  }
200
264
 
201
265
  static uint64_t
202
- fnv1a_64(const char *str)
266
+ fnv1a_64(const VALUE str)
203
267
  {
204
268
  uint64_t h = (uint64_t)0xcbf29ce484222325ULL;
205
269
  return fnv1a_64_iter(h, str);
@@ -220,7 +284,7 @@ get_ruby_revision(void)
220
284
  } else {
221
285
  uint64_t hash;
222
286
 
223
- hash = fnv1a_64(StringValueCStr(ruby_revision));
287
+ hash = fnv1a_64(ruby_revision);
224
288
  return (uint32_t)(hash >> 32);
225
289
  }
226
290
  }
@@ -228,10 +292,6 @@ get_ruby_revision(void)
228
292
  /*
229
293
  * When ruby's version doesn't change, but it's recompiled on a different OS
230
294
  * (or OS version), we need to invalidate the cache.
231
- *
232
- * We actually factor in some extra information here, to be extra confident
233
- * that we don't try to re-use caches that will not be compatible, by factoring
234
- * in utsname.version.
235
295
  */
236
296
  static uint32_t
237
297
  get_ruby_platform(void)
@@ -240,23 +300,8 @@ get_ruby_platform(void)
240
300
  VALUE ruby_platform;
241
301
 
242
302
  ruby_platform = rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM"));
243
- hash = fnv1a_64(RSTRING_PTR(ruby_platform));
244
-
245
- #ifdef _WIN32
246
- return (uint32_t)(hash >> 32) ^ (uint32_t)GetVersion();
247
- #elif defined(__GLIBC__)
248
- hash = fnv1a_64_iter(hash, gnu_get_libc_version());
249
- return (uint32_t)(hash >> 32);
250
- #else
251
- struct utsname utsname;
252
-
253
- /* Not worth crashing if this fails; lose extra cache invalidation potential */
254
- if (uname(&utsname) >= 0) {
255
- hash = fnv1a_64_iter(hash, utsname.version);
256
- }
257
-
303
+ hash = fnv1a_64(ruby_platform);
258
304
  return (uint32_t)(hash >> 32);
259
- #endif
260
305
  }
261
306
 
262
307
  /*
@@ -267,13 +312,13 @@ get_ruby_platform(void)
267
312
  * The path will look something like: <cachedir>/12/34567890abcdef
268
313
  */
269
314
  static void
270
- bs_cache_path(const char * cachedir, const char * path, char (* cache_path)[MAX_CACHEPATH_SIZE])
315
+ bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_CACHEPATH_SIZE])
271
316
  {
272
317
  uint64_t hash = fnv1a_64(path);
273
318
  uint8_t first_byte = (hash >> (64 - 8));
274
319
  uint64_t remainder = hash & 0x00ffffffffffffff;
275
320
 
276
- sprintf(*cache_path, "%s/%02x/%014llx", cachedir, first_byte, remainder);
321
+ sprintf(*cache_path, "%s/%02"PRIx8"/%014"PRIx64, cachedir, first_byte, remainder);
277
322
  }
278
323
 
279
324
  /*
@@ -284,17 +329,59 @@ bs_cache_path(const char * cachedir, const char * path, char (* cache_path)[MAX_
284
329
  * The data_size member is not compared, as it serves more of a "header"
285
330
  * function.
286
331
  */
287
- static int
288
- cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2)
332
+ static enum cache_status cache_key_equal_fast_path(struct bs_cache_key *k1,
333
+ struct bs_cache_key *k2) {
334
+ if (k1->version == k2->version &&
335
+ k1->ruby_platform == k2->ruby_platform &&
336
+ k1->compile_option == k2->compile_option &&
337
+ k1->ruby_revision == k2->ruby_revision && k1->size == k2->size) {
338
+ if (k1->mtime == k2->mtime) {
339
+ return hit;
340
+ }
341
+ if (revalidation) {
342
+ return stale;
343
+ }
344
+ }
345
+ return miss;
346
+ }
347
+
348
+ static int cache_key_equal_slow_path(struct bs_cache_key *current_key,
349
+ struct bs_cache_key *cached_key,
350
+ const VALUE input_data)
289
351
  {
290
- return (
291
- k1->version == k2->version &&
292
- k1->ruby_platform == k2->ruby_platform &&
293
- k1->compile_option == k2->compile_option &&
294
- k1->ruby_revision == k2->ruby_revision &&
295
- k1->size == k2->size &&
296
- k1->mtime == k2->mtime
297
- );
352
+ bs_cache_key_digest(current_key, input_data);
353
+ return current_key->digest == cached_key->digest;
354
+ }
355
+
356
+ static int update_cache_key(struct bs_cache_key *current_key, struct bs_cache_key *old_key, int cache_fd, const char ** errno_provenance)
357
+ {
358
+ old_key->mtime = current_key->mtime;
359
+ lseek(cache_fd, 0, SEEK_SET);
360
+ ssize_t nwrite = write(cache_fd, old_key, KEY_SIZE);
361
+ if (nwrite < 0) {
362
+ *errno_provenance = "update_cache_key:write";
363
+ return -1;
364
+ }
365
+
366
+ #ifdef HAVE_FDATASYNC
367
+ if (fdatasync(cache_fd) < 0) {
368
+ *errno_provenance = "update_cache_key:fdatasync";
369
+ return -1;
370
+ }
371
+ #endif
372
+
373
+ return 0;
374
+ }
375
+
376
+ /*
377
+ * Fills the cache key digest.
378
+ */
379
+ static void bs_cache_key_digest(struct bs_cache_key *key,
380
+ const VALUE input_data) {
381
+ if (key->digest_set)
382
+ return;
383
+ key->digest = fnv1a_64(input_data);
384
+ key->digest_set = 1;
298
385
  }
299
386
 
300
387
  /*
@@ -319,7 +406,7 @@ bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler, VALUE arg
319
406
  char cache_path[MAX_CACHEPATH_SIZE];
320
407
 
321
408
  /* generate cache path to cache_path */
322
- bs_cache_path(cachedir, path, &cache_path);
409
+ bs_cache_path(cachedir, path_v, &cache_path);
323
410
 
324
411
  return bs_fetch(path, path_v, cache_path, handler, args);
325
412
  }
@@ -346,21 +433,38 @@ bs_rb_precompile(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler)
346
433
  char cache_path[MAX_CACHEPATH_SIZE];
347
434
 
348
435
  /* generate cache path to cache_path */
349
- bs_cache_path(cachedir, path, &cache_path);
436
+ bs_cache_path(cachedir, path_v, &cache_path);
350
437
 
351
438
  return bs_precompile(path, path_v, cache_path, handler);
352
439
  }
440
+
441
+ static int bs_open_noatime(const char *path, int flags) {
442
+ int fd = 1;
443
+ if (!perm_issue) {
444
+ fd = open(path, flags | O_NOATIME);
445
+ if (fd < 0 && errno == EPERM) {
446
+ errno = 0;
447
+ perm_issue = true;
448
+ }
449
+ }
450
+
451
+ if (perm_issue) {
452
+ fd = open(path, flags);
453
+ }
454
+ return fd;
455
+ }
456
+
353
457
  /*
354
458
  * Open the file we want to load/cache and generate a cache key for it if it
355
459
  * was loaded.
356
460
  */
357
461
  static int
358
- open_current_file(char * path, struct bs_cache_key * key, const char ** errno_provenance)
462
+ open_current_file(const char * path, struct bs_cache_key * key, const char ** errno_provenance)
359
463
  {
360
464
  struct stat statbuf;
361
465
  int fd;
362
466
 
363
- fd = open(path, O_RDONLY);
467
+ fd = bs_open_noatime(path, O_RDONLY);
364
468
  if (fd < 0) {
365
469
  *errno_provenance = "bs_fetch:open_current_file:open";
366
470
  return fd;
@@ -371,7 +475,9 @@ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_pr
371
475
 
372
476
  if (fstat(fd, &statbuf) < 0) {
373
477
  *errno_provenance = "bs_fetch:open_current_file:fstat";
478
+ int previous_errno = errno;
374
479
  close(fd);
480
+ errno = previous_errno;
375
481
  return -1;
376
482
  }
377
483
 
@@ -381,12 +487,15 @@ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_pr
381
487
  key->ruby_revision = current_ruby_revision;
382
488
  key->size = (uint64_t)statbuf.st_size;
383
489
  key->mtime = (uint64_t)statbuf.st_mtime;
490
+ key->digest_set = false;
384
491
 
385
492
  return fd;
386
493
  }
387
494
 
388
495
  #define ERROR_WITH_ERRNO -1
389
- #define CACHE_MISSING_OR_INVALID -2
496
+ #define CACHE_MISS -2
497
+ #define CACHE_STALE -3
498
+ #define CACHE_UNCOMPILABLE -4
390
499
 
391
500
  /*
392
501
  * Read the cache key from the given fd, which must have position 0 (e.g.
@@ -394,15 +503,16 @@ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_pr
394
503
  *
395
504
  * Possible return values:
396
505
  * - 0 (OK, key was loaded)
397
- * - CACHE_MISSING_OR_INVALID (-2)
398
506
  * - ERROR_WITH_ERRNO (-1, errno is set)
507
+ * - CACHE_MISS (-2)
508
+ * - CACHE_STALE (-3)
399
509
  */
400
510
  static int
401
511
  bs_read_key(int fd, struct bs_cache_key * key)
402
512
  {
403
513
  ssize_t nread = read(fd, key, KEY_SIZE);
404
514
  if (nread < 0) return ERROR_WITH_ERRNO;
405
- if (nread < KEY_SIZE) return CACHE_MISSING_OR_INVALID;
515
+ if (nread < KEY_SIZE) return CACHE_STALE;
406
516
  return 0;
407
517
  }
408
518
 
@@ -412,7 +522,8 @@ bs_read_key(int fd, struct bs_cache_key * key)
412
522
  *
413
523
  * Possible return values:
414
524
  * - 0 (OK, key was loaded)
415
- * - CACHE_MISSING_OR_INVALID (-2)
525
+ * - CACHE_MISS (-2)
526
+ * - CACHE_STALE (-3)
416
527
  * - ERROR_WITH_ERRNO (-1, errno is set)
417
528
  */
418
529
  static int
@@ -420,11 +531,15 @@ open_cache_file(const char * path, struct bs_cache_key * key, const char ** errn
420
531
  {
421
532
  int fd, res;
422
533
 
423
- fd = open(path, O_RDONLY);
534
+ if (readonly || !revalidation) {
535
+ fd = bs_open_noatime(path, O_RDONLY);
536
+ } else {
537
+ fd = bs_open_noatime(path, O_RDWR);
538
+ }
539
+
424
540
  if (fd < 0) {
425
541
  *errno_provenance = "bs_fetch:open_cache_file:open";
426
- if (errno == ENOENT) return CACHE_MISSING_OR_INVALID;
427
- return ERROR_WITH_ERRNO;
542
+ return CACHE_MISS;
428
543
  }
429
544
  #ifdef _WIN32
430
545
  setmode(fd, O_BINARY);
@@ -458,7 +573,6 @@ open_cache_file(const char * path, struct bs_cache_key * key, const char ** errn
458
573
  static int
459
574
  fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE * output_data, int * exception_tag, const char ** errno_provenance)
460
575
  {
461
- char * data = NULL;
462
576
  ssize_t nread;
463
577
  int ret;
464
578
 
@@ -467,27 +581,30 @@ fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE *
467
581
  if (data_size > 100000000000) {
468
582
  *errno_provenance = "bs_fetch:fetch_cached_data:datasize";
469
583
  errno = EINVAL; /* because wtf? */
470
- ret = -1;
584
+ ret = ERROR_WITH_ERRNO;
471
585
  goto done;
472
586
  }
473
- data = ALLOC_N(char, data_size);
474
- nread = read(fd, data, data_size);
587
+ storage_data = rb_str_buf_new(data_size);
588
+ nread = read(fd, RSTRING_PTR(storage_data), data_size);
475
589
  if (nread < 0) {
476
590
  *errno_provenance = "bs_fetch:fetch_cached_data:read";
477
- ret = -1;
591
+ ret = ERROR_WITH_ERRNO;
478
592
  goto done;
479
593
  }
480
594
  if (nread != data_size) {
481
- ret = CACHE_MISSING_OR_INVALID;
595
+ ret = CACHE_STALE;
482
596
  goto done;
483
597
  }
484
598
 
485
- storage_data = rb_str_new(data, data_size);
599
+ rb_str_set_len(storage_data, nread);
486
600
 
487
601
  *exception_tag = bs_storage_to_output(handler, args, storage_data, output_data);
602
+ if (*output_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
603
+ ret = CACHE_UNCOMPILABLE;
604
+ goto done;
605
+ }
488
606
  ret = 0;
489
607
  done:
490
- if (data != NULL) xfree(data);
491
608
  return ret;
492
609
  }
493
610
 
@@ -594,17 +711,22 @@ atomic_write_cache_file(char * path, struct bs_cache_key * key, VALUE data, cons
594
711
 
595
712
 
596
713
  /* Read contents from an fd, whose contents are asserted to be +size+ bytes
597
- * long, into a buffer */
598
- static ssize_t
599
- bs_read_contents(int fd, size_t size, char ** contents, const char ** errno_provenance)
714
+ * long, returning a Ruby string on success and Qfalse on failure */
715
+ static VALUE
716
+ bs_read_contents(int fd, size_t size, const char ** errno_provenance)
600
717
  {
718
+ VALUE contents;
601
719
  ssize_t nread;
602
- *contents = ALLOC_N(char, size);
603
- nread = read(fd, *contents, size);
720
+ contents = rb_str_buf_new(size);
721
+ nread = read(fd, RSTRING_PTR(contents), size);
722
+
604
723
  if (nread < 0) {
605
724
  *errno_provenance = "bs_fetch:bs_read_contents:read";
725
+ return Qfalse;
726
+ } else {
727
+ rb_str_set_len(contents, nread);
728
+ return contents;
606
729
  }
607
- return nread;
608
730
  }
609
731
 
610
732
  /*
@@ -655,31 +777,68 @@ static VALUE
655
777
  bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args)
656
778
  {
657
779
  struct bs_cache_key cached_key, current_key;
658
- char * contents = NULL;
659
780
  int cache_fd = -1, current_fd = -1;
660
781
  int res, valid_cache = 0, exception_tag = 0;
661
782
  const char * errno_provenance = NULL;
662
783
 
663
- VALUE input_data; /* data read from source file, e.g. YAML or ruby source */
784
+ VALUE status = Qfalse;
785
+ VALUE input_data = Qfalse; /* data read from source file, e.g. YAML or ruby source */
664
786
  VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
665
787
  VALUE output_data; /* return data, e.g. ruby hash or loaded iseq */
666
788
 
667
789
  VALUE exception; /* ruby exception object to raise instead of returning */
790
+ VALUE exception_message; /* ruby exception string to use instead of errno_provenance */
668
791
 
669
792
  /* Open the source file and generate a cache key for it */
670
793
  current_fd = open_current_file(path, &current_key, &errno_provenance);
671
- if (current_fd < 0) goto fail_errno;
794
+ if (current_fd < 0) {
795
+ exception_message = path_v;
796
+ goto fail_errno;
797
+ }
672
798
 
673
799
  /* Open the cache key if it exists, and read its cache key in */
674
800
  cache_fd = open_cache_file(cache_path, &cached_key, &errno_provenance);
675
- if (cache_fd == CACHE_MISSING_OR_INVALID) {
801
+ if (cache_fd == CACHE_MISS || cache_fd == CACHE_STALE) {
676
802
  /* This is ok: valid_cache remains false, we re-populate it. */
803
+ bs_instrumentation(cache_fd == CACHE_MISS ? sym_miss : sym_stale, path_v);
677
804
  } else if (cache_fd < 0) {
805
+ exception_message = rb_str_new_cstr(cache_path);
678
806
  goto fail_errno;
679
807
  } else {
680
808
  /* True if the cache existed and no invalidating changes have occurred since
681
809
  * it was generated. */
682
- valid_cache = cache_key_equal(&current_key, &cached_key);
810
+
811
+ switch(cache_key_equal_fast_path(&current_key, &cached_key)) {
812
+ case hit:
813
+ status = sym_hit;
814
+ valid_cache = true;
815
+ break;
816
+ case miss:
817
+ valid_cache = false;
818
+ break;
819
+ case stale:
820
+ valid_cache = false;
821
+ if ((input_data = bs_read_contents(current_fd, current_key.size,
822
+ &errno_provenance)) == Qfalse) {
823
+ exception_message = path_v;
824
+ goto fail_errno;
825
+ }
826
+ valid_cache = cache_key_equal_slow_path(&current_key, &cached_key, input_data);
827
+ if (valid_cache) {
828
+ if (!readonly) {
829
+ if (update_cache_key(&current_key, &cached_key, cache_fd, &errno_provenance)) {
830
+ exception_message = path_v;
831
+ goto fail_errno;
832
+ }
833
+ }
834
+ status = sym_revalidated;
835
+ }
836
+ break;
837
+ };
838
+
839
+ if (!valid_cache) {
840
+ status = sym_stale;
841
+ }
683
842
  }
684
843
 
685
844
  if (valid_cache) {
@@ -688,25 +847,40 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
688
847
  cache_fd, (ssize_t)cached_key.data_size, handler, args,
689
848
  &output_data, &exception_tag, &errno_provenance
690
849
  );
691
- if (exception_tag != 0) goto raise;
692
- else if (res == CACHE_MISSING_OR_INVALID) valid_cache = 0;
693
- else if (res == ERROR_WITH_ERRNO) goto fail_errno;
694
- else if (!NIL_P(output_data)) goto succeed; /* fast-path, goal */
850
+ if (exception_tag != 0) goto raise;
851
+ else if (res == CACHE_UNCOMPILABLE) {
852
+ /* If fetch_cached_data returned `Uncompilable` we fallback to `input_to_output`
853
+ This happens if we have say, an unsafe YAML cache, but try to load it in safe mode */
854
+ if (input_data == Qfalse && (input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
855
+ exception_message = path_v;
856
+ goto fail_errno;
857
+ }
858
+ bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
859
+ if (exception_tag != 0) goto raise;
860
+ goto succeed;
861
+ } else if (res == CACHE_MISS || res == CACHE_STALE) valid_cache = 0;
862
+ else if (res == ERROR_WITH_ERRNO){
863
+ exception_message = rb_str_new_cstr(cache_path);
864
+ goto fail_errno;
865
+ }
866
+ else if (!NIL_P(output_data)) goto succeed; /* fast-path, goal */
695
867
  }
696
868
  close(cache_fd);
697
869
  cache_fd = -1;
698
870
  /* Cache is stale, invalid, or missing. Regenerate and write it out. */
699
871
 
700
872
  /* Read the contents of the source file into a buffer */
701
- if (bs_read_contents(current_fd, current_key.size, &contents, &errno_provenance) < 0) goto fail_errno;
702
- input_data = rb_str_new(contents, current_key.size);
873
+ if (input_data == Qfalse && (input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
874
+ exception_message = path_v;
875
+ goto fail_errno;
876
+ }
703
877
 
704
878
  /* Try to compile the input_data using input_to_storage(input_data) */
705
879
  exception_tag = bs_input_to_storage(handler, args, input_data, path_v, &storage_data);
706
880
  if (exception_tag != 0) goto raise;
707
881
  /* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try
708
882
  * to cache anything; just return input_to_output(input_data) */
709
- if (storage_data == uncompilable) {
883
+ if (storage_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
710
884
  bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
711
885
  if (exception_tag != 0) goto raise;
712
886
  goto succeed;
@@ -714,20 +888,32 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
714
888
  /* If storage_data isn't a string, we can't cache it */
715
889
  if (!RB_TYPE_P(storage_data, T_STRING)) goto invalid_type_storage_data;
716
890
 
717
- /* Write the cache key and storage_data to the cache directory */
718
- res = atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
719
- if (res < 0) goto fail_errno;
891
+ /* Attempt to write the cache key and storage_data to the cache directory.
892
+ * We do however ignore any failures to persist the cache, as it's better
893
+ * to move along, than to interrupt the process.
894
+ */
895
+ bs_cache_key_digest(&current_key, input_data);
896
+ atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
720
897
 
721
898
  /* Having written the cache, now convert storage_data to output_data */
722
899
  exception_tag = bs_storage_to_output(handler, args, storage_data, &output_data);
723
900
  if (exception_tag != 0) goto raise;
724
901
 
725
- /* If output_data is nil, delete the cache entry and generate the output
726
- * using input_to_output */
727
- if (NIL_P(output_data)) {
902
+ if (output_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
903
+ /* If storage_to_output returned `Uncompilable` we fallback to `input_to_output` */
904
+ bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
905
+ if (exception_tag != 0) goto raise;
906
+ } else if (NIL_P(output_data)) {
907
+ /* If output_data is nil, delete the cache entry and generate the output
908
+ * using input_to_output */
728
909
  if (unlink(cache_path) < 0) {
729
- errno_provenance = "bs_fetch:unlink";
730
- goto fail_errno;
910
+ /* If the cache was already deleted, it might be that another process did it before us.
911
+ * No point raising an error */
912
+ if (errno != ENOENT) {
913
+ errno_provenance = "bs_fetch:unlink";
914
+ exception_message = rb_str_new_cstr(cache_path);
915
+ goto fail_errno;
916
+ }
731
917
  }
732
918
  bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
733
919
  if (exception_tag != 0) goto raise;
@@ -736,7 +922,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
736
922
  goto succeed; /* output_data is now the correct return. */
737
923
 
738
924
  #define CLEANUP \
739
- if (contents != NULL) xfree(contents); \
925
+ if (status != Qfalse) bs_instrumentation(status, path_v); \
740
926
  if (current_fd >= 0) close(current_fd); \
741
927
  if (cache_fd >= 0) close(cache_fd);
742
928
 
@@ -745,7 +931,13 @@ succeed:
745
931
  return output_data;
746
932
  fail_errno:
747
933
  CLEANUP;
748
- exception = rb_syserr_new(errno, errno_provenance);
934
+ if (errno_provenance) {
935
+ exception_message = rb_str_concat(
936
+ rb_str_new_cstr(errno_provenance),
937
+ rb_str_concat(rb_str_new_cstr(": "), exception_message)
938
+ );
939
+ }
940
+ exception = rb_syserr_new_str(errno, exception_message);
749
941
  rb_exc_raise(exception);
750
942
  __builtin_unreachable();
751
943
  raise:
@@ -763,13 +955,16 @@ invalid_type_storage_data:
763
955
  static VALUE
764
956
  bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
765
957
  {
958
+ if (readonly) {
959
+ return Qfalse;
960
+ }
961
+
766
962
  struct bs_cache_key cached_key, current_key;
767
- char * contents = NULL;
768
963
  int cache_fd = -1, current_fd = -1;
769
964
  int res, valid_cache = 0, exception_tag = 0;
770
965
  const char * errno_provenance = NULL;
771
966
 
772
- VALUE input_data; /* data read from source file, e.g. YAML or ruby source */
967
+ VALUE input_data = Qfalse; /* data read from source file, e.g. YAML or ruby source */
773
968
  VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
774
969
 
775
970
  /* Open the source file and generate a cache key for it */
@@ -778,14 +973,33 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
778
973
 
779
974
  /* Open the cache key if it exists, and read its cache key in */
780
975
  cache_fd = open_cache_file(cache_path, &cached_key, &errno_provenance);
781
- if (cache_fd == CACHE_MISSING_OR_INVALID) {
976
+ if (cache_fd == CACHE_MISS || cache_fd == CACHE_STALE) {
782
977
  /* This is ok: valid_cache remains false, we re-populate it. */
783
978
  } else if (cache_fd < 0) {
784
979
  goto fail;
785
980
  } else {
786
981
  /* True if the cache existed and no invalidating changes have occurred since
787
982
  * it was generated. */
788
- valid_cache = cache_key_equal(&current_key, &cached_key);
983
+ switch(cache_key_equal_fast_path(&current_key, &cached_key)) {
984
+ case hit:
985
+ valid_cache = true;
986
+ break;
987
+ case miss:
988
+ valid_cache = false;
989
+ break;
990
+ case stale:
991
+ valid_cache = false;
992
+ if ((input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
993
+ goto fail;
994
+ }
995
+ valid_cache = cache_key_equal_slow_path(&current_key, &cached_key, input_data);
996
+ if (valid_cache) {
997
+ if (update_cache_key(&current_key, &cached_key, cache_fd, &errno_provenance)) {
998
+ goto fail;
999
+ }
1000
+ }
1001
+ break;
1002
+ };
789
1003
  }
790
1004
 
791
1005
  if (valid_cache) {
@@ -797,8 +1011,7 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
797
1011
  /* Cache is stale, invalid, or missing. Regenerate and write it out. */
798
1012
 
799
1013
  /* Read the contents of the source file into a buffer */
800
- if (bs_read_contents(current_fd, current_key.size, &contents, &errno_provenance) < 0) goto fail;
801
- input_data = rb_str_new(contents, current_key.size);
1014
+ if ((input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) goto fail;
802
1015
 
803
1016
  /* Try to compile the input_data using input_to_storage(input_data) */
804
1017
  exception_tag = bs_input_to_storage(handler, Qnil, input_data, path_v, &storage_data);
@@ -806,20 +1019,20 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
806
1019
 
807
1020
  /* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try
808
1021
  * to cache anything; just return false */
809
- if (storage_data == uncompilable) {
1022
+ if (storage_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
810
1023
  goto fail;
811
1024
  }
812
1025
  /* If storage_data isn't a string, we can't cache it */
813
1026
  if (!RB_TYPE_P(storage_data, T_STRING)) goto fail;
814
1027
 
815
1028
  /* Write the cache key and storage_data to the cache directory */
1029
+ bs_cache_key_digest(&current_key, input_data);
816
1030
  res = atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
817
1031
  if (res < 0) goto fail;
818
1032
 
819
1033
  goto succeed;
820
1034
 
821
1035
  #define CLEANUP \
822
- if (contents != NULL) xfree(contents); \
823
1036
  if (current_fd >= 0) close(current_fd); \
824
1037
  if (cache_fd >= 0) close(cache_fd);
825
1038
 
@@ -869,7 +1082,7 @@ struct i2s_data {
869
1082
  };
870
1083
 
871
1084
  static VALUE
872
- prot_storage_to_output(VALUE arg)
1085
+ try_storage_to_output(VALUE arg)
873
1086
  {
874
1087
  struct s2o_data * data = (struct s2o_data *)arg;
875
1088
  return rb_funcall(data->handler, rb_intern("storage_to_output"), 2, data->storage_data, data->args);
@@ -884,7 +1097,7 @@ bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * outp
884
1097
  .args = args,
885
1098
  .storage_data = storage_data,
886
1099
  };
887
- *output_data = rb_protect(prot_storage_to_output, (VALUE)&s2o_data, &state);
1100
+ *output_data = rb_protect(try_storage_to_output, (VALUE)&s2o_data, &state);
888
1101
  return state;
889
1102
  }
890
1103
 
@@ -913,31 +1126,20 @@ try_input_to_storage(VALUE arg)
913
1126
  return rb_funcall(data->handler, rb_intern("input_to_storage"), 2, data->input_data, data->pathval);
914
1127
  }
915
1128
 
916
- static VALUE
917
- rescue_input_to_storage(VALUE arg, VALUE e)
918
- {
919
- return uncompilable;
920
- }
921
-
922
- static VALUE
923
- prot_input_to_storage(VALUE arg)
924
- {
925
- struct i2s_data * data = (struct i2s_data *)arg;
926
- return rb_rescue2(
927
- try_input_to_storage, (VALUE)data,
928
- rescue_input_to_storage, Qnil,
929
- rb_eBootsnap_CompileCache_Uncompilable, 0);
930
- }
931
-
932
1129
  static int
933
1130
  bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval, VALUE * storage_data)
934
1131
  {
935
- int state;
936
- struct i2s_data i2s_data = {
937
- .handler = handler,
938
- .input_data = input_data,
939
- .pathval = pathval,
940
- };
941
- *storage_data = rb_protect(prot_input_to_storage, (VALUE)&i2s_data, &state);
942
- return state;
1132
+ if (readonly) {
1133
+ *storage_data = rb_cBootsnap_CompileCache_UNCOMPILABLE;
1134
+ return 0;
1135
+ } else {
1136
+ int state;
1137
+ struct i2s_data i2s_data = {
1138
+ .handler = handler,
1139
+ .input_data = input_data,
1140
+ .pathval = pathval,
1141
+ };
1142
+ *storage_data = rb_protect(try_input_to_storage, (VALUE)&i2s_data, &state);
1143
+ return state;
1144
+ }
943
1145
  }