bootsnap 1.6.0 → 1.18.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,15 +14,21 @@
14
14
  #include "bootsnap.h"
15
15
  #include "ruby.h"
16
16
  #include <stdint.h>
17
+ #include <stdbool.h>
17
18
  #include <sys/types.h>
18
19
  #include <errno.h>
19
20
  #include <fcntl.h>
21
+ #include <unistd.h>
20
22
  #include <sys/stat.h>
21
- #ifndef _WIN32
22
- #include <sys/utsname.h>
23
+
24
+ #ifdef __APPLE__
25
+ // The symbol is present, however not in the headers
26
+ // See: https://github.com/Shopify/bootsnap/issues/470
27
+ extern int fdatasync(int);
23
28
  #endif
24
- #ifdef __GLIBC__
25
- #include <gnu/libc-version.h>
29
+
30
+ #ifndef O_NOATIME
31
+ #define O_NOATIME 0
26
32
  #endif
27
33
 
28
34
  /* 1000 is an arbitrary limit; FNV64 plus some slashes brings the cap down to
@@ -34,6 +40,10 @@
34
40
 
35
41
  #define MAX_CREATE_TEMPFILE_ATTEMPT 3
36
42
 
43
+ #ifndef RB_UNLIKELY
44
+ #define RB_UNLIKELY(x) (x)
45
+ #endif
46
+
37
47
  /*
38
48
  * An instance of this key is written as the first 64 bytes of each cache file.
39
49
  * The mtime and size members track whether the file contents have changed, and
@@ -55,8 +65,10 @@ struct bs_cache_key {
55
65
  uint32_t ruby_revision;
56
66
  uint64_t size;
57
67
  uint64_t mtime;
58
- uint64_t data_size; /* not used for equality */
59
- uint8_t pad[24];
68
+ uint64_t data_size; //
69
+ uint64_t digest;
70
+ uint8_t digest_set;
71
+ uint8_t pad[15];
60
72
  } __attribute__((packed));
61
73
 
62
74
  /*
@@ -70,7 +82,7 @@ struct bs_cache_key {
70
82
  STATIC_ASSERT(sizeof(struct bs_cache_key) == KEY_SIZE);
71
83
 
72
84
  /* Effectively a schema version. Bumping invalidates all previous caches */
73
- static const uint32_t current_version = 3;
85
+ static const uint32_t current_version = 5;
74
86
 
75
87
  /* hash of e.g. "x86_64-darwin17", invalidating when ruby is recompiled on a
76
88
  * new OS ABI, etc. */
@@ -86,22 +98,38 @@ static mode_t current_umask;
86
98
  static VALUE rb_mBootsnap;
87
99
  static VALUE rb_mBootsnap_CompileCache;
88
100
  static VALUE rb_mBootsnap_CompileCache_Native;
89
- static VALUE rb_eBootsnap_CompileCache_Uncompilable;
90
- static ID uncompilable;
101
+ static VALUE rb_cBootsnap_CompileCache_UNCOMPILABLE;
102
+ static ID instrumentation_method;
103
+ static VALUE sym_hit, sym_miss, sym_stale, sym_revalidated;
104
+ static bool instrumentation_enabled = false;
105
+ static bool readonly = false;
106
+ static bool revalidation = false;
107
+ static bool perm_issue = false;
91
108
 
92
109
  /* Functions exposed as module functions on Bootsnap::CompileCache::Native */
110
+ static VALUE bs_instrumentation_enabled_set(VALUE self, VALUE enabled);
111
+ static VALUE bs_readonly_set(VALUE self, VALUE enabled);
112
+ static VALUE bs_revalidation_set(VALUE self, VALUE enabled);
93
113
  static VALUE bs_compile_option_crc32_set(VALUE self, VALUE crc32_v);
94
114
  static VALUE bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler, VALUE args);
95
115
  static VALUE bs_rb_precompile(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler);
96
116
 
97
117
  /* Helpers */
98
- static uint64_t fnv1a_64(const char *str);
99
- static void bs_cache_path(const char * cachedir, const char * path, char (* cache_path)[MAX_CACHEPATH_SIZE]);
118
+ enum cache_status {
119
+ miss,
120
+ hit,
121
+ stale,
122
+ };
123
+ static void bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_CACHEPATH_SIZE]);
100
124
  static int bs_read_key(int fd, struct bs_cache_key * key);
101
- static int cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2);
125
+ static enum cache_status cache_key_equal_fast_path(struct bs_cache_key * k1, struct bs_cache_key * k2);
126
+ static int cache_key_equal_slow_path(struct bs_cache_key * current_key, struct bs_cache_key * cached_key, const VALUE input_data);
127
+ static int update_cache_key(struct bs_cache_key *current_key, struct bs_cache_key *old_key, int cache_fd, const char ** errno_provenance);
128
+
129
+ static void bs_cache_key_digest(struct bs_cache_key * key, const VALUE input_data);
102
130
  static VALUE bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args);
103
131
  static VALUE bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler);
104
- static int open_current_file(char * path, struct bs_cache_key * key, const char ** errno_provenance);
132
+ static int open_current_file(const char * path, struct bs_cache_key * key, const char ** errno_provenance);
105
133
  static int fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE * output_data, int * exception_tag, const char ** errno_provenance);
106
134
  static uint32_t get_ruby_revision(void);
107
135
  static uint32_t get_ruby_platform(void);
@@ -111,10 +139,8 @@ static uint32_t get_ruby_platform(void);
111
139
  * exception.
112
140
  */
113
141
  static int bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * output_data);
114
- static VALUE prot_storage_to_output(VALUE arg);
115
142
  static VALUE prot_input_to_output(VALUE arg);
116
143
  static void bs_input_to_output(VALUE handler, VALUE args, VALUE input_data, VALUE * output_data, int * exception_tag);
117
- static VALUE prot_input_to_storage(VALUE arg);
118
144
  static int bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval, VALUE * storage_data);
119
145
  struct s2o_data;
120
146
  struct i2o_data;
@@ -129,6 +155,12 @@ bs_rb_coverage_running(VALUE self)
129
155
  return RTEST(cov) ? Qtrue : Qfalse;
130
156
  }
131
157
 
158
+ static VALUE
159
+ bs_rb_get_path(VALUE self, VALUE fname)
160
+ {
161
+ return rb_get_path(fname);
162
+ }
163
+
132
164
  /*
133
165
  * Ruby C extensions are initialized by calling Init_<extname>.
134
166
  *
@@ -140,15 +172,27 @@ void
140
172
  Init_bootsnap(void)
141
173
  {
142
174
  rb_mBootsnap = rb_define_module("Bootsnap");
175
+
176
+ rb_define_singleton_method(rb_mBootsnap, "rb_get_path", bs_rb_get_path, 1);
177
+
143
178
  rb_mBootsnap_CompileCache = rb_define_module_under(rb_mBootsnap, "CompileCache");
144
179
  rb_mBootsnap_CompileCache_Native = rb_define_module_under(rb_mBootsnap_CompileCache, "Native");
145
- rb_eBootsnap_CompileCache_Uncompilable = rb_define_class_under(rb_mBootsnap_CompileCache, "Uncompilable", rb_eStandardError);
180
+ rb_cBootsnap_CompileCache_UNCOMPILABLE = rb_const_get(rb_mBootsnap_CompileCache, rb_intern("UNCOMPILABLE"));
181
+ rb_global_variable(&rb_cBootsnap_CompileCache_UNCOMPILABLE);
146
182
 
147
183
  current_ruby_revision = get_ruby_revision();
148
184
  current_ruby_platform = get_ruby_platform();
149
185
 
150
- uncompilable = rb_intern("__bootsnap_uncompilable__");
186
+ instrumentation_method = rb_intern("_instrument");
151
187
 
188
+ sym_hit = ID2SYM(rb_intern("hit"));
189
+ sym_miss = ID2SYM(rb_intern("miss"));
190
+ sym_stale = ID2SYM(rb_intern("stale"));
191
+ sym_revalidated = ID2SYM(rb_intern("revalidated"));
192
+
193
+ rb_define_module_function(rb_mBootsnap, "instrumentation_enabled=", bs_instrumentation_enabled_set, 1);
194
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "readonly=", bs_readonly_set, 1);
195
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "revalidation=", bs_revalidation_set, 1);
152
196
  rb_define_module_function(rb_mBootsnap_CompileCache_Native, "coverage_running?", bs_rb_coverage_running, 0);
153
197
  rb_define_module_function(rb_mBootsnap_CompileCache_Native, "fetch", bs_rb_fetch, 4);
154
198
  rb_define_module_function(rb_mBootsnap_CompileCache_Native, "precompile", bs_rb_precompile, 3);
@@ -158,6 +202,35 @@ Init_bootsnap(void)
158
202
  umask(current_umask);
159
203
  }
160
204
 
205
+ static VALUE
206
+ bs_instrumentation_enabled_set(VALUE self, VALUE enabled)
207
+ {
208
+ instrumentation_enabled = RTEST(enabled);
209
+ return enabled;
210
+ }
211
+
212
+ static inline void
213
+ bs_instrumentation(VALUE event, VALUE path)
214
+ {
215
+ if (RB_UNLIKELY(instrumentation_enabled)) {
216
+ rb_funcall(rb_mBootsnap, instrumentation_method, 2, event, path);
217
+ }
218
+ }
219
+
220
+ static VALUE
221
+ bs_readonly_set(VALUE self, VALUE enabled)
222
+ {
223
+ readonly = RTEST(enabled);
224
+ return enabled;
225
+ }
226
+
227
+ static VALUE
228
+ bs_revalidation_set(VALUE self, VALUE enabled)
229
+ {
230
+ revalidation = RTEST(enabled);
231
+ return enabled;
232
+ }
233
+
161
234
  /*
162
235
  * Bootsnap's ruby code registers a hook that notifies us via this function
163
236
  * when compile_option changes. These changes invalidate all existing caches.
@@ -175,22 +248,13 @@ bs_compile_option_crc32_set(VALUE self, VALUE crc32_v)
175
248
  return Qnil;
176
249
  }
177
250
 
178
- /*
179
- * We use FNV1a-64 to derive cache paths. The choice is somewhat arbitrary but
180
- * it has several nice properties:
181
- *
182
- * - Tiny implementation
183
- * - No external dependency
184
- * - Solid performance
185
- * - Solid randomness
186
- * - 32 bits doesn't feel collision-resistant enough; 64 is nice.
187
- */
188
251
  static uint64_t
189
- fnv1a_64_iter(uint64_t h, const char *str)
252
+ fnv1a_64_iter(uint64_t h, const VALUE str)
190
253
  {
191
- unsigned char *s = (unsigned char *)str;
254
+ unsigned char *s = (unsigned char *)RSTRING_PTR(str);
255
+ unsigned char *str_end = (unsigned char *)RSTRING_PTR(str) + RSTRING_LEN(str);
192
256
 
193
- while (*s) {
257
+ while (s < str_end) {
194
258
  h ^= (uint64_t)*s++;
195
259
  h += (h << 1) + (h << 4) + (h << 5) + (h << 7) + (h << 8) + (h << 40);
196
260
  }
@@ -199,7 +263,7 @@ fnv1a_64_iter(uint64_t h, const char *str)
199
263
  }
200
264
 
201
265
  static uint64_t
202
- fnv1a_64(const char *str)
266
+ fnv1a_64(const VALUE str)
203
267
  {
204
268
  uint64_t h = (uint64_t)0xcbf29ce484222325ULL;
205
269
  return fnv1a_64_iter(h, str);
@@ -220,7 +284,7 @@ get_ruby_revision(void)
220
284
  } else {
221
285
  uint64_t hash;
222
286
 
223
- hash = fnv1a_64(StringValueCStr(ruby_revision));
287
+ hash = fnv1a_64(ruby_revision);
224
288
  return (uint32_t)(hash >> 32);
225
289
  }
226
290
  }
@@ -228,10 +292,6 @@ get_ruby_revision(void)
228
292
  /*
229
293
  * When ruby's version doesn't change, but it's recompiled on a different OS
230
294
  * (or OS version), we need to invalidate the cache.
231
- *
232
- * We actually factor in some extra information here, to be extra confident
233
- * that we don't try to re-use caches that will not be compatible, by factoring
234
- * in utsname.version.
235
295
  */
236
296
  static uint32_t
237
297
  get_ruby_platform(void)
@@ -240,23 +300,8 @@ get_ruby_platform(void)
240
300
  VALUE ruby_platform;
241
301
 
242
302
  ruby_platform = rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM"));
243
- hash = fnv1a_64(RSTRING_PTR(ruby_platform));
244
-
245
- #ifdef _WIN32
246
- return (uint32_t)(hash >> 32) ^ (uint32_t)GetVersion();
247
- #elif defined(__GLIBC__)
248
- hash = fnv1a_64_iter(hash, gnu_get_libc_version());
249
- return (uint32_t)(hash >> 32);
250
- #else
251
- struct utsname utsname;
252
-
253
- /* Not worth crashing if this fails; lose extra cache invalidation potential */
254
- if (uname(&utsname) >= 0) {
255
- hash = fnv1a_64_iter(hash, utsname.version);
256
- }
257
-
303
+ hash = fnv1a_64(ruby_platform);
258
304
  return (uint32_t)(hash >> 32);
259
- #endif
260
305
  }
261
306
 
262
307
  /*
@@ -267,13 +312,13 @@ get_ruby_platform(void)
267
312
  * The path will look something like: <cachedir>/12/34567890abcdef
268
313
  */
269
314
  static void
270
- bs_cache_path(const char * cachedir, const char * path, char (* cache_path)[MAX_CACHEPATH_SIZE])
315
+ bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_CACHEPATH_SIZE])
271
316
  {
272
317
  uint64_t hash = fnv1a_64(path);
273
318
  uint8_t first_byte = (hash >> (64 - 8));
274
319
  uint64_t remainder = hash & 0x00ffffffffffffff;
275
320
 
276
- sprintf(*cache_path, "%s/%02x/%014llx", cachedir, first_byte, remainder);
321
+ sprintf(*cache_path, "%s/%02"PRIx8"/%014"PRIx64, cachedir, first_byte, remainder);
277
322
  }
278
323
 
279
324
  /*
@@ -284,17 +329,59 @@ bs_cache_path(const char * cachedir, const char * path, char (* cache_path)[MAX_
284
329
  * The data_size member is not compared, as it serves more of a "header"
285
330
  * function.
286
331
  */
287
- static int
288
- cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2)
332
+ static enum cache_status cache_key_equal_fast_path(struct bs_cache_key *k1,
333
+ struct bs_cache_key *k2) {
334
+ if (k1->version == k2->version &&
335
+ k1->ruby_platform == k2->ruby_platform &&
336
+ k1->compile_option == k2->compile_option &&
337
+ k1->ruby_revision == k2->ruby_revision && k1->size == k2->size) {
338
+ if (k1->mtime == k2->mtime) {
339
+ return hit;
340
+ }
341
+ if (revalidation) {
342
+ return stale;
343
+ }
344
+ }
345
+ return miss;
346
+ }
347
+
348
+ static int cache_key_equal_slow_path(struct bs_cache_key *current_key,
349
+ struct bs_cache_key *cached_key,
350
+ const VALUE input_data)
289
351
  {
290
- return (
291
- k1->version == k2->version &&
292
- k1->ruby_platform == k2->ruby_platform &&
293
- k1->compile_option == k2->compile_option &&
294
- k1->ruby_revision == k2->ruby_revision &&
295
- k1->size == k2->size &&
296
- k1->mtime == k2->mtime
297
- );
352
+ bs_cache_key_digest(current_key, input_data);
353
+ return current_key->digest == cached_key->digest;
354
+ }
355
+
356
+ static int update_cache_key(struct bs_cache_key *current_key, struct bs_cache_key *old_key, int cache_fd, const char ** errno_provenance)
357
+ {
358
+ old_key->mtime = current_key->mtime;
359
+ lseek(cache_fd, 0, SEEK_SET);
360
+ ssize_t nwrite = write(cache_fd, old_key, KEY_SIZE);
361
+ if (nwrite < 0) {
362
+ *errno_provenance = "update_cache_key:write";
363
+ return -1;
364
+ }
365
+
366
+ #ifdef HAVE_FDATASYNC
367
+ if (fdatasync(cache_fd) < 0) {
368
+ *errno_provenance = "update_cache_key:fdatasync";
369
+ return -1;
370
+ }
371
+ #endif
372
+
373
+ return 0;
374
+ }
375
+
376
+ /*
377
+ * Fills the cache key digest.
378
+ */
379
+ static void bs_cache_key_digest(struct bs_cache_key *key,
380
+ const VALUE input_data) {
381
+ if (key->digest_set)
382
+ return;
383
+ key->digest = fnv1a_64(input_data);
384
+ key->digest_set = 1;
298
385
  }
299
386
 
300
387
  /*
@@ -319,7 +406,7 @@ bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler, VALUE arg
319
406
  char cache_path[MAX_CACHEPATH_SIZE];
320
407
 
321
408
  /* generate cache path to cache_path */
322
- bs_cache_path(cachedir, path, &cache_path);
409
+ bs_cache_path(cachedir, path_v, &cache_path);
323
410
 
324
411
  return bs_fetch(path, path_v, cache_path, handler, args);
325
412
  }
@@ -346,21 +433,38 @@ bs_rb_precompile(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler)
346
433
  char cache_path[MAX_CACHEPATH_SIZE];
347
434
 
348
435
  /* generate cache path to cache_path */
349
- bs_cache_path(cachedir, path, &cache_path);
436
+ bs_cache_path(cachedir, path_v, &cache_path);
350
437
 
351
438
  return bs_precompile(path, path_v, cache_path, handler);
352
439
  }
440
+
441
+ static int bs_open_noatime(const char *path, int flags) {
442
+ int fd = 1;
443
+ if (!perm_issue) {
444
+ fd = open(path, flags | O_NOATIME);
445
+ if (fd < 0 && errno == EPERM) {
446
+ errno = 0;
447
+ perm_issue = true;
448
+ }
449
+ }
450
+
451
+ if (perm_issue) {
452
+ fd = open(path, flags);
453
+ }
454
+ return fd;
455
+ }
456
+
353
457
  /*
354
458
  * Open the file we want to load/cache and generate a cache key for it if it
355
459
  * was loaded.
356
460
  */
357
461
  static int
358
- open_current_file(char * path, struct bs_cache_key * key, const char ** errno_provenance)
462
+ open_current_file(const char * path, struct bs_cache_key * key, const char ** errno_provenance)
359
463
  {
360
464
  struct stat statbuf;
361
465
  int fd;
362
466
 
363
- fd = open(path, O_RDONLY);
467
+ fd = bs_open_noatime(path, O_RDONLY);
364
468
  if (fd < 0) {
365
469
  *errno_provenance = "bs_fetch:open_current_file:open";
366
470
  return fd;
@@ -371,7 +475,9 @@ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_pr
371
475
 
372
476
  if (fstat(fd, &statbuf) < 0) {
373
477
  *errno_provenance = "bs_fetch:open_current_file:fstat";
478
+ int previous_errno = errno;
374
479
  close(fd);
480
+ errno = previous_errno;
375
481
  return -1;
376
482
  }
377
483
 
@@ -381,12 +487,15 @@ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_pr
381
487
  key->ruby_revision = current_ruby_revision;
382
488
  key->size = (uint64_t)statbuf.st_size;
383
489
  key->mtime = (uint64_t)statbuf.st_mtime;
490
+ key->digest_set = false;
384
491
 
385
492
  return fd;
386
493
  }
387
494
 
388
495
  #define ERROR_WITH_ERRNO -1
389
- #define CACHE_MISSING_OR_INVALID -2
496
+ #define CACHE_MISS -2
497
+ #define CACHE_STALE -3
498
+ #define CACHE_UNCOMPILABLE -4
390
499
 
391
500
  /*
392
501
  * Read the cache key from the given fd, which must have position 0 (e.g.
@@ -394,15 +503,16 @@ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_pr
394
503
  *
395
504
  * Possible return values:
396
505
  * - 0 (OK, key was loaded)
397
- * - CACHE_MISSING_OR_INVALID (-2)
398
506
  * - ERROR_WITH_ERRNO (-1, errno is set)
507
+ * - CACHE_MISS (-2)
508
+ * - CACHE_STALE (-3)
399
509
  */
400
510
  static int
401
511
  bs_read_key(int fd, struct bs_cache_key * key)
402
512
  {
403
513
  ssize_t nread = read(fd, key, KEY_SIZE);
404
514
  if (nread < 0) return ERROR_WITH_ERRNO;
405
- if (nread < KEY_SIZE) return CACHE_MISSING_OR_INVALID;
515
+ if (nread < KEY_SIZE) return CACHE_STALE;
406
516
  return 0;
407
517
  }
408
518
 
@@ -412,7 +522,8 @@ bs_read_key(int fd, struct bs_cache_key * key)
412
522
  *
413
523
  * Possible return values:
414
524
  * - 0 (OK, key was loaded)
415
- * - CACHE_MISSING_OR_INVALID (-2)
525
+ * - CACHE_MISS (-2)
526
+ * - CACHE_STALE (-3)
416
527
  * - ERROR_WITH_ERRNO (-1, errno is set)
417
528
  */
418
529
  static int
@@ -420,11 +531,15 @@ open_cache_file(const char * path, struct bs_cache_key * key, const char ** errn
420
531
  {
421
532
  int fd, res;
422
533
 
423
- fd = open(path, O_RDONLY);
534
+ if (readonly || !revalidation) {
535
+ fd = bs_open_noatime(path, O_RDONLY);
536
+ } else {
537
+ fd = bs_open_noatime(path, O_RDWR);
538
+ }
539
+
424
540
  if (fd < 0) {
425
541
  *errno_provenance = "bs_fetch:open_cache_file:open";
426
- if (errno == ENOENT) return CACHE_MISSING_OR_INVALID;
427
- return ERROR_WITH_ERRNO;
542
+ return CACHE_MISS;
428
543
  }
429
544
  #ifdef _WIN32
430
545
  setmode(fd, O_BINARY);
@@ -458,7 +573,6 @@ open_cache_file(const char * path, struct bs_cache_key * key, const char ** errn
458
573
  static int
459
574
  fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE * output_data, int * exception_tag, const char ** errno_provenance)
460
575
  {
461
- char * data = NULL;
462
576
  ssize_t nread;
463
577
  int ret;
464
578
 
@@ -467,27 +581,30 @@ fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE *
467
581
  if (data_size > 100000000000) {
468
582
  *errno_provenance = "bs_fetch:fetch_cached_data:datasize";
469
583
  errno = EINVAL; /* because wtf? */
470
- ret = -1;
584
+ ret = ERROR_WITH_ERRNO;
471
585
  goto done;
472
586
  }
473
- data = ALLOC_N(char, data_size);
474
- nread = read(fd, data, data_size);
587
+ storage_data = rb_str_buf_new(data_size);
588
+ nread = read(fd, RSTRING_PTR(storage_data), data_size);
475
589
  if (nread < 0) {
476
590
  *errno_provenance = "bs_fetch:fetch_cached_data:read";
477
- ret = -1;
591
+ ret = ERROR_WITH_ERRNO;
478
592
  goto done;
479
593
  }
480
594
  if (nread != data_size) {
481
- ret = CACHE_MISSING_OR_INVALID;
595
+ ret = CACHE_STALE;
482
596
  goto done;
483
597
  }
484
598
 
485
- storage_data = rb_str_new(data, data_size);
599
+ rb_str_set_len(storage_data, nread);
486
600
 
487
601
  *exception_tag = bs_storage_to_output(handler, args, storage_data, output_data);
602
+ if (*output_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
603
+ ret = CACHE_UNCOMPILABLE;
604
+ goto done;
605
+ }
488
606
  ret = 0;
489
607
  done:
490
- if (data != NULL) xfree(data);
491
608
  return ret;
492
609
  }
493
610
 
@@ -594,17 +711,22 @@ atomic_write_cache_file(char * path, struct bs_cache_key * key, VALUE data, cons
594
711
 
595
712
 
596
713
  /* Read contents from an fd, whose contents are asserted to be +size+ bytes
597
- * long, into a buffer */
598
- static ssize_t
599
- bs_read_contents(int fd, size_t size, char ** contents, const char ** errno_provenance)
714
+ * long, returning a Ruby string on success and Qfalse on failure */
715
+ static VALUE
716
+ bs_read_contents(int fd, size_t size, const char ** errno_provenance)
600
717
  {
718
+ VALUE contents;
601
719
  ssize_t nread;
602
- *contents = ALLOC_N(char, size);
603
- nread = read(fd, *contents, size);
720
+ contents = rb_str_buf_new(size);
721
+ nread = read(fd, RSTRING_PTR(contents), size);
722
+
604
723
  if (nread < 0) {
605
724
  *errno_provenance = "bs_fetch:bs_read_contents:read";
725
+ return Qfalse;
726
+ } else {
727
+ rb_str_set_len(contents, nread);
728
+ return contents;
606
729
  }
607
- return nread;
608
730
  }
609
731
 
610
732
  /*
@@ -655,31 +777,68 @@ static VALUE
655
777
  bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args)
656
778
  {
657
779
  struct bs_cache_key cached_key, current_key;
658
- char * contents = NULL;
659
780
  int cache_fd = -1, current_fd = -1;
660
781
  int res, valid_cache = 0, exception_tag = 0;
661
782
  const char * errno_provenance = NULL;
662
783
 
663
- VALUE input_data; /* data read from source file, e.g. YAML or ruby source */
784
+ VALUE status = Qfalse;
785
+ VALUE input_data = Qfalse; /* data read from source file, e.g. YAML or ruby source */
664
786
  VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
665
787
  VALUE output_data; /* return data, e.g. ruby hash or loaded iseq */
666
788
 
667
789
  VALUE exception; /* ruby exception object to raise instead of returning */
790
+ VALUE exception_message; /* ruby exception string to use instead of errno_provenance */
668
791
 
669
792
  /* Open the source file and generate a cache key for it */
670
793
  current_fd = open_current_file(path, &current_key, &errno_provenance);
671
- if (current_fd < 0) goto fail_errno;
794
+ if (current_fd < 0) {
795
+ exception_message = path_v;
796
+ goto fail_errno;
797
+ }
672
798
 
673
799
  /* Open the cache key if it exists, and read its cache key in */
674
800
  cache_fd = open_cache_file(cache_path, &cached_key, &errno_provenance);
675
- if (cache_fd == CACHE_MISSING_OR_INVALID) {
801
+ if (cache_fd == CACHE_MISS || cache_fd == CACHE_STALE) {
676
802
  /* This is ok: valid_cache remains false, we re-populate it. */
803
+ bs_instrumentation(cache_fd == CACHE_MISS ? sym_miss : sym_stale, path_v);
677
804
  } else if (cache_fd < 0) {
805
+ exception_message = rb_str_new_cstr(cache_path);
678
806
  goto fail_errno;
679
807
  } else {
680
808
  /* True if the cache existed and no invalidating changes have occurred since
681
809
  * it was generated. */
682
- valid_cache = cache_key_equal(&current_key, &cached_key);
810
+
811
+ switch(cache_key_equal_fast_path(&current_key, &cached_key)) {
812
+ case hit:
813
+ status = sym_hit;
814
+ valid_cache = true;
815
+ break;
816
+ case miss:
817
+ valid_cache = false;
818
+ break;
819
+ case stale:
820
+ valid_cache = false;
821
+ if ((input_data = bs_read_contents(current_fd, current_key.size,
822
+ &errno_provenance)) == Qfalse) {
823
+ exception_message = path_v;
824
+ goto fail_errno;
825
+ }
826
+ valid_cache = cache_key_equal_slow_path(&current_key, &cached_key, input_data);
827
+ if (valid_cache) {
828
+ if (!readonly) {
829
+ if (update_cache_key(&current_key, &cached_key, cache_fd, &errno_provenance)) {
830
+ exception_message = path_v;
831
+ goto fail_errno;
832
+ }
833
+ }
834
+ status = sym_revalidated;
835
+ }
836
+ break;
837
+ };
838
+
839
+ if (!valid_cache) {
840
+ status = sym_stale;
841
+ }
683
842
  }
684
843
 
685
844
  if (valid_cache) {
@@ -688,25 +847,40 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
688
847
  cache_fd, (ssize_t)cached_key.data_size, handler, args,
689
848
  &output_data, &exception_tag, &errno_provenance
690
849
  );
691
- if (exception_tag != 0) goto raise;
692
- else if (res == CACHE_MISSING_OR_INVALID) valid_cache = 0;
693
- else if (res == ERROR_WITH_ERRNO) goto fail_errno;
694
- else if (!NIL_P(output_data)) goto succeed; /* fast-path, goal */
850
+ if (exception_tag != 0) goto raise;
851
+ else if (res == CACHE_UNCOMPILABLE) {
852
+ /* If fetch_cached_data returned `Uncompilable` we fallback to `input_to_output`
853
+ This happens if we have say, an unsafe YAML cache, but try to load it in safe mode */
854
+ if (input_data == Qfalse && (input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
855
+ exception_message = path_v;
856
+ goto fail_errno;
857
+ }
858
+ bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
859
+ if (exception_tag != 0) goto raise;
860
+ goto succeed;
861
+ } else if (res == CACHE_MISS || res == CACHE_STALE) valid_cache = 0;
862
+ else if (res == ERROR_WITH_ERRNO){
863
+ exception_message = rb_str_new_cstr(cache_path);
864
+ goto fail_errno;
865
+ }
866
+ else if (!NIL_P(output_data)) goto succeed; /* fast-path, goal */
695
867
  }
696
868
  close(cache_fd);
697
869
  cache_fd = -1;
698
870
  /* Cache is stale, invalid, or missing. Regenerate and write it out. */
699
871
 
700
872
  /* Read the contents of the source file into a buffer */
701
- if (bs_read_contents(current_fd, current_key.size, &contents, &errno_provenance) < 0) goto fail_errno;
702
- input_data = rb_str_new(contents, current_key.size);
873
+ if (input_data == Qfalse && (input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
874
+ exception_message = path_v;
875
+ goto fail_errno;
876
+ }
703
877
 
704
878
  /* Try to compile the input_data using input_to_storage(input_data) */
705
879
  exception_tag = bs_input_to_storage(handler, args, input_data, path_v, &storage_data);
706
880
  if (exception_tag != 0) goto raise;
707
881
  /* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try
708
882
  * to cache anything; just return input_to_output(input_data) */
709
- if (storage_data == uncompilable) {
883
+ if (storage_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
710
884
  bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
711
885
  if (exception_tag != 0) goto raise;
712
886
  goto succeed;
@@ -714,20 +888,32 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
714
888
  /* If storage_data isn't a string, we can't cache it */
715
889
  if (!RB_TYPE_P(storage_data, T_STRING)) goto invalid_type_storage_data;
716
890
 
717
- /* Write the cache key and storage_data to the cache directory */
718
- res = atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
719
- if (res < 0) goto fail_errno;
891
+ /* Attempt to write the cache key and storage_data to the cache directory.
892
+ * We do however ignore any failures to persist the cache, as it's better
893
+ * to move along, than to interrupt the process.
894
+ */
895
+ bs_cache_key_digest(&current_key, input_data);
896
+ atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
720
897
 
721
898
  /* Having written the cache, now convert storage_data to output_data */
722
899
  exception_tag = bs_storage_to_output(handler, args, storage_data, &output_data);
723
900
  if (exception_tag != 0) goto raise;
724
901
 
725
- /* If output_data is nil, delete the cache entry and generate the output
726
- * using input_to_output */
727
- if (NIL_P(output_data)) {
902
+ if (output_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
903
+ /* If storage_to_output returned `Uncompilable` we fallback to `input_to_output` */
904
+ bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
905
+ if (exception_tag != 0) goto raise;
906
+ } else if (NIL_P(output_data)) {
907
+ /* If output_data is nil, delete the cache entry and generate the output
908
+ * using input_to_output */
728
909
  if (unlink(cache_path) < 0) {
729
- errno_provenance = "bs_fetch:unlink";
730
- goto fail_errno;
910
+ /* If the cache was already deleted, it might be that another process did it before us.
911
+ * No point raising an error */
912
+ if (errno != ENOENT) {
913
+ errno_provenance = "bs_fetch:unlink";
914
+ exception_message = rb_str_new_cstr(cache_path);
915
+ goto fail_errno;
916
+ }
731
917
  }
732
918
  bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
733
919
  if (exception_tag != 0) goto raise;
@@ -736,7 +922,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
736
922
  goto succeed; /* output_data is now the correct return. */
737
923
 
738
924
  #define CLEANUP \
739
- if (contents != NULL) xfree(contents); \
925
+ if (status != Qfalse) bs_instrumentation(status, path_v); \
740
926
  if (current_fd >= 0) close(current_fd); \
741
927
  if (cache_fd >= 0) close(cache_fd);
742
928
 
@@ -745,7 +931,13 @@ succeed:
745
931
  return output_data;
746
932
  fail_errno:
747
933
  CLEANUP;
748
- exception = rb_syserr_new(errno, errno_provenance);
934
+ if (errno_provenance) {
935
+ exception_message = rb_str_concat(
936
+ rb_str_new_cstr(errno_provenance),
937
+ rb_str_concat(rb_str_new_cstr(": "), exception_message)
938
+ );
939
+ }
940
+ exception = rb_syserr_new_str(errno, exception_message);
749
941
  rb_exc_raise(exception);
750
942
  __builtin_unreachable();
751
943
  raise:
@@ -763,13 +955,16 @@ invalid_type_storage_data:
763
955
  static VALUE
764
956
  bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
765
957
  {
958
+ if (readonly) {
959
+ return Qfalse;
960
+ }
961
+
766
962
  struct bs_cache_key cached_key, current_key;
767
- char * contents = NULL;
768
963
  int cache_fd = -1, current_fd = -1;
769
964
  int res, valid_cache = 0, exception_tag = 0;
770
965
  const char * errno_provenance = NULL;
771
966
 
772
- VALUE input_data; /* data read from source file, e.g. YAML or ruby source */
967
+ VALUE input_data = Qfalse; /* data read from source file, e.g. YAML or ruby source */
773
968
  VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
774
969
 
775
970
  /* Open the source file and generate a cache key for it */
@@ -778,14 +973,33 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
778
973
 
779
974
  /* Open the cache key if it exists, and read its cache key in */
780
975
  cache_fd = open_cache_file(cache_path, &cached_key, &errno_provenance);
781
- if (cache_fd == CACHE_MISSING_OR_INVALID) {
976
+ if (cache_fd == CACHE_MISS || cache_fd == CACHE_STALE) {
782
977
  /* This is ok: valid_cache remains false, we re-populate it. */
783
978
  } else if (cache_fd < 0) {
784
979
  goto fail;
785
980
  } else {
786
981
  /* True if the cache existed and no invalidating changes have occurred since
787
982
  * it was generated. */
788
- valid_cache = cache_key_equal(&current_key, &cached_key);
983
+ switch(cache_key_equal_fast_path(&current_key, &cached_key)) {
984
+ case hit:
985
+ valid_cache = true;
986
+ break;
987
+ case miss:
988
+ valid_cache = false;
989
+ break;
990
+ case stale:
991
+ valid_cache = false;
992
+ if ((input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
993
+ goto fail;
994
+ }
995
+ valid_cache = cache_key_equal_slow_path(&current_key, &cached_key, input_data);
996
+ if (valid_cache) {
997
+ if (update_cache_key(&current_key, &cached_key, cache_fd, &errno_provenance)) {
998
+ goto fail;
999
+ }
1000
+ }
1001
+ break;
1002
+ };
789
1003
  }
790
1004
 
791
1005
  if (valid_cache) {
@@ -797,8 +1011,7 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
797
1011
  /* Cache is stale, invalid, or missing. Regenerate and write it out. */
798
1012
 
799
1013
  /* Read the contents of the source file into a buffer */
800
- if (bs_read_contents(current_fd, current_key.size, &contents, &errno_provenance) < 0) goto fail;
801
- input_data = rb_str_new(contents, current_key.size);
1014
+ if ((input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) goto fail;
802
1015
 
803
1016
  /* Try to compile the input_data using input_to_storage(input_data) */
804
1017
  exception_tag = bs_input_to_storage(handler, Qnil, input_data, path_v, &storage_data);
@@ -806,20 +1019,20 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
806
1019
 
807
1020
  /* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try
808
1021
  * to cache anything; just return false */
809
- if (storage_data == uncompilable) {
1022
+ if (storage_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
810
1023
  goto fail;
811
1024
  }
812
1025
  /* If storage_data isn't a string, we can't cache it */
813
1026
  if (!RB_TYPE_P(storage_data, T_STRING)) goto fail;
814
1027
 
815
1028
  /* Write the cache key and storage_data to the cache directory */
1029
+ bs_cache_key_digest(&current_key, input_data);
816
1030
  res = atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
817
1031
  if (res < 0) goto fail;
818
1032
 
819
1033
  goto succeed;
820
1034
 
821
1035
  #define CLEANUP \
822
- if (contents != NULL) xfree(contents); \
823
1036
  if (current_fd >= 0) close(current_fd); \
824
1037
  if (cache_fd >= 0) close(cache_fd);
825
1038
 
@@ -869,7 +1082,7 @@ struct i2s_data {
869
1082
  };
870
1083
 
871
1084
  static VALUE
872
- prot_storage_to_output(VALUE arg)
1085
+ try_storage_to_output(VALUE arg)
873
1086
  {
874
1087
  struct s2o_data * data = (struct s2o_data *)arg;
875
1088
  return rb_funcall(data->handler, rb_intern("storage_to_output"), 2, data->storage_data, data->args);
@@ -884,7 +1097,7 @@ bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * outp
884
1097
  .args = args,
885
1098
  .storage_data = storage_data,
886
1099
  };
887
- *output_data = rb_protect(prot_storage_to_output, (VALUE)&s2o_data, &state);
1100
+ *output_data = rb_protect(try_storage_to_output, (VALUE)&s2o_data, &state);
888
1101
  return state;
889
1102
  }
890
1103
 
@@ -913,31 +1126,20 @@ try_input_to_storage(VALUE arg)
913
1126
  return rb_funcall(data->handler, rb_intern("input_to_storage"), 2, data->input_data, data->pathval);
914
1127
  }
915
1128
 
916
- static VALUE
917
- rescue_input_to_storage(VALUE arg, VALUE e)
918
- {
919
- return uncompilable;
920
- }
921
-
922
- static VALUE
923
- prot_input_to_storage(VALUE arg)
924
- {
925
- struct i2s_data * data = (struct i2s_data *)arg;
926
- return rb_rescue2(
927
- try_input_to_storage, (VALUE)data,
928
- rescue_input_to_storage, Qnil,
929
- rb_eBootsnap_CompileCache_Uncompilable, 0);
930
- }
931
-
932
1129
  static int
933
1130
  bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval, VALUE * storage_data)
934
1131
  {
935
- int state;
936
- struct i2s_data i2s_data = {
937
- .handler = handler,
938
- .input_data = input_data,
939
- .pathval = pathval,
940
- };
941
- *storage_data = rb_protect(prot_input_to_storage, (VALUE)&i2s_data, &state);
942
- return state;
1132
+ if (readonly) {
1133
+ *storage_data = rb_cBootsnap_CompileCache_UNCOMPILABLE;
1134
+ return 0;
1135
+ } else {
1136
+ int state;
1137
+ struct i2s_data i2s_data = {
1138
+ .handler = handler,
1139
+ .input_data = input_data,
1140
+ .pathval = pathval,
1141
+ };
1142
+ *storage_data = rb_protect(try_input_to_storage, (VALUE)&i2s_data, &state);
1143
+ return state;
1144
+ }
943
1145
  }