ed-precompiled_bootsnap 1.18.6-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1135 @@
1
+ /*
2
+ * Suggested reading order:
3
+ * 1. Skim Init_bootsnap
4
+ * 2. Skim bs_fetch
5
+ * 3. The rest of everything
6
+ *
7
+ * Init_bootsnap sets up the ruby objects and binds bs_fetch to
8
+ * Bootsnap::CompileCache::Native.fetch.
9
+ *
10
+ * bs_fetch is the ultimate caller for for just about every other function in
11
+ * here.
12
+ */
13
+
14
+ #include "bootsnap.h"
15
+ #include "ruby.h"
16
+ #include <stdint.h>
17
+ #include <stdbool.h>
18
+ #include <sys/types.h>
19
+ #include <errno.h>
20
+ #include <fcntl.h>
21
+ #include <unistd.h>
22
+ #include <sys/stat.h>
23
+
24
+ #ifdef __APPLE__
25
+ // The symbol is present, however not in the headers
26
+ // See: https://github.com/rails/bootsnap/issues/470
27
+ extern int fdatasync(int);
28
+ #endif
29
+
30
+ #ifndef O_NOATIME
31
+ #define O_NOATIME 0
32
+ #endif
33
+
34
+ /* 1000 is an arbitrary limit; FNV64 plus some slashes brings the cap down to
35
+ * 981 for the cache dir */
36
+ #define MAX_CACHEPATH_SIZE 1000
37
+ #define MAX_CACHEDIR_SIZE 981
38
+
39
+ #define KEY_SIZE 64
40
+
41
+ #define MAX_CREATE_TEMPFILE_ATTEMPT 3
42
+
43
+ #ifndef RB_UNLIKELY
44
+ #define RB_UNLIKELY(x) (x)
45
+ #endif
46
+
47
+ /*
48
+ * An instance of this key is written as the first 64 bytes of each cache file.
49
+ * The mtime and size members track whether the file contents have changed, and
50
+ * the version, ruby_platform, compile_option, and ruby_revision members track
51
+ * changes to the environment that could invalidate compile results without
52
+ * file contents having changed. The data_size member is not truly part of the
53
+ * "key". Really, this could be called a "header" with the first six members
54
+ * being an embedded "key" struct and an additional data_size member.
55
+ *
56
+ * The data_size indicates the remaining number of bytes in the cache file
57
+ * after the header (the size of the cached artifact).
58
+ *
59
+ * After data_size, the struct is padded to 64 bytes.
60
+ */
61
+ struct bs_cache_key {
62
+ uint32_t version;
63
+ uint32_t ruby_platform;
64
+ uint32_t compile_option;
65
+ uint32_t ruby_revision;
66
+ uint64_t size;
67
+ uint64_t mtime;
68
+ uint64_t data_size; //
69
+ uint64_t digest;
70
+ uint8_t digest_set;
71
+ uint8_t pad[15];
72
+ } __attribute__((packed));
73
+
74
+ /*
75
+ * If the struct padding isn't correct to pad the key to 64 bytes, refuse to
76
+ * compile.
77
+ */
78
+ #define STATIC_ASSERT(X) STATIC_ASSERT2(X,__LINE__)
79
+ #define STATIC_ASSERT2(X,L) STATIC_ASSERT3(X,L)
80
+ #define STATIC_ASSERT3(X,L) STATIC_ASSERT_MSG(X,at_line_##L)
81
+ #define STATIC_ASSERT_MSG(COND,MSG) typedef char static_assertion_##MSG[(!!(COND))*2-1]
82
+ STATIC_ASSERT(sizeof(struct bs_cache_key) == KEY_SIZE);
83
+
84
+ /* Effectively a schema version. Bumping invalidates all previous caches */
85
+ static const uint32_t current_version = 6;
86
+
87
+ /* hash of e.g. "x86_64-darwin17", invalidating when ruby is recompiled on a
88
+ * new OS ABI, etc. */
89
+ static uint32_t current_ruby_platform;
90
+ /* Invalidates cache when switching ruby versions */
91
+ static uint32_t current_ruby_revision;
92
+ /* Invalidates cache when RubyVM::InstructionSequence.compile_option changes */
93
+ static uint32_t current_compile_option_crc32 = 0;
94
+ /* Current umask */
95
+ static mode_t current_umask;
96
+
97
+ /* Bootsnap::CompileCache::{Native, Uncompilable} */
98
+ static VALUE rb_mBootsnap;
99
+ static VALUE rb_mBootsnap_CompileCache;
100
+ static VALUE rb_mBootsnap_CompileCache_Native;
101
+ static VALUE rb_cBootsnap_CompileCache_UNCOMPILABLE;
102
+ static ID instrumentation_method;
103
+ static VALUE sym_hit, sym_miss, sym_stale, sym_revalidated;
104
+ static bool instrumentation_enabled = false;
105
+ static bool readonly = false;
106
+ static bool revalidation = false;
107
+ static bool perm_issue = false;
108
+
109
+ /* Functions exposed as module functions on Bootsnap::CompileCache::Native */
110
+ static VALUE bs_instrumentation_enabled_set(VALUE self, VALUE enabled);
111
+ static VALUE bs_readonly_set(VALUE self, VALUE enabled);
112
+ static VALUE bs_revalidation_set(VALUE self, VALUE enabled);
113
+ static VALUE bs_compile_option_crc32_set(VALUE self, VALUE crc32_v);
114
+ static VALUE bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler, VALUE args);
115
+ static VALUE bs_rb_precompile(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler);
116
+
117
+ /* Helpers */
118
+ enum cache_status {
119
+ miss,
120
+ hit,
121
+ stale,
122
+ };
123
+ static void bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_CACHEPATH_SIZE]);
124
+ static int bs_read_key(int fd, struct bs_cache_key * key);
125
+ static enum cache_status cache_key_equal_fast_path(struct bs_cache_key * k1, struct bs_cache_key * k2);
126
+ static int cache_key_equal_slow_path(struct bs_cache_key * current_key, struct bs_cache_key * cached_key, const VALUE input_data);
127
+ static int update_cache_key(struct bs_cache_key *current_key, struct bs_cache_key *old_key, int cache_fd, const char ** errno_provenance);
128
+
129
+ static void bs_cache_key_digest(struct bs_cache_key * key, const VALUE input_data);
130
+ static VALUE bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args);
131
+ static VALUE bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler);
132
+ static int open_current_file(const char * path, struct bs_cache_key * key, const char ** errno_provenance);
133
+ static int fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE * output_data, int * exception_tag, const char ** errno_provenance);
134
+ static uint32_t get_ruby_revision(void);
135
+ static uint32_t get_ruby_platform(void);
136
+
137
+ /*
138
+ * Helper functions to call ruby methods on handler object without crashing on
139
+ * exception.
140
+ */
141
+ static int bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * output_data);
142
+ static VALUE prot_input_to_output(VALUE arg);
143
+ static void bs_input_to_output(VALUE handler, VALUE args, VALUE input_data, VALUE * output_data, int * exception_tag);
144
+ static int bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval, VALUE * storage_data);
145
+ struct s2o_data;
146
+ struct i2o_data;
147
+ struct i2s_data;
148
+
149
+ static VALUE
150
+ bs_rb_get_path(VALUE self, VALUE fname)
151
+ {
152
+ return rb_get_path(fname);
153
+ }
154
+
155
+ /*
156
+ * Ruby C extensions are initialized by calling Init_<extname>.
157
+ *
158
+ * This sets up the module hierarchy and attaches functions as methods.
159
+ *
160
+ * We also populate some semi-static information about the current OS and so on.
161
+ */
162
+ void
163
+ Init_bootsnap(void)
164
+ {
165
+ rb_mBootsnap = rb_define_module("Bootsnap");
166
+
167
+ rb_define_singleton_method(rb_mBootsnap, "rb_get_path", bs_rb_get_path, 1);
168
+
169
+ rb_mBootsnap_CompileCache = rb_define_module_under(rb_mBootsnap, "CompileCache");
170
+ rb_mBootsnap_CompileCache_Native = rb_define_module_under(rb_mBootsnap_CompileCache, "Native");
171
+ rb_cBootsnap_CompileCache_UNCOMPILABLE = rb_const_get(rb_mBootsnap_CompileCache, rb_intern("UNCOMPILABLE"));
172
+ rb_global_variable(&rb_cBootsnap_CompileCache_UNCOMPILABLE);
173
+
174
+ current_ruby_revision = get_ruby_revision();
175
+ current_ruby_platform = get_ruby_platform();
176
+
177
+ instrumentation_method = rb_intern("_instrument");
178
+
179
+ sym_hit = ID2SYM(rb_intern("hit"));
180
+ sym_miss = ID2SYM(rb_intern("miss"));
181
+ sym_stale = ID2SYM(rb_intern("stale"));
182
+ sym_revalidated = ID2SYM(rb_intern("revalidated"));
183
+
184
+ rb_define_module_function(rb_mBootsnap, "instrumentation_enabled=", bs_instrumentation_enabled_set, 1);
185
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "readonly=", bs_readonly_set, 1);
186
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "revalidation=", bs_revalidation_set, 1);
187
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "fetch", bs_rb_fetch, 4);
188
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "precompile", bs_rb_precompile, 3);
189
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "compile_option_crc32=", bs_compile_option_crc32_set, 1);
190
+
191
+ current_umask = umask(0777);
192
+ umask(current_umask);
193
+ }
194
+
195
+ static VALUE
196
+ bs_instrumentation_enabled_set(VALUE self, VALUE enabled)
197
+ {
198
+ instrumentation_enabled = RTEST(enabled);
199
+ return enabled;
200
+ }
201
+
202
+ static inline void
203
+ bs_instrumentation(VALUE event, VALUE path)
204
+ {
205
+ if (RB_UNLIKELY(instrumentation_enabled)) {
206
+ rb_funcall(rb_mBootsnap, instrumentation_method, 2, event, path);
207
+ }
208
+ }
209
+
210
+ static VALUE
211
+ bs_readonly_set(VALUE self, VALUE enabled)
212
+ {
213
+ readonly = RTEST(enabled);
214
+ return enabled;
215
+ }
216
+
217
+ static VALUE
218
+ bs_revalidation_set(VALUE self, VALUE enabled)
219
+ {
220
+ revalidation = RTEST(enabled);
221
+ return enabled;
222
+ }
223
+
224
+ /*
225
+ * Bootsnap's ruby code registers a hook that notifies us via this function
226
+ * when compile_option changes. These changes invalidate all existing caches.
227
+ *
228
+ * Note that on 32-bit platforms, a CRC32 can't be represented in a Fixnum, but
229
+ * can be represented by a uint.
230
+ */
231
+ static VALUE
232
+ bs_compile_option_crc32_set(VALUE self, VALUE crc32_v)
233
+ {
234
+ if (!RB_TYPE_P(crc32_v, T_BIGNUM) && !RB_TYPE_P(crc32_v, T_FIXNUM)) {
235
+ Check_Type(crc32_v, T_FIXNUM);
236
+ }
237
+ current_compile_option_crc32 = NUM2UINT(crc32_v);
238
+ return Qnil;
239
+ }
240
+
241
+ static uint64_t
242
+ fnv1a_64_iter(uint64_t h, const VALUE str)
243
+ {
244
+ unsigned char *s = (unsigned char *)RSTRING_PTR(str);
245
+ unsigned char *str_end = (unsigned char *)RSTRING_PTR(str) + RSTRING_LEN(str);
246
+
247
+ while (s < str_end) {
248
+ h ^= (uint64_t)*s++;
249
+ h += (h << 1) + (h << 4) + (h << 5) + (h << 7) + (h << 8) + (h << 40);
250
+ }
251
+
252
+ return h;
253
+ }
254
+
255
+ static uint64_t
256
+ fnv1a_64(const VALUE str)
257
+ {
258
+ uint64_t h = (uint64_t)0xcbf29ce484222325ULL;
259
+ return fnv1a_64_iter(h, str);
260
+ }
261
+
262
+ /*
263
+ * Ruby's revision may be Integer or String. CRuby 2.7 or later uses
264
+ * Git commit ID as revision. It's String.
265
+ */
266
+ static uint32_t
267
+ get_ruby_revision(void)
268
+ {
269
+ VALUE ruby_revision;
270
+
271
+ ruby_revision = rb_const_get(rb_cObject, rb_intern("RUBY_REVISION"));
272
+ if (RB_TYPE_P(ruby_revision, RUBY_T_FIXNUM)) {
273
+ return FIX2INT(ruby_revision);
274
+ } else {
275
+ uint64_t hash;
276
+
277
+ hash = fnv1a_64(ruby_revision);
278
+ return (uint32_t)(hash >> 32);
279
+ }
280
+ }
281
+
282
+ /*
283
+ * When ruby's version doesn't change, but it's recompiled on a different OS
284
+ * (or OS version), we need to invalidate the cache.
285
+ */
286
+ static uint32_t
287
+ get_ruby_platform(void)
288
+ {
289
+ uint64_t hash;
290
+ VALUE ruby_platform;
291
+
292
+ ruby_platform = rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM"));
293
+ hash = fnv1a_64(ruby_platform);
294
+ return (uint32_t)(hash >> 32);
295
+ }
296
+
297
+ /*
298
+ * Given a cache root directory and the full path to a file being cached,
299
+ * generate a path under the cache directory at which the cached artifact will
300
+ * be stored.
301
+ *
302
+ * The path will look something like: <cachedir>/12/34567890abcdef
303
+ */
304
+ static void
305
+ bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_CACHEPATH_SIZE])
306
+ {
307
+ uint64_t hash = fnv1a_64(path);
308
+ uint8_t first_byte = (hash >> (64 - 8));
309
+ uint64_t remainder = hash & 0x00ffffffffffffff;
310
+
311
+ sprintf(*cache_path, "%s/%02"PRIx8"/%014"PRIx64, cachedir, first_byte, remainder);
312
+ }
313
+
314
+ /*
315
+ * Test whether a newly-generated cache key based on the file as it exists on
316
+ * disk matches the one that was generated when the file was cached (or really
317
+ * compare any two keys).
318
+ *
319
+ * The data_size member is not compared, as it serves more of a "header"
320
+ * function.
321
+ */
322
+ static enum cache_status cache_key_equal_fast_path(struct bs_cache_key *k1,
323
+ struct bs_cache_key *k2) {
324
+ if (k1->version == k2->version &&
325
+ k1->ruby_platform == k2->ruby_platform &&
326
+ k1->compile_option == k2->compile_option &&
327
+ k1->ruby_revision == k2->ruby_revision && k1->size == k2->size) {
328
+ if (k1->mtime == k2->mtime) {
329
+ return hit;
330
+ }
331
+ if (revalidation) {
332
+ return stale;
333
+ }
334
+ }
335
+ return miss;
336
+ }
337
+
338
+ static int cache_key_equal_slow_path(struct bs_cache_key *current_key,
339
+ struct bs_cache_key *cached_key,
340
+ const VALUE input_data)
341
+ {
342
+ bs_cache_key_digest(current_key, input_data);
343
+ return current_key->digest == cached_key->digest;
344
+ }
345
+
346
+ static int update_cache_key(struct bs_cache_key *current_key, struct bs_cache_key *old_key, int cache_fd, const char ** errno_provenance)
347
+ {
348
+ old_key->mtime = current_key->mtime;
349
+ lseek(cache_fd, 0, SEEK_SET);
350
+ ssize_t nwrite = write(cache_fd, old_key, KEY_SIZE);
351
+ if (nwrite < 0) {
352
+ *errno_provenance = "update_cache_key:write";
353
+ return -1;
354
+ }
355
+
356
+ #ifdef HAVE_FDATASYNC
357
+ if (fdatasync(cache_fd) < 0) {
358
+ *errno_provenance = "update_cache_key:fdatasync";
359
+ return -1;
360
+ }
361
+ #endif
362
+
363
+ return 0;
364
+ }
365
+
366
+ /*
367
+ * Fills the cache key digest.
368
+ */
369
+ static void bs_cache_key_digest(struct bs_cache_key *key,
370
+ const VALUE input_data) {
371
+ if (key->digest_set)
372
+ return;
373
+ key->digest = fnv1a_64(input_data);
374
+ key->digest_set = 1;
375
+ }
376
+
377
+ /*
378
+ * Entrypoint for Bootsnap::CompileCache::Native.fetch. The real work is done
379
+ * in bs_fetch; this function just performs some basic typechecks and
380
+ * conversions on the ruby VALUE arguments before passing them along.
381
+ */
382
+ static VALUE
383
+ bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler, VALUE args)
384
+ {
385
+ FilePathValue(path_v);
386
+
387
+ Check_Type(cachedir_v, T_STRING);
388
+ Check_Type(path_v, T_STRING);
389
+
390
+ if (RSTRING_LEN(cachedir_v) > MAX_CACHEDIR_SIZE) {
391
+ rb_raise(rb_eArgError, "cachedir too long");
392
+ }
393
+
394
+ char * cachedir = RSTRING_PTR(cachedir_v);
395
+ char * path = RSTRING_PTR(path_v);
396
+ char cache_path[MAX_CACHEPATH_SIZE];
397
+
398
+ /* generate cache path to cache_path */
399
+ bs_cache_path(cachedir, path_v, &cache_path);
400
+
401
+ return bs_fetch(path, path_v, cache_path, handler, args);
402
+ }
403
+
404
+ /*
405
+ * Entrypoint for Bootsnap::CompileCache::Native.precompile.
406
+ * Similar to fetch, but it only generate the cache if missing
407
+ * and doesn't return the content.
408
+ */
409
+ static VALUE
410
+ bs_rb_precompile(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler)
411
+ {
412
+ FilePathValue(path_v);
413
+
414
+ Check_Type(cachedir_v, T_STRING);
415
+ Check_Type(path_v, T_STRING);
416
+
417
+ if (RSTRING_LEN(cachedir_v) > MAX_CACHEDIR_SIZE) {
418
+ rb_raise(rb_eArgError, "cachedir too long");
419
+ }
420
+
421
+ char * cachedir = RSTRING_PTR(cachedir_v);
422
+ char * path = RSTRING_PTR(path_v);
423
+ char cache_path[MAX_CACHEPATH_SIZE];
424
+
425
+ /* generate cache path to cache_path */
426
+ bs_cache_path(cachedir, path_v, &cache_path);
427
+
428
+ return bs_precompile(path, path_v, cache_path, handler);
429
+ }
430
+
431
+ static int bs_open_noatime(const char *path, int flags) {
432
+ int fd = 1;
433
+ if (!perm_issue) {
434
+ fd = open(path, flags | O_NOATIME);
435
+ if (fd < 0 && errno == EPERM) {
436
+ errno = 0;
437
+ perm_issue = true;
438
+ }
439
+ }
440
+
441
+ if (perm_issue) {
442
+ fd = open(path, flags);
443
+ }
444
+ return fd;
445
+ }
446
+
447
+ /*
448
+ * Open the file we want to load/cache and generate a cache key for it if it
449
+ * was loaded.
450
+ */
451
+ static int
452
+ open_current_file(const char * path, struct bs_cache_key * key, const char ** errno_provenance)
453
+ {
454
+ struct stat statbuf;
455
+ int fd;
456
+
457
+ fd = bs_open_noatime(path, O_RDONLY);
458
+ if (fd < 0) {
459
+ *errno_provenance = "bs_fetch:open_current_file:open";
460
+ return fd;
461
+ }
462
+ #ifdef _WIN32
463
+ setmode(fd, O_BINARY);
464
+ #endif
465
+
466
+ if (fstat(fd, &statbuf) < 0) {
467
+ *errno_provenance = "bs_fetch:open_current_file:fstat";
468
+ int previous_errno = errno;
469
+ close(fd);
470
+ errno = previous_errno;
471
+ return -1;
472
+ }
473
+
474
+ key->version = current_version;
475
+ key->ruby_platform = current_ruby_platform;
476
+ key->compile_option = current_compile_option_crc32;
477
+ key->ruby_revision = current_ruby_revision;
478
+ key->size = (uint64_t)statbuf.st_size;
479
+ key->mtime = (uint64_t)statbuf.st_mtime;
480
+ key->digest_set = false;
481
+
482
+ return fd;
483
+ }
484
+
485
+ #define ERROR_WITH_ERRNO -1
486
+ #define CACHE_MISS -2
487
+ #define CACHE_STALE -3
488
+ #define CACHE_UNCOMPILABLE -4
489
+
490
+ /*
491
+ * Read the cache key from the given fd, which must have position 0 (e.g.
492
+ * freshly opened file).
493
+ *
494
+ * Possible return values:
495
+ * - 0 (OK, key was loaded)
496
+ * - ERROR_WITH_ERRNO (-1, errno is set)
497
+ * - CACHE_MISS (-2)
498
+ * - CACHE_STALE (-3)
499
+ */
500
+ static int
501
+ bs_read_key(int fd, struct bs_cache_key * key)
502
+ {
503
+ ssize_t nread = read(fd, key, KEY_SIZE);
504
+ if (nread < 0) return ERROR_WITH_ERRNO;
505
+ if (nread < KEY_SIZE) return CACHE_STALE;
506
+ return 0;
507
+ }
508
+
509
+ /*
510
+ * Open the cache file at a given path, if it exists, and read its key into the
511
+ * struct.
512
+ *
513
+ * Possible return values:
514
+ * - 0 (OK, key was loaded)
515
+ * - CACHE_MISS (-2)
516
+ * - CACHE_STALE (-3)
517
+ * - ERROR_WITH_ERRNO (-1, errno is set)
518
+ */
519
+ static int
520
+ open_cache_file(const char * path, struct bs_cache_key * key, const char ** errno_provenance)
521
+ {
522
+ int fd, res;
523
+
524
+ if (readonly || !revalidation) {
525
+ fd = bs_open_noatime(path, O_RDONLY);
526
+ } else {
527
+ fd = bs_open_noatime(path, O_RDWR);
528
+ }
529
+
530
+ if (fd < 0) {
531
+ *errno_provenance = "bs_fetch:open_cache_file:open";
532
+ return CACHE_MISS;
533
+ }
534
+ #ifdef _WIN32
535
+ setmode(fd, O_BINARY);
536
+ #endif
537
+
538
+ res = bs_read_key(fd, key);
539
+ if (res < 0) {
540
+ *errno_provenance = "bs_fetch:open_cache_file:read";
541
+ close(fd);
542
+ return res;
543
+ }
544
+
545
+ return fd;
546
+ }
547
+
548
+ /*
549
+ * The cache file is laid out like:
550
+ * 0...64 : bs_cache_key
551
+ * 64..-1 : cached artifact
552
+ *
553
+ * This function takes a file descriptor whose position is pre-set to 64, and
554
+ * the data_size (corresponding to the remaining number of bytes) listed in the
555
+ * cache header.
556
+ *
557
+ * We load the text from this file into a buffer, and pass it to the ruby-land
558
+ * handler with exception handling via the exception_tag param.
559
+ *
560
+ * Data is returned via the output_data parameter, which, if there's no error
561
+ * or exception, will be the final data returnable to the user.
562
+ */
563
+ static int
564
+ fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE * output_data, int * exception_tag, const char ** errno_provenance)
565
+ {
566
+ ssize_t nread;
567
+ int ret;
568
+
569
+ VALUE storage_data;
570
+
571
+ if (data_size > 100000000000) {
572
+ *errno_provenance = "bs_fetch:fetch_cached_data:datasize";
573
+ errno = EINVAL; /* because wtf? */
574
+ ret = ERROR_WITH_ERRNO;
575
+ goto done;
576
+ }
577
+ storage_data = rb_str_buf_new(data_size);
578
+ nread = read(fd, RSTRING_PTR(storage_data), data_size);
579
+ if (nread < 0) {
580
+ *errno_provenance = "bs_fetch:fetch_cached_data:read";
581
+ ret = ERROR_WITH_ERRNO;
582
+ goto done;
583
+ }
584
+ if (nread != data_size) {
585
+ ret = CACHE_STALE;
586
+ goto done;
587
+ }
588
+
589
+ rb_str_set_len(storage_data, nread);
590
+
591
+ *exception_tag = bs_storage_to_output(handler, args, storage_data, output_data);
592
+ if (*output_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
593
+ ret = CACHE_UNCOMPILABLE;
594
+ goto done;
595
+ }
596
+ ret = 0;
597
+ done:
598
+ return ret;
599
+ }
600
+
601
+ /*
602
+ * Like mkdir -p, this recursively creates directory parents of a file. e.g.
603
+ * given /a/b/c, creates /a and /a/b.
604
+ */
605
+ static int
606
+ mkpath(char * file_path, mode_t mode)
607
+ {
608
+ /* It would likely be more efficient to count back until we
609
+ * find a component that *does* exist, but this will only run
610
+ * at most 256 times, so it seems not worthwhile to change. */
611
+ char * p;
612
+ for (p = strchr(file_path + 1, '/'); p; p = strchr(p + 1, '/')) {
613
+ *p = '\0';
614
+ #ifdef _WIN32
615
+ if (mkdir(file_path) == -1) {
616
+ #else
617
+ if (mkdir(file_path, mode) == -1) {
618
+ #endif
619
+ if (errno != EEXIST) {
620
+ *p = '/';
621
+ return -1;
622
+ }
623
+ }
624
+ *p = '/';
625
+ }
626
+ return 0;
627
+ }
628
+
629
+ /*
630
+ * Write a cache header/key and a compiled artifact to a given cache path by
631
+ * writing to a tmpfile and then renaming the tmpfile over top of the final
632
+ * path.
633
+ */
634
+ static int
635
+ atomic_write_cache_file(char * path, struct bs_cache_key * key, VALUE data, const char ** errno_provenance)
636
+ {
637
+ char template[MAX_CACHEPATH_SIZE + 20];
638
+ char * tmp_path;
639
+ int fd, ret, attempt;
640
+ ssize_t nwrite;
641
+
642
+ for (attempt = 0; attempt < MAX_CREATE_TEMPFILE_ATTEMPT; ++attempt) {
643
+ tmp_path = strncpy(template, path, MAX_CACHEPATH_SIZE);
644
+ strcat(tmp_path, ".tmp.XXXXXX");
645
+
646
+ // mkstemp modifies the template to be the actual created path
647
+ fd = mkstemp(tmp_path);
648
+ if (fd > 0) break;
649
+
650
+ if (attempt == 0 && mkpath(tmp_path, 0775) < 0) {
651
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:mkpath";
652
+ return -1;
653
+ }
654
+ }
655
+ if (fd < 0) {
656
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:mkstemp";
657
+ return -1;
658
+ }
659
+
660
+ if (chmod(tmp_path, 0644) < 0) {
661
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:chmod";
662
+ return -1;
663
+ }
664
+
665
+ #ifdef _WIN32
666
+ setmode(fd, O_BINARY);
667
+ #endif
668
+
669
+ key->data_size = RSTRING_LEN(data);
670
+ nwrite = write(fd, key, KEY_SIZE);
671
+ if (nwrite < 0) {
672
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:write";
673
+ return -1;
674
+ }
675
+ if (nwrite != KEY_SIZE) {
676
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:keysize";
677
+ errno = EIO; /* Lies but whatever */
678
+ return -1;
679
+ }
680
+
681
+ nwrite = write(fd, RSTRING_PTR(data), RSTRING_LEN(data));
682
+ if (nwrite < 0) return -1;
683
+ if (nwrite != RSTRING_LEN(data)) {
684
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:writelength";
685
+ errno = EIO; /* Lies but whatever */
686
+ return -1;
687
+ }
688
+
689
+ close(fd);
690
+ ret = rename(tmp_path, path);
691
+ if (ret < 0) {
692
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:rename";
693
+ return -1;
694
+ }
695
+ ret = chmod(path, 0664 & ~current_umask);
696
+ if (ret < 0) {
697
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:chmod";
698
+ }
699
+ return ret;
700
+ }
701
+
702
+
703
+ /* Read contents from an fd, whose contents are asserted to be +size+ bytes
704
+ * long, returning a Ruby string on success and Qfalse on failure */
705
+ static VALUE
706
+ bs_read_contents(int fd, size_t size, const char ** errno_provenance)
707
+ {
708
+ VALUE contents;
709
+ ssize_t nread;
710
+ contents = rb_str_buf_new(size);
711
+ nread = read(fd, RSTRING_PTR(contents), size);
712
+
713
+ if (nread < 0) {
714
+ *errno_provenance = "bs_fetch:bs_read_contents:read";
715
+ return Qfalse;
716
+ } else {
717
+ rb_str_set_len(contents, nread);
718
+ return contents;
719
+ }
720
+ }
721
+
722
+ /*
723
+ * This is the meat of the extension. bs_fetch is
724
+ * Bootsnap::CompileCache::Native.fetch.
725
+ *
726
+ * There are three "formats" in use here:
727
+ * 1. "input" format, which is what we load from the source file;
728
+ * 2. "storage" format, which we write to the cache;
729
+ * 3. "output" format, which is what we return.
730
+ *
731
+ * E.g., For ISeq compilation:
732
+ * input: ruby source, as text
733
+ * storage: binary string (RubyVM::InstructionSequence#to_binary)
734
+ * output: Instance of RubyVM::InstructionSequence
735
+ *
736
+ * And for YAML:
737
+ * input: yaml as text
738
+ * storage: MessagePack or Marshal text
739
+ * output: ruby object, loaded from yaml/messagepack/marshal
740
+ *
741
+ * A handler<I,S,O> passed in must support three messages:
742
+ * * storage_to_output(S) -> O
743
+ * * input_to_output(I) -> O
744
+ * * input_to_storage(I) -> S
745
+ * (input_to_storage may raise Bootsnap::CompileCache::Uncompilable, which
746
+ * will prevent caching and cause output to be generated with
747
+ * input_to_output)
748
+ *
749
+ * The semantics of this function are basically:
750
+ *
751
+ * return storage_to_output(cache[path]) if cache[path]
752
+ * storage = input_to_storage(input)
753
+ * cache[path] = storage
754
+ * return storage_to_output(storage)
755
+ *
756
+ * Or expanded a bit:
757
+ *
758
+ * - Check if the cache file exists and is up to date.
759
+ * - If it is, load this data to storage_data.
760
+ * - return storage_to_output(storage_data)
761
+ * - Read the file to input_data
762
+ * - Generate storage_data using input_to_storage(input_data)
763
+ * - Write storage_data data, with a cache key, to the cache file.
764
+ * - Return storage_to_output(storage_data)
765
+ */
766
+ static VALUE
767
+ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args)
768
+ {
769
+ struct bs_cache_key cached_key, current_key;
770
+ int cache_fd = -1, current_fd = -1;
771
+ int res, valid_cache = 0, exception_tag = 0;
772
+ const char * errno_provenance = NULL;
773
+
774
+ VALUE status = Qfalse;
775
+ VALUE input_data = Qfalse; /* data read from source file, e.g. YAML or ruby source */
776
+ VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
777
+ VALUE output_data; /* return data, e.g. ruby hash or loaded iseq */
778
+
779
+ VALUE exception; /* ruby exception object to raise instead of returning */
780
+ VALUE exception_message; /* ruby exception string to use instead of errno_provenance */
781
+
782
+ /* Open the source file and generate a cache key for it */
783
+ current_fd = open_current_file(path, &current_key, &errno_provenance);
784
+ if (current_fd < 0) {
785
+ exception_message = path_v;
786
+ goto fail_errno;
787
+ }
788
+
789
+ /* Open the cache key if it exists, and read its cache key in */
790
+ cache_fd = open_cache_file(cache_path, &cached_key, &errno_provenance);
791
+ if (cache_fd == CACHE_MISS || cache_fd == CACHE_STALE) {
792
+ /* This is ok: valid_cache remains false, we re-populate it. */
793
+ bs_instrumentation(cache_fd == CACHE_MISS ? sym_miss : sym_stale, path_v);
794
+ } else if (cache_fd < 0) {
795
+ exception_message = rb_str_new_cstr(cache_path);
796
+ goto fail_errno;
797
+ } else {
798
+ /* True if the cache existed and no invalidating changes have occurred since
799
+ * it was generated. */
800
+
801
+ switch(cache_key_equal_fast_path(&current_key, &cached_key)) {
802
+ case hit:
803
+ status = sym_hit;
804
+ valid_cache = true;
805
+ break;
806
+ case miss:
807
+ valid_cache = false;
808
+ break;
809
+ case stale:
810
+ valid_cache = false;
811
+ if ((input_data = bs_read_contents(current_fd, current_key.size,
812
+ &errno_provenance)) == Qfalse) {
813
+ exception_message = path_v;
814
+ goto fail_errno;
815
+ }
816
+ valid_cache = cache_key_equal_slow_path(&current_key, &cached_key, input_data);
817
+ if (valid_cache) {
818
+ if (!readonly) {
819
+ if (update_cache_key(&current_key, &cached_key, cache_fd, &errno_provenance)) {
820
+ exception_message = path_v;
821
+ goto fail_errno;
822
+ }
823
+ }
824
+ status = sym_revalidated;
825
+ }
826
+ break;
827
+ };
828
+
829
+ if (!valid_cache) {
830
+ status = sym_stale;
831
+ }
832
+ }
833
+
834
+ if (valid_cache) {
835
+ /* Fetch the cache data and return it if we're able to load it successfully */
836
+ res = fetch_cached_data(
837
+ cache_fd, (ssize_t)cached_key.data_size, handler, args,
838
+ &output_data, &exception_tag, &errno_provenance
839
+ );
840
+ if (exception_tag != 0) goto raise;
841
+ else if (res == CACHE_UNCOMPILABLE) {
842
+ /* If fetch_cached_data returned `Uncompilable` we fallback to `input_to_output`
843
+ This happens if we have say, an unsafe YAML cache, but try to load it in safe mode */
844
+ if (input_data == Qfalse && (input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
845
+ exception_message = path_v;
846
+ goto fail_errno;
847
+ }
848
+ bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
849
+ if (exception_tag != 0) goto raise;
850
+ goto succeed;
851
+ } else if (res == CACHE_MISS || res == CACHE_STALE) valid_cache = 0;
852
+ else if (res == ERROR_WITH_ERRNO){
853
+ exception_message = rb_str_new_cstr(cache_path);
854
+ goto fail_errno;
855
+ }
856
+ else if (!NIL_P(output_data)) goto succeed; /* fast-path, goal */
857
+ }
858
+ close(cache_fd);
859
+ cache_fd = -1;
860
+ /* Cache is stale, invalid, or missing. Regenerate and write it out. */
861
+
862
+ /* Read the contents of the source file into a buffer */
863
+ if (input_data == Qfalse && (input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
864
+ exception_message = path_v;
865
+ goto fail_errno;
866
+ }
867
+
868
+ /* Try to compile the input_data using input_to_storage(input_data) */
869
+ exception_tag = bs_input_to_storage(handler, args, input_data, path_v, &storage_data);
870
+ if (exception_tag != 0) goto raise;
871
+ /* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try
872
+ * to cache anything; just return input_to_output(input_data) */
873
+ if (storage_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
874
+ bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
875
+ if (exception_tag != 0) goto raise;
876
+ goto succeed;
877
+ }
878
+ /* If storage_data isn't a string, we can't cache it */
879
+ if (!RB_TYPE_P(storage_data, T_STRING)) goto invalid_type_storage_data;
880
+
881
+ /* Attempt to write the cache key and storage_data to the cache directory.
882
+ * We do however ignore any failures to persist the cache, as it's better
883
+ * to move along, than to interrupt the process.
884
+ */
885
+ bs_cache_key_digest(&current_key, input_data);
886
+ atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
887
+
888
+ /* Having written the cache, now convert storage_data to output_data */
889
+ exception_tag = bs_storage_to_output(handler, args, storage_data, &output_data);
890
+ if (exception_tag != 0) goto raise;
891
+
892
+ if (output_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
893
+ /* If storage_to_output returned `Uncompilable` we fallback to `input_to_output` */
894
+ bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
895
+ if (exception_tag != 0) goto raise;
896
+ } else if (NIL_P(output_data)) {
897
+ /* If output_data is nil, delete the cache entry and generate the output
898
+ * using input_to_output */
899
+ if (unlink(cache_path) < 0) {
900
+ /* If the cache was already deleted, it might be that another process did it before us.
901
+ * No point raising an error */
902
+ if (errno != ENOENT) {
903
+ errno_provenance = "bs_fetch:unlink";
904
+ exception_message = rb_str_new_cstr(cache_path);
905
+ goto fail_errno;
906
+ }
907
+ }
908
+ bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
909
+ if (exception_tag != 0) goto raise;
910
+ }
911
+
912
+ goto succeed; /* output_data is now the correct return. */
913
+
914
+ #define CLEANUP \
915
+ if (current_fd >= 0) close(current_fd); \
916
+ if (cache_fd >= 0) close(cache_fd); \
917
+ if (status != Qfalse) bs_instrumentation(status, path_v);
918
+
919
+ succeed:
920
+ CLEANUP;
921
+ return output_data;
922
+ fail_errno:
923
+ CLEANUP;
924
+ if (errno_provenance) {
925
+ exception_message = rb_str_concat(
926
+ rb_str_new_cstr(errno_provenance),
927
+ rb_str_concat(rb_str_new_cstr(": "), exception_message)
928
+ );
929
+ }
930
+ exception = rb_syserr_new_str(errno, exception_message);
931
+ rb_exc_raise(exception);
932
+ __builtin_unreachable();
933
+ raise:
934
+ CLEANUP;
935
+ rb_jump_tag(exception_tag);
936
+ __builtin_unreachable();
937
+ invalid_type_storage_data:
938
+ CLEANUP;
939
+ Check_Type(storage_data, T_STRING);
940
+ __builtin_unreachable();
941
+
942
+ #undef CLEANUP
943
+ }
944
+
945
+ static VALUE
946
+ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)
947
+ {
948
+ if (readonly) {
949
+ return Qfalse;
950
+ }
951
+
952
+ struct bs_cache_key cached_key, current_key;
953
+ int cache_fd = -1, current_fd = -1;
954
+ int res, valid_cache = 0, exception_tag = 0;
955
+ const char * errno_provenance = NULL;
956
+
957
+ VALUE input_data = Qfalse; /* data read from source file, e.g. YAML or ruby source */
958
+ VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
959
+
960
+ /* Open the source file and generate a cache key for it */
961
+ current_fd = open_current_file(path, &current_key, &errno_provenance);
962
+ if (current_fd < 0) goto fail;
963
+
964
+ /* Open the cache key if it exists, and read its cache key in */
965
+ cache_fd = open_cache_file(cache_path, &cached_key, &errno_provenance);
966
+ if (cache_fd == CACHE_MISS || cache_fd == CACHE_STALE) {
967
+ /* This is ok: valid_cache remains false, we re-populate it. */
968
+ } else if (cache_fd < 0) {
969
+ goto fail;
970
+ } else {
971
+ /* True if the cache existed and no invalidating changes have occurred since
972
+ * it was generated. */
973
+ switch(cache_key_equal_fast_path(&current_key, &cached_key)) {
974
+ case hit:
975
+ valid_cache = true;
976
+ break;
977
+ case miss:
978
+ valid_cache = false;
979
+ break;
980
+ case stale:
981
+ valid_cache = false;
982
+ if ((input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) {
983
+ goto fail;
984
+ }
985
+ valid_cache = cache_key_equal_slow_path(&current_key, &cached_key, input_data);
986
+ if (valid_cache) {
987
+ if (update_cache_key(&current_key, &cached_key, cache_fd, &errno_provenance)) {
988
+ goto fail;
989
+ }
990
+ }
991
+ break;
992
+ };
993
+ }
994
+
995
+ if (valid_cache) {
996
+ goto succeed;
997
+ }
998
+
999
+ close(cache_fd);
1000
+ cache_fd = -1;
1001
+ /* Cache is stale, invalid, or missing. Regenerate and write it out. */
1002
+
1003
+ /* Read the contents of the source file into a buffer */
1004
+ if ((input_data = bs_read_contents(current_fd, current_key.size, &errno_provenance)) == Qfalse) goto fail;
1005
+
1006
+ /* Try to compile the input_data using input_to_storage(input_data) */
1007
+ exception_tag = bs_input_to_storage(handler, Qnil, input_data, path_v, &storage_data);
1008
+ if (exception_tag != 0) goto fail;
1009
+
1010
+ /* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try
1011
+ * to cache anything; just return false */
1012
+ if (storage_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
1013
+ goto fail;
1014
+ }
1015
+ /* If storage_data isn't a string, we can't cache it */
1016
+ if (!RB_TYPE_P(storage_data, T_STRING)) goto fail;
1017
+
1018
+ /* Write the cache key and storage_data to the cache directory */
1019
+ bs_cache_key_digest(&current_key, input_data);
1020
+ res = atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
1021
+ if (res < 0) goto fail;
1022
+
1023
+ goto succeed;
1024
+
1025
+ #define CLEANUP \
1026
+ if (current_fd >= 0) close(current_fd); \
1027
+ if (cache_fd >= 0) close(cache_fd);
1028
+
1029
+ succeed:
1030
+ CLEANUP;
1031
+ return Qtrue;
1032
+ fail:
1033
+ CLEANUP;
1034
+ return Qfalse;
1035
+ #undef CLEANUP
1036
+ }
1037
+
1038
+
1039
+ /*****************************************************************************/
1040
+ /********************* Handler Wrappers **************************************/
1041
+ /*****************************************************************************
1042
+ * Everything after this point in the file is just wrappers to deal with ruby's
1043
+ * clunky method of handling exceptions from ruby methods invoked from C:
1044
+ *
1045
+ * In order to call a ruby method from C, while protecting against crashing in
1046
+ * the event of an exception, we must call the method with rb_protect().
1047
+ *
1048
+ * rb_protect takes a C function and precisely one argument; however, we want
1049
+ * to pass multiple arguments, so we must create structs to wrap them up.
1050
+ *
1051
+ * These functions return an exception_tag, which, if non-zero, indicates an
1052
+ * exception that should be jumped to with rb_jump_tag after cleaning up
1053
+ * allocated resources.
1054
+ */
1055
+
1056
+ struct s2o_data {
1057
+ VALUE handler;
1058
+ VALUE args;
1059
+ VALUE storage_data;
1060
+ };
1061
+
1062
+ struct i2o_data {
1063
+ VALUE handler;
1064
+ VALUE args;
1065
+ VALUE input_data;
1066
+ };
1067
+
1068
+ struct i2s_data {
1069
+ VALUE handler;
1070
+ VALUE input_data;
1071
+ VALUE pathval;
1072
+ };
1073
+
1074
+ static VALUE
1075
+ try_storage_to_output(VALUE arg)
1076
+ {
1077
+ struct s2o_data * data = (struct s2o_data *)arg;
1078
+ return rb_funcall(data->handler, rb_intern("storage_to_output"), 2, data->storage_data, data->args);
1079
+ }
1080
+
1081
+ static int
1082
+ bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * output_data)
1083
+ {
1084
+ int state;
1085
+ struct s2o_data s2o_data = {
1086
+ .handler = handler,
1087
+ .args = args,
1088
+ .storage_data = storage_data,
1089
+ };
1090
+ *output_data = rb_protect(try_storage_to_output, (VALUE)&s2o_data, &state);
1091
+ return state;
1092
+ }
1093
+
1094
+ static void
1095
+ bs_input_to_output(VALUE handler, VALUE args, VALUE input_data, VALUE * output_data, int * exception_tag)
1096
+ {
1097
+ struct i2o_data i2o_data = {
1098
+ .handler = handler,
1099
+ .args = args,
1100
+ .input_data = input_data,
1101
+ };
1102
+ *output_data = rb_protect(prot_input_to_output, (VALUE)&i2o_data, exception_tag);
1103
+ }
1104
+
1105
+ static VALUE
1106
+ prot_input_to_output(VALUE arg)
1107
+ {
1108
+ struct i2o_data * data = (struct i2o_data *)arg;
1109
+ return rb_funcall(data->handler, rb_intern("input_to_output"), 2, data->input_data, data->args);
1110
+ }
1111
+
1112
+ static VALUE
1113
+ try_input_to_storage(VALUE arg)
1114
+ {
1115
+ struct i2s_data * data = (struct i2s_data *)arg;
1116
+ return rb_funcall(data->handler, rb_intern("input_to_storage"), 2, data->input_data, data->pathval);
1117
+ }
1118
+
1119
+ static int
1120
+ bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval, VALUE * storage_data)
1121
+ {
1122
+ if (readonly) {
1123
+ *storage_data = rb_cBootsnap_CompileCache_UNCOMPILABLE;
1124
+ return 0;
1125
+ } else {
1126
+ int state;
1127
+ struct i2s_data i2s_data = {
1128
+ .handler = handler,
1129
+ .input_data = input_data,
1130
+ .pathval = pathval,
1131
+ };
1132
+ *storage_data = rb_protect(try_input_to_storage, (VALUE)&i2s_data, &state);
1133
+ return state;
1134
+ }
1135
+ }