bootsnap 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,125 +1,121 @@
1
+ /*
2
+ * Suggested reading order:
3
+ * 1. Skim Init_bootsnap
4
+ * 2. Skim bs_fetch
5
+ * 3. The rest of everything
6
+ *
7
+ * Init_bootsnap sets up the ruby objects and binds bs_fetch to
8
+ * Bootsnap::CompileCache::Native.fetch.
9
+ *
10
+ * bs_fetch is the ultimate caller for for just about every other function in
11
+ * here.
12
+ */
13
+
1
14
  #include "bootsnap.h"
15
+ #include "ruby.h"
16
+ #include <stdint.h>
2
17
  #include <sys/types.h>
3
- #include <sys/xattr.h>
4
- #include <sys/stat.h>
5
18
  #include <errno.h>
6
- #include <unistd.h>
7
19
  #include <fcntl.h>
8
- #include <stdbool.h>
9
- #include <utime.h>
10
-
11
- #ifdef __APPLE__
12
- // Used for the OS Directives to define the os_version constant
13
- #include <Availability.h>
14
- #define _ENOATTR ENOATTR
15
- #else
16
- #define _ENOATTR ENODATA
17
- #endif
18
-
19
- /*
20
- * TODO:
21
- * - test on linux or reject on non-darwin
22
- * - source files over 4GB will likely break things (meh)
23
- */
20
+ #include <sys/stat.h>
21
+ #include <sys/utsname.h>
24
22
 
25
- static VALUE rb_mBootsnap;
26
- static VALUE rb_mBootsnap_CompileCache;
27
- static VALUE rb_mBootsnap_CompileCache_Native;
28
- static VALUE rb_eBootsnap_CompileCache_Uncompilable;
29
- static uint32_t current_ruby_revision;
30
- static uint32_t current_compile_option_crc32 = 0;
31
- static ID uncompilable;
23
+ /* 1000 is an arbitrary limit; FNV64 plus some slashes brings the cap down to
24
+ * 981 for the cache dir */
25
+ #define MAX_CACHEPATH_SIZE 1000
26
+ #define MAX_CACHEDIR_SIZE 981
32
27
 
33
- struct stats {
34
- uint64_t hit;
35
- uint64_t unwritable;
36
- uint64_t uncompilable;
37
- uint64_t miss;
38
- uint64_t fail;
39
- uint64_t retry;
40
- };
41
- static struct stats stats = {
42
- .hit = 0,
43
- .unwritable = 0,
44
- .uncompilable = 0,
45
- .miss = 0,
46
- .fail = 0,
47
- .retry = 0,
48
- };
28
+ #define KEY_SIZE 64
49
29
 
50
- struct xattr_key {
51
- uint8_t version;
52
- uint8_t os_version;
30
+ /*
31
+ * An instance of this key is written as the first 64 bytes of each cache file.
32
+ * The mtime and size members track whether the file contents have changed, and
33
+ * the version, os_version, compile_option, and ruby_revision members track
34
+ * changes to the environment that could invalidate compile results without
35
+ * file contents having changed. The data_size member is not truly part of the
36
+ * "key". Really, this could be called a "header" with the first six members
37
+ * being an embedded "key" struct and an additional data_size member.
38
+ *
39
+ * The data_size indicates the remaining number of bytes in the cache file
40
+ * after the header (the size of the cached artifact).
41
+ *
42
+ * After data_size, the struct is padded to 64 bytes.
43
+ */
44
+ struct bs_cache_key {
45
+ uint32_t version;
46
+ uint32_t os_version;
53
47
  uint32_t compile_option;
54
- uint32_t data_size;
55
48
  uint32_t ruby_revision;
49
+ uint64_t size;
56
50
  uint64_t mtime;
51
+ uint64_t data_size; /* not used for equality */
52
+ uint8_t pad[24];
57
53
  } __attribute__((packed));
58
54
 
59
- struct i2o_data {
60
- VALUE handler;
61
- VALUE input_data;
62
- };
63
-
64
- struct i2s_data {
65
- VALUE handler;
66
- VALUE input_data;
67
- VALUE pathval;
68
- };
55
+ /*
56
+ * If the struct padding isn't correct to pad the key to 64 bytes, refuse to
57
+ * compile.
58
+ */
59
+ #define STATIC_ASSERT(X) STATIC_ASSERT2(X,__LINE__)
60
+ #define STATIC_ASSERT2(X,L) STATIC_ASSERT3(X,L)
61
+ #define STATIC_ASSERT3(X,L) STATIC_ASSERT_MSG(X,at_line_##L)
62
+ #define STATIC_ASSERT_MSG(COND,MSG) typedef char static_assertion_##MSG[(!!(COND))*2-1]
63
+ STATIC_ASSERT(sizeof(struct bs_cache_key) == KEY_SIZE);
64
+
65
+ /* Effectively a schema version. Bumping invalidates all previous caches */
66
+ static const uint32_t current_version = 2;
67
+
68
+ /* Derived from kernel or libc version; intended to roughly correspond to when
69
+ * ABIs have changed, requiring recompilation of native gems. */
70
+ static uint32_t current_os_version;
71
+ /* Invalidates cache when switching ruby versions */
72
+ static uint32_t current_ruby_revision;
73
+ /* Invalidates cache when RubyVM::InstructionSequence.compile_option changes */
74
+ static uint32_t current_compile_option_crc32 = 0;
69
75
 
70
- struct s2o_data {
71
- VALUE handler;
72
- VALUE storage_data;
73
- };
76
+ /* Bootsnap::CompileCache::{Native, Uncompilable} */
77
+ static VALUE rb_mBootsnap;
78
+ static VALUE rb_mBootsnap_CompileCache;
79
+ static VALUE rb_mBootsnap_CompileCache_Native;
80
+ static VALUE rb_eBootsnap_CompileCache_Uncompilable;
81
+ static ID uncompilable;
74
82
 
75
- static const uint8_t current_version = 11;
76
- static const char * xattr_key_name = "user.aotcc.key";
77
- static const char * xattr_data_name = "user.aotcc.value";
78
- static const size_t xattr_key_size = sizeof (struct xattr_key);
79
-
80
- #ifdef __MAC_10_15 // Mac OS 10.15 (future)
81
- static const int os_version = 15;
82
- #elif __MAC_10_14 // Mac OS 10.14 (future)
83
- static const int os_version = 14;
84
- #elif __MAC_10_13 // Mac OS 10.13 (future)
85
- static const int os_version = 13;
86
- #elif __MAC_10_12 // Mac OS X Sierra
87
- static const int os_version = 12;
88
- #elif __MAC_10_11 // Mac OS X El Capitan
89
- static const int os_version = 11;
90
- # else
91
- static const int os_version = 0;
92
- #endif
93
-
94
- #ifdef __APPLE__
95
- #define GETXATTR_TRAILER ,0,0
96
- #define SETXATTR_TRAILER ,0
97
- #define REMOVEXATTR_TRAILER ,0
98
- #else
99
- #define GETXATTR_TRAILER
100
- #define SETXATTR_TRAILER
101
- #define REMOVEXATTR_TRAILER
102
- #endif
103
-
104
- /* forward declarations */
105
- static int bs_fetch_data(int fd, size_t size, VALUE handler, VALUE * storage_data, int * exception_tag);
106
- static int bs_update_key(int fd, uint32_t data_size, uint64_t current_mtime);
107
- static int bs_open(const char * path, bool * writable);
108
- static int bs_get_cache(int fd, struct xattr_key * key);
109
- static size_t bs_read_contents(int fd, size_t size, char ** contents);
110
- static int bs_close_and_unclobber_times(int * fd, const char * path, time_t atime, time_t mtime);
111
- static VALUE bs_fetch(VALUE self, VALUE pathval, VALUE handler);
112
- static VALUE bs_compile_option_crc32_set(VALUE self, VALUE crc32val);
83
+ /* Functions exposed as module functions on Bootsnap::CompileCache::Native */
84
+ static VALUE bs_compile_option_crc32_set(VALUE self, VALUE crc32_v);
85
+ static VALUE bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler);
86
+
87
+ /* Helpers */
88
+ static uint64_t fnv1a_64(const char *str);
89
+ static void bs_cache_path(const char * cachedir, const char * path, char ** cache_path);
90
+ static int bs_read_key(int fd, struct bs_cache_key * key);
91
+ static int cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2);
92
+ static VALUE bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler);
93
+ static int open_current_file(char * path, struct bs_cache_key * key);
94
+ static int fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE * output_data, int * exception_tag);
113
95
  static VALUE prot_exception_for_errno(VALUE err);
96
+ static uint32_t get_os_version(void);
97
+
98
+ /*
99
+ * Helper functions to call ruby methods on handler object without crashing on
100
+ * exception.
101
+ */
102
+ static int bs_storage_to_output(VALUE handler, VALUE storage_data, VALUE * output_data);
103
+ static VALUE prot_storage_to_output(VALUE arg);
114
104
  static VALUE prot_input_to_output(VALUE arg);
115
105
  static void bs_input_to_output(VALUE handler, VALUE input_data, VALUE * output_data, int * exception_tag);
116
106
  static VALUE prot_input_to_storage(VALUE arg);
117
107
  static int bs_input_to_storage(VALUE handler, VALUE input_data, VALUE pathval, VALUE * storage_data);
118
- static VALUE prot_storage_to_output(VALUE arg);
119
- static int bs_storage_to_output(VALUE handler, VALUE storage_data, VALUE * output_data);
120
- static int logging_enabled();
121
- static VALUE bs_stats(VALUE self);
108
+ struct s2o_data;
109
+ struct i2o_data;
110
+ struct i2s_data;
122
111
 
112
+ /*
113
+ * Ruby C extensions are initialized by calling Init_<extname>.
114
+ *
115
+ * This sets up the module hierarchy and attaches functions as methods.
116
+ *
117
+ * We also populate some semi-static information about the current OS and so on.
118
+ */
123
119
  void
124
120
  Init_bootsnap(void)
125
121
  {
@@ -127,358 +123,540 @@ Init_bootsnap(void)
127
123
  rb_mBootsnap_CompileCache = rb_define_module_under(rb_mBootsnap, "CompileCache");
128
124
  rb_mBootsnap_CompileCache_Native = rb_define_module_under(rb_mBootsnap_CompileCache, "Native");
129
125
  rb_eBootsnap_CompileCache_Uncompilable = rb_define_class_under(rb_mBootsnap_CompileCache, "Uncompilable", rb_eStandardError);
126
+
130
127
  current_ruby_revision = FIX2INT(rb_const_get(rb_cObject, rb_intern("RUBY_REVISION")));
128
+ current_os_version = get_os_version();
131
129
 
132
130
  uncompilable = rb_intern("__bootsnap_uncompilable__");
133
131
 
134
- rb_define_module_function(rb_mBootsnap_CompileCache_Native, "fetch", bs_fetch, 2);
135
- rb_define_module_function(rb_mBootsnap_CompileCache_Native, "stats", bs_stats, 0);
132
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "fetch", bs_rb_fetch, 3);
136
133
  rb_define_module_function(rb_mBootsnap_CompileCache_Native, "compile_option_crc32=", bs_compile_option_crc32_set, 1);
137
134
  }
138
135
 
136
+ /*
137
+ * Bootsnap's ruby code registers a hook that notifies us via this function
138
+ * when compile_option changes. These changes invalidate all existing caches.
139
+ */
139
140
  static VALUE
140
- bs_stats(VALUE self)
141
+ bs_compile_option_crc32_set(VALUE self, VALUE crc32_v)
141
142
  {
142
- VALUE ret = rb_hash_new();
143
- rb_hash_aset(ret, ID2SYM(rb_intern("hit")), INT2NUM(stats.hit));
144
- rb_hash_aset(ret, ID2SYM(rb_intern("miss")), INT2NUM(stats.miss));
145
- rb_hash_aset(ret, ID2SYM(rb_intern("unwritable")), INT2NUM(stats.unwritable));
146
- rb_hash_aset(ret, ID2SYM(rb_intern("uncompilable")), INT2NUM(stats.uncompilable));
147
- rb_hash_aset(ret, ID2SYM(rb_intern("fail")), INT2NUM(stats.fail));
148
- rb_hash_aset(ret, ID2SYM(rb_intern("retry")), INT2NUM(stats.retry));
149
- return ret;
143
+ Check_Type(crc32_v, T_FIXNUM);
144
+ current_compile_option_crc32 = FIX2UINT(crc32_v);
145
+ return Qnil;
150
146
  }
151
147
 
152
- static VALUE
153
- bs_compile_option_crc32_set(VALUE self, VALUE crc32val)
148
+ /*
149
+ * We use FNV1a-64 to derive cache paths. The choice is somewhat arbitrary but
150
+ * it has several nice properties:
151
+ *
152
+ * - Tiny implementation
153
+ * - No external dependency
154
+ * - Solid performance
155
+ * - Solid randomness
156
+ * - 32 bits doesn't feel collision-resistant enough; 64 is nice.
157
+ */
158
+ static uint64_t
159
+ fnv1a_64(const char *str)
154
160
  {
155
- Check_Type(crc32val, T_FIXNUM);
156
- current_compile_option_crc32 = FIX2UINT(crc32val);
157
- return Qnil;
158
- }
161
+ unsigned char *s = (unsigned char *)str;
162
+ uint64_t h = (uint64_t)0xcbf29ce484222325ULL;
159
163
 
160
- #define CHECK_C(ret, func) \
161
- do { if ((int)(ret) == -1) FAIL((func), errno); } while(0);
164
+ while (*s) {
165
+ h ^= (uint64_t)*s++;
166
+ h += (h << 1) + (h << 4) + (h << 5) + (h << 7) + (h << 8) + (h << 40);
167
+ }
162
168
 
163
- #define FAIL(func, err) \
164
- do { \
165
- int state; \
166
- exception = rb_protect(prot_exception_for_errno, INT2FIX(err), &state); \
167
- if (state) exception = rb_eStandardError; \
168
- goto fail; \
169
- } while(0);
169
+ return h;
170
+ }
170
171
 
171
- #define CHECK_RB0() \
172
- do { if (exception_tag != 0) goto raise; } while (0);
172
+ /*
173
+ * The idea here is that we want a cache key member that changes when the OS
174
+ * changes in such a way as to make existing compiled ISeqs unloadable.
175
+ */
176
+ static uint32_t
177
+ get_os_version(void)
178
+ {
179
+ uint64_t hash;
180
+ struct utsname utsname;
173
181
 
174
- #define CHECK_RB(body) \
175
- do { (body); CHECK_RB0(); } while (0);
182
+ /* Not worth crashing if this fails; lose cache invalidation potential */
183
+ if (uname(&utsname) < 0) return 0;
176
184
 
177
- #define SUCCEED(final) \
178
- do { \
179
- output_data = final; \
180
- goto cleanup; \
181
- } while(0);
185
+ hash = fnv1a_64(utsname.version);
182
186
 
183
- static VALUE
184
- bs_fetch(VALUE self, VALUE pathval, VALUE handler)
187
+ return (uint32_t)(hash >> 32);
188
+ }
189
+
190
+ /*
191
+ * Given a cache root directory and the full path to a file being cached,
192
+ * generate a path under the cache directory at which the cached artifact will
193
+ * be stored.
194
+ *
195
+ * The path will look something like: <cachedir>/12/34567890abcdef
196
+ */
197
+ static void
198
+ bs_cache_path(const char * cachedir, const char * path, char ** cache_path)
185
199
  {
186
- const char * path;
200
+ uint64_t hash = fnv1a_64(path);
187
201
 
188
- VALUE exception;
189
- int exception_tag;
202
+ uint8_t first_byte = (hash >> (64 - 8));
203
+ uint64_t remainder = hash & 0x00ffffffffffffff;
190
204
 
191
- int fd, ret, retry;
192
- bool valid_cache;
193
- bool writable;
194
- uint32_t data_size;
195
- struct xattr_key cache_key;
196
- struct stat statbuf;
197
- char * contents;
205
+ sprintf(*cache_path, "%s/%02x/%014llx", cachedir, first_byte, remainder);
206
+ }
198
207
 
199
- VALUE input_data; /* data read from source file, e.g. YAML or ruby source */
200
- VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
201
- VALUE output_data; /* return data, e.g. ruby hash or loaded iseq */
208
+ /*
209
+ * Test whether a newly-generated cache key based on the file as it exists on
210
+ * disk matches the one that was generated when the file was cached (or really
211
+ * compare any two keys).
212
+ *
213
+ * The data_size member is not compared, as it serves more of a "header"
214
+ * function.
215
+ */
216
+ static int
217
+ cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2)
218
+ {
219
+ return (
220
+ k1->version == k2->version &&
221
+ k1->os_version == k2->os_version &&
222
+ k1->compile_option == k2->compile_option &&
223
+ k1->ruby_revision == k2->ruby_revision &&
224
+ k1->size == k2->size &&
225
+ k1->mtime == k2->mtime
226
+ );
227
+ }
202
228
 
203
- /* don't leak memory */
204
- #define return error!
205
- #define rb_raise error!
206
-
207
- retry = 0;
208
- begin:
209
- output_data = Qnil;
210
- contents = 0;
211
-
212
- /* Blow up if we can't turn our argument into a char* */
213
- Check_Type(pathval, T_STRING);
214
- path = RSTRING_PTR(pathval);
215
-
216
- /* open the file, get its mtime and read the cache key xattr */
217
- CHECK_C(fd = bs_open(path, &writable), "open");
218
- CHECK_C( fstat(fd, &statbuf), "fstat");
219
- CHECK_C(valid_cache = bs_get_cache(fd, &cache_key), "fgetxattr");
220
-
221
- /* `valid_cache` is true if the cache key isn't trivially invalid, e.g. built
222
- * with a different RUBY_REVISION */
223
- if (valid_cache && cache_key.mtime == (uint64_t)statbuf.st_mtime) {
224
- /* if the mtimes match, assume the cache is valid. fetch the cached data. */
225
- ret = bs_fetch_data(fd, (size_t)cache_key.data_size, handler, &output_data, &exception_tag);
226
- if (ret == -1 && errno == _ENOATTR) {
227
- /* the key was present, but the data was missing. remove the key, and
228
- * start over */
229
- CHECK_C(fremovexattr(fd, xattr_key_name REMOVEXATTR_TRAILER), "fremovexattr");
230
- goto retry;
231
- }
232
- CHECK_RB0();
233
- CHECK_C(ret, "fgetxattr/fetch-data");
234
- if (!NIL_P(output_data)) {
235
- stats.hit++;
236
- SUCCEED(output_data); /* this is the fast-path to shoot for */
237
- }
238
- valid_cache = false; /* invalid cache; we'll want to regenerate it */
239
- }
229
+ /*
230
+ * Entrypoint for Bootsnap::CompileCache::Native.fetch. The real work is done
231
+ * in bs_fetch; this function just performs some basic typechecks and
232
+ * conversions on the ruby VALUE arguments before passing them along.
233
+ */
234
+ static VALUE
235
+ bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler)
236
+ {
237
+ Check_Type(cachedir_v, T_STRING);
238
+ Check_Type(path_v, T_STRING);
240
239
 
241
- /* read the contents of the file and crc32 it to compare with the cache key */
242
- CHECK_C(bs_read_contents(fd, statbuf.st_size, &contents), "read") /* contents must be xfree'd */
240
+ if (RSTRING_LEN(cachedir_v) > MAX_CACHEDIR_SIZE) {
241
+ rb_raise(rb_eArgError, "cachedir too long");
242
+ }
243
243
 
244
- /* we need to pass this char* to ruby-land */
245
- input_data = rb_str_new_static(contents, statbuf.st_size);
244
+ char * cachedir = RSTRING_PTR(cachedir_v);
245
+ char * path = RSTRING_PTR(path_v);
246
+ char cache_path[MAX_CACHEPATH_SIZE];
246
247
 
247
- /* if we didn't have write permission to the file, bail now -- everything
248
- * that follows is about generating and writing the cache. Let's just convert
249
- * the input format to the output format and return */
250
- if (!writable) {
251
- stats.unwritable++;
252
- CHECK_RB(bs_input_to_output(handler, input_data, &output_data, &exception_tag));
253
- SUCCEED(output_data);
248
+ { /* generate cache path to cache_path */
249
+ char * tmp = (char *)&cache_path;
250
+ bs_cache_path(cachedir, path, &tmp);
254
251
  }
255
252
 
256
- /* Now, we know we have write permission, and can update the xattrs.
257
- * Additionally, we know the cache is currently missing or absent, and needs
258
- * to be updated. */
259
- stats.miss++;
253
+ return bs_fetch(path, path_v, cache_path, handler);
254
+ }
260
255
 
261
- /* First, convert the input format to the storage format by calling into the
262
- * handler. */
263
- CHECK_RB(exception_tag = bs_input_to_storage(handler, input_data, pathval, &storage_data));
264
- if (storage_data == uncompilable) {
265
- /* The handler can raise Bootsnap::CompileCache::Uncompilable. When it does this,
266
- * we just call the input_to_output handler method, bypassing the storage format. */
267
- CHECK_RB(bs_input_to_output(handler, input_data, &output_data, &exception_tag));
268
- stats.uncompilable++;
269
- SUCCEED(output_data);
270
- }
256
+ /*
257
+ * Open the file we want to load/cache and generate a cache key for it if it
258
+ * was loaded.
259
+ */
260
+ static int
261
+ open_current_file(char * path, struct bs_cache_key * key)
262
+ {
263
+ struct stat statbuf;
264
+ int fd;
271
265
 
272
- /* we can only really write strings to xattrs */
273
- if (!RB_TYPE_P(storage_data, T_STRING)) {
274
- goto invalid_type_storage_data;
275
- }
266
+ fd = open(path, O_RDONLY);
267
+ if (fd < 0) return fd;
276
268
 
277
- /* xattrs can't exceed 64MB */
278
- if (RB_TYPE_P(storage_data, T_STRING) && RSTRING_LEN(storage_data) > 64 * 1024 * 1024) {
279
- if (logging_enabled()) {
280
- fprintf(stderr, "[OPT_AOT_LOG] warning: compiled artifact is over 64MB, which is too large to store in an xattr.%s\n", path);
281
- }
282
- CHECK_RB(bs_input_to_output(handler, input_data, &output_data, &exception_tag));
283
- SUCCEED(output_data);
269
+ if (fstat(fd, &statbuf) < 0) {
270
+ close(fd);
271
+ return -1;
284
272
  }
285
273
 
286
- data_size = (uint32_t)RSTRING_LEN(storage_data);
287
-
288
- /* update the cache, but don't leave it in an invalid state even briefly: remove the key first. */
289
- fremovexattr(fd, xattr_key_name REMOVEXATTR_TRAILER);
290
- CHECK_C(fsetxattr(fd, xattr_data_name, RSTRING_PTR(storage_data), (size_t)data_size, 0 SETXATTR_TRAILER), "fsetxattr");
291
- CHECK_C(bs_update_key(fd, data_size, statbuf.st_mtime), "fsetxattr");
274
+ key->version = current_version;
275
+ key->os_version = current_os_version;
276
+ key->compile_option = current_compile_option_crc32;
277
+ key->ruby_revision = current_ruby_revision;
278
+ key->size = (uint64_t)statbuf.st_size;
279
+ key->mtime = (uint64_t)statbuf.st_mtime;
292
280
 
293
- /* updating xattrs bumps mtime, so we set them back after */
294
- CHECK_C(bs_close_and_unclobber_times(&fd, path, statbuf.st_atime, statbuf.st_mtime), "close/utime");
281
+ return fd;
282
+ }
295
283
 
296
- /* convert the data we just stored into the output format */
297
- CHECK_RB(exception_tag = bs_storage_to_output(handler, storage_data, &output_data));
284
+ #define ERROR_WITH_ERRNO -1
285
+ #define CACHE_MISSING_OR_INVALID -2
298
286
 
299
- /* if the storage data was broken, remove the cache and run input_to_output */
300
- if (output_data == Qnil) {
301
- /* deletion here is best effort; no need to fail if it does */
302
- fremovexattr(fd, xattr_key_name REMOVEXATTR_TRAILER);
303
- fremovexattr(fd, xattr_data_name REMOVEXATTR_TRAILER);
304
- CHECK_RB(bs_input_to_output(handler, input_data, &output_data, &exception_tag));
305
- }
287
+ /*
288
+ * Read the cache key from the given fd, which must have position 0 (e.g.
289
+ * freshly opened file).
290
+ *
291
+ * Possible return values:
292
+ * - 0 (OK, key was loaded)
293
+ * - CACHE_MISSING_OR_INVALID (-2)
294
+ * - ERROR_WITH_ERRNO (-1, errno is set)
295
+ */
296
+ static int
297
+ bs_read_key(int fd, struct bs_cache_key * key)
298
+ {
299
+ ssize_t nread = read(fd, key, KEY_SIZE);
300
+ if (nread < 0) return ERROR_WITH_ERRNO;
301
+ if (nread < KEY_SIZE) return CACHE_MISSING_OR_INVALID;
302
+ return 0;
303
+ }
306
304
 
307
- SUCCEED(output_data);
305
+ /*
306
+ * Open the cache file at a given path, if it exists, and read its key into the
307
+ * struct.
308
+ *
309
+ * Possible return values:
310
+ * - 0 (OK, key was loaded)
311
+ * - CACHE_MISSING_OR_INVALID (-2)
312
+ * - ERROR_WITH_ERRNO (-1, errno is set)
313
+ */
314
+ static int
315
+ open_cache_file(const char * path, struct bs_cache_key * key)
316
+ {
317
+ int fd, res;
308
318
 
309
- #undef return
310
- #undef rb_raise
311
- #define CLEANUP \
312
- if (contents != 0) xfree(contents); \
313
- if (fd > 0) close(fd);
319
+ fd = open(path, O_RDWR, 0644);
320
+ if (fd < 0) {
321
+ if (errno == ENOENT) return CACHE_MISSING_OR_INVALID;
322
+ return ERROR_WITH_ERRNO;
323
+ }
314
324
 
315
- __builtin_unreachable();
316
- cleanup:
317
- CLEANUP;
318
- return output_data;
319
- fail:
320
- CLEANUP;
321
- stats.fail++;
322
- rb_exc_raise(exception);
323
- __builtin_unreachable();
324
- invalid_type_storage_data:
325
- CLEANUP;
326
- stats.fail++;
327
- Check_Type(storage_data, T_STRING);
328
- __builtin_unreachable();
329
- retry:
330
- CLEANUP;
331
- stats.retry++;
332
- if (retry == 1) {
333
- rb_raise(rb_eRuntimeError, "internal error in bootsnap");
334
- __builtin_unreachable();
325
+ res = bs_read_key(fd, key);
326
+ if (res < 0) {
327
+ close(fd);
328
+ return res;
335
329
  }
336
- retry = 1;
337
- goto begin;
338
- raise:
339
- CLEANUP;
340
- stats.fail++;
341
- rb_jump_tag(exception_tag);
342
- __builtin_unreachable();
330
+
331
+ return fd;
343
332
  }
344
333
 
334
+ /*
335
+ * The cache file is laid out like:
336
+ * 0...64 : bs_cache_key
337
+ * 64..-1 : cached artifact
338
+ *
339
+ * This function takes a file descriptor whose position is pre-set to 64, and
340
+ * the data_size (corresponding to the remaining number of bytes) listed in the
341
+ * cache header.
342
+ *
343
+ * We load the text from this file into a buffer, and pass it to the ruby-land
344
+ * handler with exception handling via the exception_tag param.
345
+ *
346
+ * Data is returned via the output_data parameter, which, if there's no error
347
+ * or exception, will be the final data returnable to the user.
348
+ */
345
349
  static int
346
- bs_fetch_data(int fd, size_t size, VALUE handler, VALUE * output_data, int * exception_tag)
350
+ fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE * output_data, int * exception_tag)
347
351
  {
352
+ char * data = NULL;
353
+ ssize_t nread;
348
354
  int ret;
349
- ssize_t nbytes;
350
- void * xattr_data;
351
- VALUE storage_data;
352
355
 
353
- *output_data = Qnil;
354
- *exception_tag = 0;
356
+ VALUE storage_data;
355
357
 
356
- xattr_data = ALLOC_N(uint8_t, size);
357
- nbytes = fgetxattr(fd, xattr_data_name, xattr_data, size GETXATTR_TRAILER);
358
- if (nbytes == -1) {
358
+ if (data_size > 100000000000) {
359
+ errno = EINVAL; /* because wtf? */
359
360
  ret = -1;
360
361
  goto done;
361
362
  }
362
- if (nbytes != (ssize_t)size) {
363
- errno = EIO; /* lies but whatever */
363
+ data = ALLOC_N(char, data_size);
364
+ nread = read(fd, data, data_size);
365
+ if (nread < 0) {
364
366
  ret = -1;
365
367
  goto done;
366
368
  }
367
- storage_data = rb_str_new_static(xattr_data, nbytes);
368
- ret = bs_storage_to_output(handler, storage_data, output_data);
369
- if (ret != 0) {
370
- *exception_tag = ret;
371
- errno = 0;
369
+ if (nread != data_size) {
370
+ ret = CACHE_MISSING_OR_INVALID;
371
+ goto done;
372
372
  }
373
- done:
374
- xfree(xattr_data);
375
- return ret;
376
- }
377
373
 
378
- static int
379
- bs_update_key(int fd, uint32_t data_size, uint64_t current_mtime)
380
- {
381
- struct xattr_key xattr_key;
382
-
383
- xattr_key = (struct xattr_key){
384
- .version = current_version,
385
- .os_version = os_version,
386
- .data_size = data_size,
387
- .compile_option = current_compile_option_crc32,
388
- .ruby_revision = current_ruby_revision,
389
- .mtime = current_mtime,
390
- };
374
+ storage_data = rb_str_new_static(data, data_size);
391
375
 
392
- return fsetxattr(fd, xattr_key_name, &xattr_key, (size_t)xattr_key_size, 0 SETXATTR_TRAILER);
376
+ *exception_tag = bs_storage_to_output(handler, storage_data, output_data);
377
+ ret = 0;
378
+ done:
379
+ if (data != NULL) xfree(data);
380
+ return ret;
393
381
  }
394
382
 
395
383
  /*
396
- * Open the file O_RDWR if possible, or O_RDONLY if that throws EACCES.
397
- * Set +writable+ to indicate which mode was used.
384
+ * Like mkdir -p, this recursively creates directory parents of a file. e.g.
385
+ * given /a/b/c, creates /a and /a/b.
398
386
  */
399
387
  static int
400
- bs_open(const char * path, bool * writable)
388
+ mkpath(char * file_path, mode_t mode)
401
389
  {
402
- int fd;
403
-
404
- *writable = true;
405
- fd = open(path, O_RDWR);
406
- if (fd == -1 && errno == EACCES) {
407
- *writable = false;
408
- if (logging_enabled()) {
409
- fprintf(stderr, "[OPT_AOT_LOG] warning: unable to cache because no write permission to %s\n", path);
390
+ /* It would likely be more efficient to count back until we
391
+ * find a component that *does* exist, but this will only run
392
+ * at most 256 times, so it seems not worthwhile to change. */
393
+ char * p;
394
+ for (p = strchr(file_path + 1, '/'); p; p = strchr(p + 1, '/')) {
395
+ *p = '\0';
396
+ if (mkdir(file_path, mode) == -1) {
397
+ if (errno != EEXIST) {
398
+ *p = '/';
399
+ return -1;
400
+ }
410
401
  }
411
- fd = open(path, O_RDONLY);
402
+ *p = '/';
412
403
  }
413
- return fd;
404
+ return 0;
414
405
  }
415
406
 
416
407
  /*
417
- * Fetch the cache key from the relevant xattr into +key+.
418
- * Returns:
419
- * 0: invalid/no cache
420
- * 1: valid cache
421
- * -1: fgetxattr failed, errno is set
408
+ * Write a cache header/key and a compiled artifact to a given cache path by
409
+ * writing to a tmpfile and then renaming the tmpfile over top of the final
410
+ * path.
422
411
  */
423
412
  static int
424
- bs_get_cache(int fd, struct xattr_key * key)
413
+ atomic_write_cache_file(char * path, struct bs_cache_key * key, VALUE data)
425
414
  {
426
- ssize_t nbytes;
415
+ char template[MAX_CACHEPATH_SIZE + 20];
416
+ char * dest;
417
+ char * tmp_path;
418
+ int fd;
419
+ ssize_t nwrite;
420
+
421
+ dest = strncpy(template, path, MAX_CACHEPATH_SIZE);
422
+ strcat(dest, ".tmp.XXXXXX");
427
423
 
428
- nbytes = fgetxattr(fd, xattr_key_name, (void *)key, xattr_key_size GETXATTR_TRAILER);
429
- if (nbytes == -1 && errno != _ENOATTR) {
424
+ tmp_path = mktemp(template);
425
+ fd = open(tmp_path, O_WRONLY | O_CREAT, 0644);
426
+ if (fd < 0) {
427
+ if (mkpath(path, 0755) < 0) return -1;
428
+ fd = open(tmp_path, O_WRONLY | O_CREAT, 0644);
429
+ if (fd < 0) return -1;
430
+ }
431
+
432
+ key->data_size = RSTRING_LEN(data);
433
+ nwrite = write(fd, key, KEY_SIZE);
434
+ if (nwrite < 0) return -1;
435
+ if (nwrite != KEY_SIZE) {
436
+ errno = EIO; /* Lies but whatever */
430
437
  return -1;
431
438
  }
432
439
 
433
- return (nbytes == (ssize_t)xattr_key_size && \
434
- key->version == current_version && \
435
- key->os_version == os_version && \
436
- key->compile_option == current_compile_option_crc32 && \
437
- key->ruby_revision == current_ruby_revision);
440
+ nwrite = write(fd, RSTRING_PTR(data), RSTRING_LEN(data));
441
+ if (nwrite < 0) return -1;
442
+ if (nwrite != RSTRING_LEN(data)) {
443
+ errno = EIO; /* Lies but whatever */
444
+ return -1;
445
+ }
446
+
447
+ close(fd);
448
+ return rename(tmp_path, path);
438
449
  }
439
450
 
440
451
  /*
441
- * Read an entire file into a char*
442
- * contents must be freed with xfree() when done.
452
+ * Given an errno value (converted to a ruby Fixnum), return the corresponding
453
+ * Errno::* constant. If none is found, return StandardError instead.
443
454
  */
444
- static size_t
455
+ static VALUE
456
+ prot_exception_for_errno(VALUE err)
457
+ {
458
+ if (err != INT2FIX(0)) {
459
+ VALUE mErrno = rb_const_get(rb_cObject, rb_intern("Errno"));
460
+ VALUE constants = rb_funcall(mErrno, rb_intern("constants"), 0);
461
+ VALUE which = rb_funcall(constants, rb_intern("[]"), 1, err);
462
+ return rb_funcall(mErrno, rb_intern("const_get"), 1, which);
463
+ }
464
+ return rb_eStandardError;
465
+ }
466
+
467
+
468
+ /* Read contents from an fd, whose contents are asserted to be +size+ bytes
469
+ * long, into a buffer */
470
+ static ssize_t
445
471
  bs_read_contents(int fd, size_t size, char ** contents)
446
472
  {
447
473
  *contents = ALLOC_N(char, size);
448
474
  return read(fd, *contents, size);
449
475
  }
450
476
 
451
- static int
452
- bs_close_and_unclobber_times(int * fd, const char * path, time_t atime, time_t mtime)
477
+ /*
478
+ * This is the meat of the extension. bs_fetch is
479
+ * Bootsnap::CompileCache::Native.fetch.
480
+ *
481
+ * There are three "formats" in use here:
482
+ * 1. "input" fomat, which is what we load from the source file;
483
+ * 2. "storage" format, which we write to the cache;
484
+ * 3. "output" format, which is what we return.
485
+ *
486
+ * E.g., For ISeq compilation:
487
+ * input: ruby source, as text
488
+ * storage: binary string (RubyVM::InstructionSequence#to_binary)
489
+ * output: Instance of RubyVM::InstructionSequence
490
+ *
491
+ * And for YAML:
492
+ * input: yaml as text
493
+ * storage: MessagePack or Marshal text
494
+ * output: ruby object, loaded from yaml/messagepack/marshal
495
+ *
496
+ * The handler passed in must support three messages:
497
+ * * storage_to_output(s) -> o
498
+ * * input_to_output(i) -> o
499
+ * * input_to_storage(i) -> s
500
+ * (input_to_storage may raise Bootsnap::CompileCache::Uncompilable, which
501
+ * will prevent caching and cause output to be generated with
502
+ * input_to_output)
503
+ *
504
+ * The semantics of this function are basically:
505
+ *
506
+ * return storage_to_output(cache[path]) if cache[path]
507
+ * storage = input_to_storage(input)
508
+ * cache[path] = storage
509
+ * return storage_to_output(storage)
510
+ *
511
+ * Or expanded a bit:
512
+ *
513
+ * - Check if the cache file exists and is up to date.
514
+ * - If it is, load this data to storage_data.
515
+ * - return storage_to_output(storage_data)
516
+ * - Read the file to input_data
517
+ * - Generate storage_data using input_to_storage(input_data)
518
+ * - Write storage_data data, with a cache key, to the cache file.
519
+ * - Return storage_to_output(storage_data)
520
+ */
521
+ static VALUE
522
+ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler)
453
523
  {
454
- struct utimbuf times = {
455
- .actime = atime,
456
- .modtime = mtime,
457
- };
458
- if (close(*fd) == -1) {
459
- return -1;
524
+ struct bs_cache_key cached_key, current_key;
525
+ char * contents = NULL;
526
+ int cache_fd = -1, current_fd = -1;
527
+ int res, valid_cache, exception_tag = 0;
528
+
529
+ VALUE input_data; /* data read from source file, e.g. YAML or ruby source */
530
+ VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
531
+ VALUE output_data; /* return data, e.g. ruby hash or loaded iseq */
532
+
533
+ VALUE exception; /* ruby exception object to raise instead of returning */
534
+
535
+ /* Open the source file and generate a cache key for it */
536
+ current_fd = open_current_file(path, &current_key);
537
+ if (current_fd < 0) goto fail_errno;
538
+
539
+ /* Open the cache key if it exists, and read its cache key in */
540
+ cache_fd = open_cache_file(cache_path, &cached_key);
541
+ if (cache_fd < 0 && cache_fd != CACHE_MISSING_OR_INVALID) goto fail_errno;
542
+
543
+ /* True if the cache existed and no invalidating changes have occurred since
544
+ * it was generated. */
545
+ valid_cache = cache_key_equal(&current_key, &cached_key);
546
+
547
+ if (valid_cache) {
548
+ /* Fetch the cache data and return it if we're able to load it successfully */
549
+ res = fetch_cached_data(cache_fd, (ssize_t)cached_key.data_size, handler, &output_data, &exception_tag);
550
+ if (exception_tag != 0) goto raise;
551
+ else if (res == CACHE_MISSING_OR_INVALID) valid_cache = 0;
552
+ else if (res == ERROR_WITH_ERRNO) goto fail_errno;
553
+ else if (!NIL_P(output_data)) goto succeed; /* fast-path, goal */
460
554
  }
461
- *fd = 0;
462
- return utime(path, &times);
555
+ close(cache_fd);
556
+ cache_fd = -1;
557
+ /* Cache is stale, invalid, or missing. Regenerate and write it out. */
558
+
559
+ /* Read the contents of the source file into a buffer */
560
+ if (bs_read_contents(current_fd, current_key.size, &contents) < 0) goto fail_errno;
561
+ input_data = rb_str_new_static(contents, current_key.size);
562
+
563
+ /* Try to compile the input_data using input_to_storage(input_data) */
564
+ exception_tag = bs_input_to_storage(handler, input_data, path_v, &storage_data);
565
+ if (exception_tag != 0) goto raise;
566
+ /* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try
567
+ * to cache anything; just return input_to_output(input_data) */
568
+ if (storage_data == uncompilable) {
569
+ bs_input_to_output(handler, input_data, &output_data, &exception_tag);
570
+ if (exception_tag != 0) goto raise;
571
+ goto succeed;
572
+ }
573
+ /* If storage_data isn't a string, we can't cache it */
574
+ if (!RB_TYPE_P(storage_data, T_STRING)) goto invalid_type_storage_data;
575
+
576
+ /* Write the cache key and storage_data to the cache directory */
577
+ res = atomic_write_cache_file(cache_path, &current_key, storage_data);
578
+ if (res < 0) goto fail_errno;
579
+
580
+ /* Having written the cache, now convert storage_data to output_data */
581
+ exception_tag = bs_storage_to_output(handler, storage_data, &output_data);
582
+ if (exception_tag != 0) goto raise;
583
+
584
+ /* If output_data is nil, delete the cache entry and generate the output
585
+ * using input_to_output */
586
+ if (NIL_P(output_data)) {
587
+ if (unlink(cache_path) < 0) goto fail_errno;
588
+ bs_input_to_output(handler, input_data, &output_data, &exception_tag);
589
+ if (exception_tag != 0) goto raise;
590
+ }
591
+
592
+ goto succeed; /* output_data is now the correct return. */
593
+
594
+ #define CLEANUP \
595
+ if (contents != NULL) xfree(contents); \
596
+ if (current_fd >= 0) close(current_fd); \
597
+ if (cache_fd >= 0) close(cache_fd);
598
+
599
+ succeed:
600
+ CLEANUP;
601
+ return output_data;
602
+ fail_errno:
603
+ CLEANUP;
604
+ exception = rb_protect(prot_exception_for_errno, INT2FIX(errno), &res);
605
+ if (res) exception = rb_eStandardError;
606
+ rb_exc_raise(exception);
607
+ __builtin_unreachable();
608
+ raise:
609
+ CLEANUP;
610
+ rb_jump_tag(exception_tag);
611
+ __builtin_unreachable();
612
+ invalid_type_storage_data:
613
+ CLEANUP;
614
+ Check_Type(storage_data, T_STRING);
615
+ __builtin_unreachable();
616
+
617
+ #undef CLEANUP
463
618
  }
464
619
 
620
+ /*****************************************************************************/
621
+ /********************* Handler Wrappers **************************************/
622
+ /*****************************************************************************
623
+ * Everything after this point in the file is just wrappers to deal with ruby's
624
+ * clunky method of handling exceptions from ruby methods invoked from C.
625
+ */
626
+
627
+ struct s2o_data {
628
+ VALUE handler;
629
+ VALUE storage_data;
630
+ };
631
+
632
+ struct i2o_data {
633
+ VALUE handler;
634
+ VALUE input_data;
635
+ };
636
+
637
+ struct i2s_data {
638
+ VALUE handler;
639
+ VALUE input_data;
640
+ VALUE pathval;
641
+ };
642
+
465
643
  static VALUE
466
- prot_exception_for_errno(VALUE err)
644
+ prot_storage_to_output(VALUE arg)
467
645
  {
468
- if (err != INT2FIX(0)) {
469
- VALUE mErrno = rb_const_get(rb_cObject, rb_intern("Errno"));
470
- VALUE constants = rb_funcall(mErrno, rb_intern("constants"), 0);
471
- VALUE which = rb_funcall(constants, rb_intern("[]"), 1, err);
472
- return rb_funcall(mErrno, rb_intern("const_get"), 1, which);
473
- }
474
- return rb_eStandardError;
646
+ struct s2o_data * data = (struct s2o_data *)arg;
647
+ return rb_funcall(data->handler, rb_intern("storage_to_output"), 1, data->storage_data);
475
648
  }
476
649
 
477
- static VALUE
478
- prot_input_to_output(VALUE arg)
650
+ static int
651
+ bs_storage_to_output(VALUE handler, VALUE storage_data, VALUE * output_data)
479
652
  {
480
- struct i2o_data * data = (struct i2o_data *)arg;
481
- return rb_funcall(data->handler, rb_intern("input_to_output"), 1, data->input_data);
653
+ int state;
654
+ struct s2o_data s2o_data = {
655
+ .handler = handler,
656
+ .storage_data = storage_data,
657
+ };
658
+ *output_data = rb_protect(prot_storage_to_output, (VALUE)&s2o_data, &state);
659
+ return state;
482
660
  }
483
661
 
484
662
  static void
@@ -491,6 +669,13 @@ bs_input_to_output(VALUE handler, VALUE input_data, VALUE * output_data, int * e
491
669
  *output_data = rb_protect(prot_input_to_output, (VALUE)&i2o_data, exception_tag);
492
670
  }
493
671
 
672
+ static VALUE
673
+ prot_input_to_output(VALUE arg)
674
+ {
675
+ struct i2o_data * data = (struct i2o_data *)arg;
676
+ return rb_funcall(data->handler, rb_intern("input_to_output"), 1, data->input_data);
677
+ }
678
+
494
679
  static VALUE
495
680
  try_input_to_storage(VALUE arg)
496
681
  {
@@ -526,36 +711,3 @@ bs_input_to_storage(VALUE handler, VALUE input_data, VALUE pathval, VALUE * stor
526
711
  *storage_data = rb_protect(prot_input_to_storage, (VALUE)&i2s_data, &state);
527
712
  return state;
528
713
  }
529
-
530
- static VALUE
531
- prot_storage_to_output(VALUE arg)
532
- {
533
- struct s2o_data * data = (struct s2o_data *)arg;
534
- return rb_funcall(data->handler, rb_intern("storage_to_output"), 1, data->storage_data);
535
- }
536
-
537
- static int
538
- bs_storage_to_output(VALUE handler, VALUE storage_data, VALUE * output_data)
539
- {
540
- int state;
541
- struct s2o_data s2o_data = {
542
- .handler = handler,
543
- .storage_data = storage_data,
544
- };
545
- *output_data = rb_protect(prot_storage_to_output, (VALUE)&s2o_data, &state);
546
- return state;
547
- }
548
-
549
- /* default no if empty, yes if present, no if "0" */
550
- static int
551
- logging_enabled()
552
- {
553
- char * log = getenv("OPT_AOT_LOG");
554
- if (log == 0) {
555
- return 0;
556
- } else if (log[0] == '0') {
557
- return 0;
558
- } else {
559
- return 1;
560
- }
561
- }