bootsnap 1.4.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.github/CODEOWNERS +2 -0
  3. data/.github/probots.yml +2 -0
  4. data/.gitignore +17 -0
  5. data/.rubocop.yml +20 -0
  6. data/.travis.yml +21 -0
  7. data/CHANGELOG.md +122 -0
  8. data/CODE_OF_CONDUCT.md +74 -0
  9. data/CONTRIBUTING.md +21 -0
  10. data/Gemfile +9 -0
  11. data/LICENSE.txt +21 -0
  12. data/README.jp.md +231 -0
  13. data/README.md +304 -0
  14. data/Rakefile +13 -0
  15. data/bin/ci +10 -0
  16. data/bin/console +15 -0
  17. data/bin/setup +8 -0
  18. data/bin/test-minimal-support +7 -0
  19. data/bin/testunit +8 -0
  20. data/bootsnap.gemspec +46 -0
  21. data/dev.yml +10 -0
  22. data/ext/bootsnap/bootsnap.c +829 -0
  23. data/ext/bootsnap/bootsnap.h +6 -0
  24. data/ext/bootsnap/extconf.rb +19 -0
  25. data/lib/bootsnap.rb +48 -0
  26. data/lib/bootsnap/bundler.rb +15 -0
  27. data/lib/bootsnap/compile_cache.rb +43 -0
  28. data/lib/bootsnap/compile_cache/iseq.rb +73 -0
  29. data/lib/bootsnap/compile_cache/yaml.rb +63 -0
  30. data/lib/bootsnap/explicit_require.rb +50 -0
  31. data/lib/bootsnap/load_path_cache.rb +78 -0
  32. data/lib/bootsnap/load_path_cache/cache.rb +208 -0
  33. data/lib/bootsnap/load_path_cache/change_observer.rb +63 -0
  34. data/lib/bootsnap/load_path_cache/core_ext/active_support.rb +107 -0
  35. data/lib/bootsnap/load_path_cache/core_ext/kernel_require.rb +93 -0
  36. data/lib/bootsnap/load_path_cache/core_ext/loaded_features.rb +18 -0
  37. data/lib/bootsnap/load_path_cache/loaded_features_index.rb +148 -0
  38. data/lib/bootsnap/load_path_cache/path.rb +114 -0
  39. data/lib/bootsnap/load_path_cache/path_scanner.rb +50 -0
  40. data/lib/bootsnap/load_path_cache/realpath_cache.rb +32 -0
  41. data/lib/bootsnap/load_path_cache/store.rb +90 -0
  42. data/lib/bootsnap/setup.rb +39 -0
  43. data/lib/bootsnap/version.rb +4 -0
  44. data/shipit.rubygems.yml +0 -0
  45. metadata +174 -0
@@ -0,0 +1,304 @@
1
+ # Bootsnap [![Build Status](https://travis-ci.org/Shopify/bootsnap.svg?branch=master)](https://travis-ci.org/Shopify/bootsnap)
2
+
3
+ Bootsnap is a library that plugs into Ruby, with optional support for `ActiveSupport` and `YAML`,
4
+ to optimize and cache expensive computations. See [How Does This Work](#how-does-this-work).
5
+
6
+ #### Performance
7
+
8
+ - [Discourse](https://github.com/discourse/discourse) reports a boot time reduction of approximately
9
+ 50%, from roughly 6 to 3 seconds on one machine;
10
+ - One of our smaller internal apps also sees a reduction of 50%, from 3.6 to 1.8 seconds;
11
+ - The core Shopify platform -- a rather large monolithic application -- boots about 75% faster,
12
+ dropping from around 25s to 6.5s.
13
+ * In Shopify core (a large app), about 25% of this gain can be attributed to `compile_cache_*`
14
+ features; 75% to path caching, and ~1% to `disable_trace`. This is fairly representative.
15
+
16
+ ## Usage
17
+
18
+ This gem works on macOS and Linux.
19
+
20
+ Add `bootsnap` to your `Gemfile`:
21
+
22
+ ```ruby
23
+ gem 'bootsnap', require: false
24
+ ```
25
+
26
+ If you are using Rails, add this to `config/boot.rb` immediately after `require 'bundler/setup'`:
27
+
28
+ ```ruby
29
+ require 'bootsnap/setup'
30
+ ```
31
+
32
+ Note that bootsnap writes to `tmp/cache`, and that directory *must* be writable. Rails will fail to
33
+ boot if it is not. If this is unacceptable (e.g. you are running in a read-only container and
34
+ unwilling to mount in a writable tmpdir), you should remove this line or wrap it in a conditional.
35
+
36
+ **Note also that bootsnap will never clean up its own cache: this is left up to you. Depending on your
37
+ deployment strategy, you may need to periodically purge `tmp/cache/bootsnap*`. If you notice deploys
38
+ getting progressively slower, this is almost certainly the cause.**
39
+
40
+ It's technically possible to simply specify `gem 'bootsnap', require: 'bootsnap/setup'`, but it's
41
+ important to load Bootsnap as early as possible to get maximum performance improvement.
42
+
43
+ You can see how this require works [here](https://github.com/Shopify/bootsnap/blob/master/lib/bootsnap/setup.rb).
44
+
45
+ If you are not using Rails, or if you are but want more control over things, add this to your
46
+ application setup immediately after `require 'bundler/setup'` (i.e. as early as possible: the sooner
47
+ this is loaded, the sooner it can start optimizing things)
48
+
49
+ ```ruby
50
+ require 'bootsnap'
51
+ env = ENV['RAILS_ENV'] || "development"
52
+ Bootsnap.setup(
53
+ cache_dir: 'tmp/cache', # Path to your cache
54
+ development_mode: env == 'development', # Current working environment, e.g. RACK_ENV, RAILS_ENV, etc
55
+ load_path_cache: true, # Optimize the LOAD_PATH with a cache
56
+ autoload_paths_cache: true, # Optimize ActiveSupport autoloads with cache
57
+ disable_trace: true, # Set `RubyVM::InstructionSequence.compile_option = { trace_instruction: false }`
58
+ compile_cache_iseq: true, # Compile Ruby code into ISeq cache, breaks coverage reporting.
59
+ compile_cache_yaml: true # Compile YAML into a cache
60
+ )
61
+ ```
62
+
63
+ **Note that `disable_trace` will break debuggers and tracing.**
64
+
65
+ **Protip:** You can replace `require 'bootsnap'` with `BootLib::Require.from_gem('bootsnap',
66
+ 'bootsnap')` using [this trick](https://github.com/Shopify/bootsnap/wiki/Bootlib::Require). This
67
+ will help optimize boot time further if you have an extremely large `$LOAD_PATH`.
68
+
69
+ Note: Bootsnap and [Spring](https://github.com/rails/spring) are orthogonal tools. While Bootsnap
70
+ speeds up the loading of individual source files, Spring keeps a copy of a pre-booted Rails process
71
+ on hand to completely skip parts of the boot process the next time it's needed. The two tools work
72
+ well together, and are both included in a newly-generated Rails applications by default.
73
+
74
+ ### Environments
75
+
76
+ All Bootsnap features are enabled in development, test, production, and all other environments according to the configuration in the setup. At Shopify, we use this gem safely in all environments without issue.
77
+
78
+ If you would like to disable any feature for a certain environment, we suggest changing the configuration to take into account the appropriate ENV var or configuration according to your needs.
79
+
80
+ ## How does this work?
81
+
82
+ Bootsnap optimizes methods to cache results of expensive computations, and can be grouped
83
+ into two broad categories:
84
+
85
+ * [Path Pre-Scanning](#path-pre-scanning)
86
+ * `Kernel#require` and `Kernel#load` are modified to eliminate `$LOAD_PATH` scans.
87
+ * `ActiveSupport::Dependencies.{autoloadable_module?,load_missing_constant,depend_on}` are
88
+ overridden to eliminate scans of `ActiveSupport::Dependencies.autoload_paths`.
89
+ * [Compilation caching](#compilation-caching)
90
+ * `RubyVM::InstructionSequence.load_iseq` is implemented to cache the result of ruby bytecode
91
+ compilation.
92
+ * `YAML.load_file` is modified to cache the result of loading a YAML object in MessagePack format
93
+ (or Marshal, if the message uses types unsupported by MessagePack).
94
+
95
+ ### Path Pre-Scanning
96
+
97
+ *(This work is a minor evolution of [bootscale](https://github.com/byroot/bootscale)).*
98
+
99
+ Upon initialization of bootsnap or modification of the path (e.g. `$LOAD_PATH`),
100
+ `Bootsnap::LoadPathCache` will fetch a list of requirable entries from a cache, or, if necessary,
101
+ perform a full scan and cache the result.
102
+
103
+ Later, when we run (e.g.) `require 'foo'`, ruby *would* iterate through every item on our
104
+ `$LOAD_PATH` `['x', 'y', ...]`, looking for `x/foo.rb`, `y/foo.rb`, and so on. Bootsnap instead
105
+ looks at all the cached requirables for each `$LOAD_PATH` entry and substitutes the full expanded
106
+ path of the match ruby would have eventually chosen.
107
+
108
+ If you look at the syscalls generated by this behaviour, the net effect is that what would
109
+ previously look like this:
110
+
111
+ ```
112
+ open x/foo.rb # (fail)
113
+ # (imagine this with 500 $LOAD_PATH entries instead of two)
114
+ open y/foo.rb # (success)
115
+ close y/foo.rb
116
+ open y/foo.rb
117
+ ...
118
+ ```
119
+
120
+ becomes this:
121
+
122
+ ```
123
+ open y/foo.rb
124
+ ...
125
+ ```
126
+
127
+ Exactly the same strategy is employed for methods that traverse
128
+ `ActiveSupport::Dependencies.autoload_paths` if the `autoload_paths_cache` option is given to
129
+ `Bootsnap.setup`.
130
+
131
+ The following diagram flowcharts the overrides that make the `*_path_cache` features work.
132
+
133
+ ![Flowchart explaining
134
+ Bootsnap](https://cloud.githubusercontent.com/assets/3074765/24532120/eed94e64-158b-11e7-9137-438d759b2ac8.png)
135
+
136
+ Bootsnap classifies path entries into two categories: stable and volatile. Volatile entries are
137
+ scanned each time the application boots, and their caches are only valid for 30 seconds. Stable
138
+ entries do not expire -- once their contents has been scanned, it is assumed to never change.
139
+
140
+ The only directories considered "stable" are things under the Ruby install prefix
141
+ (`RbConfig::CONFIG['prefix']`, e.g. `/usr/local/ruby` or `~/.rubies/x.y.z`), and things under the
142
+ `Gem.path` (e.g. `~/.gem/ruby/x.y.z`) or `Bundler.bundle_path`. Everything else is considered
143
+ "volatile".
144
+
145
+ In addition to the [`Bootsnap::LoadPathCache::Cache`
146
+ source](https://github.com/Shopify/bootsnap/blob/master/lib/bootsnap/load_path_cache/cache.rb),
147
+ this diagram may help clarify how entry resolution works:
148
+
149
+ ![How path searching works](https://cloud.githubusercontent.com/assets/3074765/25388270/670b5652-299b-11e7-87fb-975647f68981.png)
150
+
151
+
152
+ It's also important to note how expensive `LoadError`s can be. If ruby invokes
153
+ `require 'something'`, but that file isn't on `$LOAD_PATH`, it takes `2 *
154
+ $LOAD_PATH.length` filesystem accesses to determine that. Bootsnap caches this
155
+ result too, raising a `LoadError` without touching the filesystem at all.
156
+
157
+ ### Compilation Caching
158
+
159
+ *(A more readable implementation of this concept can be found in
160
+ [yomikomu](https://github.com/ko1/yomikomu)).*
161
+
162
+ Ruby has complex grammar and parsing it is not a particularly cheap operation. Since 1.9, Ruby has
163
+ translated ruby source to an internal bytecode format, which is then executed by the Ruby VM. Since
164
+ 2.3.0, Ruby [exposes an API](https://ruby-doc.org/core-2.3.0/RubyVM/InstructionSequence.html) that
165
+ allows caching that bytecode. This allows us to bypass the relatively-expensive compilation step on
166
+ subsequent loads of the same file.
167
+
168
+ We also noticed that we spend a lot of time loading YAML documents during our application boot, and
169
+ that MessagePack and Marshal are *much* faster at deserialization than YAML, even with a fast
170
+ implementation. We use the same strategy of compilation caching for YAML documents, with the
171
+ equivalent of Ruby's "bytecode" format being a MessagePack document (or, in the case of YAML
172
+ documents with types unsupported by MessagePack, a Marshal stream).
173
+
174
+ These compilation results are stored in a cache directory, with filenames generated by taking a hash
175
+ of the full expanded path of the input file (FNV1a-64).
176
+
177
+ Whereas before, the sequence of syscalls generated to `require` a file would look like:
178
+
179
+ ```
180
+ open /c/foo.rb -> m
181
+ fstat64 m
182
+ close m
183
+ open /c/foo.rb -> o
184
+ fstat64 o
185
+ fstat64 o
186
+ read o
187
+ read o
188
+ ...
189
+ close o
190
+ ```
191
+
192
+ With bootsnap, we get:
193
+
194
+ ```
195
+ open /c/foo.rb -> n
196
+ fstat64 n
197
+ close n
198
+ open /c/foo.rb -> n
199
+ fstat64 n
200
+ open (cache) -> m
201
+ read m
202
+ read m
203
+ close m
204
+ close n
205
+ ```
206
+
207
+ This may look worse at a glance, but underlies a large performance difference.
208
+
209
+ *(The first three syscalls in both listings -- `open`, `fstat64`, `close` -- are not inherently
210
+ useful. [This ruby patch](https://bugs.ruby-lang.org/issues/13378) optimizes them out when coupled
211
+ with bootsnap.)*
212
+
213
+ Bootsnap writes a cache file containing a 64 byte header followed by the cache contents. The header
214
+ is a cache key including several fields:
215
+
216
+ * `version`, hardcoded in bootsnap. Essentially a schema version;
217
+ * `ruby_platform`, A hash of `RUBY_PLATFORM` (e.g. x86_64-linux-gnu) variable and glibc version (on Linux) or OS version (`uname -v` on BSD, macOS)
218
+ * `compile_option`, which changes with `RubyVM::InstructionSequence.compile_option` does;
219
+ * `ruby_revision`, the version of Ruby this was compiled with;
220
+ * `size`, the size of the source file;
221
+ * `mtime`, the last-modification timestamp of the source file when it was compiled; and
222
+ * `data_size`, the number of bytes following the header, which we need to read it into a buffer.
223
+
224
+ If the key is valid, the result is loaded from the value. Otherwise, it is regenerated and clobbers
225
+ the current cache.
226
+
227
+ ### Putting it all together
228
+
229
+ Imagine we have this file structure:
230
+
231
+ ```
232
+ /
233
+ ├── a
234
+ ├── b
235
+ └── c
236
+ └── foo.rb
237
+ ```
238
+
239
+ And this `$LOAD_PATH`:
240
+
241
+ ```
242
+ ["/a", "/b", "/c"]
243
+ ```
244
+
245
+ When we call `require 'foo'` without bootsnap, Ruby would generate this sequence of syscalls:
246
+
247
+
248
+ ```
249
+ open /a/foo.rb -> -1
250
+ open /b/foo.rb -> -1
251
+ open /c/foo.rb -> n
252
+ close n
253
+ open /c/foo.rb -> m
254
+ fstat64 m
255
+ close m
256
+ open /c/foo.rb -> o
257
+ fstat64 o
258
+ fstat64 o
259
+ read o
260
+ read o
261
+ ...
262
+ close o
263
+ ```
264
+
265
+ With bootsnap, we get:
266
+
267
+ ```
268
+ open /c/foo.rb -> n
269
+ fstat64 n
270
+ close n
271
+ open /c/foo.rb -> n
272
+ fstat64 n
273
+ open (cache) -> m
274
+ read m
275
+ read m
276
+ close m
277
+ close n
278
+ ```
279
+
280
+ If we call `require 'nope'` without bootsnap, we get:
281
+
282
+ ```
283
+ open /a/nope.rb -> -1
284
+ open /b/nope.rb -> -1
285
+ open /c/nope.rb -> -1
286
+ open /a/nope.bundle -> -1
287
+ open /b/nope.bundle -> -1
288
+ open /c/nope.bundle -> -1
289
+ ```
290
+
291
+ ...and if we call `require 'nope'` *with* bootsnap, we get...
292
+
293
+ ```
294
+ # (nothing!)
295
+ ```
296
+
297
+ ## When not to use Bootsnap
298
+
299
+ *Alternative engines*: Bootsnap is pretty reliant on MRI features, and parts are disabled entirely on alternative ruby
300
+ engines.
301
+
302
+ *Non-local filesystems*: Bootsnap depends on `tmp/cache` (or whatever you set its cache directory
303
+ to) being on a relatively fast filesystem. If you put it on a network mount, bootsnap is very likely
304
+ to slow your application down quite a lot.
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+ require('rake/extensiontask')
3
+ require('bundler/gem_tasks')
4
+
5
+ gemspec = Gem::Specification.load('bootsnap.gemspec')
6
+ Rake::ExtensionTask.new do |ext|
7
+ ext.name = 'bootsnap'
8
+ ext.ext_dir = 'ext/bootsnap'
9
+ ext.lib_dir = 'lib/bootsnap'
10
+ ext.gem_spec = gemspec
11
+ end
12
+
13
+ task(default: :compile)
data/bin/ci ADDED
@@ -0,0 +1,10 @@
1
+ #!/bin/bash
2
+
3
+ set -euxo pipefail
4
+
5
+ if [[ "${MINIMAL_SUPPORT-0}" -eq 1 ]]; then
6
+ exec bin/test-minimal-support
7
+ else
8
+ rake
9
+ exec bin/testunit
10
+ fi
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require("bundler/setup")
5
+ require("bootsnap")
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require("irb")
15
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+
3
+ set -euxo pipefail
4
+
5
+ cd test/minimal_support
6
+ bundle
7
+ BOOTSNAP_CACHE_DIR=/tmp bundle exec ruby -w -I ../../lib bootsnap_setup.rb
@@ -0,0 +1,8 @@
1
+ #!/bin/bash
2
+
3
+ if [[ $# -eq 0 ]]; then
4
+ exec ruby -I"test" -w -e 'Dir.glob("./test/**/*_test.rb").each { |f| require f }' -- "$@"
5
+ else
6
+ path=$1
7
+ exec ruby -I"test" -w -e "require '${path#test/}'" -- "$@"
8
+ fi
@@ -0,0 +1,46 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require('bootsnap/version')
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "bootsnap"
9
+ spec.version = Bootsnap::VERSION
10
+ spec.authors = ["Burke Libbey"]
11
+ spec.email = ["burke.libbey@shopify.com"]
12
+
13
+ spec.license = "MIT"
14
+
15
+ spec.summary = "Boot large ruby/rails apps faster"
16
+ spec.description = spec.summary
17
+ spec.homepage = "https://github.com/Shopify/bootsnap"
18
+
19
+ spec.metadata = {
20
+ 'bug_tracker_uri' => 'https://github.com/Shopify/bootsnap/issues',
21
+ 'changelog_uri' => 'https://github.com/Shopify/bootsnap/blob/master/CHANGELOG.md',
22
+ 'source_code_uri' => 'https://github.com/Shopify/bootsnap',
23
+ }
24
+
25
+ spec.files = %x(git ls-files -z).split("\x0").reject do |f|
26
+ f.match(%r{^(test|spec|features)/})
27
+ end
28
+ spec.require_paths = %w(lib)
29
+
30
+ spec.required_ruby_version = '>= 2.3.0'
31
+
32
+ if RUBY_PLATFORM =~ /java/
33
+ spec.platform = 'java'
34
+ else
35
+ spec.platform = Gem::Platform::RUBY
36
+ spec.extensions = ['ext/bootsnap/extconf.rb']
37
+ end
38
+
39
+ spec.add_development_dependency("bundler")
40
+ spec.add_development_dependency('rake', '~> 10.0')
41
+ spec.add_development_dependency('rake-compiler', '~> 0')
42
+ spec.add_development_dependency("minitest", "~> 5.0")
43
+ spec.add_development_dependency("mocha", "~> 1.2")
44
+
45
+ spec.add_runtime_dependency("msgpack", "~> 1.0")
46
+ end
data/dev.yml ADDED
@@ -0,0 +1,10 @@
1
+ env:
2
+ BOOTSNAP_PEDANTIC: '1'
3
+
4
+ up:
5
+ - ruby: 2.6.0
6
+ - bundler
7
+ commands:
8
+ build: rake compile
9
+ test: 'rake compile && exec bin/testunit'
10
+ style: 'exec rubocop -D'
@@ -0,0 +1,829 @@
1
+ /*
2
+ * Suggested reading order:
3
+ * 1. Skim Init_bootsnap
4
+ * 2. Skim bs_fetch
5
+ * 3. The rest of everything
6
+ *
7
+ * Init_bootsnap sets up the ruby objects and binds bs_fetch to
8
+ * Bootsnap::CompileCache::Native.fetch.
9
+ *
10
+ * bs_fetch is the ultimate caller for for just about every other function in
11
+ * here.
12
+ */
13
+
14
+ #include "bootsnap.h"
15
+ #include "ruby.h"
16
+ #include <stdint.h>
17
+ #include <sys/types.h>
18
+ #include <errno.h>
19
+ #include <fcntl.h>
20
+ #include <sys/stat.h>
21
+ #ifndef _WIN32
22
+ #include <sys/utsname.h>
23
+ #endif
24
+ #ifdef __GLIBC__
25
+ #include <gnu/libc-version.h>
26
+ #endif
27
+
28
+ /* 1000 is an arbitrary limit; FNV64 plus some slashes brings the cap down to
29
+ * 981 for the cache dir */
30
+ #define MAX_CACHEPATH_SIZE 1000
31
+ #define MAX_CACHEDIR_SIZE 981
32
+
33
+ #define KEY_SIZE 64
34
+
35
+ /*
36
+ * An instance of this key is written as the first 64 bytes of each cache file.
37
+ * The mtime and size members track whether the file contents have changed, and
38
+ * the version, ruby_platform, compile_option, and ruby_revision members track
39
+ * changes to the environment that could invalidate compile results without
40
+ * file contents having changed. The data_size member is not truly part of the
41
+ * "key". Really, this could be called a "header" with the first six members
42
+ * being an embedded "key" struct and an additional data_size member.
43
+ *
44
+ * The data_size indicates the remaining number of bytes in the cache file
45
+ * after the header (the size of the cached artifact).
46
+ *
47
+ * After data_size, the struct is padded to 64 bytes.
48
+ */
49
+ struct bs_cache_key {
50
+ uint32_t version;
51
+ uint32_t ruby_platform;
52
+ uint32_t compile_option;
53
+ uint32_t ruby_revision;
54
+ uint64_t size;
55
+ uint64_t mtime;
56
+ uint64_t data_size; /* not used for equality */
57
+ uint8_t pad[24];
58
+ } __attribute__((packed));
59
+
60
+ /*
61
+ * If the struct padding isn't correct to pad the key to 64 bytes, refuse to
62
+ * compile.
63
+ */
64
+ #define STATIC_ASSERT(X) STATIC_ASSERT2(X,__LINE__)
65
+ #define STATIC_ASSERT2(X,L) STATIC_ASSERT3(X,L)
66
+ #define STATIC_ASSERT3(X,L) STATIC_ASSERT_MSG(X,at_line_##L)
67
+ #define STATIC_ASSERT_MSG(COND,MSG) typedef char static_assertion_##MSG[(!!(COND))*2-1]
68
+ STATIC_ASSERT(sizeof(struct bs_cache_key) == KEY_SIZE);
69
+
70
+ /* Effectively a schema version. Bumping invalidates all previous caches */
71
+ static const uint32_t current_version = 2;
72
+
73
+ /* hash of e.g. "x86_64-darwin17", invalidating when ruby is recompiled on a
74
+ * new OS ABI, etc. */
75
+ static uint32_t current_ruby_platform;
76
+ /* Invalidates cache when switching ruby versions */
77
+ static uint32_t current_ruby_revision;
78
+ /* Invalidates cache when RubyVM::InstructionSequence.compile_option changes */
79
+ static uint32_t current_compile_option_crc32 = 0;
80
+ /* Current umask */
81
+ static mode_t current_umask;
82
+
83
+ /* Bootsnap::CompileCache::{Native, Uncompilable} */
84
+ static VALUE rb_mBootsnap;
85
+ static VALUE rb_mBootsnap_CompileCache;
86
+ static VALUE rb_mBootsnap_CompileCache_Native;
87
+ static VALUE rb_eBootsnap_CompileCache_Uncompilable;
88
+ static ID uncompilable;
89
+
90
+ /* Functions exposed as module functions on Bootsnap::CompileCache::Native */
91
+ static VALUE bs_compile_option_crc32_set(VALUE self, VALUE crc32_v);
92
+ static VALUE bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler);
93
+
94
+ /* Helpers */
95
+ static uint64_t fnv1a_64(const char *str);
96
+ static void bs_cache_path(const char * cachedir, const char * path, char (* cache_path)[MAX_CACHEPATH_SIZE]);
97
+ static int bs_read_key(int fd, struct bs_cache_key * key);
98
+ static int cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2);
99
+ static VALUE bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler);
100
+ static int open_current_file(char * path, struct bs_cache_key * key, const char ** errno_provenance);
101
+ static int fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE * output_data, int * exception_tag, const char ** errno_provenance);
102
+ static uint32_t get_ruby_revision(void);
103
+ static uint32_t get_ruby_platform(void);
104
+
105
+ /*
106
+ * Helper functions to call ruby methods on handler object without crashing on
107
+ * exception.
108
+ */
109
+ static int bs_storage_to_output(VALUE handler, VALUE storage_data, VALUE * output_data);
110
+ static VALUE prot_storage_to_output(VALUE arg);
111
+ static VALUE prot_input_to_output(VALUE arg);
112
+ static void bs_input_to_output(VALUE handler, VALUE input_data, VALUE * output_data, int * exception_tag);
113
+ static VALUE prot_input_to_storage(VALUE arg);
114
+ static int bs_input_to_storage(VALUE handler, VALUE input_data, VALUE pathval, VALUE * storage_data);
115
+ struct s2o_data;
116
+ struct i2o_data;
117
+ struct i2s_data;
118
+
119
+ /* https://bugs.ruby-lang.org/issues/13667 */
120
+ extern VALUE rb_get_coverages(void);
121
+ static VALUE
122
+ bs_rb_coverage_running(VALUE self)
123
+ {
124
+ VALUE cov = rb_get_coverages();
125
+ return RTEST(cov) ? Qtrue : Qfalse;
126
+ }
127
+
128
+ /*
129
+ * Ruby C extensions are initialized by calling Init_<extname>.
130
+ *
131
+ * This sets up the module hierarchy and attaches functions as methods.
132
+ *
133
+ * We also populate some semi-static information about the current OS and so on.
134
+ */
135
+ void
136
+ Init_bootsnap(void)
137
+ {
138
+ rb_mBootsnap = rb_define_module("Bootsnap");
139
+ rb_mBootsnap_CompileCache = rb_define_module_under(rb_mBootsnap, "CompileCache");
140
+ rb_mBootsnap_CompileCache_Native = rb_define_module_under(rb_mBootsnap_CompileCache, "Native");
141
+ rb_eBootsnap_CompileCache_Uncompilable = rb_define_class_under(rb_mBootsnap_CompileCache, "Uncompilable", rb_eStandardError);
142
+
143
+ current_ruby_revision = get_ruby_revision();
144
+ current_ruby_platform = get_ruby_platform();
145
+
146
+ uncompilable = rb_intern("__bootsnap_uncompilable__");
147
+
148
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "coverage_running?", bs_rb_coverage_running, 0);
149
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "fetch", bs_rb_fetch, 3);
150
+ rb_define_module_function(rb_mBootsnap_CompileCache_Native, "compile_option_crc32=", bs_compile_option_crc32_set, 1);
151
+
152
+ current_umask = umask(0777);
153
+ umask(current_umask);
154
+ }
155
+
156
+ /*
157
+ * Bootsnap's ruby code registers a hook that notifies us via this function
158
+ * when compile_option changes. These changes invalidate all existing caches.
159
+ *
160
+ * Note that on 32-bit platforms, a CRC32 can't be represented in a Fixnum, but
161
+ * can be represented by a uint.
162
+ */
163
+ static VALUE
164
+ bs_compile_option_crc32_set(VALUE self, VALUE crc32_v)
165
+ {
166
+ if (!RB_TYPE_P(crc32_v, T_BIGNUM) && !RB_TYPE_P(crc32_v, T_FIXNUM)) {
167
+ Check_Type(crc32_v, T_FIXNUM);
168
+ }
169
+ current_compile_option_crc32 = NUM2UINT(crc32_v);
170
+ return Qnil;
171
+ }
172
+
173
+ /*
174
+ * We use FNV1a-64 to derive cache paths. The choice is somewhat arbitrary but
175
+ * it has several nice properties:
176
+ *
177
+ * - Tiny implementation
178
+ * - No external dependency
179
+ * - Solid performance
180
+ * - Solid randomness
181
+ * - 32 bits doesn't feel collision-resistant enough; 64 is nice.
182
+ */
183
+ static uint64_t
184
+ fnv1a_64_iter(uint64_t h, const char *str)
185
+ {
186
+ unsigned char *s = (unsigned char *)str;
187
+
188
+ while (*s) {
189
+ h ^= (uint64_t)*s++;
190
+ h += (h << 1) + (h << 4) + (h << 5) + (h << 7) + (h << 8) + (h << 40);
191
+ }
192
+
193
+ return h;
194
+ }
195
+
196
+ static uint64_t
197
+ fnv1a_64(const char *str)
198
+ {
199
+ uint64_t h = (uint64_t)0xcbf29ce484222325ULL;
200
+ return fnv1a_64_iter(h, str);
201
+ }
202
+
203
+ /*
204
+ * Ruby's revision may be Integer or String. CRuby 2.7 or later uses
205
+ * Git commit ID as revision. It's String.
206
+ */
207
+ static uint32_t
208
+ get_ruby_revision(void)
209
+ {
210
+ VALUE ruby_revision;
211
+
212
+ ruby_revision = rb_const_get(rb_cObject, rb_intern("RUBY_REVISION"));
213
+ if (RB_TYPE_P(ruby_revision, RUBY_T_FIXNUM)) {
214
+ return FIX2INT(ruby_revision);
215
+ } else {
216
+ uint64_t hash;
217
+
218
+ hash = fnv1a_64(StringValueCStr(ruby_revision));
219
+ return (uint32_t)(hash >> 32);
220
+ }
221
+ }
222
+
223
+ /*
224
+ * When ruby's version doesn't change, but it's recompiled on a different OS
225
+ * (or OS version), we need to invalidate the cache.
226
+ *
227
+ * We actually factor in some extra information here, to be extra confident
228
+ * that we don't try to re-use caches that will not be compatible, by factoring
229
+ * in utsname.version.
230
+ */
231
+ static uint32_t
232
+ get_ruby_platform(void)
233
+ {
234
+ uint64_t hash;
235
+ VALUE ruby_platform;
236
+
237
+ ruby_platform = rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM"));
238
+ hash = fnv1a_64(RSTRING_PTR(ruby_platform));
239
+
240
+ #ifdef _WIN32
241
+ return (uint32_t)(hash >> 32) ^ (uint32_t)GetVersion();
242
+ #elif defined(__GLIBC__)
243
+ hash = fnv1a_64_iter(hash, gnu_get_libc_version());
244
+ return (uint32_t)(hash >> 32);
245
+ #else
246
+ struct utsname utsname;
247
+
248
+ /* Not worth crashing if this fails; lose extra cache invalidation potential */
249
+ if (uname(&utsname) >= 0) {
250
+ hash = fnv1a_64_iter(hash, utsname.version);
251
+ }
252
+
253
+ return (uint32_t)(hash >> 32);
254
+ #endif
255
+ }
256
+
257
+ /*
258
+ * Given a cache root directory and the full path to a file being cached,
259
+ * generate a path under the cache directory at which the cached artifact will
260
+ * be stored.
261
+ *
262
+ * The path will look something like: <cachedir>/12/34567890abcdef
263
+ */
264
+ static void
265
+ bs_cache_path(const char * cachedir, const char * path, char (* cache_path)[MAX_CACHEPATH_SIZE])
266
+ {
267
+ uint64_t hash = fnv1a_64(path);
268
+
269
+ uint8_t first_byte = (hash >> (64 - 8));
270
+ uint64_t remainder = hash & 0x00ffffffffffffff;
271
+
272
+ sprintf(*cache_path, "%s/%02x/%014llx", cachedir, first_byte, remainder);
273
+ }
274
+
275
+ /*
276
+ * Test whether a newly-generated cache key based on the file as it exists on
277
+ * disk matches the one that was generated when the file was cached (or really
278
+ * compare any two keys).
279
+ *
280
+ * The data_size member is not compared, as it serves more of a "header"
281
+ * function.
282
+ */
283
+ static int
284
+ cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2)
285
+ {
286
+ return (
287
+ k1->version == k2->version &&
288
+ k1->ruby_platform == k2->ruby_platform &&
289
+ k1->compile_option == k2->compile_option &&
290
+ k1->ruby_revision == k2->ruby_revision &&
291
+ k1->size == k2->size &&
292
+ k1->mtime == k2->mtime
293
+ );
294
+ }
295
+
296
+ /*
297
+ * Entrypoint for Bootsnap::CompileCache::Native.fetch. The real work is done
298
+ * in bs_fetch; this function just performs some basic typechecks and
299
+ * conversions on the ruby VALUE arguments before passing them along.
300
+ */
301
+ static VALUE
302
+ bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler)
303
+ {
304
+ FilePathValue(path_v);
305
+
306
+ Check_Type(cachedir_v, T_STRING);
307
+ Check_Type(path_v, T_STRING);
308
+
309
+ if (RSTRING_LEN(cachedir_v) > MAX_CACHEDIR_SIZE) {
310
+ rb_raise(rb_eArgError, "cachedir too long");
311
+ }
312
+
313
+ char * cachedir = RSTRING_PTR(cachedir_v);
314
+ char * path = RSTRING_PTR(path_v);
315
+ char cache_path[MAX_CACHEPATH_SIZE];
316
+
317
+ /* generate cache path to cache_path */
318
+ bs_cache_path(cachedir, path, &cache_path);
319
+
320
+ return bs_fetch(path, path_v, cache_path, handler);
321
+ }
322
+
323
+ /*
324
+ * Open the file we want to load/cache and generate a cache key for it if it
325
+ * was loaded.
326
+ */
327
+ static int
328
+ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_provenance)
329
+ {
330
+ struct stat statbuf;
331
+ int fd;
332
+
333
+ fd = open(path, O_RDONLY);
334
+ if (fd < 0) {
335
+ *errno_provenance = "bs_fetch:open_current_file:open";
336
+ return fd;
337
+ }
338
+ #ifdef _WIN32
339
+ setmode(fd, O_BINARY);
340
+ #endif
341
+
342
+ if (fstat(fd, &statbuf) < 0) {
343
+ *errno_provenance = "bs_fetch:open_current_file:fstat";
344
+ close(fd);
345
+ return -1;
346
+ }
347
+
348
+ key->version = current_version;
349
+ key->ruby_platform = current_ruby_platform;
350
+ key->compile_option = current_compile_option_crc32;
351
+ key->ruby_revision = current_ruby_revision;
352
+ key->size = (uint64_t)statbuf.st_size;
353
+ key->mtime = (uint64_t)statbuf.st_mtime;
354
+
355
+ return fd;
356
+ }
357
+
358
+ #define ERROR_WITH_ERRNO -1
359
+ #define CACHE_MISSING_OR_INVALID -2
360
+
361
+ /*
362
+ * Read the cache key from the given fd, which must have position 0 (e.g.
363
+ * freshly opened file).
364
+ *
365
+ * Possible return values:
366
+ * - 0 (OK, key was loaded)
367
+ * - CACHE_MISSING_OR_INVALID (-2)
368
+ * - ERROR_WITH_ERRNO (-1, errno is set)
369
+ */
370
+ static int
371
+ bs_read_key(int fd, struct bs_cache_key * key)
372
+ {
373
+ ssize_t nread = read(fd, key, KEY_SIZE);
374
+ if (nread < 0) return ERROR_WITH_ERRNO;
375
+ if (nread < KEY_SIZE) return CACHE_MISSING_OR_INVALID;
376
+ return 0;
377
+ }
378
+
379
+ /*
380
+ * Open the cache file at a given path, if it exists, and read its key into the
381
+ * struct.
382
+ *
383
+ * Possible return values:
384
+ * - 0 (OK, key was loaded)
385
+ * - CACHE_MISSING_OR_INVALID (-2)
386
+ * - ERROR_WITH_ERRNO (-1, errno is set)
387
+ */
388
+ static int
389
+ open_cache_file(const char * path, struct bs_cache_key * key, const char ** errno_provenance)
390
+ {
391
+ int fd, res;
392
+
393
+ fd = open(path, O_RDONLY);
394
+ if (fd < 0) {
395
+ *errno_provenance = "bs_fetch:open_cache_file:open";
396
+ if (errno == ENOENT) return CACHE_MISSING_OR_INVALID;
397
+ return ERROR_WITH_ERRNO;
398
+ }
399
+ #ifdef _WIN32
400
+ setmode(fd, O_BINARY);
401
+ #endif
402
+
403
+ res = bs_read_key(fd, key);
404
+ if (res < 0) {
405
+ *errno_provenance = "bs_fetch:open_cache_file:read";
406
+ close(fd);
407
+ return res;
408
+ }
409
+
410
+ return fd;
411
+ }
412
+
413
+ /*
414
+ * The cache file is laid out like:
415
+ * 0...64 : bs_cache_key
416
+ * 64..-1 : cached artifact
417
+ *
418
+ * This function takes a file descriptor whose position is pre-set to 64, and
419
+ * the data_size (corresponding to the remaining number of bytes) listed in the
420
+ * cache header.
421
+ *
422
+ * We load the text from this file into a buffer, and pass it to the ruby-land
423
+ * handler with exception handling via the exception_tag param.
424
+ *
425
+ * Data is returned via the output_data parameter, which, if there's no error
426
+ * or exception, will be the final data returnable to the user.
427
+ */
428
+ static int
429
+ fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE * output_data, int * exception_tag, const char ** errno_provenance)
430
+ {
431
+ char * data = NULL;
432
+ ssize_t nread;
433
+ int ret;
434
+
435
+ VALUE storage_data;
436
+
437
+ if (data_size > 100000000000) {
438
+ *errno_provenance = "bs_fetch:fetch_cached_data:datasize";
439
+ errno = EINVAL; /* because wtf? */
440
+ ret = -1;
441
+ goto done;
442
+ }
443
+ data = ALLOC_N(char, data_size);
444
+ nread = read(fd, data, data_size);
445
+ if (nread < 0) {
446
+ *errno_provenance = "bs_fetch:fetch_cached_data:read";
447
+ ret = -1;
448
+ goto done;
449
+ }
450
+ if (nread != data_size) {
451
+ ret = CACHE_MISSING_OR_INVALID;
452
+ goto done;
453
+ }
454
+
455
+ storage_data = rb_str_new_static(data, data_size);
456
+
457
+ *exception_tag = bs_storage_to_output(handler, storage_data, output_data);
458
+ ret = 0;
459
+ done:
460
+ if (data != NULL) xfree(data);
461
+ return ret;
462
+ }
463
+
464
+ /*
465
+ * Like mkdir -p, this recursively creates directory parents of a file. e.g.
466
+ * given /a/b/c, creates /a and /a/b.
467
+ */
468
+ static int
469
+ mkpath(char * file_path, mode_t mode)
470
+ {
471
+ /* It would likely be more efficient to count back until we
472
+ * find a component that *does* exist, but this will only run
473
+ * at most 256 times, so it seems not worthwhile to change. */
474
+ char * p;
475
+ for (p = strchr(file_path + 1, '/'); p; p = strchr(p + 1, '/')) {
476
+ *p = '\0';
477
+ #ifdef _WIN32
478
+ if (mkdir(file_path) == -1) {
479
+ #else
480
+ if (mkdir(file_path, mode) == -1) {
481
+ #endif
482
+ if (errno != EEXIST) {
483
+ *p = '/';
484
+ return -1;
485
+ }
486
+ }
487
+ *p = '/';
488
+ }
489
+ return 0;
490
+ }
491
+
492
+ /*
493
+ * Write a cache header/key and a compiled artifact to a given cache path by
494
+ * writing to a tmpfile and then renaming the tmpfile over top of the final
495
+ * path.
496
+ */
497
+ static int
498
+ atomic_write_cache_file(char * path, struct bs_cache_key * key, VALUE data, const char ** errno_provenance)
499
+ {
500
+ char template[MAX_CACHEPATH_SIZE + 20];
501
+ char * tmp_path;
502
+ int fd, ret;
503
+ ssize_t nwrite;
504
+
505
+ tmp_path = strncpy(template, path, MAX_CACHEPATH_SIZE);
506
+ strcat(tmp_path, ".tmp.XXXXXX");
507
+
508
+ // mkstemp modifies the template to be the actual created path
509
+ fd = mkstemp(tmp_path);
510
+ if (fd < 0) {
511
+ if (mkpath(tmp_path, 0775) < 0) {
512
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:mkpath";
513
+ return -1;
514
+ }
515
+ fd = open(tmp_path, O_WRONLY | O_CREAT, 0664);
516
+ if (fd < 0) {
517
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:open";
518
+ return -1;
519
+ }
520
+ }
521
+ #ifdef _WIN32
522
+ setmode(fd, O_BINARY);
523
+ #endif
524
+
525
+ key->data_size = RSTRING_LEN(data);
526
+ nwrite = write(fd, key, KEY_SIZE);
527
+ if (nwrite < 0) {
528
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:write";
529
+ return -1;
530
+ }
531
+ if (nwrite != KEY_SIZE) {
532
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:keysize";
533
+ errno = EIO; /* Lies but whatever */
534
+ return -1;
535
+ }
536
+
537
+ nwrite = write(fd, RSTRING_PTR(data), RSTRING_LEN(data));
538
+ if (nwrite < 0) return -1;
539
+ if (nwrite != RSTRING_LEN(data)) {
540
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:writelength";
541
+ errno = EIO; /* Lies but whatever */
542
+ return -1;
543
+ }
544
+
545
+ close(fd);
546
+ ret = rename(tmp_path, path);
547
+ if (ret < 0) {
548
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:rename";
549
+ return -1;
550
+ }
551
+ ret = chmod(path, 0664 & ~current_umask);
552
+ if (ret < 0) {
553
+ *errno_provenance = "bs_fetch:atomic_write_cache_file:chmod";
554
+ }
555
+ return ret;
556
+ }
557
+
558
+
559
+ /* Read contents from an fd, whose contents are asserted to be +size+ bytes
560
+ * long, into a buffer */
561
+ static ssize_t
562
+ bs_read_contents(int fd, size_t size, char ** contents, const char ** errno_provenance)
563
+ {
564
+ ssize_t nread;
565
+ *contents = ALLOC_N(char, size);
566
+ nread = read(fd, *contents, size);
567
+ if (nread < 0) {
568
+ *errno_provenance = "bs_fetch:bs_read_contents:read";
569
+ }
570
+ return nread;
571
+ }
572
+
573
+ /*
574
+ * This is the meat of the extension. bs_fetch is
575
+ * Bootsnap::CompileCache::Native.fetch.
576
+ *
577
+ * There are three "formats" in use here:
578
+ * 1. "input" format, which is what we load from the source file;
579
+ * 2. "storage" format, which we write to the cache;
580
+ * 3. "output" format, which is what we return.
581
+ *
582
+ * E.g., For ISeq compilation:
583
+ * input: ruby source, as text
584
+ * storage: binary string (RubyVM::InstructionSequence#to_binary)
585
+ * output: Instance of RubyVM::InstructionSequence
586
+ *
587
+ * And for YAML:
588
+ * input: yaml as text
589
+ * storage: MessagePack or Marshal text
590
+ * output: ruby object, loaded from yaml/messagepack/marshal
591
+ *
592
+ * A handler<I,S,O> passed in must support three messages:
593
+ * * storage_to_output(S) -> O
594
+ * * input_to_output(I) -> O
595
+ * * input_to_storage(I) -> S
596
+ * (input_to_storage may raise Bootsnap::CompileCache::Uncompilable, which
597
+ * will prevent caching and cause output to be generated with
598
+ * input_to_output)
599
+ *
600
+ * The semantics of this function are basically:
601
+ *
602
+ * return storage_to_output(cache[path]) if cache[path]
603
+ * storage = input_to_storage(input)
604
+ * cache[path] = storage
605
+ * return storage_to_output(storage)
606
+ *
607
+ * Or expanded a bit:
608
+ *
609
+ * - Check if the cache file exists and is up to date.
610
+ * - If it is, load this data to storage_data.
611
+ * - return storage_to_output(storage_data)
612
+ * - Read the file to input_data
613
+ * - Generate storage_data using input_to_storage(input_data)
614
+ * - Write storage_data data, with a cache key, to the cache file.
615
+ * - Return storage_to_output(storage_data)
616
+ */
617
+ static VALUE
618
+ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler)
619
+ {
620
+ struct bs_cache_key cached_key, current_key;
621
+ char * contents = NULL;
622
+ int cache_fd = -1, current_fd = -1;
623
+ int res, valid_cache = 0, exception_tag = 0;
624
+ const char * errno_provenance = NULL;
625
+
626
+ VALUE input_data; /* data read from source file, e.g. YAML or ruby source */
627
+ VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */
628
+ VALUE output_data; /* return data, e.g. ruby hash or loaded iseq */
629
+
630
+ VALUE exception; /* ruby exception object to raise instead of returning */
631
+
632
+ /* Open the source file and generate a cache key for it */
633
+ current_fd = open_current_file(path, &current_key, &errno_provenance);
634
+ if (current_fd < 0) goto fail_errno;
635
+
636
+ /* Open the cache key if it exists, and read its cache key in */
637
+ cache_fd = open_cache_file(cache_path, &cached_key, &errno_provenance);
638
+ if (cache_fd == CACHE_MISSING_OR_INVALID) {
639
+ /* This is ok: valid_cache remains false, we re-populate it. */
640
+ } else if (cache_fd < 0) {
641
+ goto fail_errno;
642
+ } else {
643
+ /* True if the cache existed and no invalidating changes have occurred since
644
+ * it was generated. */
645
+ valid_cache = cache_key_equal(&current_key, &cached_key);
646
+ }
647
+
648
+ if (valid_cache) {
649
+ /* Fetch the cache data and return it if we're able to load it successfully */
650
+ res = fetch_cached_data(
651
+ cache_fd, (ssize_t)cached_key.data_size, handler,
652
+ &output_data, &exception_tag, &errno_provenance
653
+ );
654
+ if (exception_tag != 0) goto raise;
655
+ else if (res == CACHE_MISSING_OR_INVALID) valid_cache = 0;
656
+ else if (res == ERROR_WITH_ERRNO) goto fail_errno;
657
+ else if (!NIL_P(output_data)) goto succeed; /* fast-path, goal */
658
+ }
659
+ close(cache_fd);
660
+ cache_fd = -1;
661
+ /* Cache is stale, invalid, or missing. Regenerate and write it out. */
662
+
663
+ /* Read the contents of the source file into a buffer */
664
+ if (bs_read_contents(current_fd, current_key.size, &contents, &errno_provenance) < 0) goto fail_errno;
665
+ input_data = rb_str_new_static(contents, current_key.size);
666
+
667
+ /* Try to compile the input_data using input_to_storage(input_data) */
668
+ exception_tag = bs_input_to_storage(handler, input_data, path_v, &storage_data);
669
+ if (exception_tag != 0) goto raise;
670
+ /* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try
671
+ * to cache anything; just return input_to_output(input_data) */
672
+ if (storage_data == uncompilable) {
673
+ bs_input_to_output(handler, input_data, &output_data, &exception_tag);
674
+ if (exception_tag != 0) goto raise;
675
+ goto succeed;
676
+ }
677
+ /* If storage_data isn't a string, we can't cache it */
678
+ if (!RB_TYPE_P(storage_data, T_STRING)) goto invalid_type_storage_data;
679
+
680
+ /* Write the cache key and storage_data to the cache directory */
681
+ res = atomic_write_cache_file(cache_path, &current_key, storage_data, &errno_provenance);
682
+ if (res < 0) goto fail_errno;
683
+
684
+ /* Having written the cache, now convert storage_data to output_data */
685
+ exception_tag = bs_storage_to_output(handler, storage_data, &output_data);
686
+ if (exception_tag != 0) goto raise;
687
+
688
+ /* If output_data is nil, delete the cache entry and generate the output
689
+ * using input_to_output */
690
+ if (NIL_P(output_data)) {
691
+ if (unlink(cache_path) < 0) {
692
+ errno_provenance = "bs_fetch:unlink";
693
+ goto fail_errno;
694
+ }
695
+ bs_input_to_output(handler, input_data, &output_data, &exception_tag);
696
+ if (exception_tag != 0) goto raise;
697
+ }
698
+
699
+ goto succeed; /* output_data is now the correct return. */
700
+
701
+ #define CLEANUP \
702
+ if (contents != NULL) xfree(contents); \
703
+ if (current_fd >= 0) close(current_fd); \
704
+ if (cache_fd >= 0) close(cache_fd);
705
+
706
+ succeed:
707
+ CLEANUP;
708
+ return output_data;
709
+ fail_errno:
710
+ CLEANUP;
711
+ exception = rb_syserr_new(errno, errno_provenance);
712
+ rb_exc_raise(exception);
713
+ __builtin_unreachable();
714
+ raise:
715
+ CLEANUP;
716
+ rb_jump_tag(exception_tag);
717
+ __builtin_unreachable();
718
+ invalid_type_storage_data:
719
+ CLEANUP;
720
+ Check_Type(storage_data, T_STRING);
721
+ __builtin_unreachable();
722
+
723
+ #undef CLEANUP
724
+ }
725
+
726
+ /*****************************************************************************/
727
+ /********************* Handler Wrappers **************************************/
728
+ /*****************************************************************************
729
+ * Everything after this point in the file is just wrappers to deal with ruby's
730
+ * clunky method of handling exceptions from ruby methods invoked from C:
731
+ *
732
+ * In order to call a ruby method from C, while protecting against crashing in
733
+ * the event of an exception, we must call the method with rb_protect().
734
+ *
735
+ * rb_protect takes a C function and precisely one argument; however, we want
736
+ * to pass multiple arguments, so we must create structs to wrap them up.
737
+ *
738
+ * These functions return an exception_tag, which, if non-zero, indicates an
739
+ * exception that should be jumped to with rb_jump_tag after cleaning up
740
+ * allocated resources.
741
+ */
742
+
743
+ struct s2o_data {
744
+ VALUE handler;
745
+ VALUE storage_data;
746
+ };
747
+
748
+ struct i2o_data {
749
+ VALUE handler;
750
+ VALUE input_data;
751
+ };
752
+
753
+ struct i2s_data {
754
+ VALUE handler;
755
+ VALUE input_data;
756
+ VALUE pathval;
757
+ };
758
+
759
+ static VALUE
760
+ prot_storage_to_output(VALUE arg)
761
+ {
762
+ struct s2o_data * data = (struct s2o_data *)arg;
763
+ return rb_funcall(data->handler, rb_intern("storage_to_output"), 1, data->storage_data);
764
+ }
765
+
766
+ static int
767
+ bs_storage_to_output(VALUE handler, VALUE storage_data, VALUE * output_data)
768
+ {
769
+ int state;
770
+ struct s2o_data s2o_data = {
771
+ .handler = handler,
772
+ .storage_data = storage_data,
773
+ };
774
+ *output_data = rb_protect(prot_storage_to_output, (VALUE)&s2o_data, &state);
775
+ return state;
776
+ }
777
+
778
+ static void
779
+ bs_input_to_output(VALUE handler, VALUE input_data, VALUE * output_data, int * exception_tag)
780
+ {
781
+ struct i2o_data i2o_data = {
782
+ .handler = handler,
783
+ .input_data = input_data,
784
+ };
785
+ *output_data = rb_protect(prot_input_to_output, (VALUE)&i2o_data, exception_tag);
786
+ }
787
+
788
+ static VALUE
789
+ prot_input_to_output(VALUE arg)
790
+ {
791
+ struct i2o_data * data = (struct i2o_data *)arg;
792
+ return rb_funcall(data->handler, rb_intern("input_to_output"), 1, data->input_data);
793
+ }
794
+
795
+ static VALUE
796
+ try_input_to_storage(VALUE arg)
797
+ {
798
+ struct i2s_data * data = (struct i2s_data *)arg;
799
+ return rb_funcall(data->handler, rb_intern("input_to_storage"), 2, data->input_data, data->pathval);
800
+ }
801
+
802
+ static VALUE
803
+ rescue_input_to_storage(VALUE arg)
804
+ {
805
+ return uncompilable;
806
+ }
807
+
808
+ static VALUE
809
+ prot_input_to_storage(VALUE arg)
810
+ {
811
+ struct i2s_data * data = (struct i2s_data *)arg;
812
+ return rb_rescue2(
813
+ try_input_to_storage, (VALUE)data,
814
+ rescue_input_to_storage, Qnil,
815
+ rb_eBootsnap_CompileCache_Uncompilable, 0);
816
+ }
817
+
818
+ static int
819
+ bs_input_to_storage(VALUE handler, VALUE input_data, VALUE pathval, VALUE * storage_data)
820
+ {
821
+ int state;
822
+ struct i2s_data i2s_data = {
823
+ .handler = handler,
824
+ .input_data = input_data,
825
+ .pathval = pathval,
826
+ };
827
+ *storage_data = rb_protect(prot_input_to_storage, (VALUE)&i2s_data, &state);
828
+ return state;
829
+ }