rigortype 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +195 -21
  3. data/data/builtins/ruby_core/encoding.yml +210 -0
  4. data/data/builtins/ruby_core/exception.yml +641 -0
  5. data/data/builtins/ruby_core/numeric.yml +3 -2
  6. data/data/builtins/ruby_core/proc.yml +731 -0
  7. data/data/builtins/ruby_core/random.yml +166 -0
  8. data/data/builtins/ruby_core/re.yml +689 -0
  9. data/data/builtins/ruby_core/struct.yml +449 -0
  10. data/lib/rigor/analysis/diagnostic.rb +28 -2
  11. data/lib/rigor/analysis/runner.rb +19 -3
  12. data/lib/rigor/builtins/imported_refinements.rb +6 -1
  13. data/lib/rigor/cache/descriptor.rb +278 -0
  14. data/lib/rigor/cache/rbs_class_ancestor_table.rb +63 -0
  15. data/lib/rigor/cache/rbs_class_type_param_names.rb +60 -0
  16. data/lib/rigor/cache/rbs_constant_table.rb +47 -0
  17. data/lib/rigor/cache/rbs_descriptor.rb +53 -0
  18. data/lib/rigor/cache/rbs_environment.rb +52 -0
  19. data/lib/rigor/cache/rbs_environment_marshal_patch.rb +40 -0
  20. data/lib/rigor/cache/rbs_known_class_names.rb +43 -0
  21. data/lib/rigor/cache/store.rb +325 -0
  22. data/lib/rigor/cli.rb +88 -7
  23. data/lib/rigor/environment/rbs_hierarchy.rb +18 -5
  24. data/lib/rigor/environment/rbs_loader.rb +148 -25
  25. data/lib/rigor/environment.rb +11 -2
  26. data/lib/rigor/flow_contribution.rb +128 -0
  27. data/lib/rigor/inference/builtins/encoding_catalog.rb +67 -0
  28. data/lib/rigor/inference/builtins/exception_catalog.rb +92 -0
  29. data/lib/rigor/inference/builtins/proc_catalog.rb +122 -0
  30. data/lib/rigor/inference/builtins/random_catalog.rb +58 -0
  31. data/lib/rigor/inference/builtins/re_catalog.rb +81 -0
  32. data/lib/rigor/inference/builtins/struct_catalog.rb +55 -0
  33. data/lib/rigor/inference/expression_typer.rb +26 -1
  34. data/lib/rigor/inference/method_dispatcher/constant_folding.rb +16 -1
  35. data/lib/rigor/inference/method_dispatcher/literal_string_folding.rb +87 -0
  36. data/lib/rigor/inference/method_dispatcher.rb +2 -0
  37. data/lib/rigor/inference/narrowing.rb +29 -14
  38. data/lib/rigor/rbs_extended.rb +55 -0
  39. data/lib/rigor/type/combinator.rb +72 -0
  40. data/lib/rigor/type/refined.rb +50 -2
  41. data/lib/rigor/version.rb +1 -1
  42. data/lib/rigor.rb +9 -0
  43. data/sig/rigor.rbs +3 -1
  44. metadata +24 -1
@@ -0,0 +1,325 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require "fileutils"
5
+ require "json"
6
+ require "securerandom"
7
+
8
+ module Rigor
9
+ module Cache
10
+ # Filesystem-backed cache store. Schema, layout, file format,
11
+ # atomicity, and locking are fixed by [ADR-6](../../../docs/adr/6-cache-persistence-backend.md);
12
+ # callers see the [`Rigor::Cache::Descriptor`](descriptor.rb)
13
+ # value object plus this class' `#fetch_or_compute` entry point
14
+ # and nothing else.
15
+ #
16
+ # Read failures (missing file, bad magic, format-version mismatch,
17
+ # corrupt SHA-256 trailer, unmarshal-able payload) are silently
18
+ # treated as cache misses; the producer block reruns and the
19
+ # next write replaces the bad entry. The trailing SHA-256 catches
20
+ # accidental corruption (partial writes, FS errors); it is **not**
21
+ # a security boundary, per ADR-2's trusted-gem trust model.
22
+ class Store # rubocop:disable Metrics/ClassLength
23
+ # Header literal: 5-byte ASCII magic, 1-byte separator, 1-byte
24
+ # format version. Bumped on incompatible on-disk format changes
25
+ # (independent of {Descriptor::SCHEMA_VERSION}, which covers
26
+ # the descriptor schema rather than the byte layout).
27
+ HEADER = "RIGOR\x00\x01".b.freeze
28
+
29
+ VALID_PRODUCER_ID = /\A[a-z][a-z0-9._-]*\z/
30
+
31
+ def initialize(root:)
32
+ @root = root.to_s.dup.freeze
33
+ @hits = 0
34
+ @misses = 0
35
+ @writes = 0
36
+ @by_producer = Hash.new { |h, k| h[k] = { hits: 0, misses: 0, writes: 0 } }
37
+ end
38
+
39
+ attr_reader :root
40
+
41
+ # Returns a frozen snapshot of this Store's per-run hit / miss /
42
+ # write counters. The bookkeeping is in-memory only — every new
43
+ # `Store.new` starts at zero — so the counters reflect activity
44
+ # against this specific instance rather than the on-disk cache
45
+ # state. Disk-level state is reported separately by
46
+ # {.disk_inventory}.
47
+ #
48
+ # @return [Hash] `{ hits:, misses:, writes:, by_producer: { id => { hits:, misses:, writes: } } }`
49
+ def stats
50
+ per_producer = @by_producer.transform_values { |counts| counts.dup.freeze }.freeze
51
+ { hits: @hits, misses: @misses, writes: @writes, by_producer: per_producer }.freeze
52
+ end
53
+
54
+ # Walks the on-disk cache rooted at `root` and reports a
55
+ # producer-level inventory. Used by `rigor check --cache-stats`
56
+ # to surface cache size and per-producer entry counts without
57
+ # depending on in-process counters (which only reflect the
58
+ # current run).
59
+ #
60
+ # @return [Hash] `{ root:, schema_version:, total_entries:,
61
+ # total_bytes:, producers: [{ id:, entries:, bytes: }, ...] }`.
62
+ # When the root does not exist or has no schema-version
63
+ # marker, `schema_version` is nil and the producer list is
64
+ # empty.
65
+ def self.disk_inventory(root:)
66
+ root_s = root.to_s
67
+ marker = File.join(root_s, "schema_version.txt")
68
+ schema = File.file?(marker) ? File.read(marker).strip : nil
69
+
70
+ producers = collect_producers(root_s)
71
+ total_entries = producers.sum { |p| p[:entries] }
72
+ total_bytes = producers.sum { |p| p[:bytes] }
73
+
74
+ {
75
+ root: root_s,
76
+ schema_version: schema,
77
+ total_entries: total_entries,
78
+ total_bytes: total_bytes,
79
+ producers: producers
80
+ }
81
+ end
82
+
83
+ def self.collect_producers(root)
84
+ return [] unless File.directory?(root)
85
+
86
+ Dir.children(root).sort.filter_map do |child|
87
+ subdir = File.join(root, child)
88
+ next nil unless File.directory?(subdir)
89
+
90
+ entries = Dir.glob(File.join(subdir, "**", "*.entry"))
91
+ next nil if entries.empty?
92
+
93
+ { id: child, entries: entries.size, bytes: entries.sum { |e| File.size(e) } }
94
+ end
95
+ end
96
+ private_class_method :collect_producers
97
+
98
+ # @param producer_id [String] stable cache namespace; only
99
+ # `[a-z][a-z0-9._-]*` is accepted.
100
+ # @param params [Hash] producer inputs; mixed into the cache key
101
+ # via {Descriptor#cache_key_for}.
102
+ # @param descriptor [Rigor::Cache::Descriptor] the invalidation
103
+ # descriptor for the value being cached.
104
+ # @param serialize [#call, nil] optional callable that turns the
105
+ # producer's return value into a binary `String`. Defaults to
106
+ # `Marshal.dump(value).b`. Producers whose return values are
107
+ # not `Marshal`-clean (RBS-native objects with `RBS::Location`
108
+ # members, raw `IO`, …) MUST provide a serialiser. The pair
109
+ # `(serialize, deserialize)` MUST round-trip — a producer that
110
+ # reads with one strategy and writes with another corrupts
111
+ # its own cache slice.
112
+ # @param deserialize [#call, nil] optional callable that turns
113
+ # bytes back into the producer's value. Defaults to
114
+ # `Marshal.load`. Any exception (`StandardError`) raised by
115
+ # the deserialiser is treated as a cache miss — the entry is
116
+ # considered corrupt, the producer block reruns, and the
117
+ # next write overwrites it. This is consistent with the
118
+ # fault-tolerance contract for the default `Marshal.load`
119
+ # path.
120
+ # @yieldreturn the value to cache.
121
+ # @return the cached value (loaded from disk on hit; produced by
122
+ # the block on miss).
123
+ def fetch_or_compute(producer_id:, params:, descriptor:,
124
+ serialize: nil, deserialize: nil, &block)
125
+ validate_producer_id!(producer_id)
126
+ ensure_schema_version!
127
+
128
+ key = descriptor.cache_key_for(producer_id: producer_id, params: params)
129
+ path = entry_path(producer_id, key)
130
+
131
+ cached = read_entry(path, deserialize: deserialize)
132
+ unless cached.nil?
133
+ record(:hits, producer_id)
134
+ return cached.value
135
+ end
136
+
137
+ record(:misses, producer_id)
138
+ value = block.call
139
+ write_entry(path, descriptor, value, serialize: serialize)
140
+ record(:writes, producer_id)
141
+ value
142
+ end
143
+
144
+ private
145
+
146
+ Entry = Data.define(:descriptor_bytes, :value)
147
+ private_constant :Entry
148
+
149
+ def record(counter, producer_id)
150
+ case counter
151
+ when :hits then @hits += 1
152
+ when :misses then @misses += 1
153
+ when :writes then @writes += 1
154
+ end
155
+ @by_producer[producer_id][counter] += 1
156
+ end
157
+
158
+ def validate_producer_id!(producer_id)
159
+ return if producer_id.is_a?(String) && producer_id.match?(VALID_PRODUCER_ID)
160
+
161
+ raise ArgumentError,
162
+ "producer_id must match #{VALID_PRODUCER_ID.inspect}, got #{producer_id.inspect}"
163
+ end
164
+
165
+ def entry_path(producer_id, key)
166
+ File.join(@root, producer_id, key[0, 2], "#{key[2..]}.entry")
167
+ end
168
+
169
+ # Reads and validates one entry file. Any failure (missing,
170
+ # short, bad magic, bad version, bad checksum, unmarshal-able)
171
+ # returns nil so the caller treats it as a cache miss.
172
+ def read_entry(path, deserialize: nil)
173
+ return nil unless File.file?(path)
174
+
175
+ bytes = File.binread(path)
176
+ return nil unless envelope_valid?(bytes)
177
+
178
+ body = bytes.byteslice(HEADER.bytesize, bytes.bytesize - HEADER.bytesize - 32)
179
+ descriptor_bytes, value_bytes = parse_body(body)
180
+ return nil if descriptor_bytes.nil?
181
+
182
+ value = safe_load(value_bytes, deserialize)
183
+ return nil if value.equal?(LOAD_FAILED)
184
+
185
+ Entry.new(descriptor_bytes, value)
186
+ end
187
+
188
+ # Validates the magic + format-version header and the trailing
189
+ # SHA-256 over everything before the trailer.
190
+ def envelope_valid?(bytes)
191
+ return false if bytes.bytesize < HEADER.bytesize + 32
192
+ return false unless bytes.byteslice(0, HEADER.bytesize) == HEADER
193
+
194
+ trailer = bytes.byteslice(bytes.bytesize - 32, 32)
195
+ Digest::SHA256.digest(bytes.byteslice(0, bytes.bytesize - 32)) == trailer
196
+ end
197
+
198
+ # Splits the body into (descriptor_bytes, value_bytes). Returns
199
+ # `[nil, nil]` on a malformed varint or length-overrun.
200
+ def parse_body(body)
201
+ offset = 0
202
+ descriptor_len, offset = read_varint(body, offset)
203
+ return [nil, nil] if descriptor_len.nil? || offset + descriptor_len > body.bytesize
204
+
205
+ descriptor_bytes = body.byteslice(offset, descriptor_len)
206
+ offset += descriptor_len
207
+
208
+ value_len, offset = read_varint(body, offset)
209
+ return [nil, nil] if value_len.nil? || offset + value_len != body.bytesize
210
+
211
+ value_bytes = body.byteslice(offset, value_len)
212
+ [descriptor_bytes, value_bytes]
213
+ end
214
+
215
+ LOAD_FAILED = Object.new.freeze
216
+ private_constant :LOAD_FAILED
217
+
218
+ def safe_load(bytes, deserialize)
219
+ if deserialize
220
+ deserialize.call(bytes)
221
+ else
222
+ Marshal.load(bytes) # rubocop:disable Security/MarshalLoad
223
+ end
224
+ rescue StandardError
225
+ LOAD_FAILED
226
+ end
227
+
228
+ def write_entry(path, descriptor, value, serialize: nil)
229
+ FileUtils.mkdir_p(File.dirname(path))
230
+
231
+ descriptor_bytes = descriptor.to_canonical_bytes
232
+ value_bytes = serialize_value(value, serialize)
233
+
234
+ body = +"".b
235
+ body << HEADER
236
+ write_varint(body, descriptor_bytes.bytesize)
237
+ body << descriptor_bytes
238
+ write_varint(body, value_bytes.bytesize)
239
+ body << value_bytes
240
+ body << Digest::SHA256.digest(body)
241
+
242
+ atomically_replace(path, body)
243
+ end
244
+
245
+ def serialize_value(value, serialize)
246
+ return Marshal.dump(value).b if serialize.nil?
247
+
248
+ bytes = serialize.call(value)
249
+ unless bytes.is_a?(String)
250
+ raise TypeError,
251
+ "custom serialize must return a String, got #{bytes.class}"
252
+ end
253
+
254
+ bytes.b
255
+ end
256
+
257
+ def atomically_replace(path, body)
258
+ File.open(path, File::RDWR | File::CREAT, 0o644) do |lock_fd|
259
+ lock_fd.flock(File::LOCK_EX)
260
+ tmp = "#{path}.tmp.#{Process.pid}.#{SecureRandom.hex(4)}"
261
+ File.open(tmp, "wb") do |f|
262
+ f.write(body)
263
+ f.fsync
264
+ end
265
+ File.rename(tmp, path)
266
+ end
267
+ end
268
+
269
+ def ensure_schema_version!
270
+ FileUtils.mkdir_p(@root)
271
+ marker = File.join(@root, "schema_version.txt")
272
+ current = Descriptor::SCHEMA_VERSION.to_s
273
+
274
+ if File.file?(marker)
275
+ on_disk = File.read(marker).strip
276
+ return if on_disk == current
277
+
278
+ clear_cache_root!
279
+ end
280
+
281
+ FileUtils.mkdir_p(@root)
282
+ File.write(marker, "#{current}\n")
283
+ end
284
+
285
+ def clear_cache_root!
286
+ Dir.children(@root).each do |entry|
287
+ FileUtils.rm_rf(File.join(@root, entry))
288
+ end
289
+ end
290
+
291
+ # LEB128 unsigned varint encoder/decoder. Lengths fit easily in
292
+ # five bytes (cap at 2^35); the cache layer never writes a value
293
+ # larger than that in practice.
294
+ def write_varint(bytes, value)
295
+ raise ArgumentError, "varint must be non-negative" if value.negative?
296
+
297
+ loop do
298
+ if value < 0x80
299
+ bytes << [value].pack("C")
300
+ return
301
+ end
302
+
303
+ bytes << [(value & 0x7F) | 0x80].pack("C")
304
+ value >>= 7
305
+ end
306
+ end
307
+
308
+ def read_varint(bytes, offset)
309
+ result = 0
310
+ shift = 0
311
+ loop do
312
+ return [nil, offset] if offset >= bytes.bytesize
313
+
314
+ byte = bytes.getbyte(offset)
315
+ offset += 1
316
+ result |= (byte & 0x7F) << shift
317
+ return [result, offset] if byte < 0x80
318
+
319
+ shift += 7
320
+ return [nil, offset] if shift > 35
321
+ end
322
+ end
323
+ end
324
+ end
325
+ end
data/lib/rigor/cli.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "fileutils"
3
4
  require "json"
4
5
  require "optionparser"
5
6
  require "yaml"
@@ -65,27 +66,107 @@ module Rigor
65
66
 
66
67
  def run_check
67
68
  require_relative "analysis/runner"
69
+ require_relative "cache/store"
68
70
 
71
+ options = parse_check_options
72
+
73
+ cache_root = ".rigor/cache"
74
+ handle_clear_cache(cache_root) if options.fetch(:clear_cache)
75
+ cache_store = options.fetch(:no_cache) ? nil : Cache::Store.new(root: cache_root)
76
+
77
+ configuration = Configuration.load(options.fetch(:config))
78
+ paths = @argv.empty? ? configuration.paths : @argv
79
+ runner = Analysis::Runner.new(
80
+ configuration: configuration,
81
+ explain: options.fetch(:explain),
82
+ cache_store: cache_store
83
+ )
84
+ result = runner.run(paths)
85
+
86
+ write_result(result, options.fetch(:format))
87
+ write_cache_stats(cache_root, runner.cache_store) if options.fetch(:cache_stats)
88
+ result.success? ? 0 : 1
89
+ end
90
+
91
+ def parse_check_options
69
92
  options = {
70
93
  config: Configuration::DEFAULT_PATH,
71
94
  format: "text",
72
- explain: false
95
+ explain: false,
96
+ cache_stats: false,
97
+ clear_cache: false,
98
+ no_cache: false
73
99
  }
74
-
75
100
  parser = OptionParser.new do |opts|
76
101
  opts.banner = "Usage: rigor check [options] [paths]"
77
102
  opts.on("--config=PATH", "Path to the Rigor configuration file") { |value| options[:config] = value }
78
103
  opts.on("--format=FORMAT", "Output format: text or json") { |value| options[:format] = value }
79
104
  opts.on("--explain", "Surface fail-soft fallback events as :info diagnostics") { options[:explain] = true }
105
+ opts.on("--cache-stats", "Print on-disk cache inventory at end of run") { options[:cache_stats] = true }
106
+ opts.on("--clear-cache", "Remove the .rigor/cache directory before running") { options[:clear_cache] = true }
107
+ opts.on("--no-cache", "Disable the persistent cache for this run") { options[:no_cache] = true }
80
108
  end
81
109
  parser.parse!(@argv)
110
+ options
111
+ end
82
112
 
83
- configuration = Configuration.load(options.fetch(:config))
84
- paths = @argv.empty? ? configuration.paths : @argv
85
- result = Analysis::Runner.new(configuration: configuration, explain: options.fetch(:explain)).run(paths)
113
+ def handle_clear_cache(cache_root)
114
+ if File.directory?(cache_root)
115
+ FileUtils.rm_rf(cache_root)
116
+ @out.puts("Cleared cache: #{cache_root}")
117
+ else
118
+ @out.puts("Cache already empty: #{cache_root}")
119
+ end
120
+ end
86
121
 
87
- write_result(result, options.fetch(:format))
88
- result.success? ? 0 : 1
122
+ def write_cache_stats(cache_root, runtime_store)
123
+ inv = Cache::Store.disk_inventory(root: cache_root)
124
+
125
+ @out.puts("")
126
+ @out.puts("Cache (root: #{inv.fetch(:root)})")
127
+ schema = inv.fetch(:schema_version)
128
+ @out.puts(" schema_version: #{schema.nil? ? 'absent' : schema}")
129
+ write_disk_inventory(inv)
130
+ write_runtime_stats(runtime_store) if runtime_store
131
+ end
132
+
133
+ def write_disk_inventory(inv)
134
+ if inv.fetch(:total_entries).zero?
135
+ @out.puts(" (empty)")
136
+ return
137
+ end
138
+
139
+ @out.puts(" #{inv.fetch(:total_entries)} entries, #{format_bytes(inv.fetch(:total_bytes))}")
140
+ inv.fetch(:producers).each do |producer|
141
+ bytes = format_bytes(producer.fetch(:bytes))
142
+ @out.puts(" #{producer.fetch(:id)}: #{producer.fetch(:entries)} entries, #{bytes}")
143
+ end
144
+ end
145
+
146
+ def write_runtime_stats(store)
147
+ stats = store.stats
148
+ hits = stats.fetch(:hits)
149
+ misses = stats.fetch(:misses)
150
+ writes = stats.fetch(:writes)
151
+ @out.puts(" this run: #{hits} #{plural(hits, 'hit')}, " \
152
+ "#{misses} #{plural(misses, 'miss', 'misses')}, " \
153
+ "#{writes} #{plural(writes, 'write')}")
154
+ stats.fetch(:by_producer).each do |id, counts|
155
+ @out.puts(" #{id}: #{counts.fetch(:hits)} #{plural(counts.fetch(:hits), 'hit')}, " \
156
+ "#{counts.fetch(:misses)} #{plural(counts.fetch(:misses), 'miss', 'misses')}, " \
157
+ "#{counts.fetch(:writes)} #{plural(counts.fetch(:writes), 'write')}")
158
+ end
159
+ end
160
+
161
+ def plural(count, singular, plural = "#{singular}s")
162
+ count == 1 ? singular : plural
163
+ end
164
+
165
+ def format_bytes(bytes)
166
+ return "#{bytes} B" if bytes < 1024
167
+ return format("%.1f KiB", bytes / 1024.0) if bytes < 1024 * 1024
168
+
169
+ format("%.1f MiB", bytes / (1024.0 * 1024.0))
89
170
  end
90
171
 
91
172
  def run_init
@@ -45,15 +45,28 @@ module Rigor
45
45
  key = normalize_name(class_name)
46
46
  return @ancestor_names_cache[key] if @ancestor_names_cache.key?(key)
47
47
 
48
- definition = loader.instance_definition(key)
49
48
  @ancestor_names_cache[key] =
50
- if definition
51
- definition.ancestors.ancestors.map { |ancestor| normalize_name(ancestor.name.to_s) }.uniq.freeze
49
+ if loader.cache_store
50
+ ancestor_table.fetch(key, [].freeze)
52
51
  else
53
- [].freeze
52
+ compute_ancestor_names(key)
54
53
  end
54
+ end
55
+
56
+ def compute_ancestor_names(key)
57
+ definition = loader.instance_definition(key)
58
+ return [].freeze if definition.nil?
59
+
60
+ definition.ancestors.ancestors.map { |ancestor| normalize_name(ancestor.name.to_s) }.uniq.freeze
55
61
  rescue StandardError
56
- @ancestor_names_cache[key] = [].freeze
62
+ [].freeze
63
+ end
64
+
65
+ def ancestor_table
66
+ @ancestor_table ||= begin
67
+ require_relative "../cache/rbs_class_ancestor_table"
68
+ Cache::RbsClassAncestorTable.fetch(loader: loader, store: loader.cache_store)
69
+ end
57
70
  end
58
71
 
59
72
  def normalize_name(name)