rigortype 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module Rigor
6
+ module Cache
7
+ # Cache producer that materialises every RBS-declared constant
8
+ # to its translated `Rigor::Type` form and stores the result as
9
+ # a `Hash<String, Rigor::Type>` keyed by canonical constant name.
10
+ # This is the v0.0.8 first cached producer per ADR-6 § 7; it
11
+ # caches a post-translation artefact so the cache value is
12
+ # `Marshal`-clean (RBS-native objects carry `RBS::Location`,
13
+ # which lacks `_dump_data`).
14
+ #
15
+ # Cache descriptor:
16
+ #
17
+ # - `gems`: the `rbs` gem (with the locked version) so a gem
18
+ # upgrade invalidates the table — bundled core + stdlib
19
+ # signatures live inside the gem.
20
+ # - `files`: the digest of every `.rbs` file under the loader's
21
+ # `signature_paths` (project-supplied signatures that the
22
+ # gem's locked version cannot cover).
23
+ # - `configs`: the SHA-256 of the loader's libraries list so
24
+ # adding/removing a stdlib library invalidates.
25
+ class RbsConstantTable
26
+ PRODUCER_ID = "rbs.constant_type_table"
27
+
28
+ # @param loader [Rigor::Environment::RbsLoader]
29
+ # @param store [Rigor::Cache::Store]
30
+ # @return [Hash{String => Rigor::Type}]
31
+ def self.fetch(loader:, store:)
32
+ descriptor = build_descriptor(loader)
33
+ store.fetch_or_compute(producer_id: PRODUCER_ID, params: {}, descriptor: descriptor) do
34
+ compute(loader)
35
+ end
36
+ end
37
+
38
+ def self.build_descriptor(loader)
39
+ Descriptor.new(
40
+ gems: [rbs_gem_entry],
41
+ files: file_entries(loader),
42
+ configs: [libraries_entry(loader)]
43
+ )
44
+ end
45
+
46
+ def self.compute(loader)
47
+ loader.constant_names.each_with_object({}) do |name, table|
48
+ translated = loader.constant_type(name)
49
+ table[name] = translated unless translated.nil?
50
+ end
51
+ end
52
+
53
+ def self.rbs_gem_entry
54
+ Descriptor::GemEntry.new(name: "rbs", requirement: ">= 0", locked: ::RBS::VERSION.to_s)
55
+ end
56
+
57
+ def self.file_entries(loader)
58
+ loader.signature_paths.flat_map do |root|
59
+ next [] unless root.directory?
60
+
61
+ Dir.glob(root.join("**", "*.rbs")).map do |path|
62
+ Descriptor::FileEntry.new(
63
+ path: path,
64
+ comparator: :digest,
65
+ value: Digest::SHA256.file(path).hexdigest
66
+ )
67
+ end
68
+ end
69
+ end
70
+
71
+ def self.libraries_entry(loader)
72
+ sorted = loader.libraries.map(&:to_s).sort
73
+ Descriptor::ConfigEntry.new(
74
+ key: "rbs.libraries",
75
+ value_hash: Digest::SHA256.hexdigest(sorted.join("\n"))
76
+ )
77
+ end
78
+
79
+ private_class_method :build_descriptor, :compute,
80
+ :rbs_gem_entry, :file_entries, :libraries_entry
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,261 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require "fileutils"
5
+ require "json"
6
+ require "securerandom"
7
+
8
+ module Rigor
9
+ module Cache
10
+ # Filesystem-backed cache store. Schema, layout, file format,
11
+ # atomicity, and locking are fixed by [ADR-6](../../../docs/adr/6-cache-persistence-backend.md);
12
+ # callers see the [`Rigor::Cache::Descriptor`](descriptor.rb)
13
+ # value object plus this class' `#fetch_or_compute` entry point
14
+ # and nothing else.
15
+ #
16
+ # Read failures (missing file, bad magic, format-version mismatch,
17
+ # corrupt SHA-256 trailer, unmarshal-able payload) are silently
18
+ # treated as cache misses; the producer block reruns and the
19
+ # next write replaces the bad entry. The trailing SHA-256 catches
20
+ # accidental corruption (partial writes, FS errors); it is **not**
21
+ # a security boundary, per ADR-2's trusted-gem trust model.
22
+ class Store # rubocop:disable Metrics/ClassLength
23
+ # Header literal: 5-byte ASCII magic, 1-byte separator, 1-byte
24
+ # format version. Bumped on incompatible on-disk format changes
25
+ # (independent of {Descriptor::SCHEMA_VERSION}, which covers
26
+ # the descriptor schema rather than the byte layout).
27
+ HEADER = "RIGOR\x00\x01".b.freeze
28
+
29
+ VALID_PRODUCER_ID = /\A[a-z][a-z0-9._-]*\z/
30
+
31
+ def initialize(root:)
32
+ @root = root.to_s.dup.freeze
33
+ end
34
+
35
+ attr_reader :root
36
+
37
+ # Walks the on-disk cache rooted at `root` and reports a
38
+ # producer-level inventory. Used by `rigor check --cache-stats`
39
+ # to surface cache size and per-producer entry counts without
40
+ # depending on in-process counters (which only reflect the
41
+ # current run).
42
+ #
43
+ # @return [Hash] `{ root:, schema_version:, total_entries:,
44
+ # total_bytes:, producers: [{ id:, entries:, bytes: }, ...] }`.
45
+ # When the root does not exist or has no schema-version
46
+ # marker, `schema_version` is nil and the producer list is
47
+ # empty.
48
+ def self.disk_inventory(root:)
49
+ root_s = root.to_s
50
+ marker = File.join(root_s, "schema_version.txt")
51
+ schema = File.file?(marker) ? File.read(marker).strip : nil
52
+
53
+ producers = collect_producers(root_s)
54
+ total_entries = producers.sum { |p| p[:entries] }
55
+ total_bytes = producers.sum { |p| p[:bytes] }
56
+
57
+ {
58
+ root: root_s,
59
+ schema_version: schema,
60
+ total_entries: total_entries,
61
+ total_bytes: total_bytes,
62
+ producers: producers
63
+ }
64
+ end
65
+
66
+ def self.collect_producers(root)
67
+ return [] unless File.directory?(root)
68
+
69
+ Dir.children(root).sort.filter_map do |child|
70
+ subdir = File.join(root, child)
71
+ next nil unless File.directory?(subdir)
72
+
73
+ entries = Dir.glob(File.join(subdir, "**", "*.entry"))
74
+ next nil if entries.empty?
75
+
76
+ { id: child, entries: entries.size, bytes: entries.sum { |e| File.size(e) } }
77
+ end
78
+ end
79
+ private_class_method :collect_producers
80
+
81
+ # @param producer_id [String] stable cache namespace; only
82
+ # `[a-z][a-z0-9._-]*` is accepted.
83
+ # @param params [Hash] producer inputs; mixed into the cache key
84
+ # via {Descriptor#cache_key_for}.
85
+ # @param descriptor [Rigor::Cache::Descriptor] the invalidation
86
+ # descriptor for the value being cached.
87
+ # @yieldreturn the value to cache (must be `Marshal.dump`-able).
88
+ # @return the cached value (loaded from disk on hit; produced by
89
+ # the block on miss).
90
+ def fetch_or_compute(producer_id:, params:, descriptor:, &block)
91
+ validate_producer_id!(producer_id)
92
+ ensure_schema_version!
93
+
94
+ key = descriptor.cache_key_for(producer_id: producer_id, params: params)
95
+ path = entry_path(producer_id, key)
96
+
97
+ cached = read_entry(path)
98
+ return cached.value unless cached.nil?
99
+
100
+ value = block.call
101
+ write_entry(path, descriptor, value)
102
+ value
103
+ end
104
+
105
+ private
106
+
107
+ Entry = Data.define(:descriptor_bytes, :value)
108
+ private_constant :Entry
109
+
110
+ def validate_producer_id!(producer_id)
111
+ return if producer_id.is_a?(String) && producer_id.match?(VALID_PRODUCER_ID)
112
+
113
+ raise ArgumentError,
114
+ "producer_id must match #{VALID_PRODUCER_ID.inspect}, got #{producer_id.inspect}"
115
+ end
116
+
117
+ def entry_path(producer_id, key)
118
+ File.join(@root, producer_id, key[0, 2], "#{key[2..]}.entry")
119
+ end
120
+
121
+ # Reads and validates one entry file. Any failure (missing,
122
+ # short, bad magic, bad version, bad checksum, unmarshal-able)
123
+ # returns nil so the caller treats it as a cache miss.
124
+ def read_entry(path)
125
+ return nil unless File.file?(path)
126
+
127
+ bytes = File.binread(path)
128
+ return nil unless envelope_valid?(bytes)
129
+
130
+ body = bytes.byteslice(HEADER.bytesize, bytes.bytesize - HEADER.bytesize - 32)
131
+ descriptor_bytes, value_bytes = parse_body(body)
132
+ return nil if descriptor_bytes.nil?
133
+
134
+ value = safe_marshal_load(value_bytes)
135
+ return nil if value.equal?(MARSHAL_LOAD_FAILED)
136
+
137
+ Entry.new(descriptor_bytes, value)
138
+ end
139
+
140
+ # Validates the magic + format-version header and the trailing
141
+ # SHA-256 over everything before the trailer.
142
+ def envelope_valid?(bytes)
143
+ return false if bytes.bytesize < HEADER.bytesize + 32
144
+ return false unless bytes.byteslice(0, HEADER.bytesize) == HEADER
145
+
146
+ trailer = bytes.byteslice(bytes.bytesize - 32, 32)
147
+ Digest::SHA256.digest(bytes.byteslice(0, bytes.bytesize - 32)) == trailer
148
+ end
149
+
150
+ # Splits the body into (descriptor_bytes, value_bytes). Returns
151
+ # `[nil, nil]` on a malformed varint or length-overrun.
152
+ def parse_body(body)
153
+ offset = 0
154
+ descriptor_len, offset = read_varint(body, offset)
155
+ return [nil, nil] if descriptor_len.nil? || offset + descriptor_len > body.bytesize
156
+
157
+ descriptor_bytes = body.byteslice(offset, descriptor_len)
158
+ offset += descriptor_len
159
+
160
+ value_len, offset = read_varint(body, offset)
161
+ return [nil, nil] if value_len.nil? || offset + value_len != body.bytesize
162
+
163
+ value_bytes = body.byteslice(offset, value_len)
164
+ [descriptor_bytes, value_bytes]
165
+ end
166
+
167
+ MARSHAL_LOAD_FAILED = Object.new.freeze
168
+ private_constant :MARSHAL_LOAD_FAILED
169
+
170
+ def safe_marshal_load(bytes)
171
+ Marshal.load(bytes) # rubocop:disable Security/MarshalLoad
172
+ rescue StandardError
173
+ MARSHAL_LOAD_FAILED
174
+ end
175
+
176
+ def write_entry(path, descriptor, value)
177
+ FileUtils.mkdir_p(File.dirname(path))
178
+
179
+ descriptor_bytes = descriptor.to_canonical_bytes
180
+ value_bytes = Marshal.dump(value).b
181
+
182
+ body = +"".b
183
+ body << HEADER
184
+ write_varint(body, descriptor_bytes.bytesize)
185
+ body << descriptor_bytes
186
+ write_varint(body, value_bytes.bytesize)
187
+ body << value_bytes
188
+ body << Digest::SHA256.digest(body)
189
+
190
+ atomically_replace(path, body)
191
+ end
192
+
193
+ def atomically_replace(path, body)
194
+ File.open(path, File::RDWR | File::CREAT, 0o644) do |lock_fd|
195
+ lock_fd.flock(File::LOCK_EX)
196
+ tmp = "#{path}.tmp.#{Process.pid}.#{SecureRandom.hex(4)}"
197
+ File.open(tmp, "wb") do |f|
198
+ f.write(body)
199
+ f.fsync
200
+ end
201
+ File.rename(tmp, path)
202
+ end
203
+ end
204
+
205
+ def ensure_schema_version!
206
+ FileUtils.mkdir_p(@root)
207
+ marker = File.join(@root, "schema_version.txt")
208
+ current = Descriptor::SCHEMA_VERSION.to_s
209
+
210
+ if File.file?(marker)
211
+ on_disk = File.read(marker).strip
212
+ return if on_disk == current
213
+
214
+ clear_cache_root!
215
+ end
216
+
217
+ FileUtils.mkdir_p(@root)
218
+ File.write(marker, "#{current}\n")
219
+ end
220
+
221
+ def clear_cache_root!
222
+ Dir.children(@root).each do |entry|
223
+ FileUtils.rm_rf(File.join(@root, entry))
224
+ end
225
+ end
226
+
227
+ # LEB128 unsigned varint encoder/decoder. Lengths fit easily in
228
+ # five bytes (cap at 2^35); the cache layer never writes a value
229
+ # larger than that in practice.
230
+ def write_varint(bytes, value)
231
+ raise ArgumentError, "varint must be non-negative" if value.negative?
232
+
233
+ loop do
234
+ if value < 0x80
235
+ bytes << [value].pack("C")
236
+ return
237
+ end
238
+
239
+ bytes << [(value & 0x7F) | 0x80].pack("C")
240
+ value >>= 7
241
+ end
242
+ end
243
+
244
+ def read_varint(bytes, offset)
245
+ result = 0
246
+ shift = 0
247
+ loop do
248
+ return [nil, offset] if offset >= bytes.bytesize
249
+
250
+ byte = bytes.getbyte(offset)
251
+ offset += 1
252
+ result |= (byte & 0x7F) << shift
253
+ return [result, offset] if byte < 0x80
254
+
255
+ shift += 7
256
+ return [nil, offset] if shift > 35
257
+ end
258
+ end
259
+ end
260
+ end
261
+ end
data/lib/rigor/cli.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "fileutils"
3
4
  require "json"
4
5
  require "optionparser"
5
6
  require "yaml"
@@ -65,27 +66,75 @@ module Rigor
65
66
 
66
67
  def run_check
67
68
  require_relative "analysis/runner"
69
+ require_relative "cache/store"
68
70
 
71
+ options = parse_check_options
72
+
73
+ cache_root = ".rigor/cache"
74
+ handle_clear_cache(cache_root) if options.fetch(:clear_cache)
75
+
76
+ configuration = Configuration.load(options.fetch(:config))
77
+ paths = @argv.empty? ? configuration.paths : @argv
78
+ result = Analysis::Runner.new(configuration: configuration, explain: options.fetch(:explain)).run(paths)
79
+
80
+ write_result(result, options.fetch(:format))
81
+ write_cache_stats(cache_root) if options.fetch(:cache_stats)
82
+ result.success? ? 0 : 1
83
+ end
84
+
85
+ def parse_check_options
69
86
  options = {
70
87
  config: Configuration::DEFAULT_PATH,
71
88
  format: "text",
72
- explain: false
89
+ explain: false,
90
+ cache_stats: false,
91
+ clear_cache: false
73
92
  }
74
-
75
93
  parser = OptionParser.new do |opts|
76
94
  opts.banner = "Usage: rigor check [options] [paths]"
77
95
  opts.on("--config=PATH", "Path to the Rigor configuration file") { |value| options[:config] = value }
78
96
  opts.on("--format=FORMAT", "Output format: text or json") { |value| options[:format] = value }
79
97
  opts.on("--explain", "Surface fail-soft fallback events as :info diagnostics") { options[:explain] = true }
98
+ opts.on("--cache-stats", "Print on-disk cache inventory at end of run") { options[:cache_stats] = true }
99
+ opts.on("--clear-cache", "Remove the .rigor/cache directory before running") { options[:clear_cache] = true }
80
100
  end
81
101
  parser.parse!(@argv)
102
+ options
103
+ end
82
104
 
83
- configuration = Configuration.load(options.fetch(:config))
84
- paths = @argv.empty? ? configuration.paths : @argv
85
- result = Analysis::Runner.new(configuration: configuration, explain: options.fetch(:explain)).run(paths)
105
+ def handle_clear_cache(cache_root)
106
+ if File.directory?(cache_root)
107
+ FileUtils.rm_rf(cache_root)
108
+ @out.puts("Cleared cache: #{cache_root}")
109
+ else
110
+ @out.puts("Cache already empty: #{cache_root}")
111
+ end
112
+ end
86
113
 
87
- write_result(result, options.fetch(:format))
88
- result.success? ? 0 : 1
114
+ def write_cache_stats(cache_root)
115
+ inv = Cache::Store.disk_inventory(root: cache_root)
116
+
117
+ @out.puts("")
118
+ @out.puts("Cache (root: #{inv.fetch(:root)})")
119
+ schema = inv.fetch(:schema_version)
120
+ @out.puts(" schema_version: #{schema.nil? ? 'absent' : schema}")
121
+ if inv.fetch(:total_entries).zero?
122
+ @out.puts(" (empty)")
123
+ return
124
+ end
125
+
126
+ @out.puts(" #{inv.fetch(:total_entries)} entries, #{format_bytes(inv.fetch(:total_bytes))}")
127
+ inv.fetch(:producers).each do |producer|
128
+ bytes = format_bytes(producer.fetch(:bytes))
129
+ @out.puts(" #{producer.fetch(:id)}: #{producer.fetch(:entries)} entries, #{bytes}")
130
+ end
131
+ end
132
+
133
+ def format_bytes(bytes)
134
+ return "#{bytes} B" if bytes < 1024
135
+ return format("%.1f KiB", bytes / 1024.0) if bytes < 1024 * 1024
136
+
137
+ format("%.1f MiB", bytes / (1024.0 * 1024.0))
89
138
  end
90
139
 
91
140
  def run_init
@@ -144,6 +144,17 @@ module Rigor
144
144
  @hierarchy.class_ordering(lhs, rhs)
145
145
  end
146
146
 
147
+ # @return [Array<String>] every RBS-declared constant name
148
+ # (top-level prefixed, e.g., `"::Math::PI"`) currently loaded
149
+ # into the environment. Used by the cache producer that
150
+ # materialises the constant-type table; ordinary callers
151
+ # should keep using {#constant_type} for point lookups.
152
+ def constant_names
153
+ env.constant_decls.keys.map(&:to_s)
154
+ rescue StandardError
155
+ []
156
+ end
157
+
147
158
  # Slice A constant-value lookup. Returns the translated
148
159
  # `Rigor::Type` for a non-class constant declaration
149
160
  # (`BUCKETS: Array[Symbol]`, `DEFAULT_PATH: String`, ...) or
@@ -52,6 +52,12 @@ module Rigor
52
52
  # Literals
53
53
  Prism::IntegerNode => :type_of_literal_value,
54
54
  Prism::FloatNode => :type_of_literal_value,
55
+ # `1i` / `2.5ri` lift via `node.value` which is already a
56
+ # `Complex` Ruby value; same for `1r` / `1.5r` whose
57
+ # value is a `Rational`. `Type::Constant` accepts both
58
+ # via `SCALAR_CLASSES`.
59
+ Prism::ImaginaryNode => :type_of_literal_value,
60
+ Prism::RationalNode => :type_of_literal_value,
55
61
  Prism::SymbolNode => :symbol_type_for,
56
62
  Prism::StringNode => :string_type_for,
57
63
  Prism::TrueNode => :type_of_true,
@@ -401,7 +407,13 @@ module Rigor
401
407
  # so callers stay backward compatible.
402
408
  def type_of_hash(node)
403
409
  elements = node.respond_to?(:elements) ? node.elements : []
404
- return Type::Combinator.nominal_of(Hash) if elements.empty?
410
+ # v0.0.7 `{}` resolves to the empty `HashShape{}` carrier
411
+ # rather than `Nominal[Hash]`, mirroring the v0.0.6 empty-
412
+ # array literal change. Both forms erase to plain `Hash`,
413
+ # but `HashShape{}` pins the literal's known size (zero)
414
+ # so HashShape projections (`empty?`, `first`, `count`,
415
+ # …) fold against it.
416
+ return Type::Combinator.hash_shape_of({}) if elements.empty?
405
417
 
406
418
  shape = static_hash_shape_for(elements)
407
419
  return shape if shape
@@ -691,7 +703,18 @@ module Rigor
691
703
  Type::Combinator.constant_of(Range.new(left, right, node.exclude_end?))
692
704
  end
693
705
 
694
- def type_of_regexp(_node)
706
+ # v0.0.7 — non-interpolated regex literals lift to
707
+ # `Constant<Regexp>` so `Constant<String>#scan(/regex/)`
708
+ # / `#match(/regex/)` etc. can fold through the catalog
709
+ # tier. Interpolated regexes (`/foo#{x}/`) reach the
710
+ # second `Prism::InterpolatedRegularExpressionNode` arm
711
+ # which keeps the conservative `Nominal[Regexp]` answer.
712
+ def type_of_regexp(node)
713
+ return Type::Combinator.nominal_of(Regexp) unless node.is_a?(Prism::RegularExpressionNode)
714
+
715
+ regex = Regexp.new(node.unescaped, node.options)
716
+ Type::Combinator.constant_of(regex)
717
+ rescue StandardError
695
718
  Type::Combinator.nominal_of(Regexp)
696
719
  end
697
720