jsx_rosetta 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +128 -11
  3. data/CLAUDE.md +70 -0
  4. data/README.md +50 -0
  5. data/agents/jsx-rosetta-resolve-todo-file.md +90 -0
  6. data/lib/jsx_rosetta/ast/inflector.rb +17 -0
  7. data/lib/jsx_rosetta/backend/phlex.rb +1078 -77
  8. data/lib/jsx_rosetta/backend/rails_view.rb +1 -1
  9. data/lib/jsx_rosetta/backend/view_component/expression_translator.rb +73 -20
  10. data/lib/jsx_rosetta/backend/view_component.rb +48 -2
  11. data/lib/jsx_rosetta/cli.rb +175 -37
  12. data/lib/jsx_rosetta/icons/lucide.json +37 -0
  13. data/lib/jsx_rosetta/icons.rb +44 -0
  14. data/lib/jsx_rosetta/ir/lowering.rb +720 -31
  15. data/lib/jsx_rosetta/ir/radix_registry.rb +84 -0
  16. data/lib/jsx_rosetta/ir/types.rb +187 -3
  17. data/lib/jsx_rosetta/ir.rb +5 -4
  18. data/lib/jsx_rosetta/pages_routing.rb +640 -0
  19. data/lib/jsx_rosetta/version.rb +1 -1
  20. data/lib/jsx_rosetta.rb +8 -6
  21. data/plans/nextjs_pages_to_rails.md +200 -0
  22. data/plans/nextjs_pages_to_rails_slice_2.md +118 -0
  23. data/plans/nextjs_pages_to_rails_slice_3.md +121 -0
  24. data/plans/nextjs_pages_to_rails_slice_4.md +301 -0
  25. data/plans/translator_widening_and_pages_followups.md +120 -0
  26. data/plans/translator_widening_slice_a.md +208 -0
  27. data/skills/jsx-rosetta-resolve-todos/SKILL.md +206 -0
  28. data/skills/jsx-rosetta-resolve-todos/data/design_tokens.template.yml +71 -0
  29. data/skills/jsx-rosetta-resolve-todos/data/target_app_conventions.template.yml +107 -0
  30. data/skills/jsx-rosetta-resolve-todos/examples/design_tokens.ant_design_v5.yml +190 -0
  31. data/skills/jsx-rosetta-resolve-todos/recipes/01_design_tokens.md +74 -0
  32. data/skills/jsx-rosetta-resolve-todos/recipes/02_promoted_ivar.md +49 -0
  33. data/skills/jsx-rosetta-resolve-todos/recipes/03_react_hooks.md +34 -0
  34. data/skills/jsx-rosetta-resolve-todos/recipes/04_apollo_hooks.md +34 -0
  35. data/skills/jsx-rosetta-resolve-todos/recipes/05_event_handlers.md +45 -0
  36. data/skills/jsx-rosetta-resolve-todos/recipes/06_module_constants.md +29 -0
  37. data/skills/jsx-rosetta-resolve-todos/recipes/07_nextjs_navigation.md +44 -0
  38. data/skills/jsx-rosetta-resolve-todos/recipes/08_generic_js_bailouts.md +55 -0
  39. data/skills/jsx-rosetta-resolve-todos/tools/apply_promoted_ivar.rb +189 -0
  40. data/skills/jsx-rosetta-resolve-todos/tools/apply_substitutions.rb +292 -0
  41. data/skills/jsx-rosetta-resolve-todos/tools/diff_corpus.rb +161 -0
  42. data/skills/jsx-rosetta-resolve-todos/tools/discover_bailouts.rb +211 -0
  43. metadata +29 -1
@@ -0,0 +1,292 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # apply_substitutions.rb
5
+ #
6
+ # Mechanical resolution of jsx_rosetta TODOs of the form:
7
+ # # TODO: (attribute|style declaration) "<name>" dropped — couldn't translate: <RHS>
8
+ #
9
+ # A user-supplied YAML config declares:
10
+ # - match: a regex with one capture group, applied to <RHS>; the capture
11
+ # becomes the lookup key
12
+ # - tokens: a map from lookup key → { value, tailwind?, category?, notes? }
13
+ #
14
+ # When <RHS> matches and the captured key is in `tokens`, the value is spliced
15
+ # into the `render Foo.new(...)` immediately below the TODO. Only single-line
16
+ # render calls with string-literal `style:` (or no `style:`) are touched;
17
+ # anything more complex is left untouched.
18
+ #
19
+ # Always re-parses the post-edit file with `ruby -c`; if parsing fails the
20
+ # file is reverted and the run is reported as `parse_failed`.
21
+ #
22
+ # Usage:
23
+ # apply_substitutions.rb --config <yaml> [--dry-run] [--quiet] <file_or_dir>...
24
+
25
+ require 'yaml'
26
+ require 'json'
27
+ require 'tempfile'
28
+ require 'optparse'
29
+
30
+ # --- config ------------------------------------------------------------------
31
+
32
+ CONFIG_SCHEMA_HINT = <<~MSG.freeze
33
+ Config must be YAML with:
34
+ match: '<regex with one capture group>'
35
+ tokens:
36
+ <capture>: { value: <int|string|null>, ... }
37
+ ...
38
+ MSG
39
+
40
+ def load_config(path)
41
+ raw = YAML.safe_load(File.read(path))
42
+ unless raw.is_a?(Hash) && raw['match'].is_a?(String) && raw['tokens'].is_a?(Hash)
43
+ abort "invalid config at #{path}\n#{CONFIG_SCHEMA_HINT}"
44
+ end
45
+ user_re = Regexp.new(raw['match'])
46
+ unless user_re.match("").is_a?(NilClass) || user_re.named_captures.any? || user_re.match("x") || user_re.source.include?('(')
47
+ # weak check; just confirm there's at least one paren group (capture)
48
+ end
49
+ raw['_user_re'] = user_re
50
+ raw['tokens'].freeze
51
+ raw.freeze
52
+ raw
53
+ end
54
+
55
+ # --- regex -------------------------------------------------------------------
56
+
57
+ # Built dynamically from config so the user's `match:` controls what gets
58
+ # touched. Captures: 1=indent, 2=kind, 3=attr_name, 4+=user-regex captures
59
+ # (capture 4 is the lookup key by convention).
60
+ def build_todo_re(user_re_source)
61
+ /\A(\s*)# TODO: (attribute|style declaration) "(.+?)" dropped — couldn't translate: #{user_re_source}\s*\z/
62
+ end
63
+
64
+ # Single-line `render Foo.new(...)` (with optional `do ...`).
65
+ RENDER_RE = /\A(\s*)render\s+([A-Z][A-Za-z0-9_:]*)\.new(\((.*)\))?(\s+do\b.*)?\s*\z/.freeze
66
+
67
+ RUBY_IDENT_RE = /\A[a-z_][a-z0-9_]*\z/.freeze
68
+
69
+ Drop = Struct.new(:kind, :name, :key, :line_index, :raw_line, keyword_init: true)
70
+
71
+ # --- helpers -----------------------------------------------------------------
72
+
73
+ def camel_to_snake(s)
74
+ s.gsub(/([a-z0-9])([A-Z])/) { "#{$1}_#{$2}" }.downcase.tr('-', '_')
75
+ end
76
+
77
+ def valid_ruby_kwarg?(name) = RUBY_IDENT_RE.match?(name)
78
+
79
+ def quote_string(s)
80
+ if s.include?("'") && !s.include?('"')
81
+ "\"#{s}\""
82
+ elsif s.include?('"') && !s.include?("'")
83
+ "'#{s}'"
84
+ else
85
+ "'#{s.gsub('\\', '\\\\\\\\').gsub("'", "\\\\'")}'"
86
+ end
87
+ end
88
+
89
+ def render_inline_style(value)
90
+ value.is_a?(Numeric) ? "#{value}px" : value.to_s
91
+ end
92
+
93
+ def render_kwarg_value(value)
94
+ case value
95
+ when Numeric then value.to_s
96
+ when String then quote_string(value)
97
+ end
98
+ end
99
+
100
+ def lookup(config, key)
101
+ entry = config['tokens'][key]
102
+ return nil unless entry.is_a?(Hash)
103
+ entry['value']
104
+ end
105
+
106
+ def parse_todo(line, idx, todo_re)
107
+ m = todo_re.match(line)
108
+ return nil unless m
109
+ Drop.new(kind: (m[2] == 'attribute' ? :attribute : :style_decl),
110
+ name: m[3], key: m[4], line_index: idx, raw_line: line)
111
+ end
112
+
113
+ # --- splice ------------------------------------------------------------------
114
+
115
+ def splice(render_line, drops, config)
116
+ m = RENDER_RE.match(render_line)
117
+ return nil unless m
118
+ indent, klass, has_parens, args, trailing = m[1], m[2], !m[3].nil?, (m[4] || ''), (m[5] || '')
119
+
120
+ resolved = []
121
+ unresolved = []
122
+ style_adds = []
123
+ attr_adds = []
124
+
125
+ drops.each do |d|
126
+ val = lookup(config, d.key)
127
+ if val.nil?
128
+ unresolved << d
129
+ next
130
+ end
131
+ case d.kind
132
+ when :style_decl
133
+ style_adds << "#{d.name}: #{render_inline_style(val)}"
134
+ resolved << d
135
+ when :attribute
136
+ qv = render_kwarg_value(val)
137
+ kwarg = camel_to_snake(d.name)
138
+ if qv.nil? || !valid_ruby_kwarg?(kwarg)
139
+ unresolved << d
140
+ else
141
+ attr_adds << "#{kwarg}: #{qv}"
142
+ resolved << d
143
+ end
144
+ end
145
+ end
146
+
147
+ return nil if resolved.empty?
148
+
149
+ new_args = args.dup
150
+
151
+ if style_adds.any?
152
+ str_style_re = /style:\s*(['"])(.*?)\1/
153
+ if (sm = new_args.match(str_style_re))
154
+ quote = sm[1]
155
+ existing_parts = sm[2].split(';').map(&:strip).reject(&:empty?)
156
+ combined = (existing_parts + style_adds).join('; ') + ';'
157
+ new_args = new_args.sub(str_style_re, "style: #{quote}#{combined}#{quote}")
158
+ elsif new_args.match?(/\bstyle:\s*[\{\w]/)
159
+ # Hash-form style: { ... } or symbol/var ref. Don't try to merge.
160
+ style_adds.each do
161
+ d = resolved.reverse.find { |x| x.kind == :style_decl }
162
+ next unless d
163
+ resolved.delete(d)
164
+ unresolved << d
165
+ end
166
+ style_adds.clear
167
+ else
168
+ kwarg = "style: '#{style_adds.join('; ')};'"
169
+ new_args = new_args.empty? ? kwarg : "#{kwarg}, #{new_args}"
170
+ style_adds.clear
171
+ end
172
+ end
173
+
174
+ if attr_adds.any?
175
+ new_args = new_args.empty? ? attr_adds.join(', ') : "#{new_args}, #{attr_adds.join(', ')}"
176
+ end
177
+
178
+ return nil if resolved.empty?
179
+
180
+ new_line = if has_parens || !new_args.empty?
181
+ "#{indent}render #{klass}.new(#{new_args})#{trailing}\n"
182
+ else
183
+ "#{indent}render #{klass}.new#{trailing}\n"
184
+ end
185
+
186
+ { new_line: new_line, resolved: resolved, unresolved: unresolved }
187
+ end
188
+
189
+ # --- file processing ---------------------------------------------------------
190
+
191
+ def process_file(path, config, todo_re)
192
+ lines = File.read(path).lines
193
+ out = []
194
+ pending = []
195
+ resolved = 0
196
+ skipped = 0
197
+
198
+ i = 0
199
+ while i < lines.length
200
+ line = lines[i]
201
+ if (drop = parse_todo(line, i, todo_re))
202
+ pending << drop
203
+ i += 1
204
+ next
205
+ end
206
+
207
+ if pending.any?
208
+ result = splice(line, pending, config)
209
+ if result
210
+ result[:unresolved].each { |d| out << d.raw_line; skipped += 1 }
211
+ out << result[:new_line]
212
+ resolved += result[:resolved].length
213
+ else
214
+ pending.each { |d| out << d.raw_line; skipped += 1 }
215
+ out << line
216
+ end
217
+ pending = []
218
+ else
219
+ out << line
220
+ end
221
+ i += 1
222
+ end
223
+
224
+ pending.each { |d| out << d.raw_line; skipped += 1 }
225
+
226
+ return { file: path, resolved: 0, skipped: skipped } if resolved.zero?
227
+
228
+ new_content = out.join
229
+ ok = Tempfile.create(['validate', '.rb']) do |tf|
230
+ tf.write(new_content); tf.flush
231
+ system('ruby', '-c', tf.path, out: File::NULL, err: File::NULL)
232
+ end
233
+
234
+ unless ok
235
+ return { file: path, resolved: 0, skipped: skipped + resolved, parse_failed: true }
236
+ end
237
+
238
+ { file: path, resolved: resolved, skipped: skipped, new_content: new_content }
239
+ end
240
+
241
+ # --- CLI ---------------------------------------------------------------------
242
+
243
+ opts = { dry_run: false, quiet: false, config: nil }
244
+ OptionParser.new do |o|
245
+ o.banner = "usage: apply_substitutions.rb --config <yaml> [--dry-run] [--quiet] <file_or_dir>..."
246
+ o.on('--config PATH') { |v| opts[:config] = v }
247
+ o.on('--dry-run') { opts[:dry_run] = true }
248
+ o.on('--quiet') { opts[:quiet] = true }
249
+ end.parse!(ARGV)
250
+
251
+ abort "missing --config <yaml>\n#{CONFIG_SCHEMA_HINT}" unless opts[:config]
252
+ abort "config not found: #{opts[:config]}" unless File.file?(opts[:config])
253
+ abort "no input files" if ARGV.empty?
254
+
255
+ config = load_config(opts[:config])
256
+ todo_re = build_todo_re(config['_user_re'].source)
257
+
258
+ paths = ARGV.flat_map do |arg|
259
+ if File.directory?(arg)
260
+ Dir.glob(File.join(arg, '**', '*.rb'))
261
+ elsif File.file?(arg)
262
+ [arg]
263
+ else
264
+ warn "skip: #{arg} not found"
265
+ []
266
+ end
267
+ end
268
+
269
+ totals = { files: paths.length, modified: 0, resolved: 0, skipped: 0, parse_failed: 0 }
270
+
271
+ paths.each do |p|
272
+ r = process_file(p, config, todo_re)
273
+ totals[:resolved] += r[:resolved]
274
+ totals[:skipped] += r[:skipped]
275
+
276
+ if r[:parse_failed]
277
+ totals[:parse_failed] += 1
278
+ puts JSON.generate(file: r[:file], parse_failed: true) unless opts[:quiet]
279
+ elsif r[:resolved] > 0
280
+ totals[:modified] += 1
281
+ File.write(p, r[:new_content]) unless opts[:dry_run]
282
+ puts JSON.generate(file: r[:file], resolved: r[:resolved], skipped: r[:skipped]) unless opts[:quiet]
283
+ end
284
+ end
285
+
286
+ puts ''
287
+ puts "config: #{opts[:config]}"
288
+ puts "files scanned: #{totals[:files]}"
289
+ puts "files modified: #{totals[:modified]}#{opts[:dry_run] ? ' (dry-run)' : ''}"
290
+ puts "TODOs resolved: #{totals[:resolved]}"
291
+ puts "TODOs left: #{totals[:skipped]}"
292
+ puts "parse failures: #{totals[:parse_failed]}"
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # diff_corpus.rb
5
+ #
6
+ # Tally `# TODO:` comments by category in two directories (typically a
7
+ # "before" and "after" snapshot of generated output) and report the
8
+ # per-category delta. Lets the user see the impact of a resolve-todos
9
+ # pipeline pass and confirm the skill's claims aren't pure marketing.
10
+ #
11
+ # Usage:
12
+ # diff_corpus.rb [--json] <before_dir> <after_dir>
13
+ #
14
+ # Categories match the SKILL.md routing table. Sharpened TODOs (those
15
+ # tagged `# TODO[category]:`) are counted separately so the delta shows
16
+ # both removal of original TODOs and creation of sharpened replacements.
17
+
18
+ require 'json'
19
+ require 'optparse'
20
+
21
+ # Original-form TODOs the gem emits. Order matters: more specific patterns
22
+ # first, generic last.
23
+ CATEGORIES = [
24
+ # "chain_ref" = drops whose RHS starts with `<ident>.<ident>...` (token systems,
25
+ # GraphQL field accesses, prop chains). `apply_substitutions.rb` resolves the
26
+ # subset of these that match its configured token regex.
27
+ [:chain_ref_drop, /^\s*# TODO: (?:attribute|style declaration) ".+?" dropped — couldn't translate: \w+\.\w/],
28
+ [:other_drop, /^\s*# TODO: (?:attribute|style declaration) ".+?" dropped/],
29
+ [:promoted_ivar, /^\s*# TODO: render condition references binding\(s\) promoted to @ivar/],
30
+ [:react_hooks, /^\s*# TODO: React hooks detected/],
31
+ [:apollo_hooks, /^\s*# TODO: Apollo data-fetching hooks detected/],
32
+ [:nextjs_navigation, /^\s*# TODO: Next\.js navigation hooks detected/],
33
+ [:event_handler, /^\s*# TODO: translate the original JSX `\w+` handler/],
34
+ [:module_constants, /^\s*# TODO: module-level constants/],
35
+ [:generic_js_bailout, /^\s*# TODO: translate JS to Ruby — original/],
36
+ [:generic_translate, /^\s*# TODO: translate "/],
37
+ [:generic_condition, /^\s*# TODO: translate condition:/],
38
+ [:other_todo, /^\s*# TODO:/]
39
+ ].freeze
40
+
41
+ # Sharpened TODOs emitted by recipes. Captured separately to show
42
+ # "compression" — original removed, sharpened added.
43
+ SHARPENED_RE = /^\s*# TODO\[([a-z_:-]+)\]:/.freeze
44
+
45
+ def tally(dir)
46
+ counts = Hash.new(0)
47
+ sharp_counts = Hash.new(0)
48
+ todo_files = 0
49
+ Dir.glob(File.join(dir, '**', '*.rb')).each do |path|
50
+ file_had_todo = false
51
+ File.foreach(path) do |line|
52
+ if (m = SHARPENED_RE.match(line))
53
+ sharp_counts[m[1].to_sym] += 1
54
+ counts[:_sharpened_total] += 1
55
+ file_had_todo = true
56
+ next
57
+ end
58
+ CATEGORIES.each do |sym, re|
59
+ if re.match?(line)
60
+ counts[sym] += 1
61
+ file_had_todo = true
62
+ break
63
+ end
64
+ end
65
+ end
66
+ todo_files += 1 if file_had_todo
67
+ end
68
+ { categories: counts, sharpened: sharp_counts, files_with_todos: todo_files }
69
+ end
70
+
71
+ def fmt_delta(n)
72
+ n.zero? ? ' .' : (n.positive? ? "+#{n}" : n.to_s).rjust(6)
73
+ end
74
+
75
+ def emit_text(before, after, before_dir, after_dir)
76
+ cats = (before[:categories].keys + after[:categories].keys).uniq
77
+ puts "before: #{before_dir}"
78
+ puts "after: #{after_dir}"
79
+ puts ""
80
+ puts " before after delta"
81
+ puts " ─────────────────────────────────────────────"
82
+
83
+ total_before = 0
84
+ total_after = 0
85
+ CATEGORIES.map(&:first).each do |sym|
86
+ next unless cats.include?(sym)
87
+ b = before[:categories][sym]
88
+ a = after[:categories][sym]
89
+ total_before += b
90
+ total_after += a
91
+ next if b.zero? && a.zero?
92
+ puts " %-22s %6d %6d %s" % [sym, b, a, fmt_delta(a - b)]
93
+ end
94
+
95
+ # _sharpened_total is the synthetic total
96
+ sb = before[:categories][:_sharpened_total]
97
+ sa = after[:categories][:_sharpened_total]
98
+ total_before += sb
99
+ total_after += sa
100
+ if sb.positive? || sa.positive?
101
+ puts " %-22s %6d %6d %s" % ['sharpened (compressed)', sb, sa, fmt_delta(sa - sb)]
102
+ end
103
+
104
+ puts " ─────────────────────────────────────────────"
105
+ puts " %-22s %6d %6d %s" % ['total', total_before, total_after, fmt_delta(total_after - total_before)]
106
+ puts ""
107
+ puts " files with any TODO: #{before[:files_with_todos]} → #{after[:files_with_todos]}"
108
+
109
+ if (before[:sharpened].any? || after[:sharpened].any?)
110
+ puts ""
111
+ puts " Sharpened-TODO breakdown by tag:"
112
+ tags = (before[:sharpened].keys + after[:sharpened].keys).uniq.sort
113
+ tags.each do |tag|
114
+ b = before[:sharpened][tag]
115
+ a = after[:sharpened][tag]
116
+ puts " %-22s %6d %6d %s" % [tag, b, a, fmt_delta(a - b)]
117
+ end
118
+ end
119
+ end
120
+
121
+ def emit_json(before, after, before_dir, after_dir)
122
+ cats = (before[:categories].keys + after[:categories].keys).uniq.sort
123
+ out = {
124
+ before_dir: before_dir,
125
+ after_dir: after_dir,
126
+ files_with_todos: { before: before[:files_with_todos], after: after[:files_with_todos] },
127
+ categories: cats.to_h { |sym|
128
+ b = before[:categories][sym]
129
+ a = after[:categories][sym]
130
+ [sym, { before: b, after: a, delta: a - b }]
131
+ },
132
+ sharpened_by_tag: (before[:sharpened].keys + after[:sharpened].keys).uniq.sort.to_h { |tag|
133
+ b = before[:sharpened][tag]
134
+ a = after[:sharpened][tag]
135
+ [tag, { before: b, after: a, delta: a - b }]
136
+ }
137
+ }
138
+ puts JSON.pretty_generate(out)
139
+ end
140
+
141
+ # --- CLI ---
142
+
143
+ opts = { json: false }
144
+ OptionParser.new do |o|
145
+ o.banner = "usage: diff_corpus.rb [--json] <before_dir> <after_dir>"
146
+ o.on('--json') { opts[:json] = true }
147
+ end.parse!(ARGV)
148
+
149
+ abort "expected two directory arguments" unless ARGV.size == 2
150
+ before_dir, after_dir = ARGV
151
+ abort "not a directory: #{before_dir}" unless File.directory?(before_dir)
152
+ abort "not a directory: #{after_dir}" unless File.directory?(after_dir)
153
+
154
+ before = tally(before_dir)
155
+ after = tally(after_dir)
156
+
157
+ if opts[:json]
158
+ emit_json(before, after, before_dir, after_dir)
159
+ else
160
+ emit_text(before, after, before_dir, after_dir)
161
+ end
@@ -0,0 +1,211 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # discover_bailouts.rb
5
+ #
6
+ # Scan a corpus of jsx_rosetta-generated .rb files and surface the dropped-
7
+ # expression RHS values that show up most often. Used to:
8
+ # - decide which substitutions are worth automating (apply_substitutions.rb)
9
+ # - spot clusters that look like design-system tokens (e.g. many `theme.X`,
10
+ # `vars.X`, `tokens.X.Y` references) and suggest a `match:` regex
11
+ #
12
+ # This tool makes no judgements about *what* a reference means — only what
13
+ # patterns repeat. Decisions about how to map them belong to the human (or to
14
+ # the LLM-driven recipes in the skill).
15
+ #
16
+ # Usage:
17
+ # discover_bailouts.rb [--top N] [--json] [--all-todos] <file_or_dir>...
18
+ #
19
+ # --top N Show top N entries per section (default 25).
20
+ # --json Emit machine-readable JSON instead of text.
21
+ # --all-todos Also tally non-attribute/style TODO categories (hooks,
22
+ # module constants, generic JS bailouts) so you can see
23
+ # the full distribution. Off by default.
24
+
25
+ require 'json'
26
+ require 'optparse'
27
+
28
+ # Match a "dropped" TODO. Captures: kind, attr_name, RHS.
29
+ DROP_RE = /\A\s*# TODO: (attribute|style declaration) "(.+?)" dropped — couldn't translate: (.+?)\s*\z/.freeze
30
+
31
+ # Other generic TODO categories — counted only when --all-todos is set.
32
+ OTHER_RE = /\A\s*# TODO: (.+?)\s*\z/.freeze
33
+
34
+ # Detect a "<root>.<member>" or "<root>.<member>.<member>..." chain at the
35
+ # start of an RHS. Captures: root, full chain.
36
+ CHAIN_RE = /\A(\$?\b[a-z_][A-Za-z0-9_]*)((?:\.[A-Za-z_][A-Za-z0-9_]*)+)/.freeze
37
+
38
+ # Detect a tagged template literal like `theme.colors.primary` inside backticks.
39
+ TPL_RE = /`[^`]*\$\{([^}]+)\}/.freeze
40
+
41
+ # --- collection --------------------------------------------------------------
42
+
43
+ def gather(paths)
44
+ drops = [] # [{ kind:, attr:, rhs:, file:, line: }]
45
+ others = Hash.new(0)
46
+
47
+ paths.each do |p|
48
+ File.foreach(p).with_index(1) do |line, lineno|
49
+ if (m = DROP_RE.match(line))
50
+ drops << { kind: m[1], attr: m[2], rhs: m[3], file: p, line: lineno }
51
+ elsif (m = OTHER_RE.match(line))
52
+ # Normalize generic TODOs by their leading phrase up to "—" or ":"
53
+ head = m[1].split(/[—:]/).first.strip
54
+ others[head] += 1
55
+ end
56
+ end
57
+ end
58
+
59
+ [drops, others]
60
+ end
61
+
62
+ # --- analysis ----------------------------------------------------------------
63
+
64
+ def chain_root(rhs)
65
+ if (m = CHAIN_RE.match(rhs))
66
+ m[1]
67
+ end
68
+ end
69
+
70
+ def chain_full(rhs)
71
+ if (m = CHAIN_RE.match(rhs))
72
+ m[1] + m[2]
73
+ end
74
+ end
75
+
76
+ def template_chains(rhs)
77
+ rhs.scan(TPL_RE).flat_map { |(inner)| [chain_full(inner.strip)].compact }
78
+ end
79
+
80
+ # Group RHS strings by likely "system" — the root identifier of a member chain.
81
+ # Returns: { "token" => { count: N, distinct_keys: [...], sample_rhs: [...] }, ... }
82
+ def cluster_by_root(drops)
83
+ clusters = Hash.new { |h, k| h[k] = { count: 0, keys: Hash.new(0), samples: [] } }
84
+
85
+ drops.each do |d|
86
+ rhs = d[:rhs]
87
+
88
+ # Plain chain at start of RHS
89
+ if (root = chain_root(rhs))
90
+ key = chain_full(rhs)
91
+ clusters[root][:count] += 1
92
+ clusters[root][:keys][key] += 1
93
+ clusters[root][:samples] << rhs if clusters[root][:samples].size < 5
94
+ end
95
+
96
+ # Chains inside template literals (e.g. `1px solid ${token.colorBorder}`)
97
+ template_chains(rhs).each do |chain|
98
+ root = chain.split('.').first
99
+ clusters[root][:count] += 1
100
+ clusters[root][:keys][chain] += 1
101
+ clusters[root][:samples] << rhs if clusters[root][:samples].size < 5
102
+ end
103
+ end
104
+
105
+ clusters
106
+ end
107
+
108
+ def suggest_regex(root, chains)
109
+ # Look at the chain depths to pick `<root>\.(\w+)` vs `<root>\.\w+\.(\w+)` etc.
110
+ depths = chains.keys.map { |c| c.split('.').size }
111
+ depth = depths.tally.max_by { |_, n| n }.first
112
+ esc_root = Regexp.escape(root)
113
+ case depth
114
+ when 2 then "#{esc_root}\\.(\\w+)"
115
+ when 3 then "#{esc_root}\\.\\w+\\.(\\w+)"
116
+ when 4 then "#{esc_root}\\.\\w+\\.\\w+\\.(\\w+)"
117
+ else "#{esc_root}((?:\\.\\w+)+)"
118
+ end
119
+ end
120
+
121
+ # --- output ------------------------------------------------------------------
122
+
123
+ def emit_text(drops, others, clusters, top:, all_todos:)
124
+ puts "=== Dropped expressions ==="
125
+ puts " total: #{drops.size}"
126
+ by_kind = drops.group_by { |d| d[:kind] }.transform_values(&:count)
127
+ by_kind.each { |k, n| puts " #{k}: #{n}" }
128
+ puts
129
+
130
+ puts "=== Top #{top} dropped RHS values (verbatim) ==="
131
+ drops.map { |d| d[:rhs] }.tally.sort_by { |_, n| -n }.first(top).each do |rhs, n|
132
+ puts " #{n.to_s.rjust(5)} #{rhs.length > 90 ? rhs[0, 87] + '...' : rhs}"
133
+ end
134
+ puts
135
+
136
+ puts "=== Repeating member chains (likely design-system / lookup tables) ==="
137
+ if clusters.empty?
138
+ puts " (none)"
139
+ else
140
+ sorted = clusters.sort_by { |_, c| -c[:count] }
141
+ sorted.first(top).each do |root, c|
142
+ puts ""
143
+ puts " root: #{root} total refs: #{c[:count]} distinct keys: #{c[:keys].size}"
144
+ puts " suggested match regex: '#{suggest_regex(root, c[:keys])}'"
145
+ puts " top keys:"
146
+ c[:keys].sort_by { |_, n| -n }.first(8).each do |key, n|
147
+ puts " #{n.to_s.rjust(4)} #{key}"
148
+ end
149
+ end
150
+ end
151
+ puts
152
+
153
+ if all_todos
154
+ puts "=== Other TODO categories (count by leading phrase) ==="
155
+ others.sort_by { |_, n| -n }.first(top).each do |head, n|
156
+ puts " #{n.to_s.rjust(5)} #{head}"
157
+ end
158
+ end
159
+ end
160
+
161
+ def emit_json(drops, others, clusters, all_todos:)
162
+ out = {
163
+ drops: {
164
+ total: drops.size,
165
+ by_kind: drops.group_by { |d| d[:kind] }.transform_values(&:count),
166
+ top_rhs: drops.map { |d| d[:rhs] }.tally.sort_by { |_, n| -n }.first(50).to_h
167
+ },
168
+ chains: clusters.sort_by { |_, c| -c[:count] }.map { |root, c|
169
+ [root, {
170
+ count: c[:count],
171
+ distinct_keys: c[:keys].size,
172
+ suggested_match: suggest_regex(root, c[:keys]),
173
+ top_keys: c[:keys].sort_by { |_, n| -n }.first(20).to_h
174
+ }]
175
+ }.to_h
176
+ }
177
+ out[:other_todos] = others.sort_by { |_, n| -n }.first(50).to_h if all_todos
178
+ puts JSON.pretty_generate(out)
179
+ end
180
+
181
+ # --- CLI ---------------------------------------------------------------------
182
+
183
+ opts = { top: 25, json: false, all_todos: false }
184
+ OptionParser.new do |o|
185
+ o.banner = "usage: discover_bailouts.rb [--top N] [--json] [--all-todos] <file_or_dir>..."
186
+ o.on('--top N', Integer) { |v| opts[:top] = v }
187
+ o.on('--json') { opts[:json] = true }
188
+ o.on('--all-todos') { opts[:all_todos] = true }
189
+ end.parse!(ARGV)
190
+
191
+ abort "no input files" if ARGV.empty?
192
+
193
+ paths = ARGV.flat_map do |arg|
194
+ if File.directory?(arg)
195
+ Dir.glob(File.join(arg, '**', '*.rb'))
196
+ elsif File.file?(arg)
197
+ [arg]
198
+ else
199
+ warn "skip: #{arg} not found"
200
+ []
201
+ end
202
+ end
203
+
204
+ drops, others = gather(paths)
205
+ clusters = cluster_by_root(drops)
206
+
207
+ if opts[:json]
208
+ emit_json(drops, others, clusters, all_todos: opts[:all_todos])
209
+ else
210
+ emit_text(drops, others, clusters, top: opts[:top], all_todos: opts[:all_todos])
211
+ end