jsx_rosetta 0.3.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,9 @@ module JsxRosetta
18
18
  def emit(component)
19
19
  prop_names = component.props.map(&:name)
20
20
  prop_names << component.rest_prop_name if component.rest_prop_name
21
- translator = ExpressionTranslator.new(prop_names: prop_names)
21
+ translator = ExpressionTranslator.new(
22
+ prop_names: prop_names, local_binding_names: component.local_binding_names
23
+ )
22
24
 
23
25
  @stimulus_identifier = component.stimulus_methods.any? ? stimulus_identifier(component) : nil
24
26
 
@@ -18,6 +18,10 @@ module JsxRosetta
18
18
  # to the bare snake_case identifier.
19
19
  # * Names in `prop_names` translate to a `@snake_case` instance
20
20
  # variable.
21
+ # * Names in `local_binding_names` (consts/destructures captured at
22
+ # lowering time but not modeled in IR) translate to a `nil`
23
+ # placeholder with an inline `# TODO: local 'name'` marker — the
24
+ # file still loads, but the reviewer sees what to fill in.
21
25
  # * Anything else translates to the bare snake_case identifier and
22
26
  # is recorded as unresolved.
23
27
  #
@@ -25,18 +29,58 @@ module JsxRosetta
25
29
  # entry shadows lower entries.
26
30
  class ExpressionTranslator
27
31
  IDENTIFIER = /\A[a-zA-Z_$][a-zA-Z_$0-9]*\z/
28
- STRING_LITERAL = /\A(['"])(.*)\1\z/m
32
+ # Tighter than `\A(['"])(.*)\1\z/m` — the greedy `.*` previously
33
+ # matched expressions like `"X" ? "Y" : "Z"` as a single quoted
34
+ # string. Now the body excludes unescaped quotes of the same kind,
35
+ # so only true string literals match.
36
+ STRING_LITERAL = /\A(?:"(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*')\z/m
29
37
  NUMBER_LITERAL = /\A-?\d+(\.\d+)?\z/
30
38
  TEMPLATE_LITERAL = /\A`(.*)`\z/m
31
- TEMPLATE_INTERPOLATION = /\$\{([a-zA-Z_$][a-zA-Z_$0-9]*(?:\.[a-zA-Z_$][a-zA-Z_$0-9]*)*)\}/
32
- MEMBER_CHAIN = /\A(?<root>[a-zA-Z_$][a-zA-Z_$0-9]*)(?<rest>(?:\.[a-zA-Z_$][a-zA-Z_$0-9]*)+)\z/
39
+ TEMPLATE_INTERPOLATION = /\$\{([a-zA-Z_$][a-zA-Z_$0-9]*(?:\??\.[a-zA-Z_$][a-zA-Z_$0-9]*)*)\}/
40
+ MEMBER_CHAIN = /\A(?<root>[a-zA-Z_$][a-zA-Z_$0-9]*)(?<rest>(?:\??\.[a-zA-Z_$][a-zA-Z_$0-9]*)+)\z/
41
+ MEMBER_SEGMENT = /(\??\.)([a-zA-Z_$][a-zA-Z_$0-9]*)/
42
+ # Class-component access patterns. `this.props.X` is the same shape as
43
+ # a function-component prop reference, so it maps to `@x` plus any
44
+ # trailing member chain. `this.state.X` has no Ruby analog (state
45
+ # without a backing data source) so we surface a `nil` placeholder
46
+ # with a TODO marker — the file still loads.
47
+ THIS_PROPS_CHAIN = /\Athis\.props\.(?<rest>[a-zA-Z_$][a-zA-Z_$0-9]*(?:\??\.[a-zA-Z_$][a-zA-Z_$0-9]*)*)\z/
48
+ THIS_STATE_CHAIN = /\Athis\.state\.(?<rest>[a-zA-Z_$][a-zA-Z_$0-9]*(?:\??\.[a-zA-Z_$][a-zA-Z_$0-9]*)*)\z/
33
49
  UNARY = /\A(?<op>!+|-|\+)(?<operand>.+)\z/m
34
50
  SIMPLE_LITERALS = { "null" => "nil", "undefined" => "nil", "true" => "true", "false" => "false" }.freeze
35
51
 
52
+ # Binary operators we translate, grouped by precedence (lowest first).
53
+ # We split on the *lowest*-precedence top-level operator and recurse
54
+ # on each side, mirroring how a recursive-descent parser would treat
55
+ # the source: `a > 0 && b < 5` splits on `&&` first, then each side
56
+ # splits on its relational operator.
57
+ #
58
+ # Arithmetic operators (`+`, `-`, `*`, `/`, `%`) aren't included —
59
+ # `-x` and `+x` are unary at the start of an operand, and string-
60
+ # scanning can't disambiguate without operator-state tracking that
61
+ # mirrors a parser. Real JSX conditions rarely need arithmetic in
62
+ # tests; comparison + logical covers the bulk of them.
63
+ BINARY_PRECEDENCE = [
64
+ %w[|| ??],
65
+ %w[&&],
66
+ %w[=== !== == !=],
67
+ %w[<= >= < >]
68
+ ].freeze
69
+
70
+ QUOTE_CHARS = ['"', "'", "`"].freeze
71
+ OPEN_BRACKETS = ["(", "[", "{"].freeze
72
+ CLOSE_BRACKETS = [")", "]", "}"].freeze
73
+
36
74
  Result = Data.define(:ruby, :unresolved_identifiers)
37
75
 
38
- def initialize(prop_names:)
76
+ # prop_aliases maps a local-binding name (the alias) to the
77
+ # underlying prop name. `"data-testid": dataTestId` records
78
+ # `{ "dataTestId" => "data-testid" }` so the use site of
79
+ # `dataTestId` resolves to the prop's `@data_testid` ivar.
80
+ def initialize(prop_names:, local_binding_names: [], prop_aliases: {})
39
81
  @prop_names = prop_names.to_set
82
+ @local_binding_names = local_binding_names.to_set
83
+ @prop_aliases = prop_aliases.dup
40
84
  @local_stack = []
41
85
  end
42
86
 
@@ -58,43 +102,281 @@ module JsxRosetta
58
102
  private
59
103
 
60
104
  def translate_ruby(source, unresolved)
61
- if SIMPLE_LITERALS.key?(source) then SIMPLE_LITERALS[source]
62
- elsif source.match?(NUMBER_LITERAL) || source.match?(STRING_LITERAL) then source
63
- elsif source.match?(IDENTIFIER) then translate_identifier(source, unresolved)
64
- elsif (m = MEMBER_CHAIN.match(source)) then translate_member_chain(m[:root], m[:rest], unresolved)
105
+ source = unwrap_outer_parens(source.strip)
106
+ translate_simple_form(source, unresolved) || translate_compound_form(source, unresolved)
107
+ end
108
+
109
+ # Handle the shapes that don't recurse: literals, identifiers, and
110
+ # the class-component `this.props.X` / `this.state.X` accessors.
111
+ # Returns nil when the source needs the compound-form dispatcher.
112
+ def translate_simple_form(source, unresolved)
113
+ return SIMPLE_LITERALS[source] if SIMPLE_LITERALS.key?(source)
114
+ return source if source.match?(NUMBER_LITERAL)
115
+ return reemit_string_literal(source) if source.match?(STRING_LITERAL)
116
+ return translate_this_props_chain(::Regexp.last_match(:rest)) if THIS_PROPS_CHAIN.match(source)
117
+ return translate_this_state_chain(::Regexp.last_match(:rest)) if THIS_STATE_CHAIN.match(source)
118
+ return translate_identifier(source, unresolved) if source.match?(IDENTIFIER)
119
+
120
+ nil
121
+ end
122
+
123
+ # Convert a JS string-literal source (`"foo"` or `'foo'`) into its
124
+ # rubocop-preferred Ruby form: single-quoted when the body has no
125
+ # escapes, embedded quotes of the matching kind, or non-printable
126
+ # characters. Keeps the verbatim source otherwise — JS and Ruby
127
+ # double-quoted escape sequences mostly overlap, so passing through
128
+ # preserves semantics; rewriting `\n` from JS-single-quoted to Ruby
129
+ # would corrupt the literal.
130
+ def reemit_string_literal(source)
131
+ quote = source[0]
132
+ inner = source[1...-1]
133
+ # Backslashes, embedded single quotes, and interpolation markers
134
+ # all complicate the rewrite — keep the original literal as-is.
135
+ # Non-ASCII (emojis, unicode) is fine in single-quoted Ruby
136
+ # strings, so we don't bail for that.
137
+ return source if inner.include?("\\") || inner.include?("'") ||
138
+ inner.include?("\#{") || inner.match?(/[\x00-\x1f\x7f]/)
139
+ return source if quote == "'"
140
+
141
+ "'#{inner}'"
142
+ end
143
+
144
+ # Handle the recursive / multi-segment shapes: member chains,
145
+ # template literals, unary operators, and the binary fallthrough.
146
+ def translate_compound_form(source, unresolved)
147
+ if (m = MEMBER_CHAIN.match(source)) then translate_member_chain(m[:root], m[:rest], unresolved)
65
148
  elsif (m = TEMPLATE_LITERAL.match(source)) then translate_template_literal(m[1], unresolved)
66
- elsif (m = UNARY.match(source))
67
- translate_unary(m[:op], m[:operand], unresolved)
149
+ elsif (m = UNARY.match(source)) then translate_unary(m[:op], m[:operand], unresolved)
150
+ else translate_binary(source, unresolved)
151
+ end
152
+ end
153
+
154
+ # `this.props.X` → `@x` (plus any trailing member chain segments,
155
+ # snake_cased and with `?.` → `&.`). The first segment IS the prop;
156
+ # subsequent segments are accessed off the prop's value. We don't
157
+ # add to `unresolved` here — class-component prop synthesis at
158
+ # lowering time has already registered the name.
159
+ def translate_this_props_chain(rest)
160
+ first = rest.split(/\??\./, 2).first
161
+ ivar = "@#{AST::Inflector.underscore(first)}"
162
+ remainder = rest[first.length..]
163
+ ruby_rest = remainder.gsub(MEMBER_SEGMENT) do
164
+ op = ::Regexp.last_match(1) == "?." ? "&." : "."
165
+ "#{op}#{AST::Inflector.underscore(::Regexp.last_match(2))}"
68
166
  end
167
+ "#{ivar}#{ruby_rest}"
168
+ end
169
+
170
+ # `this.state.X` has no direct Ruby analog — class-component state
171
+ # mutations don't map cleanly to a Phlex render. Emit `nil` so the
172
+ # file loads; reviewers wire up real state via controller-passed
173
+ # props or Stimulus values.
174
+ def translate_this_state_chain(_rest)
175
+ "nil"
69
176
  end
70
177
 
71
178
  def translate_unary(operator, operand, unresolved)
72
- inner = translate_ruby(operand.strip, unresolved)
179
+ operand_clean = operand.strip
180
+ # `!fieldValue` where `fieldValue` is a known-but-unresolved local
181
+ # would translate to `!nil` (always true) and silently flip the
182
+ # condition's truthiness. Bail so the caller emits a TODO with
183
+ # the verbatim source.
184
+ return nil if unresolvable_local?(operand_clean)
185
+
186
+ inner = translate_ruby(operand_clean, unresolved)
73
187
  inner && "#{operator}#{inner}"
74
188
  end
75
189
 
190
+ # Walk source left-to-right looking for a top-level binary operator
191
+ # at the lowest precedence level present. Split there and recurse on
192
+ # each side. When two operators of the same precedence appear (e.g.
193
+ # `a || b || c`), the rightmost is chosen — the recursion on `lhs`
194
+ # then keeps splitting, yielding left-associative grouping.
195
+ def translate_binary(source, unresolved)
196
+ BINARY_PRECEDENCE.each do |operators|
197
+ match = find_top_level_operator(source, operators)
198
+ next unless match
199
+
200
+ result = translate_binary_at(source, match, unresolved)
201
+ return result if result
202
+ end
203
+ nil
204
+ end
205
+
206
+ def translate_binary_at(source, match, unresolved)
207
+ start_idx, end_idx, js_op = match
208
+ lhs = source[0...start_idx].strip
209
+ rhs = source[end_idx..].strip
210
+ return nil if lhs.empty? || rhs.empty?
211
+ # `count > 0` where `count` is a known-but-unresolved local
212
+ # translates to `nil > 0` and NoMethodErrors at render time.
213
+ return nil if unresolvable_local?(lhs) || unresolvable_local?(rhs)
214
+
215
+ lhs_ruby = translate_ruby(lhs, unresolved)
216
+ rhs_ruby = translate_ruby(rhs, unresolved)
217
+ return nil unless lhs_ruby && rhs_ruby
218
+ # `value === null || value === undefined` both translate to
219
+ # `@value.nil?` — collapse idempotent duplication for `||` / `&&`.
220
+ return lhs_ruby if %w[|| &&].include?(js_op) && lhs_ruby == rhs_ruby
221
+
222
+ rewrite_nil_comparison(lhs_ruby, rhs_ruby, js_op) ||
223
+ "#{lhs_ruby} #{ruby_binary_operator(js_op)} #{rhs_ruby}"
224
+ end
225
+
226
+ # `x === null` / `x === undefined` → `x.nil?` (and `!==` → `!x.nil?`).
227
+ # JSX commonly compares values against `null`/`undefined`; emitting
228
+ # the literal `x == nil` form is valid Ruby but trips the
229
+ # Style/NilComparison cop. The `.nil?` form is idiomatic and reads
230
+ # better, so rewrite when either side is literally `nil`.
231
+ def rewrite_nil_comparison(lhs_ruby, rhs_ruby, js_op)
232
+ return nil unless %w[=== !== == !=].include?(js_op)
233
+
234
+ if rhs_ruby == "nil" && lhs_ruby != "nil"
235
+ js_op.start_with?("!") ? "!#{lhs_ruby}.nil?" : "#{lhs_ruby}.nil?"
236
+ elsif lhs_ruby == "nil" && rhs_ruby != "nil"
237
+ js_op.start_with?("!") ? "!#{rhs_ruby}.nil?" : "#{rhs_ruby}.nil?"
238
+ end
239
+ end
240
+
241
+ def ruby_binary_operator(js_op)
242
+ case js_op
243
+ when "===" then "=="
244
+ when "!==" then "!="
245
+ when "??" then "||"
246
+ else js_op
247
+ end
248
+ end
249
+
250
+ # Scan `source` for the rightmost occurrence of any operator from
251
+ # `operators` at lexical top level — outside any (), [], {}, or
252
+ # string literal. Returns `[start_index, end_index, operator]` or nil.
253
+ # Operators are tried longest-first at each position so `>=` beats
254
+ # `>` and `===` beats `==`.
255
+ def find_top_level_operator(source, operators)
256
+ sorted_ops = operators.sort_by { |op| -op.length }
257
+ state = { depth: 0, quote: nil, i: 0, last_match: nil }
258
+ while state[:i] < source.length
259
+ if state[:quote]
260
+ advance_quoted(source, state)
261
+ else
262
+ scan_one_position(source, sorted_ops, state)
263
+ end
264
+ end
265
+ state[:last_match]
266
+ end
267
+
268
+ def advance_quoted(source, state)
269
+ c = source[state[:i]]
270
+ if c == "\\"
271
+ state[:i] += 2
272
+ else
273
+ state[:quote] = nil if c == state[:quote]
274
+ state[:i] += 1
275
+ end
276
+ end
277
+
278
+ def scan_one_position(source, sorted_ops, state)
279
+ c = source[state[:i]]
280
+ if QUOTE_CHARS.include?(c)
281
+ state[:quote] = c
282
+ state[:i] += 1
283
+ return
284
+ end
285
+ state[:depth] += 1 if OPEN_BRACKETS.include?(c)
286
+ state[:depth] -= 1 if CLOSE_BRACKETS.include?(c)
287
+
288
+ matched = state[:depth].zero? && sorted_ops.find { |op| source[state[:i], op.length] == op }
289
+ if matched
290
+ state[:last_match] = [state[:i], state[:i] + matched.length, matched]
291
+ state[:i] += matched.length
292
+ else
293
+ state[:i] += 1
294
+ end
295
+ end
296
+
297
+ # Strip a single layer of outer parens when they wrap the entire
298
+ # source (`(a > b)` → `a > b`). When the leading `(` closes mid-
299
+ # source — e.g. `(a > b) && c` — leave the source alone since the
300
+ # parens are structurally meaningful. Trims surrounding whitespace.
301
+ def unwrap_outer_parens(source)
302
+ return source unless source.start_with?("(") && source.end_with?(")")
303
+ return source unless outer_parens_balanced?(source)
304
+
305
+ source[1...-1].strip
306
+ end
307
+
308
+ def outer_parens_balanced?(source)
309
+ depth = 0
310
+ quote = nil
311
+ source.each_char.with_index do |c, i|
312
+ if quote
313
+ quote = nil if c == quote && source[i - 1] != "\\"
314
+ next
315
+ end
316
+ quote = c if QUOTE_CHARS.include?(c)
317
+ depth += 1 if c == "("
318
+ if c == ")"
319
+ depth -= 1
320
+ return false if depth.zero? && i != source.length - 1
321
+ end
322
+ end
323
+ true
324
+ end
325
+
76
326
  def in_local_scope?(name)
77
327
  @local_stack.any? { |scope| scope.include?(name) }
78
328
  end
79
329
 
80
- def translate_identifier(name, unresolved)
330
+ def translate_identifier(name, unresolved, member_chain_root: false)
81
331
  snake = AST::Inflector.underscore(name)
82
332
  if in_local_scope?(name)
83
333
  snake
334
+ elsif @prop_aliases.key?(name)
335
+ "@#{AST::Inflector.underscore(@prop_aliases[name])}"
84
336
  elsif @prop_names.include?(name)
85
337
  "@#{snake}"
338
+ elsif @local_binding_names.include?(name)
339
+ # We know this binding exists locally (destructure, hook tuple)
340
+ # but can't model its value. As a leaf identifier, return `nil`
341
+ # so the file loads (a bare snake_case ref would NameError).
342
+ # As a member-chain root, `nil.member` would NoMethodError and
343
+ # the bare-snake fallback would NameError — both crash at render
344
+ # time. Bail so the whole expression fails translation and the
345
+ # caller emits a TODO comment with the verbatim source.
346
+ return nil if member_chain_root
347
+
348
+ "nil"
86
349
  else
87
350
  unresolved << name
88
351
  snake
89
352
  end
90
353
  end
91
354
 
355
+ # An identifier that we know to be a local binding (e.g. destructured
356
+ # from an untranslatable init) but whose value we can't model. The
357
+ # leaf-translates-to-nil path is safe in value positions (attribute
358
+ # kwargs, leaf interpolations) but compound contexts (unary, binary,
359
+ # member chain) must bail so callers emit a TODO instead of silently
360
+ # changing semantics.
361
+ def unresolvable_local?(source)
362
+ return false unless source.match?(IDENTIFIER)
363
+
364
+ @local_binding_names.include?(source) &&
365
+ !in_local_scope?(source) &&
366
+ !@prop_names.include?(source)
367
+ end
368
+
92
369
  def translate_member_chain(root, rest, unresolved)
93
- translated_root = translate_identifier(root, unresolved)
370
+ translated_root = translate_identifier(root, unresolved, member_chain_root: true)
371
+ return nil unless translated_root
372
+
94
373
  # Underscore each chain segment so JS camelCase identifiers map to
95
- # Ruby snake_case (`post.coverImage` → `post.cover_image`).
96
- ruby_rest = rest.gsub(/\.([a-zA-Z_$][a-zA-Z_$0-9]*)/) do
97
- ".#{AST::Inflector.underscore(::Regexp.last_match(1))}"
374
+ # Ruby snake_case (`post.coverImage` → `post.cover_image`). Map
375
+ # optional-chaining `?.` to Ruby's safe-nav `&.` so a nil receiver
376
+ # short-circuits to nil instead of raising NoMethodError.
377
+ ruby_rest = rest.gsub(MEMBER_SEGMENT) do
378
+ op = ::Regexp.last_match(1) == "?." ? "&." : "."
379
+ "#{op}#{AST::Inflector.underscore(::Regexp.last_match(2))}"
98
380
  end
99
381
  "#{translated_root}#{ruby_rest}"
100
382
  end
@@ -103,16 +385,47 @@ module JsxRosetta
103
385
  return nil if content.include?("\\`")
104
386
  return nil if content.scan("${").size != content.scan(TEMPLATE_INTERPOLATION).size
105
387
 
106
- ruby_content = content.gsub(TEMPLATE_INTERPOLATION) do |_match|
107
- captured = ::Regexp.last_match(1)
108
- translated = if (m = MEMBER_CHAIN.match(captured))
109
- translate_member_chain(m[:root], m[:rest], unresolved)
110
- else
111
- translate_identifier(captured, unresolved)
112
- end
113
- "\#{#{translated}}"
388
+ parts = []
389
+ last_pos = 0
390
+ content.to_enum(:scan, TEMPLATE_INTERPOLATION).each do
391
+ match = ::Regexp.last_match
392
+ literal = content[last_pos...match.begin(0)]
393
+ parts << escape_ruby_string_literal(literal) unless literal.empty?
394
+ translated = translate_template_interpolation(match[1], unresolved)
395
+ # An interpolation segment that itself fails translation (nil)
396
+ # or resolves to literal `nil` (known-but-unresolvable local)
397
+ # would emit `\#{}` / `\#{nil}` — empty or semantically empty
398
+ # interpolation that rubocop flags and that loses the source's
399
+ # intent. Bail so the whole template literal falls through to
400
+ # the caller's TODO path with the verbatim JS source visible.
401
+ return nil if translated.nil? || translated == "nil"
402
+
403
+ parts << "\#{#{translated}}"
404
+ last_pos = match.end(0)
114
405
  end
115
- %("#{ruby_content}")
406
+ trailing = content[last_pos..]
407
+ parts << escape_ruby_string_literal(trailing) unless trailing.empty?
408
+ %("#{parts.join}")
409
+ end
410
+
411
+ # Split into literal vs. interpolation segments so `"` and `\` in
412
+ # the literal parts can be escaped without touching the
413
+ # interpolation expressions (which are already valid Ruby).
414
+ def translate_template_interpolation(captured, unresolved)
415
+ if (m = MEMBER_CHAIN.match(captured))
416
+ translate_member_chain(m[:root], m[:rest], unresolved)
417
+ else
418
+ translate_identifier(captured, unresolved)
419
+ end
420
+ end
421
+
422
+ # Escape backslashes and double quotes so the literal portions of a
423
+ # translated template literal don't accidentally terminate the
424
+ # surrounding Ruby string. Newlines stay literal — Ruby double-quoted
425
+ # strings allow them, and template literals are typically used for
426
+ # short interpolated phrases anyway.
427
+ def escape_ruby_string_literal(text)
428
+ text.gsub("\\", "\\\\").gsub('"', '\\"')
116
429
  end
117
430
  end
118
431
  end