jsx_rosetta 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,19 +29,58 @@ module JsxRosetta
29
29
  # entry shadows lower entries.
30
30
  class ExpressionTranslator
31
31
  IDENTIFIER = /\A[a-zA-Z_$][a-zA-Z_$0-9]*\z/
32
- STRING_LITERAL = /\A(['"])(.*)\1\z/m
32
+ # Tighter than `\A(['"])(.*)\1\z/m` — the greedy `.*` previously
33
+ # matched expressions like `"X" ? "Y" : "Z"` as a single quoted
34
+ # string. Now the body excludes unescaped quotes of the same kind,
35
+ # so only true string literals match.
36
+ STRING_LITERAL = /\A(?:"(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*')\z/m
33
37
  NUMBER_LITERAL = /\A-?\d+(\.\d+)?\z/
34
38
  TEMPLATE_LITERAL = /\A`(.*)`\z/m
35
- TEMPLATE_INTERPOLATION = /\$\{([a-zA-Z_$][a-zA-Z_$0-9]*(?:\.[a-zA-Z_$][a-zA-Z_$0-9]*)*)\}/
36
- MEMBER_CHAIN = /\A(?<root>[a-zA-Z_$][a-zA-Z_$0-9]*)(?<rest>(?:\.[a-zA-Z_$][a-zA-Z_$0-9]*)+)\z/
39
+ TEMPLATE_INTERPOLATION = /\$\{([a-zA-Z_$][a-zA-Z_$0-9]*(?:\??\.[a-zA-Z_$][a-zA-Z_$0-9]*)*)\}/
40
+ MEMBER_CHAIN = /\A(?<root>[a-zA-Z_$][a-zA-Z_$0-9]*)(?<rest>(?:\??\.[a-zA-Z_$][a-zA-Z_$0-9]*)+)\z/
41
+ MEMBER_SEGMENT = /(\??\.)([a-zA-Z_$][a-zA-Z_$0-9]*)/
42
+ # Class-component access patterns. `this.props.X` is the same shape as
43
+ # a function-component prop reference, so it maps to `@x` plus any
44
+ # trailing member chain. `this.state.X` has no Ruby analog (state
45
+ # without a backing data source) so we surface a `nil` placeholder
46
+ # with a TODO marker — the file still loads.
47
+ THIS_PROPS_CHAIN = /\Athis\.props\.(?<rest>[a-zA-Z_$][a-zA-Z_$0-9]*(?:\??\.[a-zA-Z_$][a-zA-Z_$0-9]*)*)\z/
48
+ THIS_STATE_CHAIN = /\Athis\.state\.(?<rest>[a-zA-Z_$][a-zA-Z_$0-9]*(?:\??\.[a-zA-Z_$][a-zA-Z_$0-9]*)*)\z/
37
49
  UNARY = /\A(?<op>!+|-|\+)(?<operand>.+)\z/m
38
50
  SIMPLE_LITERALS = { "null" => "nil", "undefined" => "nil", "true" => "true", "false" => "false" }.freeze
39
51
 
52
+ # Binary operators we translate, grouped by precedence (lowest first).
53
+ # We split on the *lowest*-precedence top-level operator and recurse
54
+ # on each side, mirroring how a recursive-descent parser would treat
55
+ # the source: `a > 0 && b < 5` splits on `&&` first, then each side
56
+ # splits on its relational operator.
57
+ #
58
+ # Arithmetic operators (`+`, `-`, `*`, `/`, `%`) aren't included —
59
+ # `-x` and `+x` are unary at the start of an operand, and string-
60
+ # scanning can't disambiguate without operator-state tracking that
61
+ # mirrors a parser. Real JSX conditions rarely need arithmetic in
62
+ # tests; comparison + logical covers the bulk of them.
63
+ BINARY_PRECEDENCE = [
64
+ %w[|| ??],
65
+ %w[&&],
66
+ %w[=== !== == !=],
67
+ %w[<= >= < >]
68
+ ].freeze
69
+
70
+ QUOTE_CHARS = ['"', "'", "`"].freeze
71
+ OPEN_BRACKETS = ["(", "[", "{"].freeze
72
+ CLOSE_BRACKETS = [")", "]", "}"].freeze
73
+
40
74
  Result = Data.define(:ruby, :unresolved_identifiers)
41
75
 
42
- def initialize(prop_names:, local_binding_names: [])
76
+ # prop_aliases maps a local-binding name (the alias) to the
77
+ # underlying prop name. `"data-testid": dataTestId` records
78
+ # `{ "dataTestId" => "data-testid" }` so the use site of
79
+ # `dataTestId` resolves to the prop's `@data_testid` ivar.
80
+ def initialize(prop_names:, local_binding_names: [], prop_aliases: {})
43
81
  @prop_names = prop_names.to_set
44
82
  @local_binding_names = local_binding_names.to_set
83
+ @prop_aliases = prop_aliases.dup
45
84
  @local_stack = []
46
85
  end
47
86
 
@@ -63,21 +102,227 @@ module JsxRosetta
63
102
  private
64
103
 
65
104
  def translate_ruby(source, unresolved)
66
- if SIMPLE_LITERALS.key?(source) then SIMPLE_LITERALS[source]
67
- elsif source.match?(NUMBER_LITERAL) || source.match?(STRING_LITERAL) then source
68
- elsif source.match?(IDENTIFIER) then translate_identifier(source, unresolved)
69
- elsif (m = MEMBER_CHAIN.match(source)) then translate_member_chain(m[:root], m[:rest], unresolved)
105
+ source = unwrap_outer_parens(source.strip)
106
+ translate_simple_form(source, unresolved) || translate_compound_form(source, unresolved)
107
+ end
108
+
109
+ # Handle the shapes that don't recurse: literals, identifiers, and
110
+ # the class-component `this.props.X` / `this.state.X` accessors.
111
+ # Returns nil when the source needs the compound-form dispatcher.
112
+ def translate_simple_form(source, unresolved)
113
+ return SIMPLE_LITERALS[source] if SIMPLE_LITERALS.key?(source)
114
+ return source if source.match?(NUMBER_LITERAL)
115
+ return reemit_string_literal(source) if source.match?(STRING_LITERAL)
116
+ return translate_this_props_chain(::Regexp.last_match(:rest)) if THIS_PROPS_CHAIN.match(source)
117
+ return translate_this_state_chain(::Regexp.last_match(:rest)) if THIS_STATE_CHAIN.match(source)
118
+ return translate_identifier(source, unresolved) if source.match?(IDENTIFIER)
119
+
120
+ nil
121
+ end
122
+
123
+ # Convert a JS string-literal source (`"foo"` or `'foo'`) into its
124
+ # rubocop-preferred Ruby form: single-quoted when the body has no
125
+ # escapes, embedded quotes of the matching kind, or non-printable
126
+ # characters. Keeps the verbatim source otherwise — JS and Ruby
127
+ # double-quoted escape sequences mostly overlap, so passing through
128
+ # preserves semantics; rewriting `\n` from JS-single-quoted to Ruby
129
+ # would corrupt the literal.
130
+ def reemit_string_literal(source)
131
+ quote = source[0]
132
+ inner = source[1...-1]
133
+ # Backslashes, embedded single quotes, and interpolation markers
134
+ # all complicate the rewrite — keep the original literal as-is.
135
+ # Non-ASCII (emojis, unicode) is fine in single-quoted Ruby
136
+ # strings, so we don't bail for that.
137
+ return source if inner.include?("\\") || inner.include?("'") ||
138
+ inner.include?("\#{") || inner.match?(/[\x00-\x1f\x7f]/)
139
+ return source if quote == "'"
140
+
141
+ "'#{inner}'"
142
+ end
143
+
144
+ # Handle the recursive / multi-segment shapes: member chains,
145
+ # template literals, unary operators, and the binary fallthrough.
146
+ def translate_compound_form(source, unresolved)
147
+ if (m = MEMBER_CHAIN.match(source)) then translate_member_chain(m[:root], m[:rest], unresolved)
70
148
  elsif (m = TEMPLATE_LITERAL.match(source)) then translate_template_literal(m[1], unresolved)
71
- elsif (m = UNARY.match(source))
72
- translate_unary(m[:op], m[:operand], unresolved)
149
+ elsif (m = UNARY.match(source)) then translate_unary(m[:op], m[:operand], unresolved)
150
+ else translate_binary(source, unresolved)
151
+ end
152
+ end
153
+
154
+ # `this.props.X` → `@x` (plus any trailing member chain segments,
155
+ # snake_cased and with `?.` → `&.`). The first segment IS the prop;
156
+ # subsequent segments are accessed off the prop's value. We don't
157
+ # add to `unresolved` here — class-component prop synthesis at
158
+ # lowering time has already registered the name.
159
+ def translate_this_props_chain(rest)
160
+ first = rest.split(/\??\./, 2).first
161
+ ivar = "@#{AST::Inflector.underscore(first)}"
162
+ remainder = rest[first.length..]
163
+ ruby_rest = remainder.gsub(MEMBER_SEGMENT) do
164
+ op = ::Regexp.last_match(1) == "?." ? "&." : "."
165
+ "#{op}#{AST::Inflector.underscore(::Regexp.last_match(2))}"
73
166
  end
167
+ "#{ivar}#{ruby_rest}"
168
+ end
169
+
170
+ # `this.state.X` has no direct Ruby analog — class-component state
171
+ # mutations don't map cleanly to a Phlex render. Emit `nil` so the
172
+ # file loads; reviewers wire up real state via controller-passed
173
+ # props or Stimulus values.
174
+ def translate_this_state_chain(_rest)
175
+ "nil"
74
176
  end
75
177
 
76
178
  def translate_unary(operator, operand, unresolved)
77
- inner = translate_ruby(operand.strip, unresolved)
179
+ operand_clean = operand.strip
180
+ # `!fieldValue` where `fieldValue` is a known-but-unresolved local
181
+ # would translate to `!nil` (always true) and silently flip the
182
+ # condition's truthiness. Bail so the caller emits a TODO with
183
+ # the verbatim source.
184
+ return nil if unresolvable_local?(operand_clean)
185
+
186
+ inner = translate_ruby(operand_clean, unresolved)
78
187
  inner && "#{operator}#{inner}"
79
188
  end
80
189
 
190
+ # Walk source left-to-right looking for a top-level binary operator
191
+ # at the lowest precedence level present. Split there and recurse on
192
+ # each side. When two operators of the same precedence appear (e.g.
193
+ # `a || b || c`), the rightmost is chosen — the recursion on `lhs`
194
+ # then keeps splitting, yielding left-associative grouping.
195
+ def translate_binary(source, unresolved)
196
+ BINARY_PRECEDENCE.each do |operators|
197
+ match = find_top_level_operator(source, operators)
198
+ next unless match
199
+
200
+ result = translate_binary_at(source, match, unresolved)
201
+ return result if result
202
+ end
203
+ nil
204
+ end
205
+
206
+ def translate_binary_at(source, match, unresolved)
207
+ start_idx, end_idx, js_op = match
208
+ lhs = source[0...start_idx].strip
209
+ rhs = source[end_idx..].strip
210
+ return nil if lhs.empty? || rhs.empty?
211
+ # `count > 0` where `count` is a known-but-unresolved local
212
+ # translates to `nil > 0` and NoMethodErrors at render time.
213
+ return nil if unresolvable_local?(lhs) || unresolvable_local?(rhs)
214
+
215
+ lhs_ruby = translate_ruby(lhs, unresolved)
216
+ rhs_ruby = translate_ruby(rhs, unresolved)
217
+ return nil unless lhs_ruby && rhs_ruby
218
+ # `value === null || value === undefined` both translate to
219
+ # `@value.nil?` — collapse idempotent duplication for `||` / `&&`.
220
+ return lhs_ruby if %w[|| &&].include?(js_op) && lhs_ruby == rhs_ruby
221
+
222
+ rewrite_nil_comparison(lhs_ruby, rhs_ruby, js_op) ||
223
+ "#{lhs_ruby} #{ruby_binary_operator(js_op)} #{rhs_ruby}"
224
+ end
225
+
226
+ # `x === null` / `x === undefined` → `x.nil?` (and `!==` → `!x.nil?`).
227
+ # JSX commonly compares values against `null`/`undefined`; emitting
228
+ # the literal `x == nil` form is valid Ruby but trips the
229
+ # Style/NilComparison cop. The `.nil?` form is idiomatic and reads
230
+ # better, so rewrite when either side is literally `nil`.
231
+ def rewrite_nil_comparison(lhs_ruby, rhs_ruby, js_op)
232
+ return nil unless %w[=== !== == !=].include?(js_op)
233
+
234
+ if rhs_ruby == "nil" && lhs_ruby != "nil"
235
+ js_op.start_with?("!") ? "!#{lhs_ruby}.nil?" : "#{lhs_ruby}.nil?"
236
+ elsif lhs_ruby == "nil" && rhs_ruby != "nil"
237
+ js_op.start_with?("!") ? "!#{rhs_ruby}.nil?" : "#{rhs_ruby}.nil?"
238
+ end
239
+ end
240
+
241
+ def ruby_binary_operator(js_op)
242
+ case js_op
243
+ when "===" then "=="
244
+ when "!==" then "!="
245
+ when "??" then "||"
246
+ else js_op
247
+ end
248
+ end
249
+
250
+ # Scan `source` for the rightmost occurrence of any operator from
251
+ # `operators` at lexical top level — outside any (), [], {}, or
252
+ # string literal. Returns `[start_index, end_index, operator]` or nil.
253
+ # Operators are tried longest-first at each position so `>=` beats
254
+ # `>` and `===` beats `==`.
255
+ def find_top_level_operator(source, operators)
256
+ sorted_ops = operators.sort_by { |op| -op.length }
257
+ state = { depth: 0, quote: nil, i: 0, last_match: nil }
258
+ while state[:i] < source.length
259
+ if state[:quote]
260
+ advance_quoted(source, state)
261
+ else
262
+ scan_one_position(source, sorted_ops, state)
263
+ end
264
+ end
265
+ state[:last_match]
266
+ end
267
+
268
+ def advance_quoted(source, state)
269
+ c = source[state[:i]]
270
+ if c == "\\"
271
+ state[:i] += 2
272
+ else
273
+ state[:quote] = nil if c == state[:quote]
274
+ state[:i] += 1
275
+ end
276
+ end
277
+
278
+ def scan_one_position(source, sorted_ops, state)
279
+ c = source[state[:i]]
280
+ if QUOTE_CHARS.include?(c)
281
+ state[:quote] = c
282
+ state[:i] += 1
283
+ return
284
+ end
285
+ state[:depth] += 1 if OPEN_BRACKETS.include?(c)
286
+ state[:depth] -= 1 if CLOSE_BRACKETS.include?(c)
287
+
288
+ matched = state[:depth].zero? && sorted_ops.find { |op| source[state[:i], op.length] == op }
289
+ if matched
290
+ state[:last_match] = [state[:i], state[:i] + matched.length, matched]
291
+ state[:i] += matched.length
292
+ else
293
+ state[:i] += 1
294
+ end
295
+ end
296
+
297
+ # Strip a single layer of outer parens when they wrap the entire
298
+ # source (`(a > b)` → `a > b`). When the leading `(` closes mid-
299
+ # source — e.g. `(a > b) && c` — leave the source alone since the
300
+ # parens are structurally meaningful. Trims surrounding whitespace.
301
+ def unwrap_outer_parens(source)
302
+ return source unless source.start_with?("(") && source.end_with?(")")
303
+ return source unless outer_parens_balanced?(source)
304
+
305
+ source[1...-1].strip
306
+ end
307
+
308
+ def outer_parens_balanced?(source)
309
+ depth = 0
310
+ quote = nil
311
+ source.each_char.with_index do |c, i|
312
+ if quote
313
+ quote = nil if c == quote && source[i - 1] != "\\"
314
+ next
315
+ end
316
+ quote = c if QUOTE_CHARS.include?(c)
317
+ depth += 1 if c == "("
318
+ if c == ")"
319
+ depth -= 1
320
+ return false if depth.zero? && i != source.length - 1
321
+ end
322
+ end
323
+ true
324
+ end
325
+
81
326
  def in_local_scope?(name)
82
327
  @local_stack.any? { |scope| scope.include?(name) }
83
328
  end
@@ -86,35 +331,52 @@ module JsxRosetta
86
331
  snake = AST::Inflector.underscore(name)
87
332
  if in_local_scope?(name)
88
333
  snake
334
+ elsif @prop_aliases.key?(name)
335
+ "@#{AST::Inflector.underscore(@prop_aliases[name])}"
89
336
  elsif @prop_names.include?(name)
90
337
  "@#{snake}"
91
338
  elsif @local_binding_names.include?(name)
92
339
  # We know this binding exists locally (destructure, hook tuple)
93
340
  # but can't model its value. As a leaf identifier, return `nil`
94
341
  # so the file loads (a bare snake_case ref would NameError).
95
- # As a member-chain root, `nil.member` would NoMethodError at
96
- # render time worse. Fall back to the snake_case bare ref
97
- # and let it surface as a NameError (caller adds an unresolved
98
- # marker), which is at least debuggable. The TODO marker for
99
- # the binding source already lives in the comment block.
100
- if member_chain_root
101
- unresolved << name
102
- snake
103
- else
104
- "nil"
105
- end
342
+ # As a member-chain root, `nil.member` would NoMethodError and
343
+ # the bare-snake fallback would NameError both crash at render
344
+ # time. Bail so the whole expression fails translation and the
345
+ # caller emits a TODO comment with the verbatim source.
346
+ return nil if member_chain_root
347
+
348
+ "nil"
106
349
  else
107
350
  unresolved << name
108
351
  snake
109
352
  end
110
353
  end
111
354
 
355
+ # An identifier that we know to be a local binding (e.g. destructured
356
+ # from an untranslatable init) but whose value we can't model. The
357
+ # leaf-translates-to-nil path is safe in value positions (attribute
358
+ # kwargs, leaf interpolations) but compound contexts (unary, binary,
359
+ # member chain) must bail so callers emit a TODO instead of silently
360
+ # changing semantics.
361
+ def unresolvable_local?(source)
362
+ return false unless source.match?(IDENTIFIER)
363
+
364
+ @local_binding_names.include?(source) &&
365
+ !in_local_scope?(source) &&
366
+ !@prop_names.include?(source)
367
+ end
368
+
112
369
  def translate_member_chain(root, rest, unresolved)
113
370
  translated_root = translate_identifier(root, unresolved, member_chain_root: true)
371
+ return nil unless translated_root
372
+
114
373
  # Underscore each chain segment so JS camelCase identifiers map to
115
- # Ruby snake_case (`post.coverImage` → `post.cover_image`).
116
- ruby_rest = rest.gsub(/\.([a-zA-Z_$][a-zA-Z_$0-9]*)/) do
117
- ".#{AST::Inflector.underscore(::Regexp.last_match(1))}"
374
+ # Ruby snake_case (`post.coverImage` → `post.cover_image`). Map
375
+ # optional-chaining `?.` to Ruby's safe-nav `&.` so a nil receiver
376
+ # short-circuits to nil instead of raising NoMethodError.
377
+ ruby_rest = rest.gsub(MEMBER_SEGMENT) do
378
+ op = ::Regexp.last_match(1) == "?." ? "&." : "."
379
+ "#{op}#{AST::Inflector.underscore(::Regexp.last_match(2))}"
118
380
  end
119
381
  "#{translated_root}#{ruby_rest}"
120
382
  end
@@ -129,7 +391,16 @@ module JsxRosetta
129
391
  match = ::Regexp.last_match
130
392
  literal = content[last_pos...match.begin(0)]
131
393
  parts << escape_ruby_string_literal(literal) unless literal.empty?
132
- parts << "\#{#{translate_template_interpolation(match[1], unresolved)}}"
394
+ translated = translate_template_interpolation(match[1], unresolved)
395
+ # An interpolation segment that itself fails translation (nil)
396
+ # or resolves to literal `nil` (known-but-unresolvable local)
397
+ # would emit `\#{}` / `\#{nil}` — empty or semantically empty
398
+ # interpolation that rubocop flags and that loses the source's
399
+ # intent. Bail so the whole template literal falls through to
400
+ # the caller's TODO path with the verbatim JS source visible.
401
+ return nil if translated.nil? || translated == "nil"
402
+
403
+ parts << "\#{#{translated}}"
133
404
  last_pos = match.end(0)
134
405
  end
135
406
  trailing = content[last_pos..]