sas-linter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../sas_linter"
4
+ require "sas_lexer"
5
+
6
+ class SasLinter
7
+ module Rules
8
+ # Flag `if COND then S; else S;` where the THEN and ELSE bodies are
9
+ # identical token-for-token — the condition has no effect on the
10
+ # outcome, which is almost always a copy-paste error.
11
+ #
12
+ # Motivating bug (`docs/AK_LOC_HOME_CARE_SCALE_notes.txt` #1):
13
+ #
14
+ # if iK3 in (6,7,8) then NF1_2=0; else NF1_2=0;
15
+ #
16
+ # Both branches assign `NF1_2 = 0`; the THEN should have been `=1`.
17
+ #
18
+ # Scope: simple-statement bodies only (`then STMT; else STMT;`). The
19
+ # block form (`then do; ... end; else do; ... end;`) is ignored — it's
20
+ # rare and the equivalence check would need to span an unbounded body.
21
+ class IdenticalIfElseBranches < Rule
22
+ rule_id :identical_if_else_branches
23
+ description "`if ... then S; else S;` — THEN and ELSE bodies are " \
24
+ "identical, so the condition has no effect."
25
+ severity :warning
26
+
27
+ TT = SasLexer::Lexer::TokenType
28
+
29
+ def check(tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
30
+ findings = []
31
+ i = 0
32
+
33
+ while i < tokens.length
34
+ tok = tokens[i]
35
+
36
+ if tok[:type] == TT::KW_THEN
37
+ # Bail on `then do;` — only handle simple statement bodies.
38
+ nxt = tokens[i + 1]
39
+ if nxt && nxt[:type] != TT::KW_DO
40
+ then_body, after_then = collect_simple_body(tokens, i + 1)
41
+ if then_body && tokens[after_then] && tokens[after_then][:type] == TT::KW_ELSE
42
+ else_idx = after_then
43
+ # Same bail-out for `else do;`.
44
+ else_first = tokens[else_idx + 1]
45
+ if else_first && else_first[:type] != TT::KW_DO
46
+ else_body, after_else = collect_simple_body(tokens, else_idx + 1)
47
+ if else_body && bodies_equivalent?(then_body, else_body)
48
+ findings << finding(
49
+ line: tokens[else_idx][:start_line],
50
+ column: tokens[else_idx][:start_column] + 1,
51
+ message: "`if ... then #{render_body(then_body)}; else #{render_body(else_body)};` — " \
52
+ "branches are identical; the condition has no effect.",
53
+ path: path
54
+ )
55
+ i = after_else
56
+ next
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+
63
+ i += 1
64
+ end
65
+
66
+ findings
67
+ end
68
+
69
+ private
70
+
71
+ # Collect tokens for one statement body starting at `start_idx`, up to
72
+ # (but not including) the terminating SEMI. Returns [body_tokens,
73
+ # index_after_semi] or [nil, start_idx] if no SEMI is found before EOF.
74
+ def collect_simple_body(tokens, start_idx)
75
+ body = []
76
+ k = start_idx
77
+ while k < tokens.length
78
+ t = tokens[k]
79
+ return [body, k + 1] if t[:type] == TT::SEMI
80
+
81
+ body << t
82
+ k += 1
83
+ end
84
+ [nil, start_idx]
85
+ end
86
+
87
+ # Two bodies are equivalent if they have the same token types and the
88
+ # same normalized text. Identifiers and keywords are SAS-case-insensitive,
89
+ # so compare downcased text.
90
+ def bodies_equivalent?(a, b)
91
+ return false unless a.length == b.length
92
+
93
+ a.each_with_index.all? do |ta, idx|
94
+ tb = b[idx]
95
+ ta[:type] == tb[:type] && ta[:text].downcase == tb[:text].downcase
96
+ end
97
+ end
98
+
99
+ def render_body(body)
100
+ body.map { |t| t[:text] }.join(" ").gsub(/\s+([,;()])/, '\1').gsub(/([,(])\s+/, '\1')
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../sas_linter"
4
+
5
+ class SasLinter
6
+ module Rules
7
+ # Flag non-standard line endings in SAS sources. Two patterns
8
+ # appear in legacy SAS sources and tend to be hand-fixed when
9
+ # they show up:
10
+ #
11
+ # 1. `\r\r\n` — double CR before LF. Word/Outlook copy-paste
12
+ # injects an extra CR; SAS Viya tolerates it but downstream
13
+ # tools and diffs treat the file as if every line had a
14
+ # trailing literal CR character.
15
+ #
16
+ # 2. Lone `\r` (CR not followed by LF) — old-Mac CR-only
17
+ # endings. SAS Viya treats the entire file as one logical
18
+ # line, breaking saspy's shard-based submission flow.
19
+ #
20
+ # Autofix collapses `\r\r\n` to `\r\n` unconditionally and maps
21
+ # every lone `\r` to the file's dominant ending: `\r\n` if the
22
+ # source has any CRLF (i.e. it's a Windows file with stragglers),
23
+ # `\n` otherwise (i.e. pure-CR file → POSIX).
24
+ #
25
+ # Recognized config options:
26
+ # autofix: true | false (default: false)
27
+ class LineEndings < Rule
28
+ rule_id :line_endings
29
+ description "Source has non-standard line endings (double-CR or lone CR)."
30
+ severity :warning
31
+
32
+ def self.supports_autofix?
33
+ true
34
+ end
35
+
36
+ def check(_tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
37
+ return [] unless source
38
+
39
+ findings = []
40
+ bytes = source.b.bytes
41
+ line = 1
42
+ col = 1
43
+ i = 0
44
+ n = bytes.length
45
+
46
+ while i < n
47
+ b = bytes[i]
48
+ if b == 0x0D && bytes[i + 1] == 0x0D && bytes[i + 2] == 0x0A
49
+ findings << finding(
50
+ line: line,
51
+ column: col,
52
+ message: "double CR before LF (\\r\\r\\n)#{autofix? ? ' (autofixed)' : ''}",
53
+ path: path
54
+ )
55
+ line += 1
56
+ col = 1
57
+ i += 3
58
+ elsif b == 0x0D && bytes[i + 1] == 0x0A
59
+ line += 1
60
+ col = 1
61
+ i += 2
62
+ elsif b == 0x0D
63
+ findings << finding(
64
+ line: line,
65
+ column: col,
66
+ message: "lone CR (\\r)#{autofix? ? ' (autofixed)' : ''}",
67
+ path: path
68
+ )
69
+ line += 1
70
+ col = 1
71
+ i += 1
72
+ elsif b == 0x0A
73
+ line += 1
74
+ col = 1
75
+ i += 1
76
+ else
77
+ col += 1
78
+ i += 1
79
+ end
80
+ end
81
+ findings
82
+ end
83
+
84
+ # Collapse `\r\r\n` to `\r\n`; map every remaining lone `\r` to
85
+ # the file's dominant terminator (`\r\n` if any CRLF survives,
86
+ # else `\n`).
87
+ def autofix(source)
88
+ # Step 1: remove the duplicate CR in `\r\r\n` sequences. This
89
+ # leaves at most one `\r` adjacent to `\n` (real CRLF) and
90
+ # any other `\r` on its own.
91
+ step1 = source.b.gsub(/\r\r\n/, "\r\n")
92
+
93
+ # Step 2: pick the dominant terminator. `\r\n` wins if there
94
+ # are any CRLF sequences; otherwise we collapse to LF.
95
+ dominant_crlf = step1.include?("\r\n")
96
+ replacement = dominant_crlf ? "\r\n" : "\n"
97
+
98
+ # Step 3: replace every lone `\r` (not followed by `\n`) with
99
+ # the dominant ending. The negative lookahead leaves real
100
+ # CRLF intact when CRLF is the dominant style.
101
+ step1.gsub(/\r(?!\n)/, replacement).force_encoding(source.encoding)
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,291 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../sas_linter"
4
+ require "sas_lexer"
5
+
6
+ class SasLinter
7
+ module Rules
8
+ # Validate that `if ... then` conditions form well-shaped boolean
9
+ # expressions. Catches authoring mistakes the lexer cheerfully
10
+ # accepts but that won't run, e.g.
11
+ #
12
+ # if A1 = 1 A2 = 2 then ... * missing `and`/`or`
13
+ # if A1 = 1 and then ... * trailing operator
14
+ # if = 1 then ... * leading operator, no left operand
15
+ # if then ... * empty condition
16
+ # A1 = 1 then ... * missing `if`
17
+ # if (a = 1 and b = 2 then ... * unbalanced parens
18
+ #
19
+ # Strategy: at each `KW_IF`, walk forward to the matching top-level
20
+ # `KW_THEN` (or `;` for a subsetting `if`) running a tiny
21
+ # operand/operator state machine. Top-level only — anything inside
22
+ # parens is treated as a single sub-expression so function calls
23
+ # and `in (...)` lists don't trigger false positives.
24
+ #
25
+ # An orphan `KW_THEN` (one not consumed by an enclosing `if`) is
26
+ # reported as a likely missing `if`.
27
+ class MalformedIfCondition < Rule
28
+ rule_id :malformed_if_condition
29
+ description "Validate `if ... then` conditions form a well-shaped " \
30
+ "boolean expression (no missing operators, operands, " \
31
+ "or `if` keyword; balanced parens)."
32
+ severity :warning
33
+
34
+ TT = SasLexer::Lexer::TokenType
35
+
36
+ COMPARISON_OPS = [
37
+ TT::ASSIGN, TT::KW_EQ, TT::KW_NE, TT::NE, TT::KW_LT, TT::LT, TT::KW_LE, TT::LE,
38
+ TT::KW_GT, TT::GT, TT::KW_GE, TT::GE, TT::KW_IN, TT::SOUNDS_LIKE, TT::GTLT, TT::LTGT,
39
+ TT::KW_EQT, TT::KW_GTT, TT::KW_LTT, TT::KW_GET, TT::KW_LET, TT::KW_NET
40
+ ].freeze
41
+
42
+ LOGICAL_OPS = [TT::KW_AND, TT::KW_OR, TT::AMP, TT::PIPE, TT::PIPE2].freeze
43
+
44
+ ARITHMETIC_OPS = [TT::PLUS, TT::MINUS, TT::STAR, TT::FSLASH, TT::STAR2,
45
+ TT::EXCL, TT::EXCL2, TT::BPIPE, TT::BPIPE2].freeze
46
+
47
+ BINOPS = (COMPARISON_OPS + LOGICAL_OPS + ARITHMETIC_OPS).to_set.freeze
48
+
49
+ # `+`/`-` are also binary; the state machine disambiguates by
50
+ # checking whether we currently expect an operand.
51
+ UNARY_PREFIXES = [TT::KW_NOT, TT::NOT, TT::MINUS, TT::PLUS].to_set.freeze
52
+
53
+ OPERAND_TOKENS = [
54
+ TT::IDENTIFIER,
55
+ TT::INTEGER_LITERAL, TT::FLOAT_LITERAL, TT::FLOAT_EXPONENT_LITERAL,
56
+ TT::STRING_LITERAL, TT::HEX_STRING_LITERAL, TT::BIT_TESTING_LITERAL,
57
+ TT::DATE_LITERAL, TT::DATE_TIME_LITERAL, TT::TIME_LITERAL, TT::NAME_LITERAL,
58
+ TT::MACRO_VAR_RESOLVE, TT::MACRO_IDENTIFIER, TT::MACRO_STRING,
59
+ TT::STRING_EXPR_START
60
+ ].to_set.freeze
61
+
62
+ def check(tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
63
+ findings = []
64
+ consumed_thens = {}
65
+ i = 0
66
+
67
+ while i < tokens.length
68
+ tok = tokens[i]
69
+
70
+ if tok[:type] == TT::KW_IF
71
+ new_i, sub_findings = analyze_if(tokens, i, path, consumed_thens)
72
+ findings.concat(sub_findings)
73
+ i = new_i
74
+ next
75
+ end
76
+
77
+ if tok[:type] == TT::KW_THEN && !consumed_thens[i]
78
+ findings << finding(
79
+ line: tok[:start_line],
80
+ column: tok[:start_column] + 1,
81
+ message: "`then` without a preceding `if` condition — likely missing `if`.",
82
+ path: path
83
+ )
84
+ end
85
+
86
+ i += 1
87
+ end
88
+
89
+ findings
90
+ end
91
+
92
+ private
93
+
94
+ # Walk from `if` at `tokens[start]` until the matching `then`
95
+ # (or `;` for a subsetting `if`), validating expression shape.
96
+ # Returns [next_i, findings].
97
+ #
98
+ # Emits at most ONE finding per `if`: one structural defect (e.g.
99
+ # `iK2g in 0,1)` — missing `(` after `in`) cascades through the
100
+ # state machine into adjacent unbalanced-paren / orphan-then
101
+ # errors. After the first finding, we set `broken` and walk
102
+ # forward to the next top-level `;`, marking any `KW_THEN`
103
+ # tokens as consumed so the outer loop's orphan-then detector
104
+ # doesn't double-fire on this same broken statement.
105
+ # Mutates `consumed_thens`.
106
+ def analyze_if(tokens, start, path, consumed_thens)
107
+ findings = []
108
+ state = :expect_operand
109
+ paren_depth = 0
110
+ open_parens = []
111
+ cond_started = false
112
+ last_op_tok = nil
113
+ broken = false
114
+
115
+ add_finding = lambda do |tok, message|
116
+ findings << finding(
117
+ line: tok[:start_line], column: tok[:start_column] + 1,
118
+ message: message, path: path
119
+ )
120
+ broken = true
121
+ end
122
+
123
+ i = start + 1
124
+ while i < tokens.length
125
+ t = tokens[i]
126
+ type = t[:type]
127
+
128
+ # Recovery mode: skip ahead to the next `;`, marking any
129
+ # `then`s we pass over as consumed so they don't flag as
130
+ # orphan in the outer loop. Paren state is intentionally
131
+ # ignored — once we've emitted a finding we don't trust
132
+ # the depth counter to be meaningful.
133
+ if broken
134
+ consumed_thens[i] = true if type == TT::KW_THEN
135
+ return [i + 1, findings] if type == TT::SEMI
136
+
137
+ i += 1
138
+ next
139
+ end
140
+
141
+ if type == TT::KW_THEN && paren_depth.zero?
142
+ flag_terminal(findings, path, state, cond_started, last_op_tok, t, "then")
143
+ consumed_thens[i] = true
144
+ return [i + 1, findings]
145
+ end
146
+
147
+ if type == TT::SEMI && paren_depth.zero?
148
+ flag_terminal(findings, path, state, cond_started, last_op_tok, t, "subsetting `if`")
149
+ return [i + 1, findings]
150
+ end
151
+
152
+ # `then` / `;` inside open parens means a paren never closed.
153
+ # Flag at the offending `(` and drop into recovery mode.
154
+ if (type == TT::KW_THEN || type == TT::SEMI) && paren_depth.positive?
155
+ lp = open_parens.first
156
+ add_finding.call(lp, "unbalanced `(` in `if` condition (no matching `)` " \
157
+ "before `#{t[:text]}`).")
158
+ consumed_thens[i] = true if type == TT::KW_THEN
159
+ i += 1
160
+ next
161
+ end
162
+
163
+ if type == TT::LPAREN || type == TT::LBRACK
164
+ cond_started = true
165
+ paren_depth += 1
166
+ open_parens.push(t)
167
+ i += 1
168
+ next
169
+ end
170
+
171
+ if type == TT::RPAREN || type == TT::RBRACK
172
+ if paren_depth.zero?
173
+ add_finding.call(t, "unbalanced `#{t[:text]}` in `if` condition.")
174
+ i += 1
175
+ next
176
+ end
177
+ paren_depth -= 1
178
+ open_parens.pop
179
+ # A parenthesized sub-expression, function-call arg list,
180
+ # or array subscript that just closed counts as one
181
+ # completed operand.
182
+ state = :expect_operator if paren_depth.zero?
183
+ i += 1
184
+ next
185
+ end
186
+
187
+ # Inside parens we don't validate — the whole `(...)` is one
188
+ # atom at the top level.
189
+ if paren_depth.positive?
190
+ i += 1
191
+ next
192
+ end
193
+
194
+ # `,` at top level only appears inside `in (...)`, which is
195
+ # paren-wrapped. Treat as a no-op if it leaks through.
196
+ if type == TT::COMMA
197
+ i += 1
198
+ next
199
+ end
200
+
201
+ cond_started = true
202
+
203
+ if state == :expect_operand
204
+ if UNARY_PREFIXES.include?(type)
205
+ i += 1
206
+ next
207
+ end
208
+
209
+ if OPERAND_TOKENS.include?(type)
210
+ state = :expect_operator
211
+ i += 1
212
+ next
213
+ end
214
+
215
+ if BINOPS.include?(type)
216
+ msg = if last_op_tok.nil?
217
+ "operator `#{t[:text]}` at start of `if` condition with no left operand."
218
+ else
219
+ "operator `#{t[:text]}` follows operator `#{last_op_tok[:text]}` " \
220
+ "with no operand between them."
221
+ end
222
+ add_finding.call(t, msg)
223
+ last_op_tok = t
224
+ i += 1
225
+ next
226
+ end
227
+
228
+ # Unknown token in operand position — treat opaquely as
229
+ # one operand to keep walking. Reduces false positives on
230
+ # SAS shapes we don't fully model (e.g. DOT for missing
231
+ # values, format references).
232
+ state = :expect_operator
233
+ i += 1
234
+ next
235
+ end
236
+
237
+ # state == :expect_operator
238
+ if BINOPS.include?(type)
239
+ last_op_tok = t
240
+ state = :expect_operand
241
+ i += 1
242
+ next
243
+ end
244
+
245
+ # Negated comparisons: `not eq`, `not in`, `not lt`, `^=`,
246
+ # `^in`, `^<`, etc. The lexer splits these into a NOT/`^`
247
+ # token and a comparison op; recognize the pair as one
248
+ # binary operator so the state machine doesn't see two
249
+ # operators in a row.
250
+ if (type == TT::KW_NOT || type == TT::NOT) && (nxt = tokens[i + 1]) &&
251
+ COMPARISON_OPS.include?(nxt[:type])
252
+ last_op_tok = nxt
253
+ state = :expect_operand
254
+ i += 2
255
+ next
256
+ end
257
+
258
+ if OPERAND_TOKENS.include?(type) || UNARY_PREFIXES.include?(type)
259
+ add_finding.call(t,
260
+ "missing operator before `#{t[:text]}` in `if` condition — " \
261
+ "perhaps a missing `and`/`or`?")
262
+ i += 1
263
+ next
264
+ end
265
+
266
+ i += 1
267
+ end
268
+
269
+ # Reached EOF without seeing `then` or `;`.
270
+ [i, findings]
271
+ end
272
+
273
+ def flag_terminal(findings, path, state, cond_started, last_op_tok, terminator, where)
274
+ if !cond_started
275
+ findings << finding(
276
+ line: terminator[:start_line], column: terminator[:start_column] + 1,
277
+ message: "`if #{where}` with empty condition.",
278
+ path: path
279
+ )
280
+ elsif state == :expect_operand && last_op_tok
281
+ findings << finding(
282
+ line: last_op_tok[:start_line], column: last_op_tok[:start_column] + 1,
283
+ message: "operator `#{last_op_tok[:text]}` has no right operand before " \
284
+ "`#{terminator[:text]}`.",
285
+ path: path
286
+ )
287
+ end
288
+ end
289
+ end
290
+ end
291
+ end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../sas_linter"
4
+ require "sas_lexer"
5
+
6
+ class SasLinter
7
+ module Rules
8
+ # Flag assignment statements whose terminating `;` is missing,
9
+ # causing the inline `**`-style comment marker to be lexed as
10
+ # the SAS exponentiation operator and absorbed into the RHS
11
+ # expression.
12
+ #
13
+ # The motivating bug is in MDS2.0_CAP_FEEDTB_G2_V2.1_P_2012-03-15.txt:
14
+ #
15
+ # B1 = B1 ** Comatose; ← missing `;` before `**`
16
+ # B4 = B4; ** Daily decision-making;
17
+ # K5b = K5b; ** Tube feeding;
18
+ #
19
+ # SAS lexes the first line as `B1 := B1 ^ Comatose`, where
20
+ # `Comatose` is an undefined variable — the assignment silently
21
+ # produces a missing value at runtime instead of the identity
22
+ # mapping the author intended.
23
+ #
24
+ # Detection: a STAR2 (`**`) token where
25
+ # * the line containing it does NOT start with `**` (which
26
+ # would put us in a header / boxed-comment context, where
27
+ # `**` is part of the comment-statement opener, not an
28
+ # operator); AND
29
+ # * the previous default-channel token is an IDENTIFIER (the
30
+ # RHS variable in the assignment); AND
31
+ # * the next default-channel token is an IDENTIFIER (the prose
32
+ # start of what should have been an inline comment).
33
+ #
34
+ # Legitimate `X = Y ** 2` exponentiation has a numeric literal
35
+ # after the `**`, not an identifier, so it doesn't match.
36
+ class MissingAssignmentSemicolon < Rule
37
+ rule_id :missing_assignment_semicolon
38
+ description "Assignment missing terminating `;` — the inline " \
39
+ "`**` comment marker was lexed as exponentiation."
40
+ severity :warning
41
+
42
+ TT = SasLexer::Lexer::TokenType
43
+
44
+ def self.supports_autofix?
45
+ true
46
+ end
47
+
48
+ def check(tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
49
+ findings = []
50
+ lines = (source || "").split("\n", -1)
51
+
52
+ tokens.each_with_index do |t, i|
53
+ next unless t[:type] == TT::STAR2
54
+
55
+ # Skip header / boxed-comment lines: `** PROGRAM: ... **;`,
56
+ # `** DATA STEP STARTS HERE **;`, etc. The `**` there is
57
+ # part of the comment shape, not an operator.
58
+ line = lines[t[:start_line] - 1]
59
+ next if line.nil? || line.lstrip.start_with?("**")
60
+
61
+ prev_t = tokens[i - 1] if i.positive?
62
+ next_t = tokens[i + 1]
63
+ next unless prev_t && next_t
64
+ next unless prev_t[:type] == TT::IDENTIFIER && next_t[:type] == TT::IDENTIFIER
65
+
66
+ findings << finding(
67
+ line: t[:start_line],
68
+ column: t[:start_column] + 1,
69
+ message: "`**` parsed as exponentiation in `#{prev_t[:text]} ** #{next_t[:text]}` — " \
70
+ "looks like a missing `;` before an inline `** ... ;` comment.",
71
+ path: path
72
+ )
73
+ end
74
+
75
+ findings
76
+ end
77
+
78
+ # Insert the missing `;` immediately after the RHS identifier on
79
+ # each flaggable line. By replacing the single space that
80
+ # already sits between the identifier and the `**`, we preserve
81
+ # the existing column alignment of the inline-comment block —
82
+ # the row goes from `B1 = B1 ** ...;` to
83
+ # `B1 = B1; ** ...;`, matching the canonical SAS
84
+ # `VAR = VAR; ** description;` shape.
85
+ def autofix(source)
86
+ return source if source.nil? || source.empty?
87
+
88
+ lexer = SasLexer::Lexer.new
89
+ begin
90
+ all_tokens = lexer.tokenize(source)
91
+ ensure
92
+ lexer.free
93
+ end
94
+ tokens = all_tokens.reject do |t|
95
+ t[:channel] == SasLexer::Lexer::TokenChannel::HIDDEN ||
96
+ t[:channel] == SasLexer::Lexer::TokenChannel::COMMENT
97
+ end
98
+
99
+ source_lines = source.split("\n", -1)
100
+ edits = []
101
+
102
+ tokens.each_with_index do |t, i|
103
+ next unless t[:type] == TT::STAR2
104
+
105
+ line = source_lines[t[:start_line] - 1]
106
+ next if line.nil? || line.lstrip.start_with?("**")
107
+
108
+ prev_t = tokens[i - 1] if i.positive?
109
+ next_t = tokens[i + 1]
110
+ next unless prev_t && next_t
111
+ next unless prev_t[:type] == TT::IDENTIFIER && next_t[:type] == TT::IDENTIFIER
112
+
113
+ edits << [t[:start_line] - 1, prev_t[:end_column]]
114
+ end
115
+
116
+ edits.each do |line_idx, col|
117
+ line = source_lines[line_idx]
118
+ replacement =
119
+ if col + 1 < line.length && line[col] == " " && line[col + 1] == " "
120
+ # Two or more spaces between IDENT and `**`: consume one
121
+ # for the `;` so existing column alignment of the inline
122
+ # `**` comment is preserved (`B1 = B1 **` becomes
123
+ # `B1 = B1; **`).
124
+ ";#{line[(col + 1)..]}"
125
+ elsif col < line.length && line[col] == " "
126
+ # Exactly one space: keep it after the `;` so we don't
127
+ # produce `iA16a;**` (functional but ugly) — `; **` is
128
+ # the canonical neighbor shape.
129
+ "; #{line[(col + 1)..]}"
130
+ else
131
+ # No space at all (rare). Inject `; `.
132
+ "; #{line[col..]}"
133
+ end
134
+ source_lines[line_idx] = "#{line[0...col]}#{replacement}"
135
+ end
136
+
137
+ source_lines.join("\n")
138
+ end
139
+ end
140
+ end
141
+ end