sas-linter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,290 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require_relative "../../sas_linter"
5
+ require "sas_lexer"
6
+
7
+ class SasLinter
8
+ module Rules
9
+ # Restore the standard 90-char `**...**;` header convention to broken SAS
10
+ # source files. Detects header lines that *look* like `**`-comments
11
+ # but produce DEFAULT-channel tokens, and re-wraps them as proper
12
+ # `** ... **;` rows.
13
+ #
14
+ # Working sources use a uniform 90-char-wide header where each
15
+ # line is its own self-contained `*` comment statement:
16
+ #
17
+ # ****************************************************************************************;
18
+ # ** PROGRAM: ... **;
19
+ # ** BY: ... **;
20
+ #
21
+ # Broken sources have lines that look like comments (start with
22
+ # `**`) but produce DEFAULT-channel tokens. Two flavors:
23
+ #
24
+ # A. Missing trailing `;` on every header line — the whole
25
+ # header is one giant unterminated `*` comment until the
26
+ # first inline `;` ends it, leaking the rest of that
27
+ # physical line and following lines onto DEFAULT.
28
+ #
29
+ # B. Trailing `**;` is present but an inline `;` (e.g. a
30
+ # semicolon-separated list like `First Reviewer; Second
31
+ # Reviewer`) terminates the comment in the middle of the
32
+ # line — what follows the inline `;` ends up on DEFAULT
33
+ # even though the line *looks* terminated.
34
+ #
35
+ # Some files also have header continuation lines (text that
36
+ # should be inside a `**` comment) that lost their `**` prefix
37
+ # during a text-conversion step. Those are detected only inside
38
+ # the file's leading header block — *before* the first KW_DATA /
39
+ # KW_PROC token the lexer reports — so legitimate body code
40
+ # sandwiched between `**` marker comments is left alone.
41
+ #
42
+ # Recognized config options:
43
+ # autofix: true | false (default: false)
44
+ class SourceHeaders < Rule
45
+ rule_id :source_headers
46
+ description "Header lines look like `**`-comments but lex as code; will be re-wrapped."
47
+ severity :warning
48
+
49
+ TARGET_WIDTH = 90
50
+ PAD_TO = TARGET_WIDTH - 3 # leave 3 chars for trailing `**;`
51
+
52
+ DEFAULT_CHANNEL = SasLexer::Lexer::TokenChannel::DEFAULT
53
+ KW_DATA = SasLexer::Lexer::TokenType::KW_DATA
54
+ KW_PROC = SasLexer::Lexer::TokenType.const_get(:KW_PROC) if SasLexer::Lexer::TokenType.const_defined?(:KW_PROC)
55
+ C_STYLE_COMMENT = SasLexer::Lexer::TokenType::C_STYLE_COMMENT
56
+ IDENTIFIER = SasLexer::Lexer::TokenType::IDENTIFIER
57
+ SEMI = SasLexer::Lexer::TokenType::SEMI
58
+ ASSIGN = SasLexer::Lexer::TokenType::ASSIGN
59
+
60
+ def self.supports_autofix?
61
+ true
62
+ end
63
+
64
+ def check(_tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
65
+ return [] unless source
66
+
67
+ broken_header_lines(source).map do |line_idx|
68
+ finding(
69
+ line: line_idx + 1,
70
+ column: 1,
71
+ message: "broken header line#{autofix? ? ' (autofixed)' : ''}",
72
+ path: path
73
+ )
74
+ end
75
+ end
76
+
77
+ def autofix(source)
78
+ # Step 0: expand any tab characters to 4 spaces. Tabs in
79
+ # SAS source headers often come from Word docs, and
80
+ # break the column-alignment of the header box. Doing this
81
+ # first means every downstream check sees consistent column
82
+ # offsets.
83
+ text = source.gsub("\t", " ")
84
+ 10.times do
85
+ tokens = tokenize(text)
86
+ skip = c_comment_lines(tokens)
87
+ bad = broken_lines_for(text, tokens, skip) |
88
+ asterisk_rows_missing_semi_for(text, skip)
89
+ break if bad.empty?
90
+
91
+ text = rewrite(text, bad)
92
+ end
93
+ text
94
+ end
95
+
96
+ # 0-indexed line numbers the lexer thinks are broken header
97
+ # text in `source`. Public so the rule's `check` can produce
98
+ # findings without re-tokenizing on its own.
99
+ def broken_header_lines(source)
100
+ tokens = tokenize(source)
101
+ broken_lines_for(source, tokens, c_comment_lines(tokens))
102
+ end
103
+
104
+ private
105
+
106
+ # Lex `text`. The Rust lexer demands valid UTF-8; some legacy SAS
107
+ # sources ship with stray Windows-1252 bytes (smart quotes). We
108
+ # make a UTF-8-safe copy for the lexer call, then operate on
109
+ # the original byte string for offset math — the byte positions
110
+ # line up because we only replace bytes, never insert or delete.
111
+ def tokenize(text)
112
+ utf8 = text.dup.force_encoding(Encoding::UTF_8)
113
+ utf8 = utf8.scrub("?") unless utf8.valid_encoding?
114
+ lexer = SasLexer::Lexer.new
115
+ begin
116
+ lexer.tokenize(utf8)
117
+ ensure
118
+ lexer.free
119
+ end
120
+ end
121
+
122
+ # 0-indexed line number of the first body keyword (KW_DATA /
123
+ # KW_PROC). Lines at or after this cutoff are body code, not
124
+ # header. Falls back to `total_lines` for fragments that have
125
+ # no data/proc step.
126
+ def header_cutoff_line(tokens, total_lines)
127
+ first_body = tokens.find do |t|
128
+ t[:type] == KW_DATA || (KW_PROC && t[:type] == KW_PROC)
129
+ end
130
+ first_body ? first_body[:start_line] - 1 : total_lines
131
+ end
132
+
133
+ # Set of 0-indexed line numbers that fall inside a `/* ... */`
134
+ # C_STYLE_COMMENT token. Legacy SAS sources sometimes embed
135
+ # large code blocks in such comments; header repair must skip
136
+ # those lines.
137
+ def c_comment_lines(tokens)
138
+ lines = Set.new
139
+ tokens.each do |tok|
140
+ next unless tok[:type] == C_STYLE_COMMENT
141
+
142
+ ((tok[:start_line] - 1)..(tok[:end_line] - 1)).each { |ln| lines << ln }
143
+ end
144
+ lines
145
+ end
146
+
147
+ # A line is "prose-only" iff its DEFAULT-channel tokens contain
148
+ # no SAS-syntax control tokens (no `;`, no `=`). Real body code
149
+ # always has at least one of those; prose ("CHECK WITH AUTHOR
150
+ # FOR OTHERS") has neither.
151
+ def prose_only_line?(tokens, line_idx)
152
+ saw_default = false
153
+ tokens.each do |tok|
154
+ next unless tok[:start_line] - 1 == line_idx
155
+ next unless tok[:channel] == DEFAULT_CHANNEL
156
+
157
+ saw_default = true
158
+ return false if tok[:type] == SEMI || tok[:type] == ASSIGN
159
+ end
160
+ saw_default
161
+ end
162
+
163
+ def broken_lines_for(text, tokens, skip_lines)
164
+ lines = text.split("\n", -1)
165
+ cutoff_ln = header_cutoff_line(tokens, lines.length)
166
+
167
+ bad = Set.new
168
+
169
+ # Pattern A: the Rust lexer reports a DEFAULT-channel
170
+ # IDENTIFIER on a line that's otherwise a `**` comment block.
171
+ # IDENTIFIERs are the diagnostic shape — when prose (e.g. a
172
+ # list of reviewers separated by `;`) leaks past an inline
173
+ # `;` it lexes as variable references. A bare DEFAULT SEMI
174
+ # from `**A; **B; ;` is a harmless null statement and must
175
+ # not flag the line.
176
+ default_lines = Set.new
177
+ tokens.each do |tok|
178
+ next unless tok[:channel] == DEFAULT_CHANNEL && tok[:type] == IDENTIFIER
179
+
180
+ default_lines << (tok[:start_line] - 1)
181
+ end
182
+
183
+ default_lines.each do |i|
184
+ next if skip_lines.include?(i)
185
+
186
+ line = lines[i] or next
187
+ if line.lstrip.start_with?("**")
188
+ # Skip lines that already look properly terminated
189
+ # `** ... **;`. If the lexer reports default-channel
190
+ # IDENTIFIERs on such a line, it's almost always because
191
+ # something *upstream* is unterminated (e.g. a missing
192
+ # `;` after `value foo 0='x' 1='y'`) — re-padding this
193
+ # line won't fix the upstream problem.
194
+ next if line.rstrip.end_with?("**;")
195
+
196
+ bad << i
197
+ elsif i < cutoff_ln
198
+ prev = nearest_nonblank(lines, i, -1)
199
+ nxt = nearest_nonblank(lines, i, +1)
200
+ next unless prev&.lstrip&.start_with?("**") && nxt&.lstrip&.start_with?("**")
201
+ # Stricter than just "sandwiched": require the line itself
202
+ # to be prose only. This protects body code (`A=0;`,
203
+ # `if x then y;`) that happens to sit between `**` marker
204
+ # comments.
205
+ bad << i if prose_only_line?(tokens, i)
206
+ end
207
+ end
208
+
209
+ # No textual heuristic for "header-shaped lines without
210
+ # trailing `;`" (formerly Pattern C). The SAS lexer accepts
211
+ # plenty of shapes the heuristic flagged —
212
+ # `** START OF SAS CODE **` (no `;`),
213
+ # `** REVISION DATES: 03/15/12; 10/07/2025 **;` (inline
214
+ # `;` in prose with proper end terminator), `**...**:`
215
+ # (colon instead of semicolon) — and Pattern A above already
216
+ # catches every line where default-channel code actually
217
+ # leaks. Cosmetic-only re-padding is not worth the diff churn.
218
+
219
+ bad
220
+ end
221
+
222
+ def nearest_nonblank(lines, from, step)
223
+ i = from + step
224
+ while i >= 0 && i < lines.length
225
+ return lines[i] unless lines[i].strip.empty?
226
+
227
+ i += step
228
+ end
229
+ nil
230
+ end
231
+
232
+ def asterisk_rows_missing_semi_for(text, skip_lines)
233
+ bad = Set.new
234
+ text.split("\n", -1).each_with_index do |line, i|
235
+ next if skip_lines.include?(i)
236
+
237
+ bad << i if line.strip.match?(/\A\*+\z/) && !line.rstrip.end_with?(";")
238
+ end
239
+ bad
240
+ end
241
+
242
+ def rewrite(text, bad)
243
+ lines = text.split("\n", -1)
244
+ out = []
245
+ lines.each_with_index do |line, i|
246
+ if bad.include?(i)
247
+ out.concat(rewrite_line(line))
248
+ else
249
+ out << line
250
+ end
251
+ end
252
+ out.join("\n")
253
+ end
254
+
255
+ # Rewrite one broken line into one or more proper `** ... **;`
256
+ # lines.
257
+ def rewrite_line(line)
258
+ stripped = line.rstrip
259
+ return ["#{stripped};"] if stripped.match?(/\A\*+\z/)
260
+
261
+ # Continuation line missing `**` prefix — re-add it.
262
+ stripped = "** #{stripped.lstrip}" unless stripped.start_with?("**")
263
+
264
+ # Strip an existing trailing `**;` or `;` so we re-pad
265
+ # consistently.
266
+ stripped = if stripped.end_with?("**;")
267
+ stripped[0..-4].rstrip
268
+ elsif stripped.end_with?(";")
269
+ stripped[0..-2].rstrip
270
+ else
271
+ stripped
272
+ end
273
+
274
+ # Split only on `\s+\*\*\s+` — the signature of two
275
+ # `**...**;` comments that lost their line break. Inline `;`
276
+ # mid-prose is preserved as-is: once we append a trailing
277
+ # `**;`, the SAS lexer's predictive `**...**;` recognition
278
+ # consumes the whole line as one COMMENT-channel token, so
279
+ # the inline `;` no longer closes the comment early.
280
+ segments = stripped.split(/\s+\*\*\s+/)
281
+ segments.each_with_index.map do |seg, idx|
282
+ text = idx.zero? ? seg.rstrip : "** #{seg.strip}"
283
+ text = "** #{text}" unless text.start_with?("**")
284
+ text = text.ljust(PAD_TO) if text.length < PAD_TO
285
+ "#{text}**;"
286
+ end
287
+ end
288
+ end
289
+ end
290
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../sas_linter"
4
+
5
+ class SasLinter
6
+ module Rules
7
+ # Flag literal TAB (`\t`) characters in source. SAS authoring
8
+ # conventions strongly prefer spaces — tabs render at different
9
+ # widths in different editors and break the column alignment
10
+ # SAS sources often rely on for readability.
11
+ #
12
+ # When `autofix` is true, each tab is replaced with the number
13
+ # of spaces needed to reach the next column-aligned tab stop
14
+ # (i.e., the standard `expand(1)` semantics with the configured
15
+ # width). A tab in column N expands to `width - (N % width)`
16
+ # spaces, so leading whitespace, mid-line alignment, and pre-
17
+ # token padding all stay column-aligned post-fix.
18
+ #
19
+ # Recognized config options:
20
+ # width: integer (default 8)
21
+ # autofix: true | false (default false)
22
+ class TabExpansion < Rule
23
+ rule_id :tab_expansion
24
+ description "Line contains a literal TAB character; will be expanded to spaces."
25
+ severity :warning
26
+
27
+ DEFAULT_WIDTH = 8
28
+
29
+ def self.supports_autofix?
30
+ true
31
+ end
32
+
33
+ def self.from_config(opts = {})
34
+ opts = opts.transform_keys(&:to_s)
35
+ new(
36
+ width: Integer(opts.fetch("width", DEFAULT_WIDTH)),
37
+ autofix: opts["autofix"] ? true : false
38
+ )
39
+ end
40
+
41
+ attr_reader :width
42
+
43
+ def initialize(width: DEFAULT_WIDTH, autofix: false)
44
+ super(autofix: autofix)
45
+ raise ArgumentError, "width must be positive (got #{width})" if width.to_i < 1
46
+
47
+ @width = Integer(width)
48
+ end
49
+
50
+ def check(_tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
51
+ return [] unless source
52
+
53
+ findings = []
54
+ source.each_line.with_index do |line, idx|
55
+ chomped = line.sub(/\r?\n\z/, "")
56
+ next unless chomped.include?("\t")
57
+
58
+ chomped.each_char.with_index do |ch, col|
59
+ next unless ch == "\t"
60
+
61
+ findings << finding(
62
+ line: idx + 1,
63
+ column: col + 1,
64
+ message: "tab character#{autofix? ? " (expanded to #{@width}-space tab stop)" : ''}",
65
+ path: path
66
+ )
67
+ end
68
+ end
69
+ findings
70
+ end
71
+
72
+ # Replace every tab with `width - (col % width)` spaces, where
73
+ # `col` is the post-expansion column of the tab. Re-counts per
74
+ # line so the line terminator resets the column.
75
+ def autofix(source)
76
+ source.each_line.map { |line| expand_line(line) }.join
77
+ end
78
+
79
+ private
80
+
81
+ def expand_line(line)
82
+ eol_match = line.match(/\r?\n\z/)
83
+ terminator = eol_match ? eol_match[0] : ""
84
+ body = eol_match ? line[0...eol_match.begin(0)] : line
85
+
86
+ out = +""
87
+ body.each_char do |ch|
88
+ if ch == "\t"
89
+ out << (" " * (@width - (out.length % @width)))
90
+ else
91
+ out << ch
92
+ end
93
+ end
94
+ out + terminator
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../sas_linter"
4
+
5
+ class SasLinter
6
+ module Rules
7
+ # Flag end-of-line trailing whitespace (spaces or tabs that
8
+ # appear before the line terminator). Trailing whitespace is
9
+ # invisible noise — it inflates diffs, fights with editor
10
+ # auto-trim, and hides intent. Supports `autofix` to strip the
11
+ # offending bytes in place.
12
+ #
13
+ # Recognized config options:
14
+ # autofix: true | false (default: false)
15
+ class TrailingWhitespace < Rule
16
+ rule_id :trailing_whitespace
17
+ description "Line has trailing whitespace before the newline."
18
+ severity :warning
19
+
20
+ TRAILING_WS = /([ \t]+)(\r?\n|\z)/
21
+
22
+ def self.supports_autofix?
23
+ true
24
+ end
25
+
26
+ def check(_tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
27
+ return [] unless source
28
+
29
+ findings = []
30
+ source.each_line.with_index do |line, idx|
31
+ chomped = line.sub(/\r?\n\z/, "")
32
+ next unless chomped =~ /([ \t]+)\z/
33
+
34
+ ws_start = ::Regexp.last_match.begin(1)
35
+ findings << finding(
36
+ line: idx + 1,
37
+ column: ws_start + 1,
38
+ message: "trailing whitespace#{autofix? ? ' (autofixed)' : ''}",
39
+ path: path
40
+ )
41
+ end
42
+ findings
43
+ end
44
+
45
+ # Strip end-of-line trailing whitespace while preserving the
46
+ # original line terminator (LF or CRLF) and the trailing
47
+ # newline (or its absence) on the final line.
48
+ def autofix(source)
49
+ source.gsub(TRAILING_WS) { ::Regexp.last_match(2) }
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,202 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../sas_linter"
4
+ require "sas_lexer"
5
+
6
+ class SasLinter
7
+ module Rules
8
+ # Flag inner branches whose comparison values are excluded by an
9
+ # enclosing `if VAR in (...) then do; ... end;` guard.
10
+ #
11
+ # Motivating shape: an outer guard
12
+ # `if RANK in (0,1,2,3,4,5,6,8) then do;` omits 7, while an inner
13
+ # `if RANK in (5,6,7,8) then cOut = 2;` lists 7. Value 7 falls
14
+ # through the outer guard, so the inner branch can never fire for it
15
+ # and cOut silently stays missing.
16
+ #
17
+ # Detection: outer guard pushes a {var, allowed_set} frame; inner
18
+ # `if VAR in (...)`, `if VAR = N`, or `if VAR eq N` references inside
19
+ # the same DO block are checked against that set. Values absent from
20
+ # the outer set produce a finding.
21
+ class UnreachableInnerBranchValue < Rule
22
+ rule_id :unreachable_inner_branch_value
23
+ description "Inner branch references a value that the enclosing " \
24
+ "outer guard excludes — branch is unreachable for that value."
25
+ severity :warning
26
+
27
+ TT = SasLexer::Lexer::TokenType
28
+
29
+ # Outer guard pattern: KW_IF IDENT KW_IN LPAREN <lits...> RPAREN KW_THEN KW_DO SEMI
30
+ # Inner check patterns:
31
+ # KW_IF IDENT(V) KW_IN LPAREN <lits...> RPAREN
32
+ # KW_IF IDENT(V) KW_EQ <lit>
33
+ # KW_IF IDENT(V) ASSIGN <lit> (SAS uses `=` as comparison in IF)
34
+
35
+ def check(tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
36
+ findings = []
37
+ guard_stack = [] # array of {var:, allowed:, depth:}
38
+ do_depth = 0
39
+ i = 0
40
+
41
+ while i < tokens.length
42
+ tok = tokens[i]
43
+
44
+ if tok[:type] == TT::KW_IF
45
+ consumed, frame, inner_findings =
46
+ analyze_if(tokens, i, do_depth, guard_stack, path)
47
+ findings.concat(inner_findings)
48
+ if frame
49
+ guard_stack.push(frame)
50
+ do_depth += 1
51
+ end
52
+ i += consumed
53
+ next
54
+ end
55
+
56
+ if tok[:type] == TT::KW_DO
57
+ # bare `do;` (no IF prefix), or `do i = 1 to N;` — both increment depth
58
+ do_depth += 1
59
+ i += 1
60
+ next
61
+ end
62
+
63
+ if tok[:type] == TT::KW_END
64
+ do_depth -= 1 if do_depth > 0
65
+ guard_stack.pop while guard_stack.last && guard_stack.last[:depth] > do_depth
66
+ i += 1
67
+ next
68
+ end
69
+
70
+ i += 1
71
+ end
72
+
73
+ findings
74
+ end
75
+
76
+ private
77
+
78
+ # Returns [tokens_consumed, new_guard_frame_or_nil, findings].
79
+ # Skips ahead through the entire condition expression but not the body.
80
+ def analyze_if(tokens, i, do_depth, guard_stack, path)
81
+ # tokens[i] is KW_IF
82
+ j = i + 1
83
+ ident = tokens[j]
84
+ return [1, nil, []] unless ident && ident[:type] == TT::IDENTIFIER
85
+
86
+ var = ident[:text].downcase
87
+ op = tokens[j + 1]
88
+ return [1, nil, []] unless op
89
+
90
+ values, end_of_cond, simple = parse_comparison(tokens, j + 1, var, ident[:text])
91
+ return [1, nil, []] unless simple
92
+
93
+ # Now look for `then do;` immediately after end_of_cond to detect outer guards
94
+ k = end_of_cond
95
+ is_outer_guard =
96
+ tokens[k] && tokens[k][:type] == TT::KW_THEN &&
97
+ tokens[k + 1] && tokens[k + 1][:type] == TT::KW_DO &&
98
+ tokens[k + 2] && tokens[k + 2][:type] == TT::SEMI
99
+
100
+ # Generate findings for any active guard on this variable. (Skip the
101
+ # outer guard itself — its own values define the allowed set.)
102
+ findings = []
103
+ unless is_outer_guard
104
+ active = guard_stack.reverse.find { |f| f[:var] == var }
105
+ if active
106
+ values.each do |val|
107
+ next if active[:allowed].include?(val[:key])
108
+
109
+ findings << finding(
110
+ line: val[:line],
111
+ column: val[:column],
112
+ message: "value #{val[:display]} for #{ident[:text]} is excluded by " \
113
+ "the enclosing `if #{ident[:text]} in (...)` guard at line #{active[:line]}; " \
114
+ "this branch is unreachable.",
115
+ path: path
116
+ )
117
+ end
118
+ end
119
+ end
120
+
121
+ new_frame = nil
122
+ consumed = (end_of_cond - i)
123
+
124
+ if is_outer_guard
125
+ new_frame = {
126
+ var: var,
127
+ allowed: values.map { |v| v[:key] }.to_set,
128
+ depth: do_depth + 1,
129
+ line: tokens[i][:start_line]
130
+ }
131
+ consumed = (k + 3) - i # consume through SEMI
132
+ end
133
+
134
+ [consumed, new_frame, findings]
135
+ end
136
+
137
+ # Parse one of:
138
+ # KW_IN LPAREN <lits...> RPAREN
139
+ # KW_EQ <lit>
140
+ # ASSIGN <lit>
141
+ # Returns [values, index_after_condition, simple?].
142
+ # `values` is array of {key:, display:, line:, column:}.
143
+ # `simple?` is false if the condition contains anything we can't reason
144
+ # about (macros, references, expressions) — caller bails.
145
+ def parse_comparison(tokens, op_idx, _var, _orig_text)
146
+ op = tokens[op_idx]
147
+ return [[], op_idx, false] unless op
148
+
149
+ case op[:type]
150
+ when TT::KW_IN
151
+ lparen = tokens[op_idx + 1]
152
+ return [[], op_idx, false] unless lparen && lparen[:type] == TT::LPAREN
153
+
154
+ values = []
155
+ k = op_idx + 2
156
+ loop do
157
+ t = tokens[k]
158
+ return [[], op_idx, false] unless t
159
+
160
+ if t[:type] == TT::RPAREN
161
+ return [values, k + 1, true]
162
+ elsif t[:type] == TT::COMMA
163
+ k += 1
164
+ next
165
+ elsif (val = literal_value(t))
166
+ values << val
167
+ k += 1
168
+ else
169
+ # Unparseable literal (macro, identifier, expression). Bail.
170
+ return [[], op_idx, false]
171
+ end
172
+ end
173
+ when TT::KW_EQ, TT::ASSIGN
174
+ lit = tokens[op_idx + 1]
175
+ val = literal_value(lit)
176
+ return [[], op_idx, false] unless val
177
+
178
+ [[val], op_idx + 2, true]
179
+ else
180
+ [[], op_idx, false]
181
+ end
182
+ end
183
+
184
+ def literal_value(tok)
185
+ return nil unless tok
186
+
187
+ case tok[:type]
188
+ when TT::INTEGER_LITERAL
189
+ n = Integer(tok[:text]) rescue (return nil)
190
+ { key: ["int", n], display: tok[:text], line: tok[:start_line], column: tok[:start_column] + 1 }
191
+ when TT::FLOAT_LITERAL
192
+ f = Float(tok[:text]) rescue (return nil)
193
+ # Treat 5.0 as equivalent to 5 for set membership.
194
+ key = (f == f.to_i) ? ["int", f.to_i] : ["float", f]
195
+ { key: key, display: tok[:text], line: tok[:start_line], column: tok[:start_column] + 1 }
196
+ when TT::STRING_LITERAL
197
+ { key: ["str", tok[:text]], display: tok[:text], line: tok[:start_line], column: tok[:start_column] + 1 }
198
+ end
199
+ end
200
+ end
201
+ end
202
+ end