sas-linter 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -1
- data/lib/sas_linter/rules/format_for_unknown_variable.rb +163 -0
- data/lib/sas_linter/rules/unterminated_comment.rb +101 -0
- data/lib/sas_linter/version.rb +1 -1
- data/lib/sas_linter.rb +2 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b12c05da17de3409a5eb163f3e3325cb7e2e24770d3de42e01efb786fc796d60
|
|
4
|
+
data.tar.gz: b14b81be912177959a039484c70ea7bf71f209910cf995822d8083c8d6ea2897
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5fa28c6588e96bdd973197577e0e3fb1aa689fd641455ed10225f0354e0243e41fdb1e682b20d0de02d2a443b30ae9d263f419a31d42ea12763d91320bef7078
|
|
7
|
+
data.tar.gz: aa8eb2ffe598911984fd57acff5ea350136865ccc64da7d7003b23033840013ac5c544a9bfd56d7f57bcb67c1f01de5af74c5b32f59d87300eac3525ae9e9bfe
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# sas-linter
|
|
2
2
|
|
|
3
|
-
A configurable lint engine for SAS source files. Built on the [`sas-lexer`](https://github.com/mes-amis/sas-lexer-rb) gem (a Ruby FFI binding to Misha Perlov's Rust [`sas-lexer`](https://github.com/mishamsk/sas-lexer)) and ships with
|
|
3
|
+
A configurable lint engine for SAS source files. Built on the [`sas-lexer`](https://github.com/mes-amis/sas-lexer-rb) gem (a Ruby FFI binding to Misha Perlov's Rust [`sas-lexer`](https://github.com/mishamsk/sas-lexer)) and ships with fifteen pluggable rules covering structural defects, cosmetic issues, and source-header conventions.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -63,10 +63,17 @@ rules:
|
|
|
63
63
|
enabled: true
|
|
64
64
|
autofix: false # rule supports autofix; off by default
|
|
65
65
|
|
|
66
|
+
unterminated_comment:
|
|
67
|
+
enabled: true
|
|
68
|
+
autofix: false # append `;` to `**…**` lines whose missing terminator eats the next line
|
|
69
|
+
|
|
66
70
|
inconsistent_variable_case:
|
|
67
71
|
enabled: true
|
|
68
72
|
autofix: false # rewrite every minority casing to the most-common form
|
|
69
73
|
|
|
74
|
+
format_for_unknown_variable:
|
|
75
|
+
enabled: true # skipped automatically when the file uses set/merge/update/infile/input
|
|
76
|
+
|
|
70
77
|
variable_value_out_of_known_range:
|
|
71
78
|
enabled: true
|
|
72
79
|
csv_paths: # empty list = rule is a no-op
|
|
@@ -138,8 +145,10 @@ findings = linter.lint_file("path/to/source.sas")
|
|
|
138
145
|
| `encoding_issues` | Smart-quote / em-dash / Win-1252 byte sequences that confuse downstream tooling. |
|
|
139
146
|
| `malformed_if_condition` | Empty conditions, missing operators, orphan `then`, unbalanced parens, etc. |
|
|
140
147
|
| `missing_assignment_semicolon` | Assignment statements followed by an inline `**` comment but no terminating `;`. |
|
|
148
|
+
| `unterminated_comment` | A standalone `** … **` comment whose missing `;` lets the SAS lexer extend the comment into the next line of real code, silently swallowing it. Autofix appends the `;`. |
|
|
141
149
|
| `variable_value_out_of_known_range` | `if VAR = N` / `if VAR in (...)` literals fall outside the variable's documented acceptable values. Loads the catalog from one or more CSVs with configurable column names and column separator (`,`, `;`, tab). |
|
|
142
150
|
| `inconsistent_variable_case` | Identifier appears with more than one casing in the same file (`myVar` vs `MyVar`). SAS treats both as the same variable; autofix rewrites every minority spelling to the most-common form. Skips proc-format definitions and `format.` / `lib.member` references. |
|
|
151
|
+
| `format_for_unknown_variable` | `format` / `informat` / `attrib` statement assigns a format to a variable that's referenced nowhere else in the file — almost always a typo. Skipped on files that pull in columns via `set` / `merge` / `update` / `infile` / `input`. |
|
|
143
152
|
|
|
144
153
|
`bin/sas_lint --list-rules` prints the same set with autofix capability.
|
|
145
154
|
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../sas_linter"
|
|
4
|
+
require "sas_lexer"
|
|
5
|
+
|
|
6
|
+
class SasLinter
|
|
7
|
+
module Rules
|
|
8
|
+
# Flag `format` / `informat` / `attrib ... format=` statements that
|
|
9
|
+
# name a variable referenced nowhere else in the file. Almost always
|
|
10
|
+
# a typo (`attrib totalscore format=flagx.;` when every other use is
|
|
11
|
+
# `total_score`). SAS itself silently binds the format to a phantom
|
|
12
|
+
# column and runs; downstream tooling that resolves variable
|
|
13
|
+
# references (e.g. sas-ruby) refuses to compile such a file.
|
|
14
|
+
#
|
|
15
|
+
# The rule is conservative: a file that pulls variables in from an
|
|
16
|
+
# external source — `set`, `merge`, `update`, `infile`, `input` — is
|
|
17
|
+
# skipped entirely, since a format target may legitimately name a
|
|
18
|
+
# column the linter can't see.
|
|
19
|
+
class FormatForUnknownVariable < Rule
|
|
20
|
+
rule_id :format_for_unknown_variable
|
|
21
|
+
description "Variable named in a `format` / `informat` / `attrib` " \
|
|
22
|
+
"statement is referenced nowhere else in the file — " \
|
|
23
|
+
"almost always a typo."
|
|
24
|
+
severity :warning
|
|
25
|
+
|
|
26
|
+
TT = SasLexer::Lexer::TokenType
|
|
27
|
+
|
|
28
|
+
FORMAT_KIND_BY_TYPE = {
|
|
29
|
+
TT::KW_FORMAT => :format,
|
|
30
|
+
TT::KW_INFORMAT => :informat,
|
|
31
|
+
TT::KW_ATTRIB => :attrib
|
|
32
|
+
}.freeze
|
|
33
|
+
|
|
34
|
+
EXTERNAL_INPUT_TYPES = [
|
|
35
|
+
TT::KW_SET, TT::KW_MERGE, TT::KW_UPDATE, TT::KW_INFILE, TT::KW_INPUT
|
|
36
|
+
].freeze
|
|
37
|
+
|
|
38
|
+
# Statements that name variables for declaration only — the names
|
|
39
|
+
# they reference don't count as "real uses" because if a variable
|
|
40
|
+
# only appears in declaration statements it's still dead code.
|
|
41
|
+
# Keyword-typed openers:
|
|
42
|
+
DECLARATION_TYPES = [
|
|
43
|
+
TT::KW_FORMAT, TT::KW_INFORMAT, TT::KW_ATTRIB,
|
|
44
|
+
TT::KW_LABEL, TT::KW_LENGTH, TT::KW_KEEP, TT::KW_DROP, TT::KW_ARRAY
|
|
45
|
+
].freeze
|
|
46
|
+
|
|
47
|
+
# IDENTIFIER-typed openers (the lexer doesn't keyword these):
|
|
48
|
+
# `retain` is a data-step declaration;
|
|
49
|
+
# `value` / `invalue` / `picture` introduce a `proc format` body.
|
|
50
|
+
DECLARATION_TEXT = %w[retain value invalue picture].freeze
|
|
51
|
+
|
|
52
|
+
def check(tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
53
|
+
external_input = false
|
|
54
|
+
targets = []
|
|
55
|
+
use_names = Set.new
|
|
56
|
+
|
|
57
|
+
each_statement(tokens) do |stmt|
|
|
58
|
+
opener = stmt[0]
|
|
59
|
+
|
|
60
|
+
if EXTERNAL_INPUT_TYPES.include?(opener[:type])
|
|
61
|
+
external_input = true
|
|
62
|
+
next
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
kind = FORMAT_KIND_BY_TYPE[opener[:type]]
|
|
66
|
+
if kind
|
|
67
|
+
collect_targets(stmt, kind, targets)
|
|
68
|
+
next
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
next if declaration_statement?(opener)
|
|
72
|
+
|
|
73
|
+
collect_uses(stmt, use_names)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
return [] if external_input
|
|
77
|
+
|
|
78
|
+
targets.filter_map do |t, kind|
|
|
79
|
+
next if use_names.include?(t[:text].downcase)
|
|
80
|
+
|
|
81
|
+
finding(
|
|
82
|
+
line: t[:start_line],
|
|
83
|
+
column: t[:start_column] + 1,
|
|
84
|
+
message: "`#{kind}` assigns a format to `#{t[:text]}` but " \
|
|
85
|
+
"that variable is not referenced anywhere else in " \
|
|
86
|
+
"this file — likely a typo.",
|
|
87
|
+
path: path
|
|
88
|
+
)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
# Yield each `; ... ;` slice (opener at index 0, no trailing `;`).
|
|
95
|
+
# Stray semicolons produce empty slices and are skipped.
|
|
96
|
+
def each_statement(tokens)
|
|
97
|
+
start = 0
|
|
98
|
+
tokens.each_with_index do |t, i|
|
|
99
|
+
next unless t[:type] == TT::SEMI
|
|
100
|
+
|
|
101
|
+
slice = tokens[start...i]
|
|
102
|
+
yield slice unless slice.empty?
|
|
103
|
+
start = i + 1
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def declaration_statement?(opener)
|
|
108
|
+
return true if DECLARATION_TYPES.include?(opener[:type])
|
|
109
|
+
return true if opener[:type] == TT::IDENTIFIER &&
|
|
110
|
+
DECLARATION_TEXT.include?(opener[:text].downcase)
|
|
111
|
+
|
|
112
|
+
false
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def collect_targets(stmt, kind, targets)
|
|
116
|
+
stmt.each_with_index do |t, i|
|
|
117
|
+
next unless t[:type] == TT::IDENTIFIER
|
|
118
|
+
next unless variable_target?(stmt, i)
|
|
119
|
+
|
|
120
|
+
targets << [t, kind]
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# In `format`/`informat`, the format name is the identifier
|
|
125
|
+
# byte-adjacent to a following `.` (`date9.`, `flagx.`). In
|
|
126
|
+
# `attrib`, the format name additionally follows `=`
|
|
127
|
+
# (`format=flagx.`). Everything else is a variable target.
|
|
128
|
+
def variable_target?(stmt, i)
|
|
129
|
+
t = stmt[i]
|
|
130
|
+
nxt = stmt[i + 1]
|
|
131
|
+
prev = i.positive? ? stmt[i - 1] : nil
|
|
132
|
+
|
|
133
|
+
return false if nxt && nxt[:type] == TT::DOT && nxt[:start] == t[:end]
|
|
134
|
+
return false if prev && prev[:type] == TT::ASSIGN
|
|
135
|
+
|
|
136
|
+
true
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def collect_uses(stmt, names)
|
|
140
|
+
stmt.each_with_index do |t, i|
|
|
141
|
+
next unless t[:type] == TT::IDENTIFIER
|
|
142
|
+
next unless variable_use?(stmt, i)
|
|
143
|
+
|
|
144
|
+
names << t[:text].downcase
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Same exclusions as `inconsistent_variable_case`: byte-adjacent
|
|
149
|
+
# `<name>.` (format reference) and `<lib>.<member>` second halves
|
|
150
|
+
# don't name a variable in the current step.
|
|
151
|
+
def variable_use?(stmt, i)
|
|
152
|
+
t = stmt[i]
|
|
153
|
+
nxt = stmt[i + 1]
|
|
154
|
+
prev = i.positive? ? stmt[i - 1] : nil
|
|
155
|
+
|
|
156
|
+
return false if nxt && nxt[:type] == TT::DOT && nxt[:start] == t[:end]
|
|
157
|
+
return false if prev && prev[:type] == TT::DOT && prev[:end] == t[:start]
|
|
158
|
+
|
|
159
|
+
true
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../sas_linter"
|
|
4
|
+
require "sas_lexer"
|
|
5
|
+
|
|
6
|
+
class SasLinter
|
|
7
|
+
module Rules
|
|
8
|
+
# Flag a `** ... **` comment line whose missing `;` causes the
|
|
9
|
+
# SAS lexer to extend the comment statement into the following
|
|
10
|
+
# line(s) of real code, silently swallowing it.
|
|
11
|
+
#
|
|
12
|
+
# SAS `*` / `**` comment statements are terminated by the next
|
|
13
|
+
# `;` — the closing `**` is just prose. So
|
|
14
|
+
#
|
|
15
|
+
# ** SOME COMMENT **
|
|
16
|
+
# y = x + 1;
|
|
17
|
+
#
|
|
18
|
+
# lexes as a single comment token covering both lines, and the
|
|
19
|
+
# `y = x + 1;` assignment never executes.
|
|
20
|
+
#
|
|
21
|
+
# Detection: a COMMENT-channel `PREDICTED_COMMENT_STAT` token
|
|
22
|
+
# whose `start_line != end_line` AND whose first source line,
|
|
23
|
+
# rstripped, ends with `**`. That shape is the boxed-comment
|
|
24
|
+
# closer the user clearly intended — they only forgot the `;`.
|
|
25
|
+
# Legitimate multi-line `*...;` prose ends its first line with
|
|
26
|
+
# plain text, not `**`, so it's left alone.
|
|
27
|
+
#
|
|
28
|
+
# Autofix: append `;` to the end of each flagged first line.
|
|
29
|
+
class UnterminatedComment < Rule
|
|
30
|
+
rule_id :unterminated_comment
|
|
31
|
+
description "`**` comment missing its terminating `;` — consumes following code."
|
|
32
|
+
severity :warning
|
|
33
|
+
|
|
34
|
+
TT = SasLexer::Lexer::TokenType
|
|
35
|
+
COMMENT_CHANNEL = SasLexer::Lexer::TokenChannel::COMMENT
|
|
36
|
+
|
|
37
|
+
def self.supports_autofix?
|
|
38
|
+
true
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def check(_tokens, path:, all_tokens: nil, source: nil)
|
|
42
|
+
return [] unless all_tokens && source
|
|
43
|
+
|
|
44
|
+
lines = source.split("\n", -1)
|
|
45
|
+
unterminated_comment_lines(all_tokens, lines).map do |i|
|
|
46
|
+
finding_for_line(lines[i], i, path)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def autofix(source)
|
|
51
|
+
return source if source.nil? || source.empty?
|
|
52
|
+
|
|
53
|
+
lines = source.split("\n", -1)
|
|
54
|
+
bad = unterminated_comment_lines(tokenize(source), lines)
|
|
55
|
+
return source if bad.empty?
|
|
56
|
+
|
|
57
|
+
bad.each do |i|
|
|
58
|
+
lines[i] = "#{lines[i].rstrip};"
|
|
59
|
+
end
|
|
60
|
+
lines.join("\n")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def finding_for_line(line, idx, path)
|
|
66
|
+
finding(
|
|
67
|
+
line: idx + 1,
|
|
68
|
+
column: line.length - line.lstrip.length + 1,
|
|
69
|
+
message: "`**` comment missing `;` — consumes the next line of code as comment text.",
|
|
70
|
+
path: path
|
|
71
|
+
)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def tokenize(source)
|
|
75
|
+
lexer = SasLexer::Lexer.new
|
|
76
|
+
begin
|
|
77
|
+
lexer.tokenize(source)
|
|
78
|
+
ensure
|
|
79
|
+
lexer.free
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# 0-indexed source lines that hold a `** ... **` comment whose
|
|
84
|
+
# missing `;` made the lexer extend it into the next line.
|
|
85
|
+
def unterminated_comment_lines(all_tokens, lines)
|
|
86
|
+
bad = []
|
|
87
|
+
all_tokens.each do |t|
|
|
88
|
+
next unless t[:channel] == COMMENT_CHANNEL
|
|
89
|
+
next unless t[:type] == TT::PREDICTED_COMMENT_STAT
|
|
90
|
+
next unless t[:start_line] < t[:end_line]
|
|
91
|
+
|
|
92
|
+
first = lines[t[:start_line] - 1] or next
|
|
93
|
+
next unless first.rstrip.end_with?("**")
|
|
94
|
+
|
|
95
|
+
bad << (t[:start_line] - 1)
|
|
96
|
+
end
|
|
97
|
+
bad
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
data/lib/sas_linter/version.rb
CHANGED
data/lib/sas_linter.rb
CHANGED
|
@@ -310,3 +310,5 @@ require_relative "sas_linter/rules/missing_assignment_semicolon"
|
|
|
310
310
|
require_relative "sas_linter/rules/variable_value_out_of_known_range"
|
|
311
311
|
require_relative "sas_linter/rules/invalid_numeric_literal"
|
|
312
312
|
require_relative "sas_linter/rules/inconsistent_variable_case"
|
|
313
|
+
require_relative "sas_linter/rules/format_for_unknown_variable"
|
|
314
|
+
require_relative "sas_linter/rules/unterminated_comment"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sas-linter
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Craig McNamara
|
|
@@ -63,6 +63,7 @@ files:
|
|
|
63
63
|
- lib/sas_linter/rules/choose_one_template.rb
|
|
64
64
|
- lib/sas_linter/rules/commented_out_guard.rb
|
|
65
65
|
- lib/sas_linter/rules/encoding_issues.rb
|
|
66
|
+
- lib/sas_linter/rules/format_for_unknown_variable.rb
|
|
66
67
|
- lib/sas_linter/rules/identical_if_else_branches.rb
|
|
67
68
|
- lib/sas_linter/rules/inconsistent_variable_case.rb
|
|
68
69
|
- lib/sas_linter/rules/invalid_numeric_literal.rb
|
|
@@ -73,6 +74,7 @@ files:
|
|
|
73
74
|
- lib/sas_linter/rules/tab_expansion.rb
|
|
74
75
|
- lib/sas_linter/rules/trailing_whitespace.rb
|
|
75
76
|
- lib/sas_linter/rules/unreachable_inner_branch_value.rb
|
|
77
|
+
- lib/sas_linter/rules/unterminated_comment.rb
|
|
76
78
|
- lib/sas_linter/rules/variable_value_out_of_known_range.rb
|
|
77
79
|
- lib/sas_linter/version.rb
|
|
78
80
|
homepage: https://github.com/mes-amis/sas-linter
|