sas-linter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +661 -0
- data/README.md +140 -0
- data/Rakefile +11 -0
- data/bin/sas_lint +79 -0
- data/lib/sas_linter/rules/choose_one_template.rb +61 -0
- data/lib/sas_linter/rules/commented_out_guard.rb +59 -0
- data/lib/sas_linter/rules/encoding_issues.rb +322 -0
- data/lib/sas_linter/rules/identical_if_else_branches.rb +104 -0
- data/lib/sas_linter/rules/line_endings.rb +105 -0
- data/lib/sas_linter/rules/malformed_if_condition.rb +291 -0
- data/lib/sas_linter/rules/missing_assignment_semicolon.rb +141 -0
- data/lib/sas_linter/rules/source_headers.rb +290 -0
- data/lib/sas_linter/rules/tab_expansion.rb +98 -0
- data/lib/sas_linter/rules/trailing_whitespace.rb +53 -0
- data/lib/sas_linter/rules/unreachable_inner_branch_value.rb +202 -0
- data/lib/sas_linter/rules/variable_value_out_of_known_range.rb +280 -0
- data/lib/sas_linter/version.rb +5 -0
- data/lib/sas_linter.rb +287 -0
- metadata +96 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../sas_linter"
|
|
4
|
+
require "sas_lexer"
|
|
5
|
+
require "csv"
|
|
6
|
+
|
|
7
|
+
class SasLinter
|
|
8
|
+
module Rules
|
|
9
|
+
# Flag conditional comparisons against literal values that fall outside a
|
|
10
|
+
# variable's documented acceptable values, e.g.:
|
|
11
|
+
#
|
|
12
|
+
# if AGE in (0,1,2,99) then ... # AGE takes 0..2 — `99` is dead
|
|
13
|
+
# if SCORE = 99 then ... # SCORE takes 0..5
|
|
14
|
+
# if RANK eq 7 then ... # RANK takes 0..6
|
|
15
|
+
#
|
|
16
|
+
# Catches typos and stale literals where the source compares a variable
|
|
17
|
+
# against a value the variable can never actually take, so the branch is
|
|
18
|
+
# unreachable. Only fires inside `if`-conditions (between KW_IF and the
|
|
19
|
+
# next KW_THEN or SEMI) — assignments to the variable are not flagged.
|
|
20
|
+
#
|
|
21
|
+
# Acceptable values are loaded from one or more CSV files with two
|
|
22
|
+
# configurable columns: a name column and an acceptable-values column.
|
|
23
|
+
# Recognized value formats:
|
|
24
|
+
#
|
|
25
|
+
# 0-5 → integer range
|
|
26
|
+
# 1,2,3 → integer set
|
|
27
|
+
# 0-4,7,8 → range plus extras
|
|
28
|
+
# 0-90 (99) → range plus parenthesized extras
|
|
29
|
+
#
|
|
30
|
+
# Variables whose values column is free text or a date pattern are
|
|
31
|
+
# silently skipped (no findings, no errors).
|
|
32
|
+
#
|
|
33
|
+
# Recognized config options:
|
|
34
|
+
# csv_paths: ["metadata/variables.csv", ...] # required, at least one
|
|
35
|
+
# name_column: "Variable" # default: "Variable"
|
|
36
|
+
# values_column: "Acceptable Values" # default: "Acceptable Values"
|
|
37
|
+
# name_match: case_insensitive | exact # default: case_insensitive
|
|
38
|
+
# autofix: false # this rule has no autofix
|
|
39
|
+
#
|
|
40
|
+
# When `csv_paths` is empty the rule is a no-op — useful so projects
|
|
41
|
+
# without a variable catalog can keep the rule registered without it
|
|
42
|
+
# firing.
|
|
43
|
+
class VariableValueOutOfKnownRange < Rule
|
|
44
|
+
rule_id :variable_value_out_of_known_range
|
|
45
|
+
description "Comparison literal falls outside a variable's documented " \
|
|
46
|
+
"acceptable values — branch is unreachable."
|
|
47
|
+
severity :warning
|
|
48
|
+
|
|
49
|
+
TT = SasLexer::Lexer::TokenType
|
|
50
|
+
|
|
51
|
+
DEFAULT_NAME_COLUMN = "Variable"
|
|
52
|
+
DEFAULT_VALUES_COLUMN = "Acceptable Values"
|
|
53
|
+
DEFAULT_DELIMITER = ","
|
|
54
|
+
|
|
55
|
+
def initialize(csv_paths: [],
|
|
56
|
+
name_column: DEFAULT_NAME_COLUMN,
|
|
57
|
+
values_column: DEFAULT_VALUES_COLUMN,
|
|
58
|
+
name_match: :case_insensitive,
|
|
59
|
+
delimiter: DEFAULT_DELIMITER,
|
|
60
|
+
autofix: false)
|
|
61
|
+
super(autofix: autofix)
|
|
62
|
+
@csv_paths = Array(csv_paths)
|
|
63
|
+
@name_column = name_column
|
|
64
|
+
@values_column = values_column
|
|
65
|
+
@delimiter = delimiter
|
|
66
|
+
unless %i[case_insensitive exact].include?(name_match)
|
|
67
|
+
raise ArgumentError, "name_match must be :case_insensitive or :exact (got #{name_match.inspect})"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
@name_match = name_match
|
|
71
|
+
@specs = nil
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def self.from_config(opts = {})
|
|
75
|
+
opts = opts.transform_keys(&:to_s)
|
|
76
|
+
new(
|
|
77
|
+
csv_paths: Array(opts["csv_paths"]).map { |p| File.expand_path(p) },
|
|
78
|
+
name_column: opts["name_column"] || DEFAULT_NAME_COLUMN,
|
|
79
|
+
values_column: opts["values_column"] || DEFAULT_VALUES_COLUMN,
|
|
80
|
+
name_match: (opts["name_match"] || "case_insensitive").to_sym,
|
|
81
|
+
delimiter: opts["delimiter"] || DEFAULT_DELIMITER,
|
|
82
|
+
autofix: opts["autofix"] ? true : false
|
|
83
|
+
)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def check(tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
87
|
+
return [] if specs.empty?
|
|
88
|
+
|
|
89
|
+
findings = []
|
|
90
|
+
in_condition = false
|
|
91
|
+
i = 0
|
|
92
|
+
|
|
93
|
+
while i < tokens.length
|
|
94
|
+
tok = tokens[i]
|
|
95
|
+
|
|
96
|
+
case tok[:type]
|
|
97
|
+
when TT::KW_IF
|
|
98
|
+
in_condition = true
|
|
99
|
+
i += 1
|
|
100
|
+
next
|
|
101
|
+
when TT::KW_THEN, TT::SEMI
|
|
102
|
+
in_condition = false
|
|
103
|
+
i += 1
|
|
104
|
+
next
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
if in_condition && tok[:type] == TT::IDENTIFIER
|
|
108
|
+
op = tokens[i + 1]
|
|
109
|
+
if op
|
|
110
|
+
consumed, ident_findings = check_comparison(tokens, i, tok, op, path)
|
|
111
|
+
findings.concat(ident_findings)
|
|
112
|
+
if consumed > 0
|
|
113
|
+
i += consumed
|
|
114
|
+
next
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
i += 1
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
findings
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
private
|
|
126
|
+
|
|
127
|
+
# Returns [tokens_consumed, findings]. consumed=0 if no recognized
|
|
128
|
+
# comparison started here.
|
|
129
|
+
def check_comparison(tokens, ident_idx, ident, op, path)
|
|
130
|
+
spec = lookup_spec(ident[:text])
|
|
131
|
+
return [0, []] unless spec
|
|
132
|
+
|
|
133
|
+
case op[:type]
|
|
134
|
+
when TT::KW_IN
|
|
135
|
+
lparen = tokens[ident_idx + 2]
|
|
136
|
+
return [0, []] unless lparen && lparen[:type] == TT::LPAREN
|
|
137
|
+
|
|
138
|
+
findings = []
|
|
139
|
+
k = ident_idx + 3
|
|
140
|
+
while k < tokens.length
|
|
141
|
+
t = tokens[k]
|
|
142
|
+
break unless t
|
|
143
|
+
|
|
144
|
+
if t[:type] == TT::RPAREN
|
|
145
|
+
return [k - ident_idx + 1, findings]
|
|
146
|
+
elsif t[:type] == TT::COMMA
|
|
147
|
+
k += 1
|
|
148
|
+
next
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
lit = literal_value(t)
|
|
152
|
+
if lit && !value_allowed?(spec, lit[:value])
|
|
153
|
+
findings << finding(
|
|
154
|
+
line: t[:start_line],
|
|
155
|
+
column: t[:start_column] + 1,
|
|
156
|
+
message: format_message(ident[:text], lit[:display], spec),
|
|
157
|
+
path: path
|
|
158
|
+
)
|
|
159
|
+
end
|
|
160
|
+
k += 1
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
[k - ident_idx + 1, findings]
|
|
164
|
+
when TT::KW_EQ, TT::ASSIGN
|
|
165
|
+
lit_tok = tokens[ident_idx + 2]
|
|
166
|
+
lit = literal_value(lit_tok)
|
|
167
|
+
return [0, []] unless lit
|
|
168
|
+
return [3, []] if value_allowed?(spec, lit[:value])
|
|
169
|
+
|
|
170
|
+
[3, [finding(
|
|
171
|
+
line: lit_tok[:start_line],
|
|
172
|
+
column: lit_tok[:start_column] + 1,
|
|
173
|
+
message: format_message(ident[:text], lit[:display], spec),
|
|
174
|
+
path: path
|
|
175
|
+
)]]
|
|
176
|
+
else
|
|
177
|
+
[0, []]
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def lookup_spec(text)
|
|
182
|
+
key = @name_match == :exact ? text : text.downcase
|
|
183
|
+
specs[key]
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def specs
|
|
187
|
+
@specs ||= load_specs
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def load_specs
|
|
191
|
+
map = {}
|
|
192
|
+
@csv_paths.each do |path|
|
|
193
|
+
next unless File.file?(path)
|
|
194
|
+
|
|
195
|
+
CSV.foreach(path, headers: true, col_sep: @delimiter) do |row|
|
|
196
|
+
name = row[@name_column]
|
|
197
|
+
values_text = row[@values_column]
|
|
198
|
+
next if name.nil? || name.strip.empty?
|
|
199
|
+
next if values_text.nil? || values_text.strip.empty?
|
|
200
|
+
|
|
201
|
+
spec = parse_values(values_text.strip)
|
|
202
|
+
next unless spec
|
|
203
|
+
|
|
204
|
+
key = @name_match == :exact ? name : name.downcase
|
|
205
|
+
map[key] = spec
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
map
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Recognized value-string formats. Anything else (free text, date
|
|
212
|
+
# patterns, alpha ranges) returns nil and the row is skipped.
|
|
213
|
+
def parse_values(text)
|
|
214
|
+
case text
|
|
215
|
+
when /\A(-?\d+)-(-?\d+)\z/
|
|
216
|
+
{ type: :range, in: ($1.to_i)..($2.to_i) }
|
|
217
|
+
when /\A-?\d+(?:\s*,\s*-?\d+)+\z/
|
|
218
|
+
{ type: :set, in: text.split(/\s*,\s*/).map(&:to_i) }
|
|
219
|
+
when /\A(\d+)-(\d+)\s+\(([^)]+)\)\z/
|
|
220
|
+
base = ($1.to_i)..($2.to_i)
|
|
221
|
+
extras = Regexp.last_match(3).split(/\s*,\s*/).map { |x| Integer(x) rescue nil }.compact
|
|
222
|
+
{ type: :set, in: base.to_a + extras }
|
|
223
|
+
when /\A(\d+)-(\d+)\s*,\s*(.+)\z/
|
|
224
|
+
base = ($1.to_i)..($2.to_i)
|
|
225
|
+
extras = Regexp.last_match(3).split(/\s*,\s*/).map { |x| Integer(x) rescue nil }.compact
|
|
226
|
+
return nil if extras.empty?
|
|
227
|
+
|
|
228
|
+
{ type: :set, in: base.to_a + extras }
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def value_allowed?(spec, value)
|
|
233
|
+
allowed = spec[:in]
|
|
234
|
+
return true if allowed.nil?
|
|
235
|
+
|
|
236
|
+
case spec[:type]
|
|
237
|
+
when :range
|
|
238
|
+
allowed.cover?(value)
|
|
239
|
+
when :set
|
|
240
|
+
allowed.include?(value)
|
|
241
|
+
else
|
|
242
|
+
true
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def literal_value(tok)
|
|
247
|
+
return nil unless tok
|
|
248
|
+
|
|
249
|
+
case tok[:type]
|
|
250
|
+
when TT::INTEGER_LITERAL
|
|
251
|
+
n = begin
|
|
252
|
+
Integer(tok[:text])
|
|
253
|
+
rescue StandardError
|
|
254
|
+
return nil
|
|
255
|
+
end
|
|
256
|
+
{ value: n, display: tok[:text] }
|
|
257
|
+
when TT::FLOAT_LITERAL
|
|
258
|
+
f = begin
|
|
259
|
+
Float(tok[:text])
|
|
260
|
+
rescue StandardError
|
|
261
|
+
return nil
|
|
262
|
+
end
|
|
263
|
+
{ value: (f == f.to_i ? f.to_i : f), display: tok[:text] }
|
|
264
|
+
when TT::STRING_LITERAL
|
|
265
|
+
{ value: tok[:text].gsub(/\A['"]|['"]\z/, ""), display: tok[:text] }
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def format_message(ident, display, spec)
|
|
270
|
+
allowed_str =
|
|
271
|
+
case spec[:type]
|
|
272
|
+
when :range then spec[:in].to_s
|
|
273
|
+
when :set then "{#{spec[:in].join(', ')}}"
|
|
274
|
+
end
|
|
275
|
+
"value #{display} for #{ident} is outside the documented acceptable " \
|
|
276
|
+
"values (#{allowed_str}); this branch is unreachable."
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
end
|
data/lib/sas_linter.rb
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "sas_lexer"
|
|
4
|
+
require "set"
|
|
5
|
+
require "yaml"
|
|
6
|
+
|
|
7
|
+
require_relative "sas_linter/version"
|
|
8
|
+
|
|
9
|
+
# Configurable lint engine for SAS source files. Walks the token stream
|
|
10
|
+
# produced by `SasLexer::Lexer` and applies a set of pluggable rules.
|
|
11
|
+
#
|
|
12
|
+
# Each rule is a subclass of `SasLinter::Rule` and is auto-registered when
|
|
13
|
+
# its file is required. Use `SasLinter.new(rules: [...])` to constrain the
|
|
14
|
+
# rule set, or `SasLinter.from_config(config_hash)` to honor a YAML config.
|
|
15
|
+
class SasLinter
|
|
16
|
+
DEFAULT_CONFIG_PATH = "config/lint.yaml"
|
|
17
|
+
|
|
18
|
+
Finding = Struct.new(:path, :line, :column, :rule, :message, :severity, keyword_init: true) do
|
|
19
|
+
def to_s
|
|
20
|
+
"#{path}:#{line}:#{column}: [#{rule}] #{message}"
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
class Rule
|
|
25
|
+
class << self
|
|
26
|
+
def registry
|
|
27
|
+
@registry ||= {}
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def register(klass)
|
|
31
|
+
id = klass.rule_id
|
|
32
|
+
raise ArgumentError, "Rule #{klass} did not declare a rule_id" if id.nil?
|
|
33
|
+
|
|
34
|
+
registry[id] = klass
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def all
|
|
38
|
+
registry.values
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def fetch(id)
|
|
42
|
+
registry.fetch(id.to_sym) do
|
|
43
|
+
raise ArgumentError, "Unknown lint rule: #{id.inspect}. Known: #{registry.keys.join(', ')}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def inherited(subclass)
|
|
48
|
+
super
|
|
49
|
+
# Subclasses self-register once they declare an id via `rule_id :foo`.
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def rule_id(value = nil)
|
|
53
|
+
if value
|
|
54
|
+
@rule_id = value.to_sym
|
|
55
|
+
Rule.register(self)
|
|
56
|
+
end
|
|
57
|
+
@rule_id
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def description(value = nil)
|
|
61
|
+
@description = value if value
|
|
62
|
+
@description
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def severity(value = nil)
|
|
66
|
+
@severity = value if value
|
|
67
|
+
@severity || :warning
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Build a rule instance from a config hash. Subclasses with
|
|
71
|
+
# constructor arguments should override this — the default
|
|
72
|
+
# forwards `autofix:` (the only generic option) and ignores
|
|
73
|
+
# the rest.
|
|
74
|
+
def from_config(opts = {})
|
|
75
|
+
opts = opts.transform_keys(&:to_s)
|
|
76
|
+
kwargs = {}
|
|
77
|
+
kwargs[:autofix] = opts["autofix"] ? true : false if opts.key?("autofix")
|
|
78
|
+
new(**kwargs)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Whether this rule can rewrite source to fix the findings it
|
|
82
|
+
# reports. Rules that override `autofix(source)` should also
|
|
83
|
+
# override this to return true.
|
|
84
|
+
def supports_autofix?
|
|
85
|
+
false
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# @param autofix [Boolean] when true and the rule supports
|
|
90
|
+
# autofixing, the linter will call `#autofix(source)` after
|
|
91
|
+
# `#check` and write the result back to disk. The default
|
|
92
|
+
# constructor accepts this kwarg so every rule's `from_config`
|
|
93
|
+
# can forward it uniformly; rules that don't support autofix
|
|
94
|
+
# simply ignore it.
|
|
95
|
+
def initialize(autofix: false)
|
|
96
|
+
@autofix = autofix
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
attr_reader :autofix
|
|
100
|
+
alias_method :autofix?, :autofix
|
|
101
|
+
|
|
102
|
+
# Subclasses must implement check.
|
|
103
|
+
#
|
|
104
|
+
# @param tokens [Array<Hash>] default-channel tokens (no whitespace, no comments)
|
|
105
|
+
# @param path [String] file path used in Finding output
|
|
106
|
+
# @param all_tokens [Array<Hash>, nil] every token from the lexer including
|
|
107
|
+
# the comment and whitespace channels — supplied for rules that need to
|
|
108
|
+
# inspect comments. Default-channel rules can ignore this.
|
|
109
|
+
# @param source [String, nil] the raw source text, supplied for rules
|
|
110
|
+
# that operate at the byte level (e.g. trailing-whitespace).
|
|
111
|
+
def check(_tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
112
|
+
raise NotImplementedError
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Override in subclasses that can rewrite source. Return the
|
|
116
|
+
# modified source string. The base implementation is a no-op so
|
|
117
|
+
# rules without autofix can still appear in an autofix-on lint
|
|
118
|
+
# pass without special-casing.
|
|
119
|
+
def autofix(source) # rubocop:disable Lint/UnusedMethodArgument
|
|
120
|
+
source
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
protected
|
|
124
|
+
|
|
125
|
+
def finding(line:, column:, message:, path:)
|
|
126
|
+
Finding.new(
|
|
127
|
+
path: path,
|
|
128
|
+
line: line,
|
|
129
|
+
column: column,
|
|
130
|
+
rule: self.class.rule_id,
|
|
131
|
+
message: message,
|
|
132
|
+
severity: self.class.severity
|
|
133
|
+
)
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# @param rules [Array<Symbol|Class|Rule>, nil] When nil, every registered
|
|
138
|
+
# rule runs with default options. Symbols and classes are instantiated
|
|
139
|
+
# via `Rule#new`; rule instances are used as-is.
|
|
140
|
+
def initialize(rules: nil)
|
|
141
|
+
classes_or_instances =
|
|
142
|
+
if rules.nil?
|
|
143
|
+
Rule.all
|
|
144
|
+
else
|
|
145
|
+
rules
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
@rules = classes_or_instances.map do |r|
|
|
149
|
+
case r
|
|
150
|
+
when Rule then r
|
|
151
|
+
when Class then r.new
|
|
152
|
+
else Rule.fetch(r).new
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Build a linter from a parsed config hash. Schema:
|
|
158
|
+
#
|
|
159
|
+
# rules:
|
|
160
|
+
# <rule_id>:
|
|
161
|
+
# enabled: true|false # default: true
|
|
162
|
+
# <option>: <value> # passed to Rule.from_config
|
|
163
|
+
#
|
|
164
|
+
# Rules omitted from the config default to enabled with no options, so
|
|
165
|
+
# adding a new rule to the gem won't silently disable it for users with
|
|
166
|
+
# an existing config file. To suppress a rule, list it with `enabled: false`.
|
|
167
|
+
def self.from_config(config)
|
|
168
|
+
config = (config || {}).transform_keys(&:to_s)
|
|
169
|
+
rules_config = (config["rules"] || {}).transform_keys(&:to_s)
|
|
170
|
+
instances = []
|
|
171
|
+
|
|
172
|
+
rules_config.each do |id, opts|
|
|
173
|
+
opts = (opts || {}).transform_keys(&:to_s)
|
|
174
|
+
next if opts["enabled"] == false
|
|
175
|
+
|
|
176
|
+
klass = Rule.fetch(id.to_sym)
|
|
177
|
+
instances << klass.from_config(opts.reject { |k, _| k == "enabled" })
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
Rule.all.each do |klass|
|
|
181
|
+
next if rules_config.key?(klass.rule_id.to_s)
|
|
182
|
+
|
|
183
|
+
instances << klass.new
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
new(rules: instances)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Load a YAML config file and return a parsed hash. Returns an empty hash
|
|
190
|
+
# when the file is missing — the default `config/lint.yaml` is optional.
|
|
191
|
+
def self.load_config_file(path)
|
|
192
|
+
return {} unless File.file?(path)
|
|
193
|
+
|
|
194
|
+
YAML.safe_load_file(path) || {}
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Lint a SAS source string. `path` is used for finding location output.
|
|
198
|
+
# Returns just the findings array. Use `lint_with_fixes` when the caller
|
|
199
|
+
# wants the (possibly-modified) source back too.
|
|
200
|
+
def lint(source, path: "(string)")
|
|
201
|
+
lint_with_fixes(source, path: path).first
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Lint a SAS source string and apply any autofixes from rules whose
|
|
205
|
+
# `autofix?` instance flag is true. Returns `[findings, modified_source]`.
|
|
206
|
+
# When no rule has autofix enabled the modified source equals the input.
|
|
207
|
+
def lint_with_fixes(source, path: "(string)")
|
|
208
|
+
default_tokens, all_tokens = tokenize(source)
|
|
209
|
+
findings = @rules.flat_map do |rule|
|
|
210
|
+
rule.check(default_tokens, path: path, all_tokens: all_tokens, source: source)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
modified = source
|
|
214
|
+
@rules.each do |rule|
|
|
215
|
+
next unless rule.autofix? && rule.class.supports_autofix?
|
|
216
|
+
|
|
217
|
+
modified = rule.autofix(modified)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
[findings, modified]
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Lint a file by path. Sources are commonly Windows-1252 or ISO-8859-1
|
|
224
|
+
# rather than UTF-8 — read as binary and best-effort transcode so
|
|
225
|
+
# the lexer (which requires valid UTF-8) doesn't reject them.
|
|
226
|
+
#
|
|
227
|
+
# If any autofix-enabled rule rewrote the source, the file is updated
|
|
228
|
+
# in place. Returns the findings array regardless of write outcome.
|
|
229
|
+
#
|
|
230
|
+
# The `modified.b != original.b` guard compares raw bytes so a
|
|
231
|
+
# difference in encoding tags alone (e.g. UTF-8 vs ASCII-8BIT)
|
|
232
|
+
# doesn't trigger a write. That can happen when EncodingIssues
|
|
233
|
+
# autofix returns a binary string but no rule actually changed any
|
|
234
|
+
# bytes — without `.b` the file would be rewritten with byte-
|
|
235
|
+
# identical contents and a different encoding label, surfacing as
|
|
236
|
+
# a no-op diff in git that overwrites the user's chosen encoding.
|
|
237
|
+
def lint_file(path)
|
|
238
|
+
original = read_source(path)
|
|
239
|
+
findings, modified = lint_with_fixes(original, path: path)
|
|
240
|
+
File.write(path, modified) if modified.b != original.b
|
|
241
|
+
findings
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
private
|
|
245
|
+
|
|
246
|
+
def read_source(path)
|
|
247
|
+
raw = File.read(path, encoding: "BINARY")
|
|
248
|
+
return raw.force_encoding("UTF-8") if raw.dup.force_encoding("UTF-8").valid_encoding?
|
|
249
|
+
|
|
250
|
+
begin
|
|
251
|
+
raw.force_encoding("Windows-1252").encode("UTF-8", invalid: :replace, undef: :replace, replace: "'")
|
|
252
|
+
rescue StandardError
|
|
253
|
+
raw.force_encoding("ISO-8859-1").encode("UTF-8", invalid: :replace, undef: :replace, replace: "'")
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Returns [default_tokens, all_tokens]. Most rules walk default-channel
|
|
258
|
+
# tokens (no whitespace, no comments). A few — `commented_out_guard` for
|
|
259
|
+
# one — need to inspect comment tokens, so the unfiltered list is exposed
|
|
260
|
+
# to rules via the `all_tokens:` kwarg on `Rule#check`.
|
|
261
|
+
def tokenize(source)
|
|
262
|
+
lexer = SasLexer::Lexer.new
|
|
263
|
+
begin
|
|
264
|
+
all_tokens = lexer.tokenize(source)
|
|
265
|
+
ensure
|
|
266
|
+
lexer.free
|
|
267
|
+
end
|
|
268
|
+
default_tokens = all_tokens.reject do |t|
|
|
269
|
+
t[:channel] == SasLexer::Lexer::TokenChannel::HIDDEN ||
|
|
270
|
+
t[:channel] == SasLexer::Lexer::TokenChannel::COMMENT
|
|
271
|
+
end
|
|
272
|
+
[default_tokens, all_tokens]
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
require_relative "sas_linter/rules/unreachable_inner_branch_value"
|
|
277
|
+
require_relative "sas_linter/rules/identical_if_else_branches"
|
|
278
|
+
require_relative "sas_linter/rules/commented_out_guard"
|
|
279
|
+
require_relative "sas_linter/rules/choose_one_template"
|
|
280
|
+
require_relative "sas_linter/rules/trailing_whitespace"
|
|
281
|
+
require_relative "sas_linter/rules/tab_expansion"
|
|
282
|
+
require_relative "sas_linter/rules/source_headers"
|
|
283
|
+
require_relative "sas_linter/rules/line_endings"
|
|
284
|
+
require_relative "sas_linter/rules/encoding_issues"
|
|
285
|
+
require_relative "sas_linter/rules/malformed_if_condition"
|
|
286
|
+
require_relative "sas_linter/rules/missing_assignment_semicolon"
|
|
287
|
+
require_relative "sas_linter/rules/variable_value_out_of_known_range"
|
metadata
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: sas-linter
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Craig McNamara
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: sas-lexer
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0.1'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0.1'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: csv
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '0'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '0'
|
|
40
|
+
description: |
|
|
41
|
+
A configurable lint engine for SAS source files. Walks the token
|
|
42
|
+
stream produced by the `sas-lexer` gem and applies a set of pluggable
|
|
43
|
+
rules covering structural defects (malformed `if` conditions,
|
|
44
|
+
identical `then`/`else` branches, unreachable inner branches),
|
|
45
|
+
cosmetic issues (trailing whitespace, tab expansion, line endings,
|
|
46
|
+
encoding gremlins), and source-header conventions. Includes a
|
|
47
|
+
`bin/sas_lint` CLI and YAML-based config.
|
|
48
|
+
email:
|
|
49
|
+
- craig@monami.io
|
|
50
|
+
executables:
|
|
51
|
+
- sas_lint
|
|
52
|
+
extensions: []
|
|
53
|
+
extra_rdoc_files: []
|
|
54
|
+
files:
|
|
55
|
+
- LICENSE
|
|
56
|
+
- README.md
|
|
57
|
+
- Rakefile
|
|
58
|
+
- bin/sas_lint
|
|
59
|
+
- lib/sas_linter.rb
|
|
60
|
+
- lib/sas_linter/rules/choose_one_template.rb
|
|
61
|
+
- lib/sas_linter/rules/commented_out_guard.rb
|
|
62
|
+
- lib/sas_linter/rules/encoding_issues.rb
|
|
63
|
+
- lib/sas_linter/rules/identical_if_else_branches.rb
|
|
64
|
+
- lib/sas_linter/rules/line_endings.rb
|
|
65
|
+
- lib/sas_linter/rules/malformed_if_condition.rb
|
|
66
|
+
- lib/sas_linter/rules/missing_assignment_semicolon.rb
|
|
67
|
+
- lib/sas_linter/rules/source_headers.rb
|
|
68
|
+
- lib/sas_linter/rules/tab_expansion.rb
|
|
69
|
+
- lib/sas_linter/rules/trailing_whitespace.rb
|
|
70
|
+
- lib/sas_linter/rules/unreachable_inner_branch_value.rb
|
|
71
|
+
- lib/sas_linter/rules/variable_value_out_of_known_range.rb
|
|
72
|
+
- lib/sas_linter/version.rb
|
|
73
|
+
homepage: https://github.com/mes-amis/sas-linter
|
|
74
|
+
licenses:
|
|
75
|
+
- AGPL-3.0-or-later
|
|
76
|
+
metadata:
|
|
77
|
+
homepage_uri: https://github.com/mes-amis/sas-linter
|
|
78
|
+
rubygems_mfa_required: 'true'
|
|
79
|
+
rdoc_options: []
|
|
80
|
+
require_paths:
|
|
81
|
+
- lib
|
|
82
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
83
|
+
requirements:
|
|
84
|
+
- - ">="
|
|
85
|
+
- !ruby/object:Gem::Version
|
|
86
|
+
version: 3.4.0
|
|
87
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
88
|
+
requirements:
|
|
89
|
+
- - ">="
|
|
90
|
+
- !ruby/object:Gem::Version
|
|
91
|
+
version: '0'
|
|
92
|
+
requirements: []
|
|
93
|
+
rubygems_version: 4.0.6
|
|
94
|
+
specification_version: 4
|
|
95
|
+
summary: Configurable lint engine for SAS source files.
|
|
96
|
+
test_files: []
|