sas-linter 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/sas_linter/rules/format_for_unknown_variable.rb +163 -0
- data/lib/sas_linter/rules/inconsistent_variable_case.rb +12 -2
- data/lib/sas_linter/version.rb +1 -1
- data/lib/sas_linter.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b10cf15e62fe7e0a81d8f9d2e2c9eb61af36ee00d10f32e9ce8354a7ebe3e40f
|
|
4
|
+
data.tar.gz: 9abe8249ab9d968fdc8aefe558dcdad321ba2879abe4bf08374b9ec998eecb5f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 38e745b6f3ec4c8a7cc2ce3c23883569bcd931d88d6f9dc0a6c2288ed7f0b39f199e066340e780e8556d0ceeaa7bfeb149185ef2d458f58574281c54dbbf60d3
|
|
7
|
+
data.tar.gz: 8e6b5f2751eb5b3b9e50b80f0476f953519763a321fba4dd17ce219d891a84a3f0fe3c1ea1fbc3c18f4a698bc0655ef794ff2ef71d1d859aa23195a9119d41bd
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# sas-linter
|
|
2
2
|
|
|
3
|
-
A configurable lint engine for SAS source files. Built on the [`sas-lexer`](https://github.com/mes-amis/sas-lexer-rb) gem (a Ruby FFI binding to Misha Perlov's Rust [`sas-lexer`](https://github.com/mishamsk/sas-lexer)) and ships with
|
|
3
|
+
A configurable lint engine for SAS source files. Built on the [`sas-lexer`](https://github.com/mes-amis/sas-lexer-rb) gem (a Ruby FFI binding to Misha Perlov's Rust [`sas-lexer`](https://github.com/mishamsk/sas-lexer)) and ships with fourteen pluggable rules covering structural defects, cosmetic issues, and source-header conventions.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -67,6 +67,9 @@ rules:
|
|
|
67
67
|
enabled: true
|
|
68
68
|
autofix: false # rewrite every minority casing to the most-common form
|
|
69
69
|
|
|
70
|
+
format_for_unknown_variable:
|
|
71
|
+
enabled: true # skipped automatically when the file uses set/merge/update/infile/input
|
|
72
|
+
|
|
70
73
|
variable_value_out_of_known_range:
|
|
71
74
|
enabled: true
|
|
72
75
|
csv_paths: # empty list = rule is a no-op
|
|
@@ -140,6 +143,7 @@ findings = linter.lint_file("path/to/source.sas")
|
|
|
140
143
|
| `missing_assignment_semicolon` | Assignment statements followed by an inline `**` comment but no terminating `;`. |
|
|
141
144
|
| `variable_value_out_of_known_range` | `if VAR = N` / `if VAR in (...)` literals fall outside the variable's documented acceptable values. Loads the catalog from one or more CSVs with configurable column names and column separator (`,`, `;`, tab). |
|
|
142
145
|
| `inconsistent_variable_case` | Identifier appears with more than one casing in the same file (`myVar` vs `MyVar`). SAS treats both as the same variable; autofix rewrites every minority spelling to the most-common form. Skips proc-format definitions and `format.` / `lib.member` references. |
|
|
146
|
+
| `format_for_unknown_variable` | `format` / `informat` / `attrib` statement assigns a format to a variable that's referenced nowhere else in the file — almost always a typo. Skipped on files that pull in columns via `set` / `merge` / `update` / `infile` / `input`. |
|
|
143
147
|
|
|
144
148
|
`bin/sas_lint --list-rules` prints the same set with autofix capability.
|
|
145
149
|
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../sas_linter"
|
|
4
|
+
require "sas_lexer"
|
|
5
|
+
|
|
6
|
+
class SasLinter
|
|
7
|
+
module Rules
|
|
8
|
+
# Flag `format` / `informat` / `attrib ... format=` statements that
|
|
9
|
+
# name a variable referenced nowhere else in the file. Almost always
|
|
10
|
+
# a typo (`attrib totalscore format=flagx.;` when every other use is
|
|
11
|
+
# `total_score`). SAS itself silently binds the format to a phantom
|
|
12
|
+
# column and runs; downstream tooling that resolves variable
|
|
13
|
+
# references (e.g. sas-ruby) refuses to compile such a file.
|
|
14
|
+
#
|
|
15
|
+
# The rule is conservative: a file that pulls variables in from an
|
|
16
|
+
# external source — `set`, `merge`, `update`, `infile`, `input` — is
|
|
17
|
+
# skipped entirely, since a format target may legitimately name a
|
|
18
|
+
# column the linter can't see.
|
|
19
|
+
class FormatForUnknownVariable < Rule
|
|
20
|
+
rule_id :format_for_unknown_variable
|
|
21
|
+
description "Variable named in a `format` / `informat` / `attrib` " \
|
|
22
|
+
"statement is referenced nowhere else in the file — " \
|
|
23
|
+
"almost always a typo."
|
|
24
|
+
severity :warning
|
|
25
|
+
|
|
26
|
+
TT = SasLexer::Lexer::TokenType
|
|
27
|
+
|
|
28
|
+
FORMAT_KIND_BY_TYPE = {
|
|
29
|
+
TT::KW_FORMAT => :format,
|
|
30
|
+
TT::KW_INFORMAT => :informat,
|
|
31
|
+
TT::KW_ATTRIB => :attrib
|
|
32
|
+
}.freeze
|
|
33
|
+
|
|
34
|
+
EXTERNAL_INPUT_TYPES = [
|
|
35
|
+
TT::KW_SET, TT::KW_MERGE, TT::KW_UPDATE, TT::KW_INFILE, TT::KW_INPUT
|
|
36
|
+
].freeze
|
|
37
|
+
|
|
38
|
+
# Statements that name variables for declaration only — the names
|
|
39
|
+
# they reference don't count as "real uses" because if a variable
|
|
40
|
+
# only appears in declaration statements it's still dead code.
|
|
41
|
+
# Keyword-typed openers:
|
|
42
|
+
DECLARATION_TYPES = [
|
|
43
|
+
TT::KW_FORMAT, TT::KW_INFORMAT, TT::KW_ATTRIB,
|
|
44
|
+
TT::KW_LABEL, TT::KW_LENGTH, TT::KW_KEEP, TT::KW_DROP, TT::KW_ARRAY
|
|
45
|
+
].freeze
|
|
46
|
+
|
|
47
|
+
# IDENTIFIER-typed openers (the lexer doesn't keyword these):
|
|
48
|
+
# `retain` is a data-step declaration;
|
|
49
|
+
# `value` / `invalue` / `picture` introduce a `proc format` body.
|
|
50
|
+
DECLARATION_TEXT = %w[retain value invalue picture].freeze
|
|
51
|
+
|
|
52
|
+
def check(tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
53
|
+
external_input = false
|
|
54
|
+
targets = []
|
|
55
|
+
use_names = Set.new
|
|
56
|
+
|
|
57
|
+
each_statement(tokens) do |stmt|
|
|
58
|
+
opener = stmt[0]
|
|
59
|
+
|
|
60
|
+
if EXTERNAL_INPUT_TYPES.include?(opener[:type])
|
|
61
|
+
external_input = true
|
|
62
|
+
next
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
kind = FORMAT_KIND_BY_TYPE[opener[:type]]
|
|
66
|
+
if kind
|
|
67
|
+
collect_targets(stmt, kind, targets)
|
|
68
|
+
next
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
next if declaration_statement?(opener)
|
|
72
|
+
|
|
73
|
+
collect_uses(stmt, use_names)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
return [] if external_input
|
|
77
|
+
|
|
78
|
+
targets.filter_map do |t, kind|
|
|
79
|
+
next if use_names.include?(t[:text].downcase)
|
|
80
|
+
|
|
81
|
+
finding(
|
|
82
|
+
line: t[:start_line],
|
|
83
|
+
column: t[:start_column] + 1,
|
|
84
|
+
message: "`#{kind}` assigns a format to `#{t[:text]}` but " \
|
|
85
|
+
"that variable is not referenced anywhere else in " \
|
|
86
|
+
"this file — likely a typo.",
|
|
87
|
+
path: path
|
|
88
|
+
)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
# Yield each `; ... ;` slice (opener at index 0, no trailing `;`).
|
|
95
|
+
# Stray semicolons produce empty slices and are skipped.
|
|
96
|
+
def each_statement(tokens)
|
|
97
|
+
start = 0
|
|
98
|
+
tokens.each_with_index do |t, i|
|
|
99
|
+
next unless t[:type] == TT::SEMI
|
|
100
|
+
|
|
101
|
+
slice = tokens[start...i]
|
|
102
|
+
yield slice unless slice.empty?
|
|
103
|
+
start = i + 1
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def declaration_statement?(opener)
|
|
108
|
+
return true if DECLARATION_TYPES.include?(opener[:type])
|
|
109
|
+
return true if opener[:type] == TT::IDENTIFIER &&
|
|
110
|
+
DECLARATION_TEXT.include?(opener[:text].downcase)
|
|
111
|
+
|
|
112
|
+
false
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def collect_targets(stmt, kind, targets)
|
|
116
|
+
stmt.each_with_index do |t, i|
|
|
117
|
+
next unless t[:type] == TT::IDENTIFIER
|
|
118
|
+
next unless variable_target?(stmt, i)
|
|
119
|
+
|
|
120
|
+
targets << [t, kind]
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# In `format`/`informat`, the format name is the identifier
|
|
125
|
+
# byte-adjacent to a following `.` (`date9.`, `flagx.`). In
|
|
126
|
+
# `attrib`, the format name additionally follows `=`
|
|
127
|
+
# (`format=flagx.`). Everything else is a variable target.
|
|
128
|
+
def variable_target?(stmt, i)
|
|
129
|
+
t = stmt[i]
|
|
130
|
+
nxt = stmt[i + 1]
|
|
131
|
+
prev = i.positive? ? stmt[i - 1] : nil
|
|
132
|
+
|
|
133
|
+
return false if nxt && nxt[:type] == TT::DOT && nxt[:start] == t[:end]
|
|
134
|
+
return false if prev && prev[:type] == TT::ASSIGN
|
|
135
|
+
|
|
136
|
+
true
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def collect_uses(stmt, names)
|
|
140
|
+
stmt.each_with_index do |t, i|
|
|
141
|
+
next unless t[:type] == TT::IDENTIFIER
|
|
142
|
+
next unless variable_use?(stmt, i)
|
|
143
|
+
|
|
144
|
+
names << t[:text].downcase
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Same exclusions as `inconsistent_variable_case`: byte-adjacent
|
|
149
|
+
# `<name>.` (format reference) and `<lib>.<member>` second halves
|
|
150
|
+
# don't name a variable in the current step.
|
|
151
|
+
def variable_use?(stmt, i)
|
|
152
|
+
t = stmt[i]
|
|
153
|
+
nxt = stmt[i + 1]
|
|
154
|
+
prev = i.positive? ? stmt[i - 1] : nil
|
|
155
|
+
|
|
156
|
+
return false if nxt && nxt[:type] == TT::DOT && nxt[:start] == t[:end]
|
|
157
|
+
return false if prev && prev[:type] == TT::DOT && prev[:end] == t[:start]
|
|
158
|
+
|
|
159
|
+
true
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
@@ -61,9 +61,19 @@ class SasLinter
|
|
|
61
61
|
def autofix(source)
|
|
62
62
|
return source if source.nil? || source.empty?
|
|
63
63
|
|
|
64
|
+
# If a previous rule's autofix returned ASCII-8BIT (e.g.
|
|
65
|
+
# EncodingIssues#autofix walks bytes and returns binary), tag
|
|
66
|
+
# it UTF-8 before slicing. The lexer treats the bytes as UTF-8
|
|
67
|
+
# and reports character offsets either way; only Ruby's
|
|
68
|
+
# `String#[]=` cares about the encoding label, and it indexes
|
|
69
|
+
# by bytes for ASCII-8BIT but by characters for UTF-8 — so a
|
|
70
|
+
# binary tag plus any multi-byte sequence earlier in the file
|
|
71
|
+
# would shift every replacement by the byte/char gap.
|
|
72
|
+
src = source.encoding == Encoding::UTF_8 ? source : source.dup.force_encoding("UTF-8")
|
|
73
|
+
|
|
64
74
|
lexer = SasLexer::Lexer.new
|
|
65
75
|
begin
|
|
66
|
-
all_tokens = lexer.tokenize(
|
|
76
|
+
all_tokens = lexer.tokenize(src)
|
|
67
77
|
ensure
|
|
68
78
|
lexer.free
|
|
69
79
|
end
|
|
@@ -78,7 +88,7 @@ class SasLinter
|
|
|
78
88
|
end
|
|
79
89
|
|
|
80
90
|
# Apply right-to-left so earlier offsets stay valid.
|
|
81
|
-
out =
|
|
91
|
+
out = src.dup
|
|
82
92
|
edits.sort_by! { |start, _, _| -start }
|
|
83
93
|
edits.each { |start, finish, repl| out[start...finish] = repl }
|
|
84
94
|
out
|
data/lib/sas_linter/version.rb
CHANGED
data/lib/sas_linter.rb
CHANGED
|
@@ -310,3 +310,4 @@ require_relative "sas_linter/rules/missing_assignment_semicolon"
|
|
|
310
310
|
require_relative "sas_linter/rules/variable_value_out_of_known_range"
|
|
311
311
|
require_relative "sas_linter/rules/invalid_numeric_literal"
|
|
312
312
|
require_relative "sas_linter/rules/inconsistent_variable_case"
|
|
313
|
+
require_relative "sas_linter/rules/format_for_unknown_variable"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sas-linter
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Craig McNamara
|
|
@@ -63,6 +63,7 @@ files:
|
|
|
63
63
|
- lib/sas_linter/rules/choose_one_template.rb
|
|
64
64
|
- lib/sas_linter/rules/commented_out_guard.rb
|
|
65
65
|
- lib/sas_linter/rules/encoding_issues.rb
|
|
66
|
+
- lib/sas_linter/rules/format_for_unknown_variable.rb
|
|
66
67
|
- lib/sas_linter/rules/identical_if_else_branches.rb
|
|
67
68
|
- lib/sas_linter/rules/inconsistent_variable_case.rb
|
|
68
69
|
- lib/sas_linter/rules/invalid_numeric_literal.rb
|