sas-linter 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/lib/sas_linter/rules/malformed_label_statement.rb +127 -0
- data/lib/sas_linter/rules/unterminated_comment.rb +101 -0
- data/lib/sas_linter/version.rb +1 -1
- data/lib/sas_linter.rb +2 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 688281e326c33b3af52864a9d25c85f00967a2c4f8ac9233e57cf52a83407b18
|
|
4
|
+
data.tar.gz: a9f331a62f73a5bdb7b637bbd62783f3969b169746e7035adf687afc3030ceb4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d952062a88c0bca6c33ef3f3e92c6cfeeec3f2142053e47c71dde3c04202ba0209b4672cad326161d62d05df90fb0940665cd0f65b73a62412d97b0ff6c8a1da
|
|
7
|
+
data.tar.gz: 7c41bac13b275a4dc9fc8bbf374339595b2b185fb6058e3ea27b412b18124a44bdb5b901eec7bdc9c0d7db3d9239ea0d56a30e6896ad88f0b9386dc74c2b1687
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# sas-linter
|
|
2
2
|
|
|
3
|
-
A configurable lint engine for SAS source files. Built on the [`sas-lexer`](https://github.com/mes-amis/sas-lexer-rb) gem (a Ruby FFI binding to Misha Perlov's Rust [`sas-lexer`](https://github.com/mishamsk/sas-lexer)) and ships with
|
|
3
|
+
A configurable lint engine for SAS source files. Built on the [`sas-lexer`](https://github.com/mes-amis/sas-lexer-rb) gem (a Ruby FFI binding to Misha Perlov's Rust [`sas-lexer`](https://github.com/mishamsk/sas-lexer)) and ships with fifteen pluggable rules covering structural defects, cosmetic issues, and source-header conventions.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
@@ -63,6 +63,10 @@ rules:
|
|
|
63
63
|
enabled: true
|
|
64
64
|
autofix: false # rule supports autofix; off by default
|
|
65
65
|
|
|
66
|
+
unterminated_comment:
|
|
67
|
+
enabled: true
|
|
68
|
+
autofix: false # append `;` to `**…**` lines whose missing terminator eats the next line
|
|
69
|
+
|
|
66
70
|
inconsistent_variable_case:
|
|
67
71
|
enabled: true
|
|
68
72
|
autofix: false # rewrite every minority casing to the most-common form
|
|
@@ -141,6 +145,7 @@ findings = linter.lint_file("path/to/source.sas")
|
|
|
141
145
|
| `encoding_issues` | Smart-quote / em-dash / Win-1252 byte sequences that confuse downstream tooling. |
|
|
142
146
|
| `malformed_if_condition` | Empty conditions, missing operators, orphan `then`, unbalanced parens, etc. |
|
|
143
147
|
| `missing_assignment_semicolon` | Assignment statements followed by an inline `**` comment but no terminating `;`. |
|
|
148
|
+
| `unterminated_comment` | A standalone `** … **` comment whose missing `;` lets the SAS lexer extend the comment into the next line of real code, silently swallowing it. Autofix appends the `;`. |
|
|
144
149
|
| `variable_value_out_of_known_range` | `if VAR = N` / `if VAR in (...)` literals fall outside the variable's documented acceptable values. Loads the catalog from one or more CSVs with configurable column names and column separator (`,`, `;`, tab). |
|
|
145
150
|
| `inconsistent_variable_case` | Identifier appears with more than one casing in the same file (`myVar` vs `MyVar`). SAS treats both as the same variable; autofix rewrites every minority spelling to the most-common form. Skips proc-format definitions and `format.` / `lib.member` references. |
|
|
146
151
|
| `format_for_unknown_variable` | `format` / `informat` / `attrib` statement assigns a format to a variable that's referenced nowhere else in the file — almost always a typo. Skipped on files that pull in columns via `set` / `merge` / `update` / `infile` / `input`. |
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../sas_linter"
|
|
4
|
+
require "sas_lexer"
|
|
5
|
+
|
|
6
|
+
class SasLinter
|
|
7
|
+
module Rules
|
|
8
|
+
# Flag `label` statements where the `=` between the variable name
|
|
9
|
+
# and the string literal is missing.
|
|
10
|
+
#
|
|
11
|
+
# Motivating bug: `label aHSDELIRIUM 'Delirium Screener';` in
|
|
12
|
+
# SUITE9_HS_DELIRIUM_SCREENER_2014-04-15.TXT — SAS rejects this
|
|
13
|
+
# with "ERROR 22-322: Syntax error, expecting one of the following:
|
|
14
|
+
# =, ?", and the label is silently never attached. The treatment
|
|
15
|
+
# variant of the same algorithm shipped the same typo, and several
|
|
16
|
+
# other interRAI sources have shipped it over time.
|
|
17
|
+
#
|
|
18
|
+
# Detection: every `label` statement is a `KW_LABEL` keyword
|
|
19
|
+
# followed by one or more `IDENT '=' STRING_LITERAL` triples
|
|
20
|
+
# separated by whitespace, terminated by `;`. We walk each label
|
|
21
|
+
# statement and, for each IDENT inside it, require the next
|
|
22
|
+
# default-channel token to be `ASSIGN` (`=`). If instead the next
|
|
23
|
+
# token is a STRING_LITERAL, the `=` was dropped.
|
|
24
|
+
class MalformedLabelStatement < Rule
|
|
25
|
+
rule_id :malformed_label_statement
|
|
26
|
+
description "`label` statement missing `=` between variable and string literal."
|
|
27
|
+
severity :warning
|
|
28
|
+
|
|
29
|
+
TT = SasLexer::Lexer::TokenType
|
|
30
|
+
|
|
31
|
+
def self.supports_autofix?
|
|
32
|
+
true
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def check(tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
|
|
36
|
+
findings = []
|
|
37
|
+
each_label_violation(tokens) do |ident_t, string_t|
|
|
38
|
+
findings << finding(
|
|
39
|
+
line: ident_t[:start_line],
|
|
40
|
+
column: ident_t[:start_column] + 1,
|
|
41
|
+
message: "`label #{ident_t[:text]} #{shorten(string_t[:text])}` is missing the `=` " \
|
|
42
|
+
"between the variable name and the label string.",
|
|
43
|
+
path: path
|
|
44
|
+
)
|
|
45
|
+
end
|
|
46
|
+
findings
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def autofix(source)
|
|
50
|
+
return source if source.nil? || source.empty?
|
|
51
|
+
|
|
52
|
+
lexer = SasLexer::Lexer.new
|
|
53
|
+
begin
|
|
54
|
+
all_tokens = lexer.tokenize(source)
|
|
55
|
+
ensure
|
|
56
|
+
lexer.free
|
|
57
|
+
end
|
|
58
|
+
tokens = all_tokens.reject do |t|
|
|
59
|
+
t[:channel] == SasLexer::Lexer::TokenChannel::HIDDEN ||
|
|
60
|
+
t[:channel] == SasLexer::Lexer::TokenChannel::COMMENT
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
source_lines = source.split("\n", -1)
|
|
64
|
+
# Collect (line_idx, col_after_ident) for each malformed label,
|
|
65
|
+
# then apply edits right-to-left within each line so earlier
|
|
66
|
+
# column offsets stay valid.
|
|
67
|
+
edits_by_line = Hash.new { |h, k| h[k] = [] }
|
|
68
|
+
|
|
69
|
+
each_label_violation(tokens) do |ident_t, _string_t|
|
|
70
|
+
edits_by_line[ident_t[:start_line] - 1] << ident_t[:end_column]
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
edits_by_line.each do |line_idx, cols|
|
|
74
|
+
line = source_lines[line_idx]
|
|
75
|
+
next if line.nil?
|
|
76
|
+
|
|
77
|
+
# Right-to-left so earlier insertions don't shift later columns.
|
|
78
|
+
cols.sort.reverse.each do |col|
|
|
79
|
+
# Insert ` =` immediately after the IDENT (consuming the
|
|
80
|
+
# following space if there is one, preserving alignment).
|
|
81
|
+
replacement =
|
|
82
|
+
if col < line.length && line[col] == " "
|
|
83
|
+
# `aHSDELIRIUM 'Delirium Screener'` → `aHSDELIRIUM = 'Delirium Screener'`.
|
|
84
|
+
# Replace the single space with ` = ` (one space before, one after).
|
|
85
|
+
" = #{line[(col + 1)..]}"
|
|
86
|
+
else
|
|
87
|
+
" = #{line[col..]}"
|
|
88
|
+
end
|
|
89
|
+
line = "#{line[0...col]}#{replacement}"
|
|
90
|
+
end
|
|
91
|
+
source_lines[line_idx] = line
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
source_lines.join("\n")
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
private
|
|
98
|
+
|
|
99
|
+
def each_label_violation(tokens)
|
|
100
|
+
tokens.each_with_index do |t, i|
|
|
101
|
+
next unless t[:type] == TT::KW_LABEL
|
|
102
|
+
|
|
103
|
+
# Walk forward through the label statement until SEMI.
|
|
104
|
+
j = i + 1
|
|
105
|
+
while j < tokens.length && tokens[j][:type] != TT::SEMI
|
|
106
|
+
cur = tokens[j]
|
|
107
|
+
if cur[:type] == TT::IDENTIFIER
|
|
108
|
+
nxt = tokens[j + 1]
|
|
109
|
+
if nxt && nxt[:type] == TT::STRING_LITERAL
|
|
110
|
+
yield cur, nxt
|
|
111
|
+
j += 2
|
|
112
|
+
next
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
j += 1
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def shorten(text)
|
|
121
|
+
return text if text.nil? || text.length <= 40
|
|
122
|
+
|
|
123
|
+
"#{text[0, 37]}..."
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../sas_linter"
|
|
4
|
+
require "sas_lexer"
|
|
5
|
+
|
|
6
|
+
class SasLinter
|
|
7
|
+
module Rules
|
|
8
|
+
# Flag a `** ... **` comment line whose missing `;` causes the
|
|
9
|
+
# SAS lexer to extend the comment statement into the following
|
|
10
|
+
# line(s) of real code, silently swallowing it.
|
|
11
|
+
#
|
|
12
|
+
# SAS `*` / `**` comment statements are terminated by the next
|
|
13
|
+
# `;` — the closing `**` is just prose. So
|
|
14
|
+
#
|
|
15
|
+
# ** SOME COMMENT **
|
|
16
|
+
# y = x + 1;
|
|
17
|
+
#
|
|
18
|
+
# lexes as a single comment token covering both lines, and the
|
|
19
|
+
# `y = x + 1;` assignment never executes.
|
|
20
|
+
#
|
|
21
|
+
# Detection: a COMMENT-channel `PREDICTED_COMMENT_STAT` token
|
|
22
|
+
# whose `start_line != end_line` AND whose first source line,
|
|
23
|
+
# rstripped, ends with `**`. That shape is the boxed-comment
|
|
24
|
+
# closer the user clearly intended — they only forgot the `;`.
|
|
25
|
+
# Legitimate multi-line `*...;` prose ends its first line with
|
|
26
|
+
# plain text, not `**`, so it's left alone.
|
|
27
|
+
#
|
|
28
|
+
# Autofix: append `;` to the end of each flagged first line.
|
|
29
|
+
class UnterminatedComment < Rule
|
|
30
|
+
rule_id :unterminated_comment
|
|
31
|
+
description "`**` comment missing its terminating `;` — consumes following code."
|
|
32
|
+
severity :warning
|
|
33
|
+
|
|
34
|
+
TT = SasLexer::Lexer::TokenType
|
|
35
|
+
COMMENT_CHANNEL = SasLexer::Lexer::TokenChannel::COMMENT
|
|
36
|
+
|
|
37
|
+
def self.supports_autofix?
|
|
38
|
+
true
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def check(_tokens, path:, all_tokens: nil, source: nil)
|
|
42
|
+
return [] unless all_tokens && source
|
|
43
|
+
|
|
44
|
+
lines = source.split("\n", -1)
|
|
45
|
+
unterminated_comment_lines(all_tokens, lines).map do |i|
|
|
46
|
+
finding_for_line(lines[i], i, path)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def autofix(source)
|
|
51
|
+
return source if source.nil? || source.empty?
|
|
52
|
+
|
|
53
|
+
lines = source.split("\n", -1)
|
|
54
|
+
bad = unterminated_comment_lines(tokenize(source), lines)
|
|
55
|
+
return source if bad.empty?
|
|
56
|
+
|
|
57
|
+
bad.each do |i|
|
|
58
|
+
lines[i] = "#{lines[i].rstrip};"
|
|
59
|
+
end
|
|
60
|
+
lines.join("\n")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def finding_for_line(line, idx, path)
|
|
66
|
+
finding(
|
|
67
|
+
line: idx + 1,
|
|
68
|
+
column: line.length - line.lstrip.length + 1,
|
|
69
|
+
message: "`**` comment missing `;` — consumes the next line of code as comment text.",
|
|
70
|
+
path: path
|
|
71
|
+
)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def tokenize(source)
|
|
75
|
+
lexer = SasLexer::Lexer.new
|
|
76
|
+
begin
|
|
77
|
+
lexer.tokenize(source)
|
|
78
|
+
ensure
|
|
79
|
+
lexer.free
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# 0-indexed source lines that hold a `** ... **` comment whose
|
|
84
|
+
# missing `;` made the lexer extend it into the next line.
|
|
85
|
+
def unterminated_comment_lines(all_tokens, lines)
|
|
86
|
+
bad = []
|
|
87
|
+
all_tokens.each do |t|
|
|
88
|
+
next unless t[:channel] == COMMENT_CHANNEL
|
|
89
|
+
next unless t[:type] == TT::PREDICTED_COMMENT_STAT
|
|
90
|
+
next unless t[:start_line] < t[:end_line]
|
|
91
|
+
|
|
92
|
+
first = lines[t[:start_line] - 1] or next
|
|
93
|
+
next unless first.rstrip.end_with?("**")
|
|
94
|
+
|
|
95
|
+
bad << (t[:start_line] - 1)
|
|
96
|
+
end
|
|
97
|
+
bad
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
data/lib/sas_linter/version.rb
CHANGED
data/lib/sas_linter.rb
CHANGED
|
@@ -307,7 +307,9 @@ require_relative "sas_linter/rules/line_endings"
|
|
|
307
307
|
require_relative "sas_linter/rules/encoding_issues"
|
|
308
308
|
require_relative "sas_linter/rules/malformed_if_condition"
|
|
309
309
|
require_relative "sas_linter/rules/missing_assignment_semicolon"
|
|
310
|
+
require_relative "sas_linter/rules/malformed_label_statement"
|
|
310
311
|
require_relative "sas_linter/rules/variable_value_out_of_known_range"
|
|
311
312
|
require_relative "sas_linter/rules/invalid_numeric_literal"
|
|
312
313
|
require_relative "sas_linter/rules/inconsistent_variable_case"
|
|
313
314
|
require_relative "sas_linter/rules/format_for_unknown_variable"
|
|
315
|
+
require_relative "sas_linter/rules/unterminated_comment"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sas-linter
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Craig McNamara
|
|
@@ -69,11 +69,13 @@ files:
|
|
|
69
69
|
- lib/sas_linter/rules/invalid_numeric_literal.rb
|
|
70
70
|
- lib/sas_linter/rules/line_endings.rb
|
|
71
71
|
- lib/sas_linter/rules/malformed_if_condition.rb
|
|
72
|
+
- lib/sas_linter/rules/malformed_label_statement.rb
|
|
72
73
|
- lib/sas_linter/rules/missing_assignment_semicolon.rb
|
|
73
74
|
- lib/sas_linter/rules/source_headers.rb
|
|
74
75
|
- lib/sas_linter/rules/tab_expansion.rb
|
|
75
76
|
- lib/sas_linter/rules/trailing_whitespace.rb
|
|
76
77
|
- lib/sas_linter/rules/unreachable_inner_branch_value.rb
|
|
78
|
+
- lib/sas_linter/rules/unterminated_comment.rb
|
|
77
79
|
- lib/sas_linter/rules/variable_value_out_of_known_range.rb
|
|
78
80
|
- lib/sas_linter/version.rb
|
|
79
81
|
homepage: https://github.com/mes-amis/sas-linter
|