sas-linter 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b10cf15e62fe7e0a81d8f9d2e2c9eb61af36ee00d10f32e9ce8354a7ebe3e40f
4
- data.tar.gz: 9abe8249ab9d968fdc8aefe558dcdad321ba2879abe4bf08374b9ec998eecb5f
3
+ metadata.gz: 688281e326c33b3af52864a9d25c85f00967a2c4f8ac9233e57cf52a83407b18
4
+ data.tar.gz: a9f331a62f73a5bdb7b637bbd62783f3969b169746e7035adf687afc3030ceb4
5
5
  SHA512:
6
- metadata.gz: 38e745b6f3ec4c8a7cc2ce3c23883569bcd931d88d6f9dc0a6c2288ed7f0b39f199e066340e780e8556d0ceeaa7bfeb149185ef2d458f58574281c54dbbf60d3
7
- data.tar.gz: 8e6b5f2751eb5b3b9e50b80f0476f953519763a321fba4dd17ce219d891a84a3f0fe3c1ea1fbc3c18f4a698bc0655ef794ff2ef71d1d859aa23195a9119d41bd
6
+ metadata.gz: d952062a88c0bca6c33ef3f3e92c6cfeeec3f2142053e47c71dde3c04202ba0209b4672cad326161d62d05df90fb0940665cd0f65b73a62412d97b0ff6c8a1da
7
+ data.tar.gz: 7c41bac13b275a4dc9fc8bbf374339595b2b185fb6058e3ea27b412b18124a44bdb5b901eec7bdc9c0d7db3d9239ea0d56a30e6896ad88f0b9386dc74c2b1687
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # sas-linter
2
2
 
3
- A configurable lint engine for SAS source files. Built on the [`sas-lexer`](https://github.com/mes-amis/sas-lexer-rb) gem (a Ruby FFI binding to Misha Perlov's Rust [`sas-lexer`](https://github.com/mishamsk/sas-lexer)) and ships with fourteen pluggable rules covering structural defects, cosmetic issues, and source-header conventions.
3
+ A configurable lint engine for SAS source files. Built on the [`sas-lexer`](https://github.com/mes-amis/sas-lexer-rb) gem (a Ruby FFI binding to Misha Perlov's Rust [`sas-lexer`](https://github.com/mishamsk/sas-lexer)) and ships with fifteen pluggable rules covering structural defects, cosmetic issues, and source-header conventions.
4
4
 
5
5
  ## Installation
6
6
 
@@ -63,6 +63,10 @@ rules:
63
63
  enabled: true
64
64
  autofix: false # rule supports autofix; off by default
65
65
 
66
+ unterminated_comment:
67
+ enabled: true
68
+ autofix: false # append `;` to `**…**` lines whose missing terminator eats the next line
69
+
66
70
  inconsistent_variable_case:
67
71
  enabled: true
68
72
  autofix: false # rewrite every minority casing to the most-common form
@@ -141,6 +145,7 @@ findings = linter.lint_file("path/to/source.sas")
141
145
  | `encoding_issues` | Smart-quote / em-dash / Win-1252 byte sequences that confuse downstream tooling. |
142
146
  | `malformed_if_condition` | Empty conditions, missing operators, orphan `then`, unbalanced parens, etc. |
143
147
  | `missing_assignment_semicolon` | Assignment statements followed by an inline `**` comment but no terminating `;`. |
148
+ | `unterminated_comment` | A standalone `** … **` comment whose missing `;` lets the SAS lexer extend the comment into the next line of real code, silently swallowing it. Autofix appends the `;`. |
144
149
  | `variable_value_out_of_known_range` | `if VAR = N` / `if VAR in (...)` literals fall outside the variable's documented acceptable values. Loads the catalog from one or more CSVs with configurable column names and column separator (`,`, `;`, tab). |
145
150
  | `inconsistent_variable_case` | Identifier appears with more than one casing in the same file (`myVar` vs `MyVar`). SAS treats both as the same variable; autofix rewrites every minority spelling to the most-common form. Skips proc-format definitions and `format.` / `lib.member` references. |
146
151
  | `format_for_unknown_variable` | `format` / `informat` / `attrib` statement assigns a format to a variable that's referenced nowhere else in the file — almost always a typo. Skipped on files that pull in columns via `set` / `merge` / `update` / `infile` / `input`. |
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../sas_linter"
4
+ require "sas_lexer"
5
+
6
+ class SasLinter
7
+ module Rules
8
+ # Flag `label` statements where the `=` between the variable name
9
+ # and the string literal is missing.
10
+ #
11
+ # Motivating bug: `label aHSDELIRIUM 'Delirium Screener';` in
12
+ # SUITE9_HS_DELIRIUM_SCREENER_2014-04-15.TXT — SAS rejects this
13
+ # with "ERROR 22-322: Syntax error, expecting one of the following:
14
+ # =, ?", and the label is silently never attached. The treatment
15
+ # variant of the same algorithm shipped the same typo, and several
16
+ # other interRAI sources have shipped it over time.
17
+ #
18
+ # Detection: every `label` statement is a `KW_LABEL` keyword
19
+ # followed by one or more `IDENT '=' STRING_LITERAL` triples
20
+ # separated by whitespace, terminated by `;`. We walk each label
21
+ # statement and, for each IDENT inside it, require the next
22
+ # default-channel token to be `ASSIGN` (`=`). If instead the next
23
+ # token is a STRING_LITERAL, the `=` was dropped.
24
+ class MalformedLabelStatement < Rule
25
+ rule_id :malformed_label_statement
26
+ description "`label` statement missing `=` between variable and string literal."
27
+ severity :warning
28
+
29
+ TT = SasLexer::Lexer::TokenType
30
+
31
+ def self.supports_autofix?
32
+ true
33
+ end
34
+
35
+ def check(tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
36
+ findings = []
37
+ each_label_violation(tokens) do |ident_t, string_t|
38
+ findings << finding(
39
+ line: ident_t[:start_line],
40
+ column: ident_t[:start_column] + 1,
41
+ message: "`label #{ident_t[:text]} #{shorten(string_t[:text])}` is missing the `=` " \
42
+ "between the variable name and the label string.",
43
+ path: path
44
+ )
45
+ end
46
+ findings
47
+ end
48
+
49
+ def autofix(source)
50
+ return source if source.nil? || source.empty?
51
+
52
+ lexer = SasLexer::Lexer.new
53
+ begin
54
+ all_tokens = lexer.tokenize(source)
55
+ ensure
56
+ lexer.free
57
+ end
58
+ tokens = all_tokens.reject do |t|
59
+ t[:channel] == SasLexer::Lexer::TokenChannel::HIDDEN ||
60
+ t[:channel] == SasLexer::Lexer::TokenChannel::COMMENT
61
+ end
62
+
63
+ source_lines = source.split("\n", -1)
64
+ # Collect (line_idx, col_after_ident) for each malformed label,
65
+ # then apply edits right-to-left within each line so earlier
66
+ # column offsets stay valid.
67
+ edits_by_line = Hash.new { |h, k| h[k] = [] }
68
+
69
+ each_label_violation(tokens) do |ident_t, _string_t|
70
+ edits_by_line[ident_t[:start_line] - 1] << ident_t[:end_column]
71
+ end
72
+
73
+ edits_by_line.each do |line_idx, cols|
74
+ line = source_lines[line_idx]
75
+ next if line.nil?
76
+
77
+ # Right-to-left so earlier insertions don't shift later columns.
78
+ cols.sort.reverse.each do |col|
79
+ # Insert ` =` immediately after the IDENT (consuming the
80
+ # following space if there is one, preserving alignment).
81
+ replacement =
82
+ if col < line.length && line[col] == " "
83
+ # `aHSDELIRIUM 'Delirium Screener'` → `aHSDELIRIUM = 'Delirium Screener'`.
84
+ # Replace the single space with ` = ` (one space before, one after).
85
+ " = #{line[(col + 1)..]}"
86
+ else
87
+ " = #{line[col..]}"
88
+ end
89
+ line = "#{line[0...col]}#{replacement}"
90
+ end
91
+ source_lines[line_idx] = line
92
+ end
93
+
94
+ source_lines.join("\n")
95
+ end
96
+
97
+ private
98
+
99
+ def each_label_violation(tokens)
100
+ tokens.each_with_index do |t, i|
101
+ next unless t[:type] == TT::KW_LABEL
102
+
103
+ # Walk forward through the label statement until SEMI.
104
+ j = i + 1
105
+ while j < tokens.length && tokens[j][:type] != TT::SEMI
106
+ cur = tokens[j]
107
+ if cur[:type] == TT::IDENTIFIER
108
+ nxt = tokens[j + 1]
109
+ if nxt && nxt[:type] == TT::STRING_LITERAL
110
+ yield cur, nxt
111
+ j += 2
112
+ next
113
+ end
114
+ end
115
+ j += 1
116
+ end
117
+ end
118
+ end
119
+
120
+ def shorten(text)
121
+ return text if text.nil? || text.length <= 40
122
+
123
+ "#{text[0, 37]}..."
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../sas_linter"
4
+ require "sas_lexer"
5
+
6
+ class SasLinter
7
+ module Rules
8
+ # Flag a `** ... **` comment line whose missing `;` causes the
9
+ # SAS lexer to extend the comment statement into the following
10
+ # line(s) of real code, silently swallowing it.
11
+ #
12
+ # SAS `*` / `**` comment statements are terminated by the next
13
+ # `;` — the closing `**` is just prose. So
14
+ #
15
+ # ** SOME COMMENT **
16
+ # y = x + 1;
17
+ #
18
+ # lexes as a single comment token covering both lines, and the
19
+ # `y = x + 1;` assignment never executes.
20
+ #
21
+ # Detection: a COMMENT-channel `PREDICTED_COMMENT_STAT` token
22
+ # whose `start_line != end_line` AND whose first source line,
23
+ # rstripped, ends with `**`. That shape is the boxed-comment
24
+ # closer the user clearly intended — they only forgot the `;`.
25
+ # Legitimate multi-line `*...;` prose ends its first line with
26
+ # plain text, not `**`, so it's left alone.
27
+ #
28
+ # Autofix: append `;` to the end of each flagged first line.
29
+ class UnterminatedComment < Rule
30
+ rule_id :unterminated_comment
31
+ description "`**` comment missing its terminating `;` — consumes following code."
32
+ severity :warning
33
+
34
+ TT = SasLexer::Lexer::TokenType
35
+ COMMENT_CHANNEL = SasLexer::Lexer::TokenChannel::COMMENT
36
+
37
+ def self.supports_autofix?
38
+ true
39
+ end
40
+
41
+ def check(_tokens, path:, all_tokens: nil, source: nil)
42
+ return [] unless all_tokens && source
43
+
44
+ lines = source.split("\n", -1)
45
+ unterminated_comment_lines(all_tokens, lines).map do |i|
46
+ finding_for_line(lines[i], i, path)
47
+ end
48
+ end
49
+
50
+ def autofix(source)
51
+ return source if source.nil? || source.empty?
52
+
53
+ lines = source.split("\n", -1)
54
+ bad = unterminated_comment_lines(tokenize(source), lines)
55
+ return source if bad.empty?
56
+
57
+ bad.each do |i|
58
+ lines[i] = "#{lines[i].rstrip};"
59
+ end
60
+ lines.join("\n")
61
+ end
62
+
63
+ private
64
+
65
+ def finding_for_line(line, idx, path)
66
+ finding(
67
+ line: idx + 1,
68
+ column: line.length - line.lstrip.length + 1,
69
+ message: "`**` comment missing `;` — consumes the next line of code as comment text.",
70
+ path: path
71
+ )
72
+ end
73
+
74
+ def tokenize(source)
75
+ lexer = SasLexer::Lexer.new
76
+ begin
77
+ lexer.tokenize(source)
78
+ ensure
79
+ lexer.free
80
+ end
81
+ end
82
+
83
+ # 0-indexed source lines that hold a `** ... **` comment whose
84
+ # missing `;` made the lexer extend it into the next line.
85
+ def unterminated_comment_lines(all_tokens, lines)
86
+ bad = []
87
+ all_tokens.each do |t|
88
+ next unless t[:channel] == COMMENT_CHANNEL
89
+ next unless t[:type] == TT::PREDICTED_COMMENT_STAT
90
+ next unless t[:start_line] < t[:end_line]
91
+
92
+ first = lines[t[:start_line] - 1] or next
93
+ next unless first.rstrip.end_with?("**")
94
+
95
+ bad << (t[:start_line] - 1)
96
+ end
97
+ bad
98
+ end
99
+ end
100
+ end
101
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class SasLinter
4
- VERSION = "0.2.4"
4
+ VERSION = "0.2.6"
5
5
  end
data/lib/sas_linter.rb CHANGED
@@ -307,7 +307,9 @@ require_relative "sas_linter/rules/line_endings"
307
307
  require_relative "sas_linter/rules/encoding_issues"
308
308
  require_relative "sas_linter/rules/malformed_if_condition"
309
309
  require_relative "sas_linter/rules/missing_assignment_semicolon"
310
+ require_relative "sas_linter/rules/malformed_label_statement"
310
311
  require_relative "sas_linter/rules/variable_value_out_of_known_range"
311
312
  require_relative "sas_linter/rules/invalid_numeric_literal"
312
313
  require_relative "sas_linter/rules/inconsistent_variable_case"
313
314
  require_relative "sas_linter/rules/format_for_unknown_variable"
315
+ require_relative "sas_linter/rules/unterminated_comment"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sas-linter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Craig McNamara
@@ -69,11 +69,13 @@ files:
69
69
  - lib/sas_linter/rules/invalid_numeric_literal.rb
70
70
  - lib/sas_linter/rules/line_endings.rb
71
71
  - lib/sas_linter/rules/malformed_if_condition.rb
72
+ - lib/sas_linter/rules/malformed_label_statement.rb
72
73
  - lib/sas_linter/rules/missing_assignment_semicolon.rb
73
74
  - lib/sas_linter/rules/source_headers.rb
74
75
  - lib/sas_linter/rules/tab_expansion.rb
75
76
  - lib/sas_linter/rules/trailing_whitespace.rb
76
77
  - lib/sas_linter/rules/unreachable_inner_branch_value.rb
78
+ - lib/sas_linter/rules/unterminated_comment.rb
77
79
  - lib/sas_linter/rules/variable_value_out_of_known_range.rb
78
80
  - lib/sas_linter/version.rb
79
81
  homepage: https://github.com/mes-amis/sas-linter