sas-linter 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 306561a9219d046d164095dd03b92fdd2da003c4292188e9d5d75e19eae4c3c9
4
- data.tar.gz: 559eab2894b60b33f0a159df624603379a4285c61a6e49425ead209a991ae760
3
+ metadata.gz: 144b602b9b4eff14c301d9c04852f07f490e9557802f897eb4dd08acbf3d3fa4
4
+ data.tar.gz: 4aa7e953e1ed8a05cd1f4b94cf698086438a529604019120be0bcc0bb0530be3
5
5
  SHA512:
6
- metadata.gz: 7da26fa1fbf7cf1fc00753e157c6e5ba2e9ed16b7c2470dccd7585717a35f269ee2636754b572f0ca425f16d4058fea0e212927bc45dd7efc75d504da13202df
7
- data.tar.gz: 7f507e8df56ea74b355c0c471a3de0022064860c43b90e779d11a4b0f7ac1ebd20de09ea20f614e04a87c747b1bf774cb8eeda97e00045879f728f040a487e1c
6
+ metadata.gz: 74e7303ebdcfcfc616cd6e520f8984494d687b3c0c3af6dcba4efba91f7cbfefd61f511bfa535ae4ee9c4f6a481d4774a96b7a4ff20803ce2768782f2b39f5e6
7
+ data.tar.gz: 7a1dab42267ac076c9d992d7770871c386d52d6f4c6c2cc088372151b6f586e1ff12b963dcab26a47aa0b427396eef989c9f837805a4183e35c43a734b8fbd41
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # sas-linter
2
2
 
3
- A configurable lint engine for SAS source files. Built on the [`sas-lexer`](https://github.com/mes-amis/sas-lexer-rb) gem (a Ruby FFI binding to Misha Perlov's Rust [`sas-lexer`](https://github.com/mishamsk/sas-lexer)) and ships with eleven pluggable rules covering structural defects, cosmetic issues, and source-header conventions.
3
+ A configurable lint engine for SAS source files. Built on the [`sas-lexer`](https://github.com/mes-amis/sas-lexer-rb) gem (a Ruby FFI binding to Misha Perlov's Rust [`sas-lexer`](https://github.com/mishamsk/sas-lexer)) and ships with thirteen pluggable rules covering structural defects, cosmetic issues, and source-header conventions.
4
4
 
5
5
  ## Installation
6
6
 
@@ -63,6 +63,10 @@ rules:
63
63
  enabled: true
64
64
  autofix: false # rule supports autofix; off by default
65
65
 
66
+ inconsistent_variable_case:
67
+ enabled: true
68
+ autofix: false # rewrite every minority casing to the most-common form
69
+
66
70
  variable_value_out_of_known_range:
67
71
  enabled: true
68
72
  csv_paths: # empty list = rule is a no-op
@@ -135,6 +139,7 @@ findings = linter.lint_file("path/to/source.sas")
135
139
  | `malformed_if_condition` | Empty conditions, missing operators, orphan `then`, unbalanced parens, etc. |
136
140
  | `missing_assignment_semicolon` | Assignment statements followed by an inline `**` comment but no terminating `;`. |
137
141
  | `variable_value_out_of_known_range` | `if VAR = N` / `if VAR in (...)` literals fall outside the variable's documented acceptable values. Loads the catalog from one or more CSVs with configurable column names and column separator (`,`, `;`, tab). |
142
+ | `inconsistent_variable_case` | Identifier appears with more than one casing in the same file (`myVar` vs `MyVar`). SAS treats both as the same variable; autofix rewrites every minority spelling to the most-common form. Skips proc-format definitions and `format.` / `lib.member` references. |
138
143
 
139
144
  `bin/sas_lint --list-rules` prints the same set with autofix capability.
140
145
 
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../sas_linter"
4
+ require "sas_lexer"
5
+
6
+ class SasLinter
7
+ module Rules
8
+ # Flag identifiers that are spelled with inconsistent letter case
9
+ # across the file. SAS resolves variable references case-insensitively,
10
+ # so `myVar` and `MyVar` end up bound to the same column — but mixing
11
+ # the two within one program is sloppy and makes the source harder to
12
+ # grep, diff, and read.
13
+ #
14
+ # The most-used spelling wins; every other casing is reported (and
15
+ # rewritten when autofix is on). Ties resolve to the first occurrence
16
+ # so the canonical form is reading-order deterministic.
17
+ #
18
+ # Skipped on purpose:
19
+ # * identifiers immediately followed by `.` (format references like
20
+ # `agecat.`, library references like `work.foo`);
21
+ # * identifiers immediately preceded by `.` (the column half of
22
+ # `lib.member` / `dataset.col`) — those name a column in another
23
+ # dataset, not a variable in the current step;
24
+ # * `value` / `invalue` / `picture` themselves and the format name
25
+ # directly following them — these are proc-format definitions,
26
+ # not variable references. We match locally rather than tracking
27
+ # a `proc format ... run;` block because real-world SAS files
28
+ # meant to be `%include`d into a caller's data step often omit
29
+ # the terminating `run;`, so a state machine would never close.
30
+ class InconsistentVariableCase < Rule
31
+ rule_id :inconsistent_variable_case
32
+ description "Variable identifiers must use one consistent letter case " \
33
+ "across the file; mixing `myVar` and `MyVar` is sloppy."
34
+ severity :warning
35
+
36
+ TT = SasLexer::Lexer::TokenType
37
+
38
+ # Identifiers that introduce a format / informat / picture
39
+ # definition in a `proc format` step. The lexer types these as
40
+ # plain IDENTIFIERs (not keywords), so we recognize them by text.
41
+ FORMAT_DEF_KEYWORDS = %w[value invalue picture].freeze
42
+
43
+ def self.supports_autofix?
44
+ true
45
+ end
46
+
47
+ def check(tokens, path:, all_tokens: nil, source: nil) # rubocop:disable Lint/UnusedMethodArgument
48
+ findings = []
49
+ each_inconsistent_use(tokens) do |token, canonical|
50
+ findings << finding(
51
+ line: token[:start_line],
52
+ column: token[:start_column] + 1,
53
+ message: "variable `#{token[:text]}` is spelled `#{canonical}` " \
54
+ "elsewhere in this file — pick one case and stick with it.",
55
+ path: path
56
+ )
57
+ end
58
+ findings
59
+ end
60
+
61
+ def autofix(source)
62
+ return source if source.nil? || source.empty?
63
+
64
+ # If a previous rule's autofix returned ASCII-8BIT (e.g.
65
+ # EncodingIssues#autofix walks bytes and returns binary), tag
66
+ # it UTF-8 before slicing. The lexer treats the bytes as UTF-8
67
+ # and reports character offsets either way; only Ruby's
68
+ # `String#[]=` cares about the encoding label, and it indexes
69
+ # by bytes for ASCII-8BIT but by characters for UTF-8 — so a
70
+ # binary tag plus any multi-byte sequence earlier in the file
71
+ # would shift every replacement by the byte/char gap.
72
+ src = source.encoding == Encoding::UTF_8 ? source : source.dup.force_encoding("UTF-8")
73
+
74
+ lexer = SasLexer::Lexer.new
75
+ begin
76
+ all_tokens = lexer.tokenize(src)
77
+ ensure
78
+ lexer.free
79
+ end
80
+ tokens = all_tokens.reject do |t|
81
+ t[:channel] == SasLexer::Lexer::TokenChannel::HIDDEN ||
82
+ t[:channel] == SasLexer::Lexer::TokenChannel::COMMENT
83
+ end
84
+
85
+ edits = []
86
+ each_inconsistent_use(tokens) do |token, canonical|
87
+ edits << [token[:start], token[:end], canonical]
88
+ end
89
+
90
+ # Apply right-to-left so earlier offsets stay valid.
91
+ out = src.dup
92
+ edits.sort_by! { |start, _, _| -start }
93
+ edits.each { |start, finish, repl| out[start...finish] = repl }
94
+ out
95
+ end
96
+
97
+ private
98
+
99
+ # Yields `[token, canonical_form]` for every identifier whose
100
+ # spelling differs from the file-wide canonical case.
101
+ def each_inconsistent_use(tokens)
102
+ groups = collect_variable_uses(tokens)
103
+
104
+ groups.each_value do |uses|
105
+ forms = uses.map { |t| t[:text] }.tally
106
+ next if forms.size <= 1
107
+
108
+ canonical = canonical_form(forms, uses)
109
+ uses.each do |t|
110
+ yield t, canonical unless t[:text] == canonical
111
+ end
112
+ end
113
+ end
114
+
115
+ # Walk default-channel tokens and bucket eligible IDENTIFIER
116
+ # uses by lowercase name. Format-related identifiers (see class
117
+ # docstring) are filtered out by `variable_use?`.
118
+ def collect_variable_uses(tokens)
119
+ groups = Hash.new { |h, k| h[k] = [] }
120
+ tokens.each_with_index do |t, i|
121
+ next unless t[:type] == TT::IDENTIFIER && variable_use?(tokens, i)
122
+
123
+ groups[t[:text].downcase] << t
124
+ end
125
+ groups
126
+ end
127
+
128
+ # Reject `format.` / `lib.member` shapes via byte-adjacency to a
129
+ # `.` token, and `value <fmt-name>` shapes by checking the
130
+ # neighboring identifier. The lexer emits the dot separately, so
131
+ # we use `prev.end == t.start` / `t.end == nxt.start` to tell a
132
+ # truly-adjacent dot from one that just happens to follow after
133
+ # whitespace.
134
+ def variable_use?(tokens, i)
135
+ t = tokens[i]
136
+ nxt = tokens[i + 1]
137
+ prev = i.positive? ? tokens[i - 1] : nil
138
+
139
+ return false if nxt && nxt[:type] == TT::DOT && nxt[:start] == t[:end]
140
+ return false if prev && prev[:type] == TT::DOT && prev[:end] == t[:start]
141
+ return false if FORMAT_DEF_KEYWORDS.include?(t[:text].downcase)
142
+ return false if prev && prev[:type] == TT::IDENTIFIER &&
143
+ FORMAT_DEF_KEYWORDS.include?(prev[:text].downcase)
144
+
145
+ true
146
+ end
147
+
148
+ # Most-used spelling wins; ties go to the first occurrence so the
149
+ # canonical form matches reading order and stays deterministic
150
+ # across runs.
151
+ def canonical_form(forms, uses)
152
+ max_count = forms.values.max
153
+ winners = forms.select { |_, c| c == max_count }.keys
154
+ return winners.first if winners.size == 1
155
+
156
+ uses.each { |t| return t[:text] if winners.include?(t[:text]) }
157
+ winners.first
158
+ end
159
+ end
160
+ end
161
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class SasLinter
4
- VERSION = "0.2.1"
4
+ VERSION = "0.2.3"
5
5
  end
data/lib/sas_linter.rb CHANGED
@@ -309,3 +309,4 @@ require_relative "sas_linter/rules/malformed_if_condition"
309
309
  require_relative "sas_linter/rules/missing_assignment_semicolon"
310
310
  require_relative "sas_linter/rules/variable_value_out_of_known_range"
311
311
  require_relative "sas_linter/rules/invalid_numeric_literal"
312
+ require_relative "sas_linter/rules/inconsistent_variable_case"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sas-linter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Craig McNamara
@@ -64,6 +64,7 @@ files:
64
64
  - lib/sas_linter/rules/commented_out_guard.rb
65
65
  - lib/sas_linter/rules/encoding_issues.rb
66
66
  - lib/sas_linter/rules/identical_if_else_branches.rb
67
+ - lib/sas_linter/rules/inconsistent_variable_case.rb
67
68
  - lib/sas_linter/rules/invalid_numeric_literal.rb
68
69
  - lib/sas_linter/rules/line_endings.rb
69
70
  - lib/sas_linter/rules/malformed_if_condition.rb