pureshellcheck 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ """pureshellcheck: a pure Python reimplementation of ShellCheck's most
2
+ common checks.
3
+
4
+ >>> import pureshellcheck
5
+ >>> for finding in pureshellcheck.check('echo $foo'):
6
+ ... print(finding.line, finding.column, finding.code, finding.message)
7
+ """
8
+
9
+ __version__ = "0.1.0"
10
+
11
+ from .analyzer import Finding, run_checks # noqa: F401
12
+ from .parser import ParseError, parse # noqa: F401
13
+ from . import checks # noqa: F401 (registers all checks)
14
+
15
+
16
+ def check(source, shell=None, include_optional=False):
17
+ """Analyze a shell script and return a list of Finding objects.
18
+
19
+ `shell` overrides shebang detection ("bash", "sh", "dash", "ksh").
20
+ Findings have: code (int), severity, message, line, column, end_line,
21
+ end_column.
22
+ """
23
+ findings, _ = run_checks(source, shell=shell,
24
+ include_optional=include_optional)
25
+ return findings
26
+
27
+
28
+ def implemented_codes():
29
+ """The set of SC codes this version can emit."""
30
+ from .analyzer import NODE_CHECKS, TREE_CHECKS # noqa: F401
31
+ return set(_IMPLEMENTED)
32
+
33
+
34
+ # maintained by hand; verified by tests/test_implemented.py
35
+ _IMPLEMENTED = set()
36
+
37
+
38
+ def _register_codes(*codes):
39
+ _IMPLEMENTED.update(codes)
40
+
41
+
42
+ _register_codes(
43
+ 1073, # parse errors
44
+ 2006, 2016, 2026, 2027, 2041, 2042, 2043, 2046, 2048, 2066, 2068,
45
+ 2086, 2089, 2090, 2140, 2145, 2206, 2207, 2223, 2248, 2250, 2258,
46
+ )
47
+ _register_codes(
48
+ 2002, 2003, 2005, 2009, 2010, 2011, 2012, 2015, 2038, 2050, 2059,
49
+ 2064, 2065, 2114, 2115, 2116, 2126, 2148, 2162, 2164, 2174, 2181,
50
+ 2182, 2183, 2187, 2188, 2189, 2239, 2246, 2304, 2305,
51
+ 2306, 2307, 2308,
52
+ )
53
+ _register_codes(
54
+ 2004, 2034, 2128, 2153, 2154, 2155, 2178, 2179,
55
+ )
56
+ _register_codes(
57
+ 2007, 2028, 2035, 2093, 2094, 2103,
58
+ )
@@ -0,0 +1,403 @@
1
+ """Analysis framework: finding model, AST helpers, check registry, driver."""
2
+
3
+ import re
4
+
5
+ from .shast import Positions, ancestors, iter_children, set_parents, walk
6
+ from .parser import (ParseError, Parser, literal_text, quoted_literal_text)
7
+
8
+ SEVERITIES = ("error", "warning", "info", "style")
9
+
10
+
11
+ class Finding:
12
+ __slots__ = ("code", "severity", "message", "pos", "end",
13
+ "line", "column", "end_line", "end_column")
14
+
15
+ def __init__(self, code, severity, message, pos, end):
16
+ self.code = code
17
+ self.severity = severity
18
+ self.message = message
19
+ self.pos = pos
20
+ self.end = end
21
+ self.line = self.column = self.end_line = self.end_column = 0
22
+
23
+ def locate(self, positions):
24
+ self.line, self.column = positions.line_col(self.pos)
25
+ self.end_line, self.end_column = positions.line_col(self.end)
26
+
27
+ def __repr__(self):
28
+ return "SC%d:%d:%d %s" % (self.code, self.line, self.column,
29
+ self.message)
30
+
31
+
32
+ NODE_CHECKS = {}
33
+ TREE_CHECKS = []
34
+ # Checks that are opt-in in shellcheck 0.11 (quote-safe-variables,
35
+ # require-variable-braces, useless-use-of-cat, ...)
36
+ OPTIONAL_CODES = frozenset({2002, 2248, 2250, 2312})
37
+
38
+
39
+ def node_check(*kinds):
40
+ def deco(fn):
41
+ for k in kinds:
42
+ NODE_CHECKS.setdefault(k, []).append(fn)
43
+ return fn
44
+ return deco
45
+
46
+
47
+ def tree_check(fn):
48
+ TREE_CHECKS.append(fn)
49
+ return fn
50
+
51
+
52
+ SHEBANG_RE = re.compile(r"#!\s*(\S+)(\s+(\S+))?")
53
+ KNOWN_SHELLS = {"bash", "sh", "dash", "ash", "ksh", "ksh93", "mksh",
54
+ "busybox", "bats", "zsh"}
55
+
56
+
57
+ def shell_from_shebang(shebang):
58
+ if not shebang:
59
+ return None
60
+ m = SHEBANG_RE.match(shebang)
61
+ if not m:
62
+ return None
63
+ base = m.group(1).rsplit("/", 1)[-1]
64
+ if base == "env" and m.group(3):
65
+ base = m.group(3).rsplit("/", 1)[-1]
66
+ if base in ("busybox",):
67
+ return "ash"
68
+ if base in KNOWN_SHELLS:
69
+ return "ksh" if base == "ksh93" else base
70
+ return None
71
+
72
+
73
+ class Context:
74
+ """Shared state and helpers available to every check."""
75
+
76
+ def __init__(self, source, root, shell, include_optional=False):
77
+ self.source = source
78
+ self.root = root
79
+ self.shell = shell
80
+ self.include_optional = include_optional
81
+ self.positions = Positions(source)
82
+ self.findings = []
83
+ self.cache = {}
84
+
85
+ # -- emission ------------------------------------------------------
86
+
87
+ def report(self, node, code, severity, message, pos=None, end=None):
88
+ if code in OPTIONAL_CODES and not self.include_optional:
89
+ return
90
+ f = Finding(code, severity, message,
91
+ node.pos if pos is None else pos,
92
+ node.end if end is None else end)
93
+ self.findings.append(f)
94
+
95
+ def err(self, node, code, message):
96
+ self.report(node, code, "error", message)
97
+
98
+ def warn(self, node, code, message):
99
+ self.report(node, code, "warning", message)
100
+
101
+ def info(self, node, code, message):
102
+ self.report(node, code, "info", message)
103
+
104
+ def style(self, node, code, message):
105
+ self.report(node, code, "style", message)
106
+
107
+ # -- shell flavor ----------------------------------------------------
108
+
109
+ @property
110
+ def is_bashlike(self):
111
+ return self.shell in ("bash", "ksh", "bats", "zsh")
112
+
113
+ # -- command helpers -------------------------------------------------
114
+
115
+ WRAPPER_COMMANDS = frozenset({
116
+ "sudo", "nice", "nohup", "time", "timeout", "env", "doas",
117
+ "command", "builtin", "exec", "stdbuf", "busybox", "run",
118
+ })
119
+ WRAPPER_ARG_FLAGS = {
120
+ "exec": {"a"},
121
+ "stdbuf": {"o", "e", "i"},
122
+ "timeout": {"k", "s"},
123
+ "env": {"u", "C", "S"},
124
+ }
125
+
126
+ def command_resolution(self, cmd):
127
+ """(name_word, index, wrapper_names) after skipping wrappers."""
128
+ if cmd.kind != "T_SimpleCommand" or not cmd.words:
129
+ return None, -1, []
130
+ words = cmd.words
131
+ wrappers = []
132
+ idx = 0
133
+ while idx < len(words):
134
+ name = literal_text(words[idx])
135
+ if name is None:
136
+ return words[idx], idx, wrappers
137
+ base = name.rsplit("/", 1)[-1]
138
+ if base in self.WRAPPER_COMMANDS:
139
+ wrappers.append(base)
140
+ arg_flags = self.WRAPPER_ARG_FLAGS.get(base, set())
141
+ idx += 1
142
+ while idx < len(words):
143
+ text = literal_text(words[idx])
144
+ if text is None:
145
+ break
146
+ if text.startswith("-"):
147
+ idx += 1
148
+ if text[1:] in arg_flags and idx < len(words):
149
+ idx += 1
150
+ elif base == "env" and "=" in text:
151
+ idx += 1
152
+ else:
153
+ break
154
+ if base == "timeout" and idx < len(words):
155
+ idx += 1 # the duration argument
156
+ continue
157
+ return words[idx], idx, wrappers
158
+ return None, -1, wrappers
159
+
160
+ def command_name_word(self, cmd):
161
+ """The word holding the command name, skipping wrapper commands."""
162
+ return self.command_resolution(cmd)[0]
163
+
164
+ def command_basename(self, cmd):
165
+ word = self.command_name_word(cmd)
166
+ if word is None:
167
+ return None
168
+ name = literal_text(word)
169
+ if name is None:
170
+ return None
171
+ return name.rsplit("/", 1)[-1]
172
+
173
+ def is_command(self, cmd, name):
174
+ return self.command_basename(cmd) == name
175
+
176
+ def argument_words(self, cmd):
177
+ """Argument words after the (wrapper-skipped) command name."""
178
+ word = self.command_name_word(cmd)
179
+ if word is None:
180
+ return []
181
+ words = cmd.words
182
+ for i, w in enumerate(words):
183
+ if w is word:
184
+ return words[i + 1:]
185
+ return []
186
+
187
+ def flags(self, cmd):
188
+ """[(flagname, word)] for '-x'/'--foo' arguments; '' for others."""
189
+ out = []
190
+ args = self.argument_words(cmd)
191
+ for w in args:
192
+ text = literal_text(w)
193
+ if text == "--":
194
+ break
195
+ if text and text.startswith("--"):
196
+ out.append((text[2:].split("=", 1)[0], w))
197
+ elif text and text.startswith("-") and len(text) > 1:
198
+ for ch in text[1:]:
199
+ out.append((ch, w))
200
+ else:
201
+ out.append(("", w))
202
+ return out
203
+
204
+ # -- quoting / context helpers ----------------------------------------
205
+
206
+ def is_quote_free(self, node):
207
+ """True if node is in an unquoted context (command subs reset)."""
208
+ for a in ancestors(node):
209
+ k = a.kind
210
+ if k in ("T_DoubleQuoted", "T_DollarDoubleQuoted"):
211
+ return False
212
+ if k in ("T_DollarExpansion", "T_Backticked", "T_ProcSub",
213
+ "T_DollarBraceCommandExpansion", "T_Script"):
214
+ return True
215
+ if k == "T_HereDoc":
216
+ return False
217
+ return True
218
+
219
+ def parent_word(self, node):
220
+ """The outermost T_NormalWord containing node, within this context."""
221
+ word = node if node.kind == "T_NormalWord" else None
222
+ for a in ancestors(node):
223
+ if a.kind == "T_NormalWord":
224
+ word = a
225
+ elif a.kind in ("T_DollarExpansion", "T_Backticked",
226
+ "T_ProcSub", "T_Script"):
227
+ break
228
+ return word
229
+
230
+ def word_role(self, node):
231
+ """How the word containing node is used: (role, holder).
232
+
233
+ Roles: 'command-word', 'argument', 'assign-value', 'condition',
234
+ 'arith', 'case-word', 'for-words', 'redirect-target', 'herestring',
235
+ 'heredoc', 'case-pattern', 'array-element', 'braced-arg', 'other'.
236
+ """
237
+ prev = node
238
+ for a in ancestors(node):
239
+ k = a.kind
240
+ if k == "T_SimpleCommand":
241
+ words = a.words
242
+ if words and (prev is words[0]):
243
+ return "command-word", a
244
+ return "argument", a
245
+ if k == "T_Assignment":
246
+ if prev is a.get("value"):
247
+ return "assign-value", a
248
+ return "other", a
249
+ if k == "T_IndexedElement":
250
+ return "assign-value", a
251
+ if k == "T_Array":
252
+ return "array-element", a
253
+ if k == "T_Condition":
254
+ return "condition", a
255
+ if k in ("T_DollarArithmetic", "T_Arithmetic", "TA_Expansion"):
256
+ return "arith", a
257
+ if k == "T_CaseExpression":
258
+ if prev is a.word:
259
+ return "case-word", a
260
+ return "case-pattern", a
261
+ if k == "T_CaseItem":
262
+ if prev in a.patterns:
263
+ return "case-pattern", a
264
+ if k in ("T_ForIn", "T_SelectIn"):
265
+ if prev in a.words:
266
+ return "for-words", a
267
+ if k == "T_IoFile":
268
+ return "redirect-target", a
269
+ if k == "T_HereString":
270
+ return "herestring", a
271
+ if k == "T_HereDoc":
272
+ return "heredoc", a
273
+ if k == "T_DollarBraced":
274
+ return "braced-arg", a
275
+ if k in ("T_DollarExpansion", "T_Backticked", "T_Script",
276
+ "T_ProcSub"):
277
+ return "other", a
278
+ prev = a
279
+ return "other", None
280
+
281
+
282
+ def statement_lists(root):
283
+ """Yield every list of statement nodes in the tree."""
284
+ for node in walk(root):
285
+ f = node.fields
286
+ for key in ("commands", "body", "condition", "else_body", "init"):
287
+ v = f.get(key)
288
+ if isinstance(v, list) and v and isinstance(v[0], object):
289
+ yield v
290
+ branches = f.get("branches")
291
+ if branches:
292
+ for cond, body in branches:
293
+ yield cond
294
+ yield body
295
+
296
+
297
+ def apply_directives(findings, directives, root, source, positions):
298
+ """Filter findings according to `# shellcheck disable=` directives."""
299
+ if not directives:
300
+ return findings
301
+ # statements eligible as directive targets, sorted by position
302
+ statements = []
303
+ seen = set()
304
+ for lst in statement_lists(root):
305
+ for node in lst:
306
+ if id(node) not in seen:
307
+ seen.add(id(node))
308
+ statements.append(node)
309
+ statements.sort(key=lambda n: n.pos)
310
+
311
+ disabled_ranges = [] # (start, end, set_of_codes)
312
+ first_cmd_pos = statements[0].pos if statements else len(source)
313
+ for d in directives:
314
+ if d.kind != "disable":
315
+ continue
316
+ codes = set()
317
+ for v in d.values:
318
+ m = re.match(r"^(?:SC)?(\d+)$", v)
319
+ if m:
320
+ codes.add(int(m.group(1)))
321
+ elif v == "all":
322
+ codes.add(-1)
323
+ else:
324
+ m = re.match(r"^(?:SC)?(\d+)-(?:SC)?(\d+)$", v)
325
+ if m:
326
+ codes.update(range(int(m.group(1)),
327
+ int(m.group(2)) + 1))
328
+ if not codes:
329
+ continue
330
+ if d.pos <= first_cmd_pos:
331
+ disabled_ranges.append((0, len(source) + 1, codes))
332
+ elif d.line_has_code:
333
+ # trailing directive: applies to the statement on this line
334
+ target = None
335
+ for s in statements:
336
+ if s.pos <= d.pos and s.end >= d.pos - 1:
337
+ target = s
338
+ if target is not None:
339
+ disabled_ranges.append((target.pos, target.end, codes))
340
+ else:
341
+ target = None
342
+ for s in statements:
343
+ if s.pos >= d.pos:
344
+ target = s
345
+ break
346
+ if target is not None:
347
+ disabled_ranges.append((target.pos, target.end, codes))
348
+ if not disabled_ranges:
349
+ return findings
350
+ out = []
351
+ for f in findings:
352
+ suppressed = False
353
+ for start, end, codes in disabled_ranges:
354
+ if start <= f.pos < end and (f.code in codes or -1 in codes):
355
+ suppressed = True
356
+ break
357
+ if not suppressed:
358
+ out.append(f)
359
+ return out
360
+
361
+
362
+ def run_checks(source, shell=None, include_optional=False,
363
+ filename="<stdin>"):
364
+ """Parse and analyze a script. Returns (findings, parse_error|None)."""
365
+ parser = Parser(source)
366
+ try:
367
+ root = parser.parse()
368
+ except ParseError as e:
369
+ f = Finding(e.code, "error",
370
+ e.message[0].upper() + e.message[1:] + ".",
371
+ min(e.pos, max(len(source) - 1, 0)),
372
+ min(e.pos + 1, len(source)))
373
+ f.locate(Positions(source))
374
+ return [f], e
375
+ set_parents(root)
376
+
377
+ detected = shell_from_shebang(root.get("shebang"))
378
+ directives = parser.directives
379
+ for d in directives:
380
+ if d.kind == "shell" and d.values:
381
+ detected = d.values[0]
382
+ effective_shell = shell or detected or "bash"
383
+
384
+ ctx = Context(source, root, effective_shell,
385
+ include_optional=include_optional)
386
+ ctx.detected_shell = detected
387
+ ctx.explicit_shell = shell
388
+ ctx.directives = directives
389
+
390
+ for node in walk(root):
391
+ fns = NODE_CHECKS.get(node.kind)
392
+ if fns:
393
+ for fn in fns:
394
+ fn(ctx, node)
395
+ for fn in TREE_CHECKS:
396
+ fn(ctx, root)
397
+
398
+ findings = apply_directives(ctx.findings, directives, root, source,
399
+ ctx.positions)
400
+ findings.sort(key=lambda f: (f.pos, f.code))
401
+ for f in findings:
402
+ f.locate(ctx.positions)
403
+ return findings, None