scitex-linter 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """SciTeX Linter — enforce reproducible research patterns via AST analysis."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """MCP integration for scitex-linter."""
@@ -0,0 +1,8 @@
1
+ """MCP tool registration for scitex-linter."""
2
+
3
+
4
+ def register_all_tools(mcp) -> None:
5
+ """Register all MCP tools with the server."""
6
+ from .lint import register_lint_tools
7
+
8
+ register_lint_tools(mcp)
@@ -0,0 +1,67 @@
1
+ """Lint MCP tools for scitex-linter."""
2
+
3
+ from typing import Optional
4
+
5
+
6
+ def register_lint_tools(mcp) -> None:
7
+ """Register lint-related MCP tools."""
8
+
9
+ @mcp.tool()
10
+ def linter_lint(
11
+ path: str, severity: str = "info", category: Optional[str] = None
12
+ ) -> dict:
13
+ """[linter] Lint a Python file for SciTeX pattern compliance."""
14
+ from ...checker import lint_file
15
+ from ...formatter import to_json
16
+ from ...rules import SEVERITY_ORDER
17
+
18
+ issues = lint_file(path)
19
+ min_sev = SEVERITY_ORDER.get(severity, 0)
20
+ categories = set(category.split(",")) if category else None
21
+
22
+ issues = [
23
+ i
24
+ for i in issues
25
+ if SEVERITY_ORDER[i.rule.severity] >= min_sev
26
+ and (categories is None or i.rule.category in categories)
27
+ ]
28
+
29
+ return to_json(issues, path)
30
+
31
+ @mcp.tool()
32
+ def linter_list_rules(
33
+ category: Optional[str] = None, severity: Optional[str] = None
34
+ ) -> dict:
35
+ """[linter] List all available lint rules."""
36
+ from ...rules import ALL_RULES
37
+
38
+ rules_list = list(ALL_RULES.values())
39
+
40
+ if category:
41
+ cats = set(category.split(","))
42
+ rules_list = [r for r in rules_list if r.category in cats]
43
+ if severity:
44
+ rules_list = [r for r in rules_list if r.severity == severity]
45
+
46
+ return {
47
+ "rules": [
48
+ {
49
+ "id": r.id,
50
+ "severity": r.severity,
51
+ "category": r.category,
52
+ "message": r.message,
53
+ "suggestion": r.suggestion,
54
+ }
55
+ for r in rules_list
56
+ ],
57
+ "count": len(rules_list),
58
+ }
59
+
60
+ @mcp.tool()
61
+ def linter_check_source(source: str, filepath: str = "<stdin>") -> dict:
62
+ """[linter] Lint Python source code string for SciTeX pattern compliance."""
63
+ from ...checker import lint_source
64
+ from ...formatter import to_json
65
+
66
+ issues = lint_source(source, filepath=filepath)
67
+ return to_json(issues, filepath)
@@ -0,0 +1,23 @@
1
+ """MCP server for scitex-linter."""
2
+
3
+ from fastmcp import FastMCP
4
+
5
+ from ._mcp.tools import register_all_tools
6
+
7
+ _INSTRUCTIONS = """\
8
+ SciTeX Linter: AST-based linter enforcing reproducible research patterns.
9
+
10
+ Tools:
11
+ - linter_lint: Lint a Python file
12
+ - linter_list_rules: List all lint rules
13
+ - linter_check_source: Lint source code string
14
+ """
15
+
16
+ mcp = FastMCP(name="scitex-linter", instructions=_INSTRUCTIONS)
17
+
18
+ register_all_tools(mcp)
19
+
20
+
21
+ def run_server(transport: str = "stdio") -> None:
22
+ """Run the MCP server."""
23
+ mcp.run(transport=transport)
@@ -0,0 +1,469 @@
1
+ """AST-based checker that detects SciTeX anti-patterns."""
2
+
3
+ import ast
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ from . import rules
8
+ from .rules import Rule
9
+
10
+ # Shortcuts for Phase 1 rules
11
+ S001, S002, S003, S004, S005 = (
12
+ rules.S001,
13
+ rules.S002,
14
+ rules.S003,
15
+ rules.S004,
16
+ rules.S005,
17
+ )
18
+ I001, I002, I003 = rules.I001, rules.I002, rules.I003
19
+ I006, I007 = rules.I006, rules.I007
20
+
21
+ # Phase 2: Call-level rule lookup table {(module_alias, func_name): Rule}
22
+ # module_alias=None means match func_name on any object
23
+ _CALL_RULES: dict = {
24
+ # IO rules
25
+ ("np", "save"): rules.IO001,
26
+ ("numpy", "save"): rules.IO001,
27
+ ("np", "load"): rules.IO002,
28
+ ("numpy", "load"): rules.IO002,
29
+ ("pd", "read_csv"): rules.IO003,
30
+ ("pandas", "read_csv"): rules.IO003,
31
+ (None, "to_csv"): rules.IO004,
32
+ ("pickle", "dump"): rules.IO005,
33
+ ("pickle", "dumps"): rules.IO005,
34
+ ("json", "dump"): rules.IO006,
35
+ ("plt", "savefig"): rules.IO007,
36
+ # Plot rules
37
+ (None, "show"): rules.P004, # plt.show()
38
+ # Stats rules — scipy.stats.X()
39
+ ("stats", "ttest_ind"): rules.ST001,
40
+ ("stats", "mannwhitneyu"): rules.ST002,
41
+ ("stats", "pearsonr"): rules.ST003,
42
+ ("stats", "f_oneway"): rules.ST004,
43
+ ("stats", "wilcoxon"): rules.ST005,
44
+ ("stats", "kruskal"): rules.ST006,
45
+ # Path rules
46
+ ("os", "makedirs"): rules.PA003,
47
+ ("os", "mkdir"): rules.PA003,
48
+ ("os", "chdir"): rules.PA004,
49
+ }
50
+
51
+ # Axes method suggestions {func_name: Rule}
52
+ _AXES_HINTS: dict = {
53
+ "plot": rules.P001,
54
+ "scatter": rules.P002,
55
+ "bar": rules.P003,
56
+ }
57
+
58
+ # print() inside session
59
+ _PRINT_RULE = rules.P005
60
+
61
+
62
+ @dataclass
63
+ class Issue:
64
+ rule: Rule
65
+ line: int
66
+ col: int
67
+ source_line: str = ""
68
+
69
+
70
+ def is_script(filepath: str) -> bool:
71
+ """Check if file is a script (not a library module)."""
72
+ name = Path(filepath).name
73
+ if name == "__init__.py":
74
+ return False
75
+ if name.startswith("test_") or name == "conftest.py":
76
+ return False
77
+ if name in ("setup.py", "manage.py"):
78
+ return False
79
+ return True
80
+
81
+
82
+ class SciTeXChecker(ast.NodeVisitor):
83
+ """AST visitor detecting non-SciTeX patterns."""
84
+
85
+ def __init__(self, source_lines: list, filepath: str = "<stdin>"):
86
+ self.source_lines = source_lines
87
+ self.filepath = filepath
88
+ self.issues: list = []
89
+
90
+ # Tracking state
91
+ self._has_stx_import = False
92
+ self._has_main_guard = False
93
+ self._has_session_decorator = False
94
+ self._session_func_returns_int = False
95
+ self._imports: dict = {} # alias -> full module path
96
+ self._is_script = is_script(filepath)
97
+
98
+ # -----------------------------------------------------------------
99
+ # Import visitors
100
+ # -----------------------------------------------------------------
101
+
102
+ def visit_Import(self, node: ast.Import) -> None:
103
+ for alias in node.names:
104
+ name = alias.asname or alias.name
105
+ self._imports[name] = alias.name
106
+
107
+ if alias.name == "scitex":
108
+ self._has_stx_import = True
109
+
110
+ self._check_import(alias.name, node)
111
+
112
+ self.generic_visit(node)
113
+
114
+ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
115
+ module = node.module or ""
116
+ for alias in node.names:
117
+ name = alias.asname or alias.name
118
+ full = f"{module}.{alias.name}"
119
+ self._imports[name] = full
120
+
121
+ self._check_import_from(module, node)
122
+ self.generic_visit(node)
123
+
124
+ def _check_import(self, module_name: str, node: ast.Import) -> None:
125
+ """Check bare `import X` statements."""
126
+ line = self._get_source(node.lineno)
127
+
128
+ # import matplotlib.pyplot as plt
129
+ if "matplotlib.pyplot" in module_name:
130
+ self._add(I001, node.lineno, node.col_offset, line)
131
+
132
+ if module_name == "argparse":
133
+ self._add(S003, node.lineno, node.col_offset, line)
134
+
135
+ if module_name == "pickle":
136
+ self._add(I003, node.lineno, node.col_offset, line)
137
+
138
+ if module_name == "random":
139
+ self._add(I006, node.lineno, node.col_offset, line)
140
+
141
+ if module_name == "logging":
142
+ self._add(I007, node.lineno, node.col_offset, line)
143
+
144
+ def _check_import_from(self, module: str, node: ast.ImportFrom) -> None:
145
+ """Check `from X import Y` statements."""
146
+ line = self._get_source(node.lineno)
147
+
148
+ # from matplotlib import pyplot / from matplotlib.pyplot import *
149
+ if module == "matplotlib":
150
+ for alias in node.names:
151
+ if alias.name == "pyplot":
152
+ self._add(I001, node.lineno, node.col_offset, line)
153
+ break
154
+ elif module and "matplotlib.pyplot" in module:
155
+ self._add(I001, node.lineno, node.col_offset, line)
156
+
157
+ # from scipy import stats / from scipy.stats import *
158
+ if module in ("scipy", "scipy.stats"):
159
+ if module == "scipy":
160
+ for alias in node.names:
161
+ if alias.name == "stats":
162
+ self._add(I002, node.lineno, node.col_offset, line)
163
+ break
164
+ else:
165
+ self._add(I002, node.lineno, node.col_offset, line)
166
+
167
+ # from argparse import *
168
+ if module == "argparse":
169
+ self._add(S003, node.lineno, node.col_offset, line)
170
+
171
+ # -----------------------------------------------------------------
172
+ # Call visitors (Phase 2)
173
+ # -----------------------------------------------------------------
174
+
175
+ def visit_Call(self, node: ast.Call) -> None:
176
+ self._check_call(node)
177
+ self.generic_visit(node)
178
+
179
+ def _check_call(self, node: ast.Call) -> None:
180
+ """Check function calls against Phase 2 rules."""
181
+ func = node.func
182
+
183
+ # module.func() pattern — e.g., np.save(), stats.ttest_ind()
184
+ if isinstance(func, ast.Attribute):
185
+ func_name = func.attr
186
+ mod_name = None
187
+
188
+ if isinstance(func.value, ast.Name):
189
+ mod_name = func.value.id
190
+ elif isinstance(func.value, ast.Attribute):
191
+ # module.sub.func() — e.g., scipy.stats.ttest_ind()
192
+ if isinstance(func.value.value, ast.Name):
193
+ mod_name = func.value.attr # use "stats" from scipy.stats
194
+
195
+ # Check stx.io path patterns before skipping stx.* calls
196
+ if mod_name == "stx" or (
197
+ isinstance(func.value, ast.Attribute)
198
+ and isinstance(func.value.value, ast.Name)
199
+ and func.value.value.id == "stx"
200
+ ):
201
+ self._check_stx_io_path(node)
202
+ return
203
+
204
+ # Resolve alias: if user did `import numpy as np`, resolve np -> numpy
205
+ resolved = self._imports.get(mod_name, mod_name)
206
+
207
+ # Check (module, func) against rule table
208
+ rule = _CALL_RULES.get((mod_name, func_name))
209
+ if rule is None and resolved != mod_name:
210
+ rule = _CALL_RULES.get((resolved, func_name))
211
+ if rule is None:
212
+ rule = _CALL_RULES.get((None, func_name))
213
+
214
+ # Special cases
215
+ if rule is not None:
216
+ # plt.show() — only flag if mod resolves to matplotlib
217
+ if rule is rules.P004:
218
+ if mod_name not in ("plt", "pyplot") and resolved not in (
219
+ "matplotlib.pyplot",
220
+ ):
221
+ return
222
+
223
+ # to_csv — only flag on DataFrame-like objects (not stx)
224
+ if rule is rules.IO004:
225
+ if mod_name in ("stx", "os", "sys", "Path"):
226
+ return
227
+
228
+ line = self._get_source(node.lineno)
229
+ self._add(rule, node.lineno, node.col_offset, line)
230
+ return
231
+
232
+ # Axes hints: ax.plot(), ax.scatter(), ax.bar()
233
+ if func_name in _AXES_HINTS and mod_name not in (
234
+ "stx",
235
+ "os",
236
+ "sys",
237
+ "Path",
238
+ "math",
239
+ "np",
240
+ "numpy",
241
+ "pd",
242
+ "pandas",
243
+ ):
244
+ # Heuristic: if variable name looks like axes
245
+ if mod_name and (
246
+ mod_name.startswith("ax") or mod_name in ("axes", "subplot")
247
+ ):
248
+ line = self._get_source(node.lineno)
249
+ self._add(
250
+ _AXES_HINTS[func_name], node.lineno, node.col_offset, line
251
+ )
252
+ return
253
+
254
+ # Path(...).mkdir() pattern
255
+ if func_name == "mkdir" and mod_name not in (
256
+ "os",
257
+ "stx",
258
+ "sys",
259
+ ):
260
+ # Heuristic: if it's called on something that looks like a Path
261
+ line = self._get_source(node.lineno)
262
+ if "Path" in line or "path" in line.lower():
263
+ self._add(rules.PA003, node.lineno, node.col_offset, line)
264
+
265
+ # bare func() pattern — e.g., print(), open()
266
+ elif isinstance(func, ast.Name):
267
+ if func.id == "print" and self._has_session_decorator:
268
+ line = self._get_source(node.lineno)
269
+ self._add(_PRINT_RULE, node.lineno, node.col_offset, line)
270
+ elif func.id == "open" and self._has_session_decorator:
271
+ line = self._get_source(node.lineno)
272
+ self._add(rules.PA002, node.lineno, node.col_offset, line)
273
+
274
+ # -----------------------------------------------------------------
275
+ # stx.io path checking
276
+ # -----------------------------------------------------------------
277
+
278
+ def _check_stx_io_path(self, node: ast.Call) -> None:
279
+ """Check path arguments in stx.io.save() / stx.io.load() calls."""
280
+ func = node.func
281
+ if not isinstance(func, ast.Attribute):
282
+ return
283
+
284
+ func_name = func.attr
285
+
286
+ # Determine which positional arg holds the path
287
+ # stx.io.save(obj, path, ...) -> index 1
288
+ # stx.io.load(path, ...) -> index 0
289
+ if func_name == "save":
290
+ path_idx = 1
291
+ elif func_name == "load":
292
+ path_idx = 0
293
+ else:
294
+ return
295
+
296
+ # Check if this is stx.io.save/load (not stx.plt.save, etc.)
297
+ # Pattern: stx.io.save(...) where func.value is Attribute(value=Name('stx'), attr='io')
298
+ is_stx_io = False
299
+ if isinstance(func.value, ast.Attribute):
300
+ if (
301
+ isinstance(func.value.value, ast.Name)
302
+ and func.value.value.id == "stx"
303
+ and func.value.attr == "io"
304
+ ):
305
+ is_stx_io = True
306
+ # Also: io.save(...) if io was imported from stx
307
+ elif isinstance(func.value, ast.Name):
308
+ resolved = self._imports.get(func.value.id, "")
309
+ if "scitex" in resolved and "io" in resolved:
310
+ is_stx_io = True
311
+
312
+ if not is_stx_io:
313
+ return
314
+
315
+ # Extract path string from positional args or 'path' kwarg
316
+ path_str = None
317
+ if len(node.args) > path_idx:
318
+ arg = node.args[path_idx]
319
+ if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
320
+ path_str = arg.value
321
+ else:
322
+ for kw in node.keywords:
323
+ if kw.arg == "path":
324
+ if isinstance(kw.value, ast.Constant) and isinstance(
325
+ kw.value.value, str
326
+ ):
327
+ path_str = kw.value.value
328
+ break
329
+
330
+ if path_str is None:
331
+ return
332
+
333
+ line = self._get_source(node.lineno)
334
+
335
+ # PA001: absolute path
336
+ if path_str.startswith("/"):
337
+ self._add(rules.PA001, node.lineno, node.col_offset, line)
338
+ # PA005: missing ./ prefix (bare relative path)
339
+ elif not path_str.startswith("./") and not path_str.startswith("../"):
340
+ self._add(rules.PA005, node.lineno, node.col_offset, line)
341
+
342
+ # -----------------------------------------------------------------
343
+ # Function/decorator visitors
344
+ # -----------------------------------------------------------------
345
+
346
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
347
+ if self._has_session_deco(node):
348
+ self._has_session_decorator = True
349
+ self._check_session_return(node)
350
+ self.generic_visit(node)
351
+
352
+ visit_AsyncFunctionDef = visit_FunctionDef
353
+
354
+ def _has_session_deco(self, node: ast.FunctionDef) -> bool:
355
+ """Check if function has @stx.session or @session decorator."""
356
+ for deco in node.decorator_list:
357
+ # @stx.session
358
+ if isinstance(deco, ast.Attribute):
359
+ if (
360
+ isinstance(deco.value, ast.Name)
361
+ and deco.value.id == "stx"
362
+ and deco.attr == "session"
363
+ ):
364
+ return True
365
+ # @session (bare)
366
+ if isinstance(deco, ast.Name) and deco.id == "session":
367
+ return True
368
+ return False
369
+
370
+ def _check_session_return(self, node: ast.FunctionDef) -> None:
371
+ """Check that session function returns an int."""
372
+ for child in ast.walk(node):
373
+ if isinstance(child, ast.Return) and child.value is not None:
374
+ if isinstance(child.value, ast.Constant) and isinstance(
375
+ child.value.value, int
376
+ ):
377
+ self._session_func_returns_int = True
378
+ return
379
+ # No int return found
380
+ line = self._get_source(node.lineno)
381
+ self._add(S004, node.lineno, node.col_offset, line)
382
+
383
+ # -----------------------------------------------------------------
384
+ # Module-level checks (run after visiting entire tree)
385
+ # -----------------------------------------------------------------
386
+
387
+ def visit_If(self, node: ast.If) -> None:
388
+ """Detect if __name__ == '__main__' guard."""
389
+ if self._is_main_guard(node):
390
+ self._has_main_guard = True
391
+ self.generic_visit(node)
392
+
393
+ def _is_main_guard(self, node: ast.If) -> bool:
394
+ test = node.test
395
+ if isinstance(test, ast.Compare):
396
+ if (
397
+ isinstance(test.left, ast.Name)
398
+ and test.left.id == "__name__"
399
+ and len(test.comparators) == 1
400
+ and isinstance(test.comparators[0], ast.Constant)
401
+ and test.comparators[0].value == "__main__"
402
+ ):
403
+ return True
404
+ return False
405
+
406
+ # -----------------------------------------------------------------
407
+ # Finalization
408
+ # -----------------------------------------------------------------
409
+
410
+ def get_issues(self) -> list:
411
+ """Return all issues, including post-visit structural checks."""
412
+ if not self._is_script:
413
+ return self.issues
414
+
415
+ if not self._has_main_guard:
416
+ self._add(S002, 1, 0, "")
417
+
418
+ if self._has_main_guard and not self._has_session_decorator:
419
+ self._add(S001, 1, 0, "")
420
+
421
+ if self._has_main_guard and not self._has_stx_import:
422
+ self._add(S005, 1, 0, "")
423
+
424
+ # Sort: errors first, then by line
425
+ from .rules import SEVERITY_ORDER
426
+
427
+ self.issues.sort(key=lambda i: (-SEVERITY_ORDER[i.rule.severity], i.line))
428
+ return self.issues
429
+
430
+ # -----------------------------------------------------------------
431
+ # Helpers
432
+ # -----------------------------------------------------------------
433
+
434
+ def _add(self, rule: Rule, line: int, col: int, source_line: str) -> None:
435
+ self.issues.append(
436
+ Issue(rule=rule, line=line, col=col, source_line=source_line)
437
+ )
438
+
439
+ def _get_source(self, lineno: int) -> str:
440
+ if 1 <= lineno <= len(self.source_lines):
441
+ return self.source_lines[lineno - 1].rstrip()
442
+ return ""
443
+
444
+
445
+ # =============================================================================
446
+ # Public API
447
+ # =============================================================================
448
+
449
+
450
+ def lint_source(source: str, filepath: str = "<stdin>") -> list:
451
+ """Lint Python source code and return list of Issues."""
452
+ try:
453
+ tree = ast.parse(source, filename=filepath)
454
+ except SyntaxError:
455
+ return []
456
+
457
+ lines = source.splitlines()
458
+ checker = SciTeXChecker(lines, filepath=filepath)
459
+ checker.visit(tree)
460
+ return checker.get_issues()
461
+
462
+
463
+ def lint_file(filepath: str) -> list:
464
+ """Lint a Python file and return list of Issues."""
465
+ path = Path(filepath)
466
+ if not path.exists() or not path.is_file():
467
+ return []
468
+ source = path.read_text(encoding="utf-8")
469
+ return lint_source(source, filepath=str(path))