codexray-analyser 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexray/__init__.py +15 -0
- codexray/analyser.py +125 -0
- codexray/cli.py +71 -0
- codexray/config.py +13 -0
- codexray/graph.py +101 -0
- codexray/limits.py +25 -0
- codexray/models.py +42 -0
- codexray/offline.py +12 -0
- codexray/parsers.py +94 -0
- codexray/rules.py +127 -0
- codexray_analyser-0.1.0.dist-info/METADATA +177 -0
- codexray_analyser-0.1.0.dist-info/RECORD +15 -0
- codexray_analyser-0.1.0.dist-info/WHEEL +4 -0
- codexray_analyser-0.1.0.dist-info/entry_points.txt +2 -0
- codexray_analyser-0.1.0.dist-info/licenses/LICENSE +21 -0
codexray/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Public package API for codexray."""
|
|
2
|
+
|
|
3
|
+
from .analyser import analyse_file_snippet, analyse_path, analyse_snippet
|
|
4
|
+
from .config import AnalyserConfig
|
|
5
|
+
from .models import AnalysisResult, Finding, GraphData
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"AnalyserConfig",
|
|
9
|
+
"AnalysisResult",
|
|
10
|
+
"Finding",
|
|
11
|
+
"GraphData",
|
|
12
|
+
"analyse_path",
|
|
13
|
+
"analyse_snippet",
|
|
14
|
+
"analyse_file_snippet",
|
|
15
|
+
]
|
codexray/analyser.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from .config import AnalyserConfig
|
|
6
|
+
from .graph import build_graph, merge_graphs
|
|
7
|
+
from .limits import AnalyserLimitError
|
|
8
|
+
from .models import AnalysisResult, Finding
|
|
9
|
+
from .parsers import (
|
|
10
|
+
discover_input_files,
|
|
11
|
+
parse_notebook,
|
|
12
|
+
parse_python_source,
|
|
13
|
+
parse_requirements_text,
|
|
14
|
+
read_file_text,
|
|
15
|
+
select_line_range,
|
|
16
|
+
)
|
|
17
|
+
from .rules import RuleContext, run_requirements_rules, run_rules
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def analyse_path(path: str | Path, config: AnalyserConfig | None = None) -> AnalysisResult:
|
|
21
|
+
cfg = config or AnalyserConfig()
|
|
22
|
+
root = Path(path)
|
|
23
|
+
if not root.exists():
|
|
24
|
+
raise FileNotFoundError(f"Path does not exist: {root}")
|
|
25
|
+
|
|
26
|
+
if root.is_file():
|
|
27
|
+
files = [root]
|
|
28
|
+
else:
|
|
29
|
+
files = list(discover_input_files(root))
|
|
30
|
+
|
|
31
|
+
all_findings = []
|
|
32
|
+
graphs = []
|
|
33
|
+
|
|
34
|
+
for file_path in files:
|
|
35
|
+
suffix = file_path.suffix.lower()
|
|
36
|
+
if _is_requirements_file(file_path):
|
|
37
|
+
try:
|
|
38
|
+
source = read_file_text(file_path, cfg)
|
|
39
|
+
requirements = parse_requirements_text(source)
|
|
40
|
+
all_findings.extend(run_requirements_rules(requirements, file_path))
|
|
41
|
+
except (ValueError, OSError) as exc:
|
|
42
|
+
all_findings.append(_parsing_finding(file_path, str(exc)))
|
|
43
|
+
continue
|
|
44
|
+
|
|
45
|
+
if suffix not in {".py", ".ipynb"}:
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
source = _load_source(file_path, cfg)
|
|
50
|
+
tree = parse_python_source(source, cfg)
|
|
51
|
+
all_findings.extend(
|
|
52
|
+
run_rules(RuleContext(file_path=file_path, source=source, tree=tree))
|
|
53
|
+
)
|
|
54
|
+
graphs.append(build_graph(file_path, tree))
|
|
55
|
+
except (AnalyserLimitError, SyntaxError, OSError, ValueError) as exc:
|
|
56
|
+
all_findings.append(_parsing_finding(file_path, str(exc)))
|
|
57
|
+
|
|
58
|
+
return AnalysisResult(
|
|
59
|
+
findings=sorted(
|
|
60
|
+
all_findings, key=lambda item: (item.file_path, item.rule_id, item.line or 0)
|
|
61
|
+
),
|
|
62
|
+
graph=merge_graphs(graphs),
|
|
63
|
+
metadata={
|
|
64
|
+
"analysed_path": str(root),
|
|
65
|
+
"offline_mode": not cfg.allow_network,
|
|
66
|
+
"files_analysed": len(files),
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def analyse_snippet(
|
|
72
|
+
source: str,
|
|
73
|
+
file_path: str = "snippet.py",
|
|
74
|
+
config: AnalyserConfig | None = None,
|
|
75
|
+
) -> AnalysisResult:
|
|
76
|
+
cfg = config or AnalyserConfig()
|
|
77
|
+
tree = parse_python_source(source, cfg)
|
|
78
|
+
path = Path(file_path)
|
|
79
|
+
findings = run_rules(RuleContext(file_path=path, source=source, tree=tree))
|
|
80
|
+
graph = build_graph(path, tree)
|
|
81
|
+
return AnalysisResult(
|
|
82
|
+
findings=findings,
|
|
83
|
+
graph=graph,
|
|
84
|
+
metadata={"snippet": True, "offline_mode": not cfg.allow_network},
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def analyse_file_snippet(
|
|
89
|
+
path: str | Path,
|
|
90
|
+
start_line: int,
|
|
91
|
+
end_line: int,
|
|
92
|
+
config: AnalyserConfig | None = None,
|
|
93
|
+
) -> AnalysisResult:
|
|
94
|
+
cfg = config or AnalyserConfig()
|
|
95
|
+
target = Path(path)
|
|
96
|
+
source = read_file_text(target, cfg)
|
|
97
|
+
snippet = select_line_range(source, start_line, end_line)
|
|
98
|
+
return analyse_snippet(snippet, file_path=str(target), config=cfg)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _load_source(path: Path, config: AnalyserConfig) -> str:
|
|
102
|
+
if path.suffix.lower() == ".ipynb":
|
|
103
|
+
return parse_notebook(path, config)
|
|
104
|
+
if path.suffix.lower() == ".py":
|
|
105
|
+
return read_file_text(path, config)
|
|
106
|
+
raise ValueError(f"Unsupported source type: {path.suffix}")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _is_requirements_file(path: Path) -> bool:
|
|
110
|
+
lower_name = path.name.lower()
|
|
111
|
+
return lower_name.startswith("requirements") and path.suffix.lower() in {
|
|
112
|
+
".txt",
|
|
113
|
+
".in",
|
|
114
|
+
".cfg",
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _parsing_finding(path: Path, error_message: str) -> Finding:
|
|
119
|
+
return Finding(
|
|
120
|
+
rule_id="PARSE001",
|
|
121
|
+
title="File parsing failed",
|
|
122
|
+
severity="medium",
|
|
123
|
+
message=error_message,
|
|
124
|
+
file_path=str(path),
|
|
125
|
+
)
|
codexray/cli.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import asdict
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .analyser import analyse_file_snippet, analyse_path, analyse_snippet
|
|
9
|
+
from .config import AnalyserConfig
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def main() -> int:
|
|
13
|
+
parser = argparse.ArgumentParser(
|
|
14
|
+
prog="codexray",
|
|
15
|
+
description="Offline-first static analysis for Python files and notebooks.",
|
|
16
|
+
)
|
|
17
|
+
parser.add_argument(
|
|
18
|
+
"target", nargs="?", help="Path to file or directory to analyse"
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument("--snippet", help="Inline Python snippet to analyse")
|
|
21
|
+
parser.add_argument(
|
|
22
|
+
"--start-line", type=int, help="Start line for target file snippet analysis"
|
|
23
|
+
)
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--end-line", type=int, help="End line for target file snippet analysis"
|
|
26
|
+
)
|
|
27
|
+
parser.add_argument("--output", help="Write JSON result to this path")
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"--allow-network", action="store_true", help="Enable network-dependent features"
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument("--max-file-bytes", type=int, default=2 * 1024 * 1024)
|
|
32
|
+
parser.add_argument("--max-snippet-chars", type=int, default=50_000)
|
|
33
|
+
|
|
34
|
+
args = parser.parse_args()
|
|
35
|
+
|
|
36
|
+
if not args.target and not args.snippet:
|
|
37
|
+
parser.error("Provide a target path or use --snippet")
|
|
38
|
+
|
|
39
|
+
config = AnalyserConfig(
|
|
40
|
+
allow_network=args.allow_network,
|
|
41
|
+
max_file_size_bytes=args.max_file_bytes,
|
|
42
|
+
max_snippet_chars=args.max_snippet_chars,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if args.snippet:
|
|
46
|
+
result = analyse_snippet(args.snippet, config=config)
|
|
47
|
+
elif args.start_line is not None and args.end_line is not None:
|
|
48
|
+
if not args.target:
|
|
49
|
+
parser.error("Line-range analysis requires a file target")
|
|
50
|
+
result = analyse_file_snippet(
|
|
51
|
+
Path(args.target),
|
|
52
|
+
start_line=args.start_line,
|
|
53
|
+
end_line=args.end_line,
|
|
54
|
+
config=config,
|
|
55
|
+
)
|
|
56
|
+
else:
|
|
57
|
+
result = analyse_path(Path(args.target), config=config)
|
|
58
|
+
|
|
59
|
+
payload = asdict(result)
|
|
60
|
+
rendered = json.dumps(payload, indent=2)
|
|
61
|
+
|
|
62
|
+
if args.output:
|
|
63
|
+
Path(args.output).write_text(rendered, encoding="utf-8")
|
|
64
|
+
else:
|
|
65
|
+
print(rendered)
|
|
66
|
+
|
|
67
|
+
return 0
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
if __name__ == "__main__":
|
|
71
|
+
raise SystemExit(main())
|
codexray/config.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass(slots=True)
|
|
5
|
+
class AnalyserConfig:
|
|
6
|
+
"""Runtime safety controls and feature flags."""
|
|
7
|
+
|
|
8
|
+
allow_network: bool = False
|
|
9
|
+
max_file_size_bytes: int = 2 * 1024 * 1024
|
|
10
|
+
max_notebook_cells: int = 500
|
|
11
|
+
max_notebook_json_bytes: int = 5 * 1024 * 1024
|
|
12
|
+
max_ast_nodes: int = 200_000
|
|
13
|
+
max_snippet_chars: int = 50_000
|
codexray/graph.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .models import GraphData, GraphEdge, GraphNode
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def build_graph(file_path: Path, tree: ast.AST) -> GraphData:
|
|
10
|
+
graph = GraphData()
|
|
11
|
+
file_node = _file_node_id(file_path)
|
|
12
|
+
graph.nodes.append(GraphNode(node_id=file_node, kind="file", label=str(file_path)))
|
|
13
|
+
|
|
14
|
+
import_nodes: set[str] = set()
|
|
15
|
+
function_nodes: set[str] = set()
|
|
16
|
+
|
|
17
|
+
call_targets: set[str] = set()
|
|
18
|
+
|
|
19
|
+
for node in ast.walk(tree):
|
|
20
|
+
if isinstance(node, ast.Import):
|
|
21
|
+
for alias in node.names:
|
|
22
|
+
module = alias.name
|
|
23
|
+
import_id = f"import:{module}"
|
|
24
|
+
if import_id not in import_nodes:
|
|
25
|
+
graph.nodes.append(GraphNode(node_id=import_id, kind="import", label=module))
|
|
26
|
+
import_nodes.add(import_id)
|
|
27
|
+
graph.edges.append(
|
|
28
|
+
GraphEdge(source=file_node, target=import_id, relation="imports")
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
if isinstance(node, ast.ImportFrom) and node.module:
|
|
32
|
+
module = node.module
|
|
33
|
+
import_id = f"import:{module}"
|
|
34
|
+
if import_id not in import_nodes:
|
|
35
|
+
graph.nodes.append(GraphNode(node_id=import_id, kind="import", label=module))
|
|
36
|
+
import_nodes.add(import_id)
|
|
37
|
+
graph.edges.append(
|
|
38
|
+
GraphEdge(source=file_node, target=import_id, relation="imports")
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
if isinstance(node, ast.FunctionDef):
|
|
42
|
+
fn_id = f"function:{file_path}:{node.name}"
|
|
43
|
+
if fn_id not in function_nodes:
|
|
44
|
+
graph.nodes.append(GraphNode(node_id=fn_id, kind="function", label=node.name))
|
|
45
|
+
function_nodes.add(fn_id)
|
|
46
|
+
graph.edges.append(GraphEdge(source=file_node, target=fn_id, relation="contains"))
|
|
47
|
+
|
|
48
|
+
for child in ast.walk(node):
|
|
49
|
+
if not isinstance(child, ast.Call):
|
|
50
|
+
continue
|
|
51
|
+
callee = _get_call_name(child)
|
|
52
|
+
if not callee:
|
|
53
|
+
continue
|
|
54
|
+
call_id = f"import:{callee}"
|
|
55
|
+
if call_id not in call_targets:
|
|
56
|
+
graph.nodes.append(GraphNode(node_id=call_id, kind="import", label=callee))
|
|
57
|
+
call_targets.add(call_id)
|
|
58
|
+
graph.edges.append(GraphEdge(source=fn_id, target=call_id, relation="calls"))
|
|
59
|
+
|
|
60
|
+
return graph
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def merge_graphs(graphs: list[GraphData]) -> GraphData:
|
|
64
|
+
merged = GraphData()
|
|
65
|
+
seen_nodes: set[str] = set()
|
|
66
|
+
seen_edges: set[tuple[str, str, str]] = set()
|
|
67
|
+
|
|
68
|
+
for graph in graphs:
|
|
69
|
+
for node in graph.nodes:
|
|
70
|
+
if node.node_id in seen_nodes:
|
|
71
|
+
continue
|
|
72
|
+
merged.nodes.append(node)
|
|
73
|
+
seen_nodes.add(node.node_id)
|
|
74
|
+
|
|
75
|
+
for edge in graph.edges:
|
|
76
|
+
key = (edge.source, edge.target, edge.relation)
|
|
77
|
+
if key in seen_edges:
|
|
78
|
+
continue
|
|
79
|
+
merged.edges.append(edge)
|
|
80
|
+
seen_edges.add(key)
|
|
81
|
+
|
|
82
|
+
return merged
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _file_node_id(path: Path) -> str:
|
|
86
|
+
return f"file:{path}"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _get_call_name(node: ast.Call) -> str:
|
|
90
|
+
if isinstance(node.func, ast.Name):
|
|
91
|
+
return node.func.id
|
|
92
|
+
if isinstance(node.func, ast.Attribute):
|
|
93
|
+
segments: list[str] = []
|
|
94
|
+
current: ast.AST = node.func
|
|
95
|
+
while isinstance(current, ast.Attribute):
|
|
96
|
+
segments.append(current.attr)
|
|
97
|
+
current = current.value
|
|
98
|
+
if isinstance(current, ast.Name):
|
|
99
|
+
segments.append(current.id)
|
|
100
|
+
return ".".join(reversed(segments))
|
|
101
|
+
return ""
|
codexray/limits.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
|
|
5
|
+
from .config import AnalyserConfig
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AnalyserLimitError(ValueError):
|
|
9
|
+
"""Raised when user input exceeds configured safety limits."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def enforce_text_limit(text: str, max_chars: int, context: str) -> None:
|
|
13
|
+
if len(text) > max_chars:
|
|
14
|
+
raise AnalyserLimitError(f"{context} exceeds {max_chars} characters")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def enforce_bytes_limit(size_bytes: int, max_bytes: int, context: str) -> None:
|
|
18
|
+
if size_bytes > max_bytes:
|
|
19
|
+
raise AnalyserLimitError(f"{context} exceeds {max_bytes} bytes")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def enforce_ast_limit(tree: ast.AST, config: AnalyserConfig) -> None:
|
|
23
|
+
node_count = sum(1 for _ in ast.walk(tree))
|
|
24
|
+
if node_count > config.max_ast_nodes:
|
|
25
|
+
raise AnalyserLimitError(f"AST exceeds configured node limit ({config.max_ast_nodes})")
|
codexray/models.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
Severity = Literal["low", "medium", "high", "critical"]
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(slots=True)
|
|
8
|
+
class Finding:
|
|
9
|
+
rule_id: str
|
|
10
|
+
title: str
|
|
11
|
+
severity: Severity
|
|
12
|
+
message: str
|
|
13
|
+
file_path: str
|
|
14
|
+
line: int | None = None
|
|
15
|
+
column: int | None = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(slots=True)
|
|
19
|
+
class GraphNode:
|
|
20
|
+
node_id: str
|
|
21
|
+
kind: Literal["file", "import", "function"]
|
|
22
|
+
label: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(slots=True)
|
|
26
|
+
class GraphEdge:
|
|
27
|
+
source: str
|
|
28
|
+
target: str
|
|
29
|
+
relation: Literal["imports", "calls", "contains"]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(slots=True)
|
|
33
|
+
class GraphData:
|
|
34
|
+
nodes: list[GraphNode] = field(default_factory=list)
|
|
35
|
+
edges: list[GraphEdge] = field(default_factory=list)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(slots=True)
|
|
39
|
+
class AnalysisResult:
|
|
40
|
+
findings: list[Finding] = field(default_factory=list)
|
|
41
|
+
graph: GraphData = field(default_factory=GraphData)
|
|
42
|
+
metadata: dict[str, str | int | bool] = field(default_factory=dict)
|
codexray/offline.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from .config import AnalyserConfig
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class NetworkAccessError(RuntimeError):
|
|
5
|
+
"""Raised when a network-required feature is requested in offline mode."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def require_network(config: AnalyserConfig) -> None:
|
|
9
|
+
if not config.allow_network:
|
|
10
|
+
raise NetworkAccessError(
|
|
11
|
+
"Network features are disabled. Set allow_network=True to enable outbound calls."
|
|
12
|
+
)
|
codexray/parsers.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import json
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from .config import AnalyserConfig
|
|
9
|
+
from .limits import (
|
|
10
|
+
AnalyserLimitError,
|
|
11
|
+
enforce_ast_limit,
|
|
12
|
+
enforce_bytes_limit,
|
|
13
|
+
enforce_text_limit,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
SUPPORTED_SUFFIXES = {".py", ".ipynb", ".txt", ".in", ".cfg"}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def read_file_text(path: Path, config: AnalyserConfig) -> str:
|
|
20
|
+
size = path.stat().st_size
|
|
21
|
+
enforce_bytes_limit(size, config.max_file_size_bytes, f"File {path}")
|
|
22
|
+
return path.read_text(encoding="utf-8", errors="strict")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_python_source(source: str, config: AnalyserConfig) -> ast.AST:
|
|
26
|
+
enforce_text_limit(source, config.max_snippet_chars, "Python source")
|
|
27
|
+
tree = ast.parse(source)
|
|
28
|
+
enforce_ast_limit(tree, config)
|
|
29
|
+
return tree
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def parse_notebook(path: Path, config: AnalyserConfig) -> str:
|
|
33
|
+
size = path.stat().st_size
|
|
34
|
+
enforce_bytes_limit(size, config.max_notebook_json_bytes, f"Notebook {path}")
|
|
35
|
+
raw = path.read_text(encoding="utf-8")
|
|
36
|
+
try:
|
|
37
|
+
notebook = json.loads(raw)
|
|
38
|
+
except json.JSONDecodeError as exc:
|
|
39
|
+
raise AnalyserLimitError(f"Notebook JSON is invalid: {exc}") from exc
|
|
40
|
+
cells = notebook.get("cells", [])
|
|
41
|
+
if not isinstance(cells, list):
|
|
42
|
+
raise AnalyserLimitError("Notebook cells payload is invalid")
|
|
43
|
+
if len(cells) > config.max_notebook_cells:
|
|
44
|
+
raise AnalyserLimitError(
|
|
45
|
+
f"Notebook has {len(cells)} cells, over max {config.max_notebook_cells}"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
lines: list[str] = []
|
|
49
|
+
for cell in cells:
|
|
50
|
+
if cell.get("cell_type") != "code":
|
|
51
|
+
continue
|
|
52
|
+
source = cell.get("source", [])
|
|
53
|
+
if isinstance(source, list):
|
|
54
|
+
joined = "".join(str(part) for part in source)
|
|
55
|
+
else:
|
|
56
|
+
joined = str(source)
|
|
57
|
+
if joined.strip():
|
|
58
|
+
lines.append(joined)
|
|
59
|
+
source = "\n\n".join(lines)
|
|
60
|
+
enforce_text_limit(source, config.max_snippet_chars, "Notebook extracted code")
|
|
61
|
+
return source
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def parse_requirements_text(text: str) -> list[str]:
|
|
65
|
+
dependencies: list[str] = []
|
|
66
|
+
for raw_line in text.splitlines():
|
|
67
|
+
line = raw_line.strip()
|
|
68
|
+
if not line or line.startswith("#"):
|
|
69
|
+
continue
|
|
70
|
+
if line.startswith("--"):
|
|
71
|
+
continue
|
|
72
|
+
dependencies.append(line)
|
|
73
|
+
return dependencies
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def select_line_range(source: str, start_line: int, end_line: int) -> str:
|
|
77
|
+
if start_line < 1 or end_line < start_line:
|
|
78
|
+
raise ValueError("Invalid line range")
|
|
79
|
+
lines = source.splitlines()
|
|
80
|
+
if end_line > len(lines):
|
|
81
|
+
raise ValueError("Requested line range exceeds file length")
|
|
82
|
+
return "\n".join(lines[start_line - 1 : end_line])
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def discover_input_files(root: Path) -> Iterable[Path]:
|
|
86
|
+
for path in root.rglob("*"):
|
|
87
|
+
if not path.is_file():
|
|
88
|
+
continue
|
|
89
|
+
if any(part.startswith(".") for part in path.parts):
|
|
90
|
+
continue
|
|
91
|
+
if any(part in {"venv", ".venv", "__pycache__", "node_modules"} for part in path.parts):
|
|
92
|
+
continue
|
|
93
|
+
if path.suffix.lower() in SUPPORTED_SUFFIXES:
|
|
94
|
+
yield path
|
codexray/rules.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .models import Finding
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(slots=True)
|
|
11
|
+
class RuleContext:
|
|
12
|
+
file_path: Path
|
|
13
|
+
source: str
|
|
14
|
+
tree: ast.AST
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class _ShellCallVisitor(ast.NodeVisitor):
|
|
18
|
+
def __init__(self) -> None:
|
|
19
|
+
self.hits: list[tuple[int, int]] = []
|
|
20
|
+
|
|
21
|
+
def visit_Call(self, node: ast.Call) -> None:
|
|
22
|
+
func_name = _get_call_name(node)
|
|
23
|
+
if func_name in {"os.system", "subprocess.call", "subprocess.run", "subprocess.Popen"}:
|
|
24
|
+
self.hits.append((node.lineno, node.col_offset))
|
|
25
|
+
self.generic_visit(node)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class _DangerousBuiltinsVisitor(ast.NodeVisitor):
|
|
29
|
+
def __init__(self) -> None:
|
|
30
|
+
self.hits: list[tuple[str, int, int]] = []
|
|
31
|
+
|
|
32
|
+
def visit_Call(self, node: ast.Call) -> None:
|
|
33
|
+
func_name = _get_call_name(node)
|
|
34
|
+
if func_name in {"eval", "exec"}:
|
|
35
|
+
self.hits.append((func_name, node.lineno, node.col_offset))
|
|
36
|
+
self.generic_visit(node)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _get_call_name(node: ast.Call) -> str:
|
|
40
|
+
func = node.func
|
|
41
|
+
if isinstance(func, ast.Name):
|
|
42
|
+
return func.id
|
|
43
|
+
if isinstance(func, ast.Attribute):
|
|
44
|
+
parts: list[str] = []
|
|
45
|
+
current: ast.AST = func
|
|
46
|
+
while isinstance(current, ast.Attribute):
|
|
47
|
+
parts.append(current.attr)
|
|
48
|
+
current = current.value
|
|
49
|
+
if isinstance(current, ast.Name):
|
|
50
|
+
parts.append(current.id)
|
|
51
|
+
return ".".join(reversed(parts))
|
|
52
|
+
return ""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def run_rules(ctx: RuleContext) -> list[Finding]:
|
|
56
|
+
findings: list[Finding] = []
|
|
57
|
+
|
|
58
|
+
shell_visitor = _ShellCallVisitor()
|
|
59
|
+
shell_visitor.visit(ctx.tree)
|
|
60
|
+
for line, col in shell_visitor.hits:
|
|
61
|
+
findings.append(
|
|
62
|
+
Finding(
|
|
63
|
+
rule_id="SEC001",
|
|
64
|
+
title="Shell command execution",
|
|
65
|
+
severity="high",
|
|
66
|
+
message="Shell execution can be dangerous with untrusted input.",
|
|
67
|
+
file_path=str(ctx.file_path),
|
|
68
|
+
line=line,
|
|
69
|
+
column=col,
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
builtin_visitor = _DangerousBuiltinsVisitor()
|
|
74
|
+
builtin_visitor.visit(ctx.tree)
|
|
75
|
+
for func_name, line, col in builtin_visitor.hits:
|
|
76
|
+
findings.append(
|
|
77
|
+
Finding(
|
|
78
|
+
rule_id="SEC002",
|
|
79
|
+
title=f"Dangerous builtin {func_name}",
|
|
80
|
+
severity="critical",
|
|
81
|
+
message=f"Avoid {func_name} on untrusted content.",
|
|
82
|
+
file_path=str(ctx.file_path),
|
|
83
|
+
line=line,
|
|
84
|
+
column=col,
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
if "verify=False" in ctx.source:
|
|
89
|
+
findings.append(
|
|
90
|
+
Finding(
|
|
91
|
+
rule_id="SEC003",
|
|
92
|
+
title="TLS verification disabled",
|
|
93
|
+
severity="high",
|
|
94
|
+
message="requests verify=False weakens TLS protection.",
|
|
95
|
+
file_path=str(ctx.file_path),
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if "TODO" in ctx.source:
|
|
100
|
+
findings.append(
|
|
101
|
+
Finding(
|
|
102
|
+
rule_id="QLT001",
|
|
103
|
+
title="Unresolved TODO",
|
|
104
|
+
severity="low",
|
|
105
|
+
message="TODO comments should be tracked before release.",
|
|
106
|
+
file_path=str(ctx.file_path),
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
return findings
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def run_requirements_rules(requirements: list[str], file_path: Path) -> list[Finding]:
|
|
114
|
+
findings: list[Finding] = []
|
|
115
|
+
for dep in requirements:
|
|
116
|
+
# Prefer pinned dependencies for reproducibility and easier CVE response.
|
|
117
|
+
if "==" not in dep:
|
|
118
|
+
findings.append(
|
|
119
|
+
Finding(
|
|
120
|
+
rule_id="DEP001",
|
|
121
|
+
title="Unpinned dependency",
|
|
122
|
+
severity="medium",
|
|
123
|
+
message=f"Dependency '{dep}' is not pinned with ==.",
|
|
124
|
+
file_path=str(file_path),
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
return findings
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codexray-analyser
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Offline-first static analyser for Python files and notebooks.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Merlins-Sanctum/codexray-analyser
|
|
6
|
+
Project-URL: Repository, https://github.com/Merlins-Sanctum/codexray-analyser
|
|
7
|
+
Project-URL: Issues, https://github.com/Merlins-Sanctum/codexray-analyser/issues
|
|
8
|
+
Author: Mohammad Hamad
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: notebook,offline,python,security,static-analysis
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Security
|
|
19
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# Codexray
|
|
24
|
+
|
|
25
|
+
Codexray is an offline-first Python static analyser for `.py` and `.ipynb` files.
|
|
26
|
+
It helps teams inspect security risks, code quality problems, and dependency patterns
|
|
27
|
+
without sending source code outside the local machine.
|
|
28
|
+
|
|
29
|
+
## What Codexray does
|
|
30
|
+
|
|
31
|
+
- Scans Python files and notebooks.
|
|
32
|
+
- Supports project-level scans, full file scans, and targeted line-range checks.
|
|
33
|
+
- Produces a structured JSON report.
|
|
34
|
+
- Builds a dependency graph view with nodes and edges.
|
|
35
|
+
- Flags common security risks such as shell execution and dangerous builtins.
|
|
36
|
+
|
|
37
|
+
## Privacy and security behavior
|
|
38
|
+
|
|
39
|
+
- No telemetry.
|
|
40
|
+
- No source upload.
|
|
41
|
+
- Network features are disabled by default.
|
|
42
|
+
- Strict input limits for file size, notebook size, snippet length, and AST depth.
|
|
43
|
+
|
|
44
|
+
This tool is designed for local analysis workflows where proprietary code must stay on the client system.
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install codexray-analyser
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Quick start with CLI
|
|
53
|
+
|
|
54
|
+
Analyse a folder:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
codexray ./my_project
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Analyse a single file:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
codexray ./my_project/app.py
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Analyse a code snippet:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
codexray --snippet "import os; os.system('whoami')"
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Analyse only specific lines from one file:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
codexray ./my_project/app.py --start-line 40 --end-line 80
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Save output to JSON:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
codexray ./my_project --output codexray-report.json
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Python API usage
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from codexray import analyse_file_snippet, analyse_path, analyse_snippet
|
|
88
|
+
|
|
89
|
+
project_result = analyse_path("./my_project")
|
|
90
|
+
snippet_result = analyse_snippet("import os\nos.system('whoami')")
|
|
91
|
+
range_result = analyse_file_snippet("./my_project/app.py", 20, 50)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Understanding the report
|
|
95
|
+
|
|
96
|
+
Each result returns:
|
|
97
|
+
|
|
98
|
+
- `findings`: list of detected issues.
|
|
99
|
+
- `graph`: nodes and edges representing imports, files, and function relationships.
|
|
100
|
+
- `metadata`: run information such as analysed path and offline mode state.
|
|
101
|
+
|
|
102
|
+
Example finding shape:
|
|
103
|
+
|
|
104
|
+
```json
|
|
105
|
+
{
|
|
106
|
+
"rule_id": "SEC002",
|
|
107
|
+
"title": "Dangerous builtin eval",
|
|
108
|
+
"severity": "critical",
|
|
109
|
+
"message": "Avoid eval on untrusted content.",
|
|
110
|
+
"file_path": "src/app.py",
|
|
111
|
+
"line": 18,
|
|
112
|
+
"column": 4
|
|
113
|
+
}
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Reading graph output
|
|
117
|
+
|
|
118
|
+
Graph output contains:
|
|
119
|
+
|
|
120
|
+
- `nodes`: entities such as files, imports, and functions
|
|
121
|
+
- `edges`: relationships such as `imports`, `contains`, and `calls`
|
|
122
|
+
|
|
123
|
+
Typical use:
|
|
124
|
+
|
|
125
|
+
1. Run Codexray and save JSON output.
|
|
126
|
+
2. Load `graph.nodes` and `graph.edges` into your graph viewer.
|
|
127
|
+
3. Track dependency hotspots and risky call paths.
|
|
128
|
+
|
|
129
|
+
## How to use findings to make code changes
|
|
130
|
+
|
|
131
|
+
Recommended workflow:
|
|
132
|
+
|
|
133
|
+
1. Sort findings by `severity`.
|
|
134
|
+
2. Fix `critical` and `high` findings first.
|
|
135
|
+
3. Re-run Codexray after each fix batch.
|
|
136
|
+
4. Keep evidence by committing report diffs in your internal workflow.
|
|
137
|
+
|
|
138
|
+
Examples:
|
|
139
|
+
|
|
140
|
+
- `SEC001` shell execution:
|
|
141
|
+
- Replace dynamic shell calls with safe Python APIs.
|
|
142
|
+
- Avoid passing untrusted input to command execution.
|
|
143
|
+
- `SEC002` dangerous builtin:
|
|
144
|
+
- Replace `eval` or `exec` with safe parsing and strict allow-lists.
|
|
145
|
+
- `DEP001` unpinned dependency:
|
|
146
|
+
- Pin versions in requirements files with `==` where practical.
|
|
147
|
+
|
|
148
|
+
## Troubleshooting
|
|
149
|
+
|
|
150
|
+
- `File parsing failed`:
|
|
151
|
+
- Check syntax errors or unsupported file encoding.
|
|
152
|
+
- `exceeds ... bytes/chars`:
|
|
153
|
+
- Increase limits in config for controlled internal usage.
|
|
154
|
+
- Empty findings:
|
|
155
|
+
- Confirm the target path includes `.py` or `.ipynb` sources.
|
|
156
|
+
|
|
157
|
+
## Local development
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
python -m pip install -e .
|
|
161
|
+
python -m pip install pytest ruff bandit pip-audit build twine
|
|
162
|
+
python -m ruff check .
|
|
163
|
+
python -m pytest
|
|
164
|
+
python -m bandit -q -r src
|
|
165
|
+
python -m pip-audit
|
|
166
|
+
python -m build
|
|
167
|
+
python -m twine check dist/*
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Contributing
|
|
171
|
+
|
|
172
|
+
Read `CONTRIBUTING.md` before opening a pull request.
|
|
173
|
+
Security reports should follow `SECURITY.md`.
|
|
174
|
+
|
|
175
|
+
## License
|
|
176
|
+
|
|
177
|
+
MIT. See `LICENSE`.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
codexray/__init__.py,sha256=iaaRmrDgxz3PcnF4VymgisoUI7zVwMwAWECxXdpeK0s,366
|
|
2
|
+
codexray/analyser.py,sha256=cgMiBpcxOqPIBHYSmGWstkELU64LfjZ4HEnbofTdXrA,3866
|
|
3
|
+
codexray/cli.py,sha256=29_B-ecLTnqVy1SIPI0wKszhOfuIZzoMJm6rv8wsW_w,2255
|
|
4
|
+
codexray/config.py,sha256=q0aKeM8a0kzISuAj0uIGA7O97ExQCk2ScaYc7aKm5v8,368
|
|
5
|
+
codexray/graph.py,sha256=yRiUFTrlk6fucD7z0d3SSM5Hd0CqPRjuYXhM7Hfv6NE,3567
|
|
6
|
+
codexray/limits.py,sha256=ReJpYF2pOzn3W0G6s_gQg3nWzpfrPYyaeNEJbJ8rJVM,817
|
|
7
|
+
codexray/models.py,sha256=S2btOixsxzUsBM_Nyo9D1zRZgH0HdeZBJrFqpwKIApA,953
|
|
8
|
+
codexray/offline.py,sha256=xUonP9SvDIF1ycPPZ0ZdeoeYNrHsHftVtHM1cbROoZU,382
|
|
9
|
+
codexray/parsers.py,sha256=TWfrnZ3FWUPi3dZMVs_Ofgl_LnMCi8CHHC60abZ9s_8,3112
|
|
10
|
+
codexray/rules.py,sha256=fOVEvULv_SitfAWiuQof2QgKzzaHeG-zwOvcpV1g8p8,3884
|
|
11
|
+
codexray_analyser-0.1.0.dist-info/METADATA,sha256=ZKV18qou99Yy8j7o3WivzGqvXeMwm3D9MwrDekcStR4,4686
|
|
12
|
+
codexray_analyser-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
13
|
+
codexray_analyser-0.1.0.dist-info/entry_points.txt,sha256=r9LzXd27RixDqAQCQzlViYrslbdW-_--N0SGQ0yMnXE,47
|
|
14
|
+
codexray_analyser-0.1.0.dist-info/licenses/LICENSE,sha256=Xrr6RxAF66F3QmjiZOcEX8erMms2GYH6XVgYenOhV48,1071
|
|
15
|
+
codexray_analyser-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mohammad Hamad
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|