codexray-analyser 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codexray/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """Public package API for codexray."""
2
+
3
+ from .analyser import analyse_file_snippet, analyse_path, analyse_snippet
4
+ from .config import AnalyserConfig
5
+ from .models import AnalysisResult, Finding, GraphData
6
+
7
+ __all__ = [
8
+ "AnalyserConfig",
9
+ "AnalysisResult",
10
+ "Finding",
11
+ "GraphData",
12
+ "analyse_path",
13
+ "analyse_snippet",
14
+ "analyse_file_snippet",
15
+ ]
codexray/analyser.py ADDED
@@ -0,0 +1,125 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from .config import AnalyserConfig
6
+ from .graph import build_graph, merge_graphs
7
+ from .limits import AnalyserLimitError
8
+ from .models import AnalysisResult, Finding
9
+ from .parsers import (
10
+ discover_input_files,
11
+ parse_notebook,
12
+ parse_python_source,
13
+ parse_requirements_text,
14
+ read_file_text,
15
+ select_line_range,
16
+ )
17
+ from .rules import RuleContext, run_requirements_rules, run_rules
18
+
19
+
20
+ def analyse_path(path: str | Path, config: AnalyserConfig | None = None) -> AnalysisResult:
21
+ cfg = config or AnalyserConfig()
22
+ root = Path(path)
23
+ if not root.exists():
24
+ raise FileNotFoundError(f"Path does not exist: {root}")
25
+
26
+ if root.is_file():
27
+ files = [root]
28
+ else:
29
+ files = list(discover_input_files(root))
30
+
31
+ all_findings = []
32
+ graphs = []
33
+
34
+ for file_path in files:
35
+ suffix = file_path.suffix.lower()
36
+ if _is_requirements_file(file_path):
37
+ try:
38
+ source = read_file_text(file_path, cfg)
39
+ requirements = parse_requirements_text(source)
40
+ all_findings.extend(run_requirements_rules(requirements, file_path))
41
+ except (ValueError, OSError) as exc:
42
+ all_findings.append(_parsing_finding(file_path, str(exc)))
43
+ continue
44
+
45
+ if suffix not in {".py", ".ipynb"}:
46
+ continue
47
+
48
+ try:
49
+ source = _load_source(file_path, cfg)
50
+ tree = parse_python_source(source, cfg)
51
+ all_findings.extend(
52
+ run_rules(RuleContext(file_path=file_path, source=source, tree=tree))
53
+ )
54
+ graphs.append(build_graph(file_path, tree))
55
+ except (AnalyserLimitError, SyntaxError, OSError, ValueError) as exc:
56
+ all_findings.append(_parsing_finding(file_path, str(exc)))
57
+
58
+ return AnalysisResult(
59
+ findings=sorted(
60
+ all_findings, key=lambda item: (item.file_path, item.rule_id, item.line or 0)
61
+ ),
62
+ graph=merge_graphs(graphs),
63
+ metadata={
64
+ "analysed_path": str(root),
65
+ "offline_mode": not cfg.allow_network,
66
+ "files_analysed": len(files),
67
+ },
68
+ )
69
+
70
+
71
+ def analyse_snippet(
72
+ source: str,
73
+ file_path: str = "snippet.py",
74
+ config: AnalyserConfig | None = None,
75
+ ) -> AnalysisResult:
76
+ cfg = config or AnalyserConfig()
77
+ tree = parse_python_source(source, cfg)
78
+ path = Path(file_path)
79
+ findings = run_rules(RuleContext(file_path=path, source=source, tree=tree))
80
+ graph = build_graph(path, tree)
81
+ return AnalysisResult(
82
+ findings=findings,
83
+ graph=graph,
84
+ metadata={"snippet": True, "offline_mode": not cfg.allow_network},
85
+ )
86
+
87
+
88
+ def analyse_file_snippet(
89
+ path: str | Path,
90
+ start_line: int,
91
+ end_line: int,
92
+ config: AnalyserConfig | None = None,
93
+ ) -> AnalysisResult:
94
+ cfg = config or AnalyserConfig()
95
+ target = Path(path)
96
+ source = read_file_text(target, cfg)
97
+ snippet = select_line_range(source, start_line, end_line)
98
+ return analyse_snippet(snippet, file_path=str(target), config=cfg)
99
+
100
+
101
+ def _load_source(path: Path, config: AnalyserConfig) -> str:
102
+ if path.suffix.lower() == ".ipynb":
103
+ return parse_notebook(path, config)
104
+ if path.suffix.lower() == ".py":
105
+ return read_file_text(path, config)
106
+ raise ValueError(f"Unsupported source type: {path.suffix}")
107
+
108
+
109
+ def _is_requirements_file(path: Path) -> bool:
110
+ lower_name = path.name.lower()
111
+ return lower_name.startswith("requirements") and path.suffix.lower() in {
112
+ ".txt",
113
+ ".in",
114
+ ".cfg",
115
+ }
116
+
117
+
118
+ def _parsing_finding(path: Path, error_message: str) -> Finding:
119
+ return Finding(
120
+ rule_id="PARSE001",
121
+ title="File parsing failed",
122
+ severity="medium",
123
+ message=error_message,
124
+ file_path=str(path),
125
+ )
codexray/cli.py ADDED
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from dataclasses import asdict
6
+ from pathlib import Path
7
+
8
+ from .analyser import analyse_file_snippet, analyse_path, analyse_snippet
9
+ from .config import AnalyserConfig
10
+
11
+
12
+ def main() -> int:
13
+ parser = argparse.ArgumentParser(
14
+ prog="codexray",
15
+ description="Offline-first static analysis for Python files and notebooks.",
16
+ )
17
+ parser.add_argument(
18
+ "target", nargs="?", help="Path to file or directory to analyse"
19
+ )
20
+ parser.add_argument("--snippet", help="Inline Python snippet to analyse")
21
+ parser.add_argument(
22
+ "--start-line", type=int, help="Start line for target file snippet analysis"
23
+ )
24
+ parser.add_argument(
25
+ "--end-line", type=int, help="End line for target file snippet analysis"
26
+ )
27
+ parser.add_argument("--output", help="Write JSON result to this path")
28
+ parser.add_argument(
29
+ "--allow-network", action="store_true", help="Enable network-dependent features"
30
+ )
31
+ parser.add_argument("--max-file-bytes", type=int, default=2 * 1024 * 1024)
32
+ parser.add_argument("--max-snippet-chars", type=int, default=50_000)
33
+
34
+ args = parser.parse_args()
35
+
36
+ if not args.target and not args.snippet:
37
+ parser.error("Provide a target path or use --snippet")
38
+
39
+ config = AnalyserConfig(
40
+ allow_network=args.allow_network,
41
+ max_file_size_bytes=args.max_file_bytes,
42
+ max_snippet_chars=args.max_snippet_chars,
43
+ )
44
+
45
+ if args.snippet:
46
+ result = analyse_snippet(args.snippet, config=config)
47
+ elif args.start_line is not None and args.end_line is not None:
48
+ if not args.target:
49
+ parser.error("Line-range analysis requires a file target")
50
+ result = analyse_file_snippet(
51
+ Path(args.target),
52
+ start_line=args.start_line,
53
+ end_line=args.end_line,
54
+ config=config,
55
+ )
56
+ else:
57
+ result = analyse_path(Path(args.target), config=config)
58
+
59
+ payload = asdict(result)
60
+ rendered = json.dumps(payload, indent=2)
61
+
62
+ if args.output:
63
+ Path(args.output).write_text(rendered, encoding="utf-8")
64
+ else:
65
+ print(rendered)
66
+
67
+ return 0
68
+
69
+
70
+ if __name__ == "__main__":
71
+ raise SystemExit(main())
codexray/config.py ADDED
@@ -0,0 +1,13 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass(slots=True)
5
+ class AnalyserConfig:
6
+ """Runtime safety controls and feature flags."""
7
+
8
+ allow_network: bool = False
9
+ max_file_size_bytes: int = 2 * 1024 * 1024
10
+ max_notebook_cells: int = 500
11
+ max_notebook_json_bytes: int = 5 * 1024 * 1024
12
+ max_ast_nodes: int = 200_000
13
+ max_snippet_chars: int = 50_000
codexray/graph.py ADDED
@@ -0,0 +1,101 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from pathlib import Path
5
+
6
+ from .models import GraphData, GraphEdge, GraphNode
7
+
8
+
9
+ def build_graph(file_path: Path, tree: ast.AST) -> GraphData:
10
+ graph = GraphData()
11
+ file_node = _file_node_id(file_path)
12
+ graph.nodes.append(GraphNode(node_id=file_node, kind="file", label=str(file_path)))
13
+
14
+ import_nodes: set[str] = set()
15
+ function_nodes: set[str] = set()
16
+
17
+ call_targets: set[str] = set()
18
+
19
+ for node in ast.walk(tree):
20
+ if isinstance(node, ast.Import):
21
+ for alias in node.names:
22
+ module = alias.name
23
+ import_id = f"import:{module}"
24
+ if import_id not in import_nodes:
25
+ graph.nodes.append(GraphNode(node_id=import_id, kind="import", label=module))
26
+ import_nodes.add(import_id)
27
+ graph.edges.append(
28
+ GraphEdge(source=file_node, target=import_id, relation="imports")
29
+ )
30
+
31
+ if isinstance(node, ast.ImportFrom) and node.module:
32
+ module = node.module
33
+ import_id = f"import:{module}"
34
+ if import_id not in import_nodes:
35
+ graph.nodes.append(GraphNode(node_id=import_id, kind="import", label=module))
36
+ import_nodes.add(import_id)
37
+ graph.edges.append(
38
+ GraphEdge(source=file_node, target=import_id, relation="imports")
39
+ )
40
+
41
+ if isinstance(node, ast.FunctionDef):
42
+ fn_id = f"function:{file_path}:{node.name}"
43
+ if fn_id not in function_nodes:
44
+ graph.nodes.append(GraphNode(node_id=fn_id, kind="function", label=node.name))
45
+ function_nodes.add(fn_id)
46
+ graph.edges.append(GraphEdge(source=file_node, target=fn_id, relation="contains"))
47
+
48
+ for child in ast.walk(node):
49
+ if not isinstance(child, ast.Call):
50
+ continue
51
+ callee = _get_call_name(child)
52
+ if not callee:
53
+ continue
54
+ call_id = f"import:{callee}"
55
+ if call_id not in call_targets:
56
+ graph.nodes.append(GraphNode(node_id=call_id, kind="import", label=callee))
57
+ call_targets.add(call_id)
58
+ graph.edges.append(GraphEdge(source=fn_id, target=call_id, relation="calls"))
59
+
60
+ return graph
61
+
62
+
63
+ def merge_graphs(graphs: list[GraphData]) -> GraphData:
64
+ merged = GraphData()
65
+ seen_nodes: set[str] = set()
66
+ seen_edges: set[tuple[str, str, str]] = set()
67
+
68
+ for graph in graphs:
69
+ for node in graph.nodes:
70
+ if node.node_id in seen_nodes:
71
+ continue
72
+ merged.nodes.append(node)
73
+ seen_nodes.add(node.node_id)
74
+
75
+ for edge in graph.edges:
76
+ key = (edge.source, edge.target, edge.relation)
77
+ if key in seen_edges:
78
+ continue
79
+ merged.edges.append(edge)
80
+ seen_edges.add(key)
81
+
82
+ return merged
83
+
84
+
85
+ def _file_node_id(path: Path) -> str:
86
+ return f"file:{path}"
87
+
88
+
89
+ def _get_call_name(node: ast.Call) -> str:
90
+ if isinstance(node.func, ast.Name):
91
+ return node.func.id
92
+ if isinstance(node.func, ast.Attribute):
93
+ segments: list[str] = []
94
+ current: ast.AST = node.func
95
+ while isinstance(current, ast.Attribute):
96
+ segments.append(current.attr)
97
+ current = current.value
98
+ if isinstance(current, ast.Name):
99
+ segments.append(current.id)
100
+ return ".".join(reversed(segments))
101
+ return ""
codexray/limits.py ADDED
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+
5
+ from .config import AnalyserConfig
6
+
7
+
8
+ class AnalyserLimitError(ValueError):
9
+ """Raised when user input exceeds configured safety limits."""
10
+
11
+
12
+ def enforce_text_limit(text: str, max_chars: int, context: str) -> None:
13
+ if len(text) > max_chars:
14
+ raise AnalyserLimitError(f"{context} exceeds {max_chars} characters")
15
+
16
+
17
+ def enforce_bytes_limit(size_bytes: int, max_bytes: int, context: str) -> None:
18
+ if size_bytes > max_bytes:
19
+ raise AnalyserLimitError(f"{context} exceeds {max_bytes} bytes")
20
+
21
+
22
+ def enforce_ast_limit(tree: ast.AST, config: AnalyserConfig) -> None:
23
+ node_count = sum(1 for _ in ast.walk(tree))
24
+ if node_count > config.max_ast_nodes:
25
+ raise AnalyserLimitError(f"AST exceeds configured node limit ({config.max_ast_nodes})")
codexray/models.py ADDED
@@ -0,0 +1,42 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Literal
3
+
4
+ Severity = Literal["low", "medium", "high", "critical"]
5
+
6
+
7
+ @dataclass(slots=True)
8
+ class Finding:
9
+ rule_id: str
10
+ title: str
11
+ severity: Severity
12
+ message: str
13
+ file_path: str
14
+ line: int | None = None
15
+ column: int | None = None
16
+
17
+
18
+ @dataclass(slots=True)
19
+ class GraphNode:
20
+ node_id: str
21
+ kind: Literal["file", "import", "function"]
22
+ label: str
23
+
24
+
25
+ @dataclass(slots=True)
26
+ class GraphEdge:
27
+ source: str
28
+ target: str
29
+ relation: Literal["imports", "calls", "contains"]
30
+
31
+
32
+ @dataclass(slots=True)
33
+ class GraphData:
34
+ nodes: list[GraphNode] = field(default_factory=list)
35
+ edges: list[GraphEdge] = field(default_factory=list)
36
+
37
+
38
+ @dataclass(slots=True)
39
+ class AnalysisResult:
40
+ findings: list[Finding] = field(default_factory=list)
41
+ graph: GraphData = field(default_factory=GraphData)
42
+ metadata: dict[str, str | int | bool] = field(default_factory=dict)
codexray/offline.py ADDED
@@ -0,0 +1,12 @@
1
+ from .config import AnalyserConfig
2
+
3
+
4
+ class NetworkAccessError(RuntimeError):
5
+ """Raised when a network-required feature is requested in offline mode."""
6
+
7
+
8
+ def require_network(config: AnalyserConfig) -> None:
9
+ if not config.allow_network:
10
+ raise NetworkAccessError(
11
+ "Network features are disabled. Set allow_network=True to enable outbound calls."
12
+ )
codexray/parsers.py ADDED
@@ -0,0 +1,94 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import json
5
+ from collections.abc import Iterable
6
+ from pathlib import Path
7
+
8
+ from .config import AnalyserConfig
9
+ from .limits import (
10
+ AnalyserLimitError,
11
+ enforce_ast_limit,
12
+ enforce_bytes_limit,
13
+ enforce_text_limit,
14
+ )
15
+
16
+ SUPPORTED_SUFFIXES = {".py", ".ipynb", ".txt", ".in", ".cfg"}
17
+
18
+
19
+ def read_file_text(path: Path, config: AnalyserConfig) -> str:
20
+ size = path.stat().st_size
21
+ enforce_bytes_limit(size, config.max_file_size_bytes, f"File {path}")
22
+ return path.read_text(encoding="utf-8", errors="strict")
23
+
24
+
25
+ def parse_python_source(source: str, config: AnalyserConfig) -> ast.AST:
26
+ enforce_text_limit(source, config.max_snippet_chars, "Python source")
27
+ tree = ast.parse(source)
28
+ enforce_ast_limit(tree, config)
29
+ return tree
30
+
31
+
32
+ def parse_notebook(path: Path, config: AnalyserConfig) -> str:
33
+ size = path.stat().st_size
34
+ enforce_bytes_limit(size, config.max_notebook_json_bytes, f"Notebook {path}")
35
+ raw = path.read_text(encoding="utf-8")
36
+ try:
37
+ notebook = json.loads(raw)
38
+ except json.JSONDecodeError as exc:
39
+ raise AnalyserLimitError(f"Notebook JSON is invalid: {exc}") from exc
40
+ cells = notebook.get("cells", [])
41
+ if not isinstance(cells, list):
42
+ raise AnalyserLimitError("Notebook cells payload is invalid")
43
+ if len(cells) > config.max_notebook_cells:
44
+ raise AnalyserLimitError(
45
+ f"Notebook has {len(cells)} cells, over max {config.max_notebook_cells}"
46
+ )
47
+
48
+ lines: list[str] = []
49
+ for cell in cells:
50
+ if cell.get("cell_type") != "code":
51
+ continue
52
+ source = cell.get("source", [])
53
+ if isinstance(source, list):
54
+ joined = "".join(str(part) for part in source)
55
+ else:
56
+ joined = str(source)
57
+ if joined.strip():
58
+ lines.append(joined)
59
+ source = "\n\n".join(lines)
60
+ enforce_text_limit(source, config.max_snippet_chars, "Notebook extracted code")
61
+ return source
62
+
63
+
64
+ def parse_requirements_text(text: str) -> list[str]:
65
+ dependencies: list[str] = []
66
+ for raw_line in text.splitlines():
67
+ line = raw_line.strip()
68
+ if not line or line.startswith("#"):
69
+ continue
70
+ if line.startswith("--"):
71
+ continue
72
+ dependencies.append(line)
73
+ return dependencies
74
+
75
+
76
+ def select_line_range(source: str, start_line: int, end_line: int) -> str:
77
+ if start_line < 1 or end_line < start_line:
78
+ raise ValueError("Invalid line range")
79
+ lines = source.splitlines()
80
+ if end_line > len(lines):
81
+ raise ValueError("Requested line range exceeds file length")
82
+ return "\n".join(lines[start_line - 1 : end_line])
83
+
84
+
85
+ def discover_input_files(root: Path) -> Iterable[Path]:
86
+ for path in root.rglob("*"):
87
+ if not path.is_file():
88
+ continue
89
+ if any(part.startswith(".") for part in path.parts):
90
+ continue
91
+ if any(part in {"venv", ".venv", "__pycache__", "node_modules"} for part in path.parts):
92
+ continue
93
+ if path.suffix.lower() in SUPPORTED_SUFFIXES:
94
+ yield path
codexray/rules.py ADDED
@@ -0,0 +1,127 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ from .models import Finding
8
+
9
+
10
+ @dataclass(slots=True)
11
+ class RuleContext:
12
+ file_path: Path
13
+ source: str
14
+ tree: ast.AST
15
+
16
+
17
+ class _ShellCallVisitor(ast.NodeVisitor):
18
+ def __init__(self) -> None:
19
+ self.hits: list[tuple[int, int]] = []
20
+
21
+ def visit_Call(self, node: ast.Call) -> None:
22
+ func_name = _get_call_name(node)
23
+ if func_name in {"os.system", "subprocess.call", "subprocess.run", "subprocess.Popen"}:
24
+ self.hits.append((node.lineno, node.col_offset))
25
+ self.generic_visit(node)
26
+
27
+
28
+ class _DangerousBuiltinsVisitor(ast.NodeVisitor):
29
+ def __init__(self) -> None:
30
+ self.hits: list[tuple[str, int, int]] = []
31
+
32
+ def visit_Call(self, node: ast.Call) -> None:
33
+ func_name = _get_call_name(node)
34
+ if func_name in {"eval", "exec"}:
35
+ self.hits.append((func_name, node.lineno, node.col_offset))
36
+ self.generic_visit(node)
37
+
38
+
39
+ def _get_call_name(node: ast.Call) -> str:
40
+ func = node.func
41
+ if isinstance(func, ast.Name):
42
+ return func.id
43
+ if isinstance(func, ast.Attribute):
44
+ parts: list[str] = []
45
+ current: ast.AST = func
46
+ while isinstance(current, ast.Attribute):
47
+ parts.append(current.attr)
48
+ current = current.value
49
+ if isinstance(current, ast.Name):
50
+ parts.append(current.id)
51
+ return ".".join(reversed(parts))
52
+ return ""
53
+
54
+
55
+ def run_rules(ctx: RuleContext) -> list[Finding]:
56
+ findings: list[Finding] = []
57
+
58
+ shell_visitor = _ShellCallVisitor()
59
+ shell_visitor.visit(ctx.tree)
60
+ for line, col in shell_visitor.hits:
61
+ findings.append(
62
+ Finding(
63
+ rule_id="SEC001",
64
+ title="Shell command execution",
65
+ severity="high",
66
+ message="Shell execution can be dangerous with untrusted input.",
67
+ file_path=str(ctx.file_path),
68
+ line=line,
69
+ column=col,
70
+ )
71
+ )
72
+
73
+ builtin_visitor = _DangerousBuiltinsVisitor()
74
+ builtin_visitor.visit(ctx.tree)
75
+ for func_name, line, col in builtin_visitor.hits:
76
+ findings.append(
77
+ Finding(
78
+ rule_id="SEC002",
79
+ title=f"Dangerous builtin {func_name}",
80
+ severity="critical",
81
+ message=f"Avoid {func_name} on untrusted content.",
82
+ file_path=str(ctx.file_path),
83
+ line=line,
84
+ column=col,
85
+ )
86
+ )
87
+
88
+ if "verify=False" in ctx.source:
89
+ findings.append(
90
+ Finding(
91
+ rule_id="SEC003",
92
+ title="TLS verification disabled",
93
+ severity="high",
94
+ message="requests verify=False weakens TLS protection.",
95
+ file_path=str(ctx.file_path),
96
+ )
97
+ )
98
+
99
+ if "TODO" in ctx.source:
100
+ findings.append(
101
+ Finding(
102
+ rule_id="QLT001",
103
+ title="Unresolved TODO",
104
+ severity="low",
105
+ message="TODO comments should be tracked before release.",
106
+ file_path=str(ctx.file_path),
107
+ )
108
+ )
109
+
110
+ return findings
111
+
112
+
113
+ def run_requirements_rules(requirements: list[str], file_path: Path) -> list[Finding]:
114
+ findings: list[Finding] = []
115
+ for dep in requirements:
116
+ # Prefer pinned dependencies for reproducibility and easier CVE response.
117
+ if "==" not in dep:
118
+ findings.append(
119
+ Finding(
120
+ rule_id="DEP001",
121
+ title="Unpinned dependency",
122
+ severity="medium",
123
+ message=f"Dependency '{dep}' is not pinned with ==.",
124
+ file_path=str(file_path),
125
+ )
126
+ )
127
+ return findings
@@ -0,0 +1,177 @@
1
+ Metadata-Version: 2.4
2
+ Name: codexray-analyser
3
+ Version: 0.1.0
4
+ Summary: Offline-first static analyser for Python files and notebooks.
5
+ Project-URL: Homepage, https://github.com/Merlins-Sanctum/codexray-analyser
6
+ Project-URL: Repository, https://github.com/Merlins-Sanctum/codexray-analyser
7
+ Project-URL: Issues, https://github.com/Merlins-Sanctum/codexray-analyser/issues
8
+ Author: Mohammad Hamad
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: notebook,offline,python,security,static-analysis
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Security
19
+ Classifier: Topic :: Software Development :: Quality Assurance
20
+ Requires-Python: >=3.11
21
+ Description-Content-Type: text/markdown
22
+
23
+ # Codexray
24
+
25
+ Codexray is an offline-first Python static analyser for `.py` and `.ipynb` files.
26
+ It helps teams inspect security risks, code quality problems, and dependency patterns
27
+ without sending source code outside the local machine.
28
+
29
+ ## What Codexray does
30
+
31
+ - Scans Python files and notebooks.
32
+ - Supports project-level scans, full file scans, and targeted line-range checks.
33
+ - Produces a structured JSON report.
34
+ - Builds a dependency graph view with nodes and edges.
35
+ - Flags common security risks such as shell execution and dangerous builtins.
36
+
37
+ ## Privacy and security behavior
38
+
39
+ - No telemetry.
40
+ - No source upload.
41
+ - Network features are disabled by default.
42
+ - Strict input limits for file size, notebook size, snippet length, and AST depth.
43
+
44
+ This tool is designed for local analysis workflows where proprietary code must stay on the client system.
45
+
46
+ ## Installation
47
+
48
+ ```bash
49
+ pip install codexray-analyser
50
+ ```
51
+
52
+ ## Quick start with CLI
53
+
54
+ Analyse a folder:
55
+
56
+ ```bash
57
+ codexray ./my_project
58
+ ```
59
+
60
+ Analyse a single file:
61
+
62
+ ```bash
63
+ codexray ./my_project/app.py
64
+ ```
65
+
66
+ Analyse a code snippet:
67
+
68
+ ```bash
69
+ codexray --snippet "import os; os.system('whoami')"
70
+ ```
71
+
72
+ Analyse only specific lines from one file:
73
+
74
+ ```bash
75
+ codexray ./my_project/app.py --start-line 40 --end-line 80
76
+ ```
77
+
78
+ Save output to JSON:
79
+
80
+ ```bash
81
+ codexray ./my_project --output codexray-report.json
82
+ ```
83
+
84
+ ## Python API usage
85
+
86
+ ```python
87
+ from codexray import analyse_file_snippet, analyse_path, analyse_snippet
88
+
89
+ project_result = analyse_path("./my_project")
90
+ snippet_result = analyse_snippet("import os\nos.system('whoami')")
91
+ range_result = analyse_file_snippet("./my_project/app.py", 20, 50)
92
+ ```
93
+
94
+ ## Understanding the report
95
+
96
+ Each result returns:
97
+
98
+ - `findings`: list of detected issues.
99
+ - `graph`: nodes and edges representing imports, files, and function relationships.
100
+ - `metadata`: run information such as analysed path and offline mode state.
101
+
102
+ Example finding shape:
103
+
104
+ ```json
105
+ {
106
+ "rule_id": "SEC002",
107
+ "title": "Dangerous builtin eval",
108
+ "severity": "critical",
109
+ "message": "Avoid eval on untrusted content.",
110
+ "file_path": "src/app.py",
111
+ "line": 18,
112
+ "column": 4
113
+ }
114
+ ```
115
+
116
+ ## Reading graph output
117
+
118
+ Graph output contains:
119
+
120
+ - `nodes`: entities such as files, imports, and functions
121
+ - `edges`: relationships such as `imports`, `contains`, and `calls`
122
+
123
+ Typical use:
124
+
125
+ 1. Run Codexray and save JSON output.
126
+ 2. Load `graph.nodes` and `graph.edges` into your graph viewer.
127
+ 3. Track dependency hotspots and risky call paths.
128
+
129
+ ## How to use findings to make code changes
130
+
131
+ Recommended workflow:
132
+
133
+ 1. Sort findings by `severity`.
134
+ 2. Fix `critical` and `high` findings first.
135
+ 3. Re-run Codexray after each fix batch.
136
+ 4. Keep evidence by committing report diffs in your internal workflow.
137
+
138
+ Examples:
139
+
140
+ - `SEC001` shell execution:
141
+ - Replace dynamic shell calls with safe Python APIs.
142
+ - Avoid passing untrusted input to command execution.
143
+ - `SEC002` dangerous builtin:
144
+ - Replace `eval` or `exec` with safe parsing and strict allow-lists.
145
+ - `DEP001` unpinned dependency:
146
+ - Pin versions in requirements files with `==` where practical.
147
+
148
+ ## Troubleshooting
149
+
150
+ - `File parsing failed`:
151
+ - Check syntax errors or unsupported file encoding.
152
+ - `exceeds ... bytes/chars`:
153
+ - Increase limits in config for controlled internal usage.
154
+ - Empty findings:
155
+ - Confirm the target path includes `.py` or `.ipynb` sources.
156
+
157
+ ## Local development
158
+
159
+ ```bash
160
+ python -m pip install -e .
161
+ python -m pip install pytest ruff bandit pip-audit build twine
162
+ python -m ruff check .
163
+ python -m pytest
164
+ python -m bandit -q -r src
165
+ python -m pip-audit
166
+ python -m build
167
+ python -m twine check dist/*
168
+ ```
169
+
170
+ ## Contributing
171
+
172
+ Read `CONTRIBUTING.md` before opening a pull request.
173
+ Security reports should follow `SECURITY.md`.
174
+
175
+ ## License
176
+
177
+ MIT. See `LICENSE`.
@@ -0,0 +1,15 @@
1
+ codexray/__init__.py,sha256=iaaRmrDgxz3PcnF4VymgisoUI7zVwMwAWECxXdpeK0s,366
2
+ codexray/analyser.py,sha256=cgMiBpcxOqPIBHYSmGWstkELU64LfjZ4HEnbofTdXrA,3866
3
+ codexray/cli.py,sha256=29_B-ecLTnqVy1SIPI0wKszhOfuIZzoMJm6rv8wsW_w,2255
4
+ codexray/config.py,sha256=q0aKeM8a0kzISuAj0uIGA7O97ExQCk2ScaYc7aKm5v8,368
5
+ codexray/graph.py,sha256=yRiUFTrlk6fucD7z0d3SSM5Hd0CqPRjuYXhM7Hfv6NE,3567
6
+ codexray/limits.py,sha256=ReJpYF2pOzn3W0G6s_gQg3nWzpfrPYyaeNEJbJ8rJVM,817
7
+ codexray/models.py,sha256=S2btOixsxzUsBM_Nyo9D1zRZgH0HdeZBJrFqpwKIApA,953
8
+ codexray/offline.py,sha256=xUonP9SvDIF1ycPPZ0ZdeoeYNrHsHftVtHM1cbROoZU,382
9
+ codexray/parsers.py,sha256=TWfrnZ3FWUPi3dZMVs_Ofgl_LnMCi8CHHC60abZ9s_8,3112
10
+ codexray/rules.py,sha256=fOVEvULv_SitfAWiuQof2QgKzzaHeG-zwOvcpV1g8p8,3884
11
+ codexray_analyser-0.1.0.dist-info/METADATA,sha256=ZKV18qou99Yy8j7o3WivzGqvXeMwm3D9MwrDekcStR4,4686
12
+ codexray_analyser-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
13
+ codexray_analyser-0.1.0.dist-info/entry_points.txt,sha256=r9LzXd27RixDqAQCQzlViYrslbdW-_--N0SGQ0yMnXE,47
14
+ codexray_analyser-0.1.0.dist-info/licenses/LICENSE,sha256=Xrr6RxAF66F3QmjiZOcEX8erMms2GYH6XVgYenOhV48,1071
15
+ codexray_analyser-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ codexray = codexray.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mohammad Hamad
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.