shannon-codebase-insight 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
- shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
- shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
- shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
- shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
- shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
- shannon_insight/__init__.py +25 -0
- shannon_insight/analyzers/__init__.py +8 -0
- shannon_insight/analyzers/base.py +215 -0
- shannon_insight/analyzers/go_analyzer.py +150 -0
- shannon_insight/analyzers/python_analyzer.py +169 -0
- shannon_insight/analyzers/typescript_analyzer.py +162 -0
- shannon_insight/cache.py +214 -0
- shannon_insight/cli.py +333 -0
- shannon_insight/config.py +235 -0
- shannon_insight/core.py +546 -0
- shannon_insight/exceptions/__init__.py +31 -0
- shannon_insight/exceptions/analysis.py +78 -0
- shannon_insight/exceptions/base.py +18 -0
- shannon_insight/exceptions/config.py +48 -0
- shannon_insight/file_ops.py +218 -0
- shannon_insight/logging_config.py +98 -0
- shannon_insight/math/__init__.py +15 -0
- shannon_insight/math/entropy.py +133 -0
- shannon_insight/math/fusion.py +109 -0
- shannon_insight/math/graph.py +209 -0
- shannon_insight/math/robust.py +106 -0
- shannon_insight/math/statistics.py +159 -0
- shannon_insight/models.py +48 -0
- shannon_insight/primitives/__init__.py +13 -0
- shannon_insight/primitives/detector.py +318 -0
- shannon_insight/primitives/extractor.py +278 -0
- shannon_insight/primitives/fusion.py +373 -0
- shannon_insight/primitives/recommendations.py +158 -0
- shannon_insight/py.typed +2 -0
- shannon_insight/security.py +284 -0
- shannon_insight/utils/__init__.py +1 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Security utilities for Shannon Insight.
|
|
3
|
+
|
|
4
|
+
Provides path validation, resource limits, and safe file operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional, Pattern
|
|
11
|
+
|
|
12
|
+
from .exceptions import SecurityError, InvalidPathError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# System directories that should never be analyzed
|
|
16
|
+
SYSTEM_DIRECTORIES = {
|
|
17
|
+
"/etc", "/sys", "/proc", "/dev", "/boot",
|
|
18
|
+
"/bin", "/sbin", "/usr/bin", "/usr/sbin",
|
|
19
|
+
"C:\\Windows", "C:\\Program Files", "C:\\Program Files (x86)",
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
# Maximum file size in bytes (default 10MB)
|
|
23
|
+
DEFAULT_MAX_FILE_SIZE = 10 * 1024 * 1024
|
|
24
|
+
|
|
25
|
+
# Maximum number of files to scan
|
|
26
|
+
DEFAULT_MAX_FILES = 10000
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PathValidator:
|
|
30
|
+
"""
|
|
31
|
+
Validates file paths for security issues.
|
|
32
|
+
|
|
33
|
+
Prevents:
|
|
34
|
+
- Directory traversal attacks
|
|
35
|
+
- Symlink escape attacks
|
|
36
|
+
- Access to system directories
|
|
37
|
+
- Access to hidden sensitive files
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
root_dir: Path,
|
|
43
|
+
allow_hidden: bool = False,
|
|
44
|
+
block_system_dirs: bool = True
|
|
45
|
+
):
|
|
46
|
+
"""
|
|
47
|
+
Initialize path validator.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
root_dir: Root directory that paths must be within
|
|
51
|
+
allow_hidden: Allow hidden files/directories (starting with .)
|
|
52
|
+
block_system_dirs: Block access to system directories
|
|
53
|
+
"""
|
|
54
|
+
self.root_dir = root_dir.resolve()
|
|
55
|
+
self.allow_hidden = allow_hidden
|
|
56
|
+
self.block_system_dirs = block_system_dirs
|
|
57
|
+
|
|
58
|
+
def validate_path(self, path: Path) -> Path:
|
|
59
|
+
"""
|
|
60
|
+
Validate that a path is safe to access.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
path: Path to validate
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Resolved absolute path
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
SecurityError: If path fails security checks
|
|
70
|
+
InvalidPathError: If path doesn't exist or isn't accessible
|
|
71
|
+
"""
|
|
72
|
+
# Resolve to absolute path
|
|
73
|
+
try:
|
|
74
|
+
resolved_path = path.resolve()
|
|
75
|
+
except (OSError, RuntimeError) as e:
|
|
76
|
+
raise InvalidPathError(path, f"Cannot resolve path: {e}")
|
|
77
|
+
|
|
78
|
+
# Check if path exists
|
|
79
|
+
if not resolved_path.exists():
|
|
80
|
+
raise InvalidPathError(resolved_path, "Path does not exist")
|
|
81
|
+
|
|
82
|
+
# Check if path is within root directory
|
|
83
|
+
try:
|
|
84
|
+
resolved_path.relative_to(self.root_dir)
|
|
85
|
+
except ValueError:
|
|
86
|
+
raise SecurityError(
|
|
87
|
+
"Path traversal detected: path is outside root directory",
|
|
88
|
+
filepath=resolved_path
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Check for symlinks that escape root directory
|
|
92
|
+
if resolved_path.is_symlink():
|
|
93
|
+
real_path = resolved_path.resolve()
|
|
94
|
+
try:
|
|
95
|
+
real_path.relative_to(self.root_dir)
|
|
96
|
+
except ValueError:
|
|
97
|
+
raise SecurityError(
|
|
98
|
+
"Symlink escape detected: target is outside root directory",
|
|
99
|
+
filepath=resolved_path
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Block system directories
|
|
103
|
+
if self.block_system_dirs:
|
|
104
|
+
path_str = str(resolved_path)
|
|
105
|
+
for sys_dir in SYSTEM_DIRECTORIES:
|
|
106
|
+
if path_str.startswith(sys_dir):
|
|
107
|
+
raise SecurityError(
|
|
108
|
+
f"Access to system directory blocked: {sys_dir}",
|
|
109
|
+
filepath=resolved_path
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Check for hidden files
|
|
113
|
+
if not self.allow_hidden:
|
|
114
|
+
for part in resolved_path.parts:
|
|
115
|
+
if part.startswith('.') and part not in {'.', '..'}:
|
|
116
|
+
raise SecurityError(
|
|
117
|
+
"Access to hidden file/directory blocked",
|
|
118
|
+
filepath=resolved_path
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return resolved_path
|
|
122
|
+
|
|
123
|
+
def is_safe_path(self, path: Path) -> bool:
|
|
124
|
+
"""
|
|
125
|
+
Check if path is safe without raising exceptions.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
path: Path to check
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
True if path is safe, False otherwise
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
self.validate_path(path)
|
|
135
|
+
return True
|
|
136
|
+
except (SecurityError, InvalidPathError):
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class ResourceLimiter:
|
|
141
|
+
"""
|
|
142
|
+
Enforces resource limits during analysis.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
def __init__(
|
|
146
|
+
self,
|
|
147
|
+
max_file_size: int = DEFAULT_MAX_FILE_SIZE,
|
|
148
|
+
max_files: int = DEFAULT_MAX_FILES
|
|
149
|
+
):
|
|
150
|
+
"""
|
|
151
|
+
Initialize resource limiter.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
max_file_size: Maximum file size in bytes
|
|
155
|
+
max_files: Maximum number of files to process
|
|
156
|
+
"""
|
|
157
|
+
self.max_file_size = max_file_size
|
|
158
|
+
self.max_files = max_files
|
|
159
|
+
self.files_processed = 0
|
|
160
|
+
|
|
161
|
+
def check_file_size(self, filepath: Path) -> None:
|
|
162
|
+
"""
|
|
163
|
+
Check if file size is within limits.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
filepath: File to check
|
|
167
|
+
|
|
168
|
+
Raises:
|
|
169
|
+
SecurityError: If file exceeds size limit
|
|
170
|
+
"""
|
|
171
|
+
try:
|
|
172
|
+
size = filepath.stat().st_size
|
|
173
|
+
except OSError as e:
|
|
174
|
+
raise InvalidPathError(filepath, f"Cannot stat file: {e}")
|
|
175
|
+
|
|
176
|
+
if size > self.max_file_size:
|
|
177
|
+
size_mb = size / (1024 * 1024)
|
|
178
|
+
limit_mb = self.max_file_size / (1024 * 1024)
|
|
179
|
+
raise SecurityError(
|
|
180
|
+
f"File size ({size_mb:.2f}MB) exceeds limit ({limit_mb:.2f}MB)",
|
|
181
|
+
filepath=filepath
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
def check_file_count(self) -> None:
|
|
185
|
+
"""
|
|
186
|
+
Check if file count is within limits.
|
|
187
|
+
|
|
188
|
+
Raises:
|
|
189
|
+
SecurityError: If file count exceeds limit
|
|
190
|
+
"""
|
|
191
|
+
if self.files_processed >= self.max_files:
|
|
192
|
+
raise SecurityError(
|
|
193
|
+
f"File count ({self.files_processed}) exceeds limit ({self.max_files})"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def increment_file_count(self) -> None:
|
|
197
|
+
"""Increment the count of processed files."""
|
|
198
|
+
self.files_processed += 1
|
|
199
|
+
self.check_file_count()
|
|
200
|
+
|
|
201
|
+
def reset(self) -> None:
|
|
202
|
+
"""Reset counters."""
|
|
203
|
+
self.files_processed = 0
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def safe_compile_regex(pattern: str, flags: int = 0) -> Optional[Pattern]:
|
|
207
|
+
"""
|
|
208
|
+
Safely compile a regex pattern with timeout protection.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
pattern: Regex pattern to compile
|
|
212
|
+
flags: Regex flags
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Compiled regex pattern, or None if compilation fails
|
|
216
|
+
|
|
217
|
+
Note:
|
|
218
|
+
Python's re module doesn't have built-in timeout protection,
|
|
219
|
+
but we can validate pattern complexity here.
|
|
220
|
+
"""
|
|
221
|
+
# Reject overly complex patterns
|
|
222
|
+
if len(pattern) > 1000:
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
# Check for catastrophic backtracking patterns
|
|
226
|
+
dangerous_patterns = [
|
|
227
|
+
r'\(.*\)\*', # (...)*
|
|
228
|
+
r'\(.*\)\+', # (...)+
|
|
229
|
+
r'\(.*\)\{', # (...){n,m}
|
|
230
|
+
]
|
|
231
|
+
|
|
232
|
+
for dangerous in dangerous_patterns:
|
|
233
|
+
if re.search(dangerous, pattern):
|
|
234
|
+
# This is a heuristic - not foolproof
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
return re.compile(pattern, flags)
|
|
239
|
+
except re.error:
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def validate_root_directory(path: Path) -> Path:
|
|
244
|
+
"""
|
|
245
|
+
Validate that a root directory is safe to analyze.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
path: Directory path to validate
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Resolved absolute path
|
|
252
|
+
|
|
253
|
+
Raises:
|
|
254
|
+
InvalidPathError: If path is invalid
|
|
255
|
+
SecurityError: If path is unsafe
|
|
256
|
+
"""
|
|
257
|
+
# Resolve to absolute path
|
|
258
|
+
try:
|
|
259
|
+
resolved = path.resolve()
|
|
260
|
+
except (OSError, RuntimeError) as e:
|
|
261
|
+
raise InvalidPathError(path, f"Cannot resolve path: {e}")
|
|
262
|
+
|
|
263
|
+
# Check existence
|
|
264
|
+
if not resolved.exists():
|
|
265
|
+
raise InvalidPathError(resolved, "Directory does not exist")
|
|
266
|
+
|
|
267
|
+
# Check it's a directory
|
|
268
|
+
if not resolved.is_dir():
|
|
269
|
+
raise InvalidPathError(resolved, "Path is not a directory")
|
|
270
|
+
|
|
271
|
+
# Check readability
|
|
272
|
+
if not os.access(resolved, os.R_OK):
|
|
273
|
+
raise InvalidPathError(resolved, "Directory is not readable")
|
|
274
|
+
|
|
275
|
+
# Block system directories
|
|
276
|
+
path_str = str(resolved)
|
|
277
|
+
for sys_dir in SYSTEM_DIRECTORIES:
|
|
278
|
+
if path_str.startswith(sys_dir):
|
|
279
|
+
raise SecurityError(
|
|
280
|
+
f"Cannot analyze system directory: {sys_dir}",
|
|
281
|
+
filepath=resolved
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return resolved
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utility functions"""
|