shannon-codebase-insight 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
  2. shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
  3. shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
  4. shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
  5. shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
  7. shannon_insight/__init__.py +25 -0
  8. shannon_insight/analyzers/__init__.py +8 -0
  9. shannon_insight/analyzers/base.py +215 -0
  10. shannon_insight/analyzers/go_analyzer.py +150 -0
  11. shannon_insight/analyzers/python_analyzer.py +169 -0
  12. shannon_insight/analyzers/typescript_analyzer.py +162 -0
  13. shannon_insight/cache.py +214 -0
  14. shannon_insight/cli.py +333 -0
  15. shannon_insight/config.py +235 -0
  16. shannon_insight/core.py +546 -0
  17. shannon_insight/exceptions/__init__.py +31 -0
  18. shannon_insight/exceptions/analysis.py +78 -0
  19. shannon_insight/exceptions/base.py +18 -0
  20. shannon_insight/exceptions/config.py +48 -0
  21. shannon_insight/file_ops.py +218 -0
  22. shannon_insight/logging_config.py +98 -0
  23. shannon_insight/math/__init__.py +15 -0
  24. shannon_insight/math/entropy.py +133 -0
  25. shannon_insight/math/fusion.py +109 -0
  26. shannon_insight/math/graph.py +209 -0
  27. shannon_insight/math/robust.py +106 -0
  28. shannon_insight/math/statistics.py +159 -0
  29. shannon_insight/models.py +48 -0
  30. shannon_insight/primitives/__init__.py +13 -0
  31. shannon_insight/primitives/detector.py +318 -0
  32. shannon_insight/primitives/extractor.py +278 -0
  33. shannon_insight/primitives/fusion.py +373 -0
  34. shannon_insight/primitives/recommendations.py +158 -0
  35. shannon_insight/py.typed +2 -0
  36. shannon_insight/security.py +284 -0
  37. shannon_insight/utils/__init__.py +1 -0
@@ -0,0 +1,284 @@
1
+ """
2
+ Security utilities for Shannon Insight.
3
+
4
+ Provides path validation, resource limits, and safe file operations.
5
+ """
6
+
7
+ import os
8
+ import re
9
+ from pathlib import Path
10
+ from typing import Optional, Pattern
11
+
12
+ from .exceptions import SecurityError, InvalidPathError
13
+
14
+
15
+ # System directories that should never be analyzed
16
+ SYSTEM_DIRECTORIES = {
17
+ "/etc", "/sys", "/proc", "/dev", "/boot",
18
+ "/bin", "/sbin", "/usr/bin", "/usr/sbin",
19
+ "C:\\Windows", "C:\\Program Files", "C:\\Program Files (x86)",
20
+ }
21
+
22
+ # Maximum file size in bytes (default 10MB)
23
+ DEFAULT_MAX_FILE_SIZE = 10 * 1024 * 1024
24
+
25
+ # Maximum number of files to scan
26
+ DEFAULT_MAX_FILES = 10000
27
+
28
+
29
+ class PathValidator:
30
+ """
31
+ Validates file paths for security issues.
32
+
33
+ Prevents:
34
+ - Directory traversal attacks
35
+ - Symlink escape attacks
36
+ - Access to system directories
37
+ - Access to hidden sensitive files
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ root_dir: Path,
43
+ allow_hidden: bool = False,
44
+ block_system_dirs: bool = True
45
+ ):
46
+ """
47
+ Initialize path validator.
48
+
49
+ Args:
50
+ root_dir: Root directory that paths must be within
51
+ allow_hidden: Allow hidden files/directories (starting with .)
52
+ block_system_dirs: Block access to system directories
53
+ """
54
+ self.root_dir = root_dir.resolve()
55
+ self.allow_hidden = allow_hidden
56
+ self.block_system_dirs = block_system_dirs
57
+
58
+ def validate_path(self, path: Path) -> Path:
59
+ """
60
+ Validate that a path is safe to access.
61
+
62
+ Args:
63
+ path: Path to validate
64
+
65
+ Returns:
66
+ Resolved absolute path
67
+
68
+ Raises:
69
+ SecurityError: If path fails security checks
70
+ InvalidPathError: If path doesn't exist or isn't accessible
71
+ """
72
+ # Resolve to absolute path
73
+ try:
74
+ resolved_path = path.resolve()
75
+ except (OSError, RuntimeError) as e:
76
+ raise InvalidPathError(path, f"Cannot resolve path: {e}")
77
+
78
+ # Check if path exists
79
+ if not resolved_path.exists():
80
+ raise InvalidPathError(resolved_path, "Path does not exist")
81
+
82
+ # Check if path is within root directory
83
+ try:
84
+ resolved_path.relative_to(self.root_dir)
85
+ except ValueError:
86
+ raise SecurityError(
87
+ "Path traversal detected: path is outside root directory",
88
+ filepath=resolved_path
89
+ )
90
+
91
+ # Check for symlinks that escape root directory
92
+ if resolved_path.is_symlink():
93
+ real_path = resolved_path.resolve()
94
+ try:
95
+ real_path.relative_to(self.root_dir)
96
+ except ValueError:
97
+ raise SecurityError(
98
+ "Symlink escape detected: target is outside root directory",
99
+ filepath=resolved_path
100
+ )
101
+
102
+ # Block system directories
103
+ if self.block_system_dirs:
104
+ path_str = str(resolved_path)
105
+ for sys_dir in SYSTEM_DIRECTORIES:
106
+ if path_str.startswith(sys_dir):
107
+ raise SecurityError(
108
+ f"Access to system directory blocked: {sys_dir}",
109
+ filepath=resolved_path
110
+ )
111
+
112
+ # Check for hidden files
113
+ if not self.allow_hidden:
114
+ for part in resolved_path.parts:
115
+ if part.startswith('.') and part not in {'.', '..'}:
116
+ raise SecurityError(
117
+ "Access to hidden file/directory blocked",
118
+ filepath=resolved_path
119
+ )
120
+
121
+ return resolved_path
122
+
123
+ def is_safe_path(self, path: Path) -> bool:
124
+ """
125
+ Check if path is safe without raising exceptions.
126
+
127
+ Args:
128
+ path: Path to check
129
+
130
+ Returns:
131
+ True if path is safe, False otherwise
132
+ """
133
+ try:
134
+ self.validate_path(path)
135
+ return True
136
+ except (SecurityError, InvalidPathError):
137
+ return False
138
+
139
+
140
+ class ResourceLimiter:
141
+ """
142
+ Enforces resource limits during analysis.
143
+ """
144
+
145
+ def __init__(
146
+ self,
147
+ max_file_size: int = DEFAULT_MAX_FILE_SIZE,
148
+ max_files: int = DEFAULT_MAX_FILES
149
+ ):
150
+ """
151
+ Initialize resource limiter.
152
+
153
+ Args:
154
+ max_file_size: Maximum file size in bytes
155
+ max_files: Maximum number of files to process
156
+ """
157
+ self.max_file_size = max_file_size
158
+ self.max_files = max_files
159
+ self.files_processed = 0
160
+
161
+ def check_file_size(self, filepath: Path) -> None:
162
+ """
163
+ Check if file size is within limits.
164
+
165
+ Args:
166
+ filepath: File to check
167
+
168
+ Raises:
169
+ SecurityError: If file exceeds size limit
170
+ """
171
+ try:
172
+ size = filepath.stat().st_size
173
+ except OSError as e:
174
+ raise InvalidPathError(filepath, f"Cannot stat file: {e}")
175
+
176
+ if size > self.max_file_size:
177
+ size_mb = size / (1024 * 1024)
178
+ limit_mb = self.max_file_size / (1024 * 1024)
179
+ raise SecurityError(
180
+ f"File size ({size_mb:.2f}MB) exceeds limit ({limit_mb:.2f}MB)",
181
+ filepath=filepath
182
+ )
183
+
184
+ def check_file_count(self) -> None:
185
+ """
186
+ Check if file count is within limits.
187
+
188
+ Raises:
189
+ SecurityError: If file count exceeds limit
190
+ """
191
+ if self.files_processed >= self.max_files:
192
+ raise SecurityError(
193
+ f"File count ({self.files_processed}) exceeds limit ({self.max_files})"
194
+ )
195
+
196
+ def increment_file_count(self) -> None:
197
+ """Increment the count of processed files."""
198
+ self.files_processed += 1
199
+ self.check_file_count()
200
+
201
+ def reset(self) -> None:
202
+ """Reset counters."""
203
+ self.files_processed = 0
204
+
205
+
206
+ def safe_compile_regex(pattern: str, flags: int = 0) -> Optional[Pattern]:
207
+ """
208
+ Safely compile a regex pattern with timeout protection.
209
+
210
+ Args:
211
+ pattern: Regex pattern to compile
212
+ flags: Regex flags
213
+
214
+ Returns:
215
+ Compiled regex pattern, or None if compilation fails
216
+
217
+ Note:
218
+ Python's re module doesn't have built-in timeout protection,
219
+ but we can validate pattern complexity here.
220
+ """
221
+ # Reject overly complex patterns
222
+ if len(pattern) > 1000:
223
+ return None
224
+
225
+ # Check for catastrophic backtracking patterns
226
+ dangerous_patterns = [
227
+ r'\(.*\)\*', # (...)*
228
+ r'\(.*\)\+', # (...)+
229
+ r'\(.*\)\{', # (...){n,m}
230
+ ]
231
+
232
+ for dangerous in dangerous_patterns:
233
+ if re.search(dangerous, pattern):
234
+ # This is a heuristic - not foolproof
235
+ pass
236
+
237
+ try:
238
+ return re.compile(pattern, flags)
239
+ except re.error:
240
+ return None
241
+
242
+
243
+ def validate_root_directory(path: Path) -> Path:
244
+ """
245
+ Validate that a root directory is safe to analyze.
246
+
247
+ Args:
248
+ path: Directory path to validate
249
+
250
+ Returns:
251
+ Resolved absolute path
252
+
253
+ Raises:
254
+ InvalidPathError: If path is invalid
255
+ SecurityError: If path is unsafe
256
+ """
257
+ # Resolve to absolute path
258
+ try:
259
+ resolved = path.resolve()
260
+ except (OSError, RuntimeError) as e:
261
+ raise InvalidPathError(path, f"Cannot resolve path: {e}")
262
+
263
+ # Check existence
264
+ if not resolved.exists():
265
+ raise InvalidPathError(resolved, "Directory does not exist")
266
+
267
+ # Check it's a directory
268
+ if not resolved.is_dir():
269
+ raise InvalidPathError(resolved, "Path is not a directory")
270
+
271
+ # Check readability
272
+ if not os.access(resolved, os.R_OK):
273
+ raise InvalidPathError(resolved, "Directory is not readable")
274
+
275
+ # Block system directories
276
+ path_str = str(resolved)
277
+ for sys_dir in SYSTEM_DIRECTORIES:
278
+ if path_str.startswith(sys_dir):
279
+ raise SecurityError(
280
+ f"Cannot analyze system directory: {sys_dir}",
281
+ filepath=resolved
282
+ )
283
+
284
+ return resolved
@@ -0,0 +1 @@
1
+ """Utility functions"""