shannon-codebase-insight 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shannon_codebase_insight-0.4.0.dist-info/METADATA +209 -0
- shannon_codebase_insight-0.4.0.dist-info/RECORD +37 -0
- shannon_codebase_insight-0.4.0.dist-info/WHEEL +5 -0
- shannon_codebase_insight-0.4.0.dist-info/entry_points.txt +7 -0
- shannon_codebase_insight-0.4.0.dist-info/licenses/LICENSE +21 -0
- shannon_codebase_insight-0.4.0.dist-info/top_level.txt +1 -0
- shannon_insight/__init__.py +25 -0
- shannon_insight/analyzers/__init__.py +8 -0
- shannon_insight/analyzers/base.py +215 -0
- shannon_insight/analyzers/go_analyzer.py +150 -0
- shannon_insight/analyzers/python_analyzer.py +169 -0
- shannon_insight/analyzers/typescript_analyzer.py +162 -0
- shannon_insight/cache.py +214 -0
- shannon_insight/cli.py +333 -0
- shannon_insight/config.py +235 -0
- shannon_insight/core.py +546 -0
- shannon_insight/exceptions/__init__.py +31 -0
- shannon_insight/exceptions/analysis.py +78 -0
- shannon_insight/exceptions/base.py +18 -0
- shannon_insight/exceptions/config.py +48 -0
- shannon_insight/file_ops.py +218 -0
- shannon_insight/logging_config.py +98 -0
- shannon_insight/math/__init__.py +15 -0
- shannon_insight/math/entropy.py +133 -0
- shannon_insight/math/fusion.py +109 -0
- shannon_insight/math/graph.py +209 -0
- shannon_insight/math/robust.py +106 -0
- shannon_insight/math/statistics.py +159 -0
- shannon_insight/models.py +48 -0
- shannon_insight/primitives/__init__.py +13 -0
- shannon_insight/primitives/detector.py +318 -0
- shannon_insight/primitives/extractor.py +278 -0
- shannon_insight/primitives/fusion.py +373 -0
- shannon_insight/primitives/recommendations.py +158 -0
- shannon_insight/py.typed +2 -0
- shannon_insight/security.py +284 -0
- shannon_insight/utils/__init__.py +1 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Configuration and security exceptions: paths, settings, access control."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Any
|
|
5
|
+
|
|
6
|
+
from .base import ShannonInsightError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ConfigurationError(ShannonInsightError):
|
|
10
|
+
"""Base class for configuration-related errors."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class InvalidPathError(ConfigurationError):
|
|
15
|
+
"""Raised when a provided path is invalid."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, path: Path, reason: str):
|
|
18
|
+
super().__init__(
|
|
19
|
+
f"Invalid path: {path}", details={"path": str(path), "reason": reason}
|
|
20
|
+
)
|
|
21
|
+
self.path = path
|
|
22
|
+
self.reason = reason
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class InvalidConfigError(ConfigurationError):
|
|
26
|
+
"""Raised when configuration values are invalid."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, key: str, value: Any, reason: str):
|
|
29
|
+
super().__init__(
|
|
30
|
+
f"Invalid configuration for {key}: {value}",
|
|
31
|
+
details={"key": key, "value": str(value), "reason": reason},
|
|
32
|
+
)
|
|
33
|
+
self.key = key
|
|
34
|
+
self.value = value
|
|
35
|
+
self.reason = reason
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SecurityError(ConfigurationError):
|
|
39
|
+
"""Raised when a security violation is detected."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, reason: str, filepath: Optional[Path] = None):
|
|
42
|
+
details = {"reason": reason}
|
|
43
|
+
if filepath:
|
|
44
|
+
details["filepath"] = str(filepath)
|
|
45
|
+
|
|
46
|
+
super().__init__(f"Security violation: {reason}", details=details)
|
|
47
|
+
self.reason = reason
|
|
48
|
+
self.filepath = filepath
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Safe file operations for Shannon Insight.
|
|
3
|
+
|
|
4
|
+
Provides timeout-protected and size-limited file operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import signal
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Generator, List, Optional
|
|
11
|
+
|
|
12
|
+
from .exceptions import FileAccessError, SecurityError
|
|
13
|
+
from .security import PathValidator, ResourceLimiter
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TimeoutError(Exception):
|
|
17
|
+
"""Raised when an operation times out."""
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _timeout_handler(signum, frame):
|
|
22
|
+
"""Signal handler for timeout."""
|
|
23
|
+
raise TimeoutError("Operation timed out")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@contextmanager
|
|
27
|
+
def timeout(seconds: int):
|
|
28
|
+
"""
|
|
29
|
+
Context manager for timeout protection.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
seconds: Timeout in seconds
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
TimeoutError: If operation exceeds timeout
|
|
36
|
+
"""
|
|
37
|
+
# Set up signal handler
|
|
38
|
+
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
|
|
39
|
+
signal.alarm(seconds)
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
yield
|
|
43
|
+
finally:
|
|
44
|
+
# Restore old handler and cancel alarm
|
|
45
|
+
signal.alarm(0)
|
|
46
|
+
signal.signal(signal.SIGALRM, old_handler)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def safe_read_file(
|
|
50
|
+
filepath: Path,
|
|
51
|
+
validator: Optional[PathValidator] = None,
|
|
52
|
+
limiter: Optional[ResourceLimiter] = None,
|
|
53
|
+
timeout_seconds: int = 10,
|
|
54
|
+
encoding: str = 'utf-8',
|
|
55
|
+
errors: str = 'replace'
|
|
56
|
+
) -> str:
|
|
57
|
+
"""
|
|
58
|
+
Safely read a file with security checks and timeout protection.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
filepath: File to read
|
|
62
|
+
validator: Path validator (if None, skips path validation)
|
|
63
|
+
limiter: Resource limiter (if None, skips size check)
|
|
64
|
+
timeout_seconds: Timeout in seconds
|
|
65
|
+
encoding: Text encoding
|
|
66
|
+
errors: How to handle encoding errors
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
File contents as string
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
FileAccessError: If file cannot be read
|
|
73
|
+
SecurityError: If security checks fail
|
|
74
|
+
TimeoutError: If read operation times out
|
|
75
|
+
"""
|
|
76
|
+
# Validate path
|
|
77
|
+
if validator:
|
|
78
|
+
filepath = validator.validate_path(filepath)
|
|
79
|
+
|
|
80
|
+
# Check file size
|
|
81
|
+
if limiter:
|
|
82
|
+
limiter.check_file_size(filepath)
|
|
83
|
+
|
|
84
|
+
# Read with timeout protection
|
|
85
|
+
try:
|
|
86
|
+
with timeout(timeout_seconds):
|
|
87
|
+
with open(filepath, 'r', encoding=encoding, errors=errors) as f:
|
|
88
|
+
return f.read()
|
|
89
|
+
except TimeoutError:
|
|
90
|
+
raise FileAccessError(filepath, f"Read operation timed out after {timeout_seconds}s")
|
|
91
|
+
except UnicodeDecodeError as e:
|
|
92
|
+
raise FileAccessError(filepath, f"Encoding error: {e}")
|
|
93
|
+
except OSError as e:
|
|
94
|
+
raise FileAccessError(filepath, f"OS error: {e}")
|
|
95
|
+
except Exception as e:
|
|
96
|
+
raise FileAccessError(filepath, f"Unexpected error: {e}")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def safe_scan_directory(
|
|
100
|
+
root_dir: Path,
|
|
101
|
+
pattern: str = "**/*",
|
|
102
|
+
validator: Optional[PathValidator] = None,
|
|
103
|
+
limiter: Optional[ResourceLimiter] = None,
|
|
104
|
+
follow_symlinks: bool = False
|
|
105
|
+
) -> Generator[Path, None, None]:
|
|
106
|
+
"""
|
|
107
|
+
Safely scan a directory with security checks.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
root_dir: Directory to scan
|
|
111
|
+
pattern: Glob pattern
|
|
112
|
+
validator: Path validator
|
|
113
|
+
limiter: Resource limiter
|
|
114
|
+
follow_symlinks: Whether to follow symbolic links
|
|
115
|
+
|
|
116
|
+
Yields:
|
|
117
|
+
Safe file paths
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
SecurityError: If resource limits are exceeded
|
|
121
|
+
"""
|
|
122
|
+
# Validate root directory
|
|
123
|
+
if validator:
|
|
124
|
+
root_dir = validator.validate_path(root_dir)
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
for path in root_dir.glob(pattern):
|
|
128
|
+
# Skip symlinks if not following them
|
|
129
|
+
if path.is_symlink() and not follow_symlinks:
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
# Skip directories
|
|
133
|
+
if path.is_dir():
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
# Check file count limit
|
|
137
|
+
if limiter:
|
|
138
|
+
limiter.increment_file_count()
|
|
139
|
+
|
|
140
|
+
# Validate path
|
|
141
|
+
if validator:
|
|
142
|
+
try:
|
|
143
|
+
path = validator.validate_path(path)
|
|
144
|
+
except (SecurityError, Exception):
|
|
145
|
+
# Skip files that fail validation
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
# Check file size
|
|
149
|
+
if limiter:
|
|
150
|
+
try:
|
|
151
|
+
limiter.check_file_size(path)
|
|
152
|
+
except SecurityError:
|
|
153
|
+
# Skip files that exceed size limit
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
yield path
|
|
157
|
+
|
|
158
|
+
except OSError as e:
|
|
159
|
+
raise FileAccessError(root_dir, f"Directory scan failed: {e}")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def safe_write_file(
|
|
163
|
+
filepath: Path,
|
|
164
|
+
content: str,
|
|
165
|
+
validator: Optional[PathValidator] = None,
|
|
166
|
+
encoding: str = 'utf-8'
|
|
167
|
+
) -> None:
|
|
168
|
+
"""
|
|
169
|
+
Safely write to a file with validation.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
filepath: File to write
|
|
173
|
+
content: Content to write
|
|
174
|
+
validator: Path validator
|
|
175
|
+
encoding: Text encoding
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
FileAccessError: If file cannot be written
|
|
179
|
+
SecurityError: If security checks fail
|
|
180
|
+
"""
|
|
181
|
+
# Validate path (parent directory)
|
|
182
|
+
if validator:
|
|
183
|
+
parent = filepath.parent
|
|
184
|
+
if parent.exists():
|
|
185
|
+
validator.validate_path(parent)
|
|
186
|
+
|
|
187
|
+
try:
|
|
188
|
+
# Create parent directory if it doesn't exist
|
|
189
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
190
|
+
|
|
191
|
+
# Write file
|
|
192
|
+
with open(filepath, 'w', encoding=encoding) as f:
|
|
193
|
+
f.write(content)
|
|
194
|
+
|
|
195
|
+
except OSError as e:
|
|
196
|
+
raise FileAccessError(filepath, f"Write failed: {e}")
|
|
197
|
+
except Exception as e:
|
|
198
|
+
raise FileAccessError(filepath, f"Unexpected error: {e}")
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def should_skip_file(
|
|
202
|
+
filepath: Path,
|
|
203
|
+
exclude_patterns: List[str]
|
|
204
|
+
) -> bool:
|
|
205
|
+
"""
|
|
206
|
+
Check if a file should be skipped based on exclusion patterns.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
filepath: File to check
|
|
210
|
+
exclude_patterns: List of glob patterns to exclude
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
True if file should be skipped
|
|
214
|
+
"""
|
|
215
|
+
for pattern in exclude_patterns:
|
|
216
|
+
if filepath.match(pattern):
|
|
217
|
+
return True
|
|
218
|
+
return False
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Logging configuration for Shannon Insight.
|
|
3
|
+
|
|
4
|
+
Provides structured logging with rich formatting for beautiful terminal output.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from rich.logging import RichHandler
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def setup_logging(
|
|
16
|
+
verbose: bool = False,
|
|
17
|
+
quiet: bool = False,
|
|
18
|
+
log_file: Optional[str] = None
|
|
19
|
+
) -> logging.Logger:
|
|
20
|
+
"""
|
|
21
|
+
Configure logging with rich handler for colored output.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
verbose: Enable DEBUG level logging
|
|
25
|
+
quiet: Suppress all but ERROR level logging
|
|
26
|
+
log_file: Optional file path to write logs to
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Configured logger instance for shannon_insight
|
|
30
|
+
"""
|
|
31
|
+
# Determine log level
|
|
32
|
+
if quiet:
|
|
33
|
+
level = logging.ERROR
|
|
34
|
+
elif verbose:
|
|
35
|
+
level = logging.DEBUG
|
|
36
|
+
else:
|
|
37
|
+
level = logging.INFO
|
|
38
|
+
|
|
39
|
+
# Create rich console for logging
|
|
40
|
+
console = Console(stderr=True)
|
|
41
|
+
|
|
42
|
+
# Configure handlers
|
|
43
|
+
handlers = [
|
|
44
|
+
RichHandler(
|
|
45
|
+
console=console,
|
|
46
|
+
rich_tracebacks=True,
|
|
47
|
+
tracebacks_show_locals=verbose,
|
|
48
|
+
markup=True,
|
|
49
|
+
show_time=True,
|
|
50
|
+
show_path=verbose,
|
|
51
|
+
)
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
# Add file handler if specified
|
|
55
|
+
if log_file:
|
|
56
|
+
file_handler = logging.FileHandler(log_file, mode='a')
|
|
57
|
+
file_handler.setFormatter(
|
|
58
|
+
logging.Formatter(
|
|
59
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
60
|
+
datefmt='%Y-%m-%d %H:%M:%S'
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
handlers.append(file_handler)
|
|
64
|
+
|
|
65
|
+
# Configure root logger
|
|
66
|
+
logging.basicConfig(
|
|
67
|
+
level=level,
|
|
68
|
+
format="%(message)s",
|
|
69
|
+
datefmt="[%X]",
|
|
70
|
+
handlers=handlers
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Get shannon_insight logger
|
|
74
|
+
logger = logging.getLogger("shannon_insight")
|
|
75
|
+
logger.setLevel(level)
|
|
76
|
+
|
|
77
|
+
return logger
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_logger(name: Optional[str] = None) -> logging.Logger:
|
|
81
|
+
"""
|
|
82
|
+
Get a logger instance for a specific module.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
name: Module name (e.g., 'shannon_insight.core')
|
|
86
|
+
If None, returns the root shannon_insight logger
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Logger instance
|
|
90
|
+
"""
|
|
91
|
+
if name is None:
|
|
92
|
+
return logging.getLogger("shannon_insight")
|
|
93
|
+
|
|
94
|
+
# Ensure name starts with shannon_insight
|
|
95
|
+
if not name.startswith("shannon_insight"):
|
|
96
|
+
name = f"shannon_insight.{name}"
|
|
97
|
+
|
|
98
|
+
return logging.getLogger(name)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Mathematical utilities for codebase analysis."""
|
|
2
|
+
|
|
3
|
+
from .entropy import Entropy
|
|
4
|
+
from .graph import GraphMetrics
|
|
5
|
+
from .statistics import Statistics
|
|
6
|
+
from .robust import RobustStatistics
|
|
7
|
+
from .fusion import SignalFusion
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"Entropy",
|
|
11
|
+
"GraphMetrics",
|
|
12
|
+
"Statistics",
|
|
13
|
+
"SignalFusion",
|
|
14
|
+
"RobustStatistics",
|
|
15
|
+
]
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Information theory: Shannon entropy, KL divergence, joint entropy."""
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from typing import List, Mapping, Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Entropy:
|
|
8
|
+
"""Information entropy calculations."""
|
|
9
|
+
|
|
10
|
+
@staticmethod
|
|
11
|
+
def shannon(distribution: Mapping[str, Union[int, float]]) -> float:
|
|
12
|
+
"""
|
|
13
|
+
Compute Shannon entropy H(X) = -Σ p(x) log₂ p(x).
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
distribution: Dictionary with event -> count mapping
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Entropy in bits
|
|
20
|
+
"""
|
|
21
|
+
total = sum(distribution.values())
|
|
22
|
+
if total == 0:
|
|
23
|
+
return 0.0
|
|
24
|
+
|
|
25
|
+
entropy = 0.0
|
|
26
|
+
for count in distribution.values():
|
|
27
|
+
p = count / total
|
|
28
|
+
if p > 0:
|
|
29
|
+
entropy -= p * math.log2(p)
|
|
30
|
+
|
|
31
|
+
return entropy
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def normalized(distribution: Mapping[str, Union[int, float]]) -> float:
|
|
35
|
+
"""
|
|
36
|
+
Normalize entropy by maximum possible entropy.
|
|
37
|
+
|
|
38
|
+
H_norm = H / log₂(N) where N is number of unique events
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Normalized entropy in [0, 1]
|
|
42
|
+
"""
|
|
43
|
+
h = Entropy.shannon(distribution)
|
|
44
|
+
n = len(distribution)
|
|
45
|
+
if n <= 1:
|
|
46
|
+
return 0.0
|
|
47
|
+
max_h = math.log2(n)
|
|
48
|
+
return h / max_h if max_h > 0 else 0.0
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def kl_divergence(
|
|
52
|
+
p: Mapping[str, Union[int, float]], q: Mapping[str, Union[int, float]]
|
|
53
|
+
) -> float:
|
|
54
|
+
"""
|
|
55
|
+
Compute Kullback-Leibler divergence D_KL(P || Q).
|
|
56
|
+
|
|
57
|
+
D_KL(P || Q) = Σ P(x) log₂(P(x) / Q(x))
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
p: Observed distribution
|
|
61
|
+
q: Expected distribution
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
KL divergence in bits (lower = more similar)
|
|
65
|
+
"""
|
|
66
|
+
total_p = sum(p.values())
|
|
67
|
+
total_q = sum(q.values())
|
|
68
|
+
|
|
69
|
+
if total_p == 0 or total_q == 0:
|
|
70
|
+
return 0.0
|
|
71
|
+
|
|
72
|
+
kl_div = 0.0
|
|
73
|
+
for key, count_p in p.items():
|
|
74
|
+
prob_p = count_p / total_p
|
|
75
|
+
prob_q = q.get(key, 0) / total_q if key in q else 0
|
|
76
|
+
|
|
77
|
+
if prob_p > 0 and prob_q == 0:
|
|
78
|
+
# D_KL is undefined (infinite) when P(x)>0 but Q(x)=0
|
|
79
|
+
return float("inf")
|
|
80
|
+
if prob_p > 0 and prob_q > 0:
|
|
81
|
+
kl_div += prob_p * math.log2(prob_p / prob_q)
|
|
82
|
+
|
|
83
|
+
return kl_div
|
|
84
|
+
|
|
85
|
+
@staticmethod
|
|
86
|
+
def joint_entropy(
|
|
87
|
+
joint_distribution: Mapping[tuple, Union[int, float]]
|
|
88
|
+
) -> float:
|
|
89
|
+
"""
|
|
90
|
+
Compute joint entropy H(X, Y, ...) from a joint distribution.
|
|
91
|
+
|
|
92
|
+
H(X,Y) = -Σ_x Σ_y p(x,y) log₂ p(x,y)
|
|
93
|
+
|
|
94
|
+
The joint distribution must be keyed by tuples representing
|
|
95
|
+
joint outcomes, e.g. {("a", "b"): 5, ("a", "c"): 3, ...}.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
joint_distribution: Dictionary mapping outcome tuples to counts
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Joint entropy in bits
|
|
102
|
+
|
|
103
|
+
Reference:
|
|
104
|
+
Cover & Thomas, *Elements of Information Theory*, 2nd ed.,
|
|
105
|
+
Chapter 2 (Theorem 2.6.6).
|
|
106
|
+
"""
|
|
107
|
+
# Delegate to shannon() — the formula is identical, only the
|
|
108
|
+
# sample space changes from singleton events to joint events.
|
|
109
|
+
return Entropy.shannon(joint_distribution)
|
|
110
|
+
|
|
111
|
+
@staticmethod
|
|
112
|
+
def pooled_entropy(*distributions: Mapping[str, Union[int, float]]) -> float:
|
|
113
|
+
"""
|
|
114
|
+
Compute entropy of the pooled (merged) sample from multiple distributions.
|
|
115
|
+
|
|
116
|
+
This is NOT the same as joint entropy. It merges all counts into a
|
|
117
|
+
single distribution and computes H of the mixture. Useful when you
|
|
118
|
+
want the entropy of the combined observation set.
|
|
119
|
+
|
|
120
|
+
H_pooled = H(merge(X₁, X₂, ...))
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
*distributions: Multiple count distributions to pool
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Entropy of the pooled distribution in bits
|
|
127
|
+
"""
|
|
128
|
+
merged: dict = {}
|
|
129
|
+
for dist in distributions:
|
|
130
|
+
for key, count in dist.items():
|
|
131
|
+
merged[key] = merged.get(key, 0) + count
|
|
132
|
+
|
|
133
|
+
return Entropy.shannon(merged)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Evidence fusion: Bayesian combination, Dempster-Shafer theory."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Tuple
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SignalFusion:
|
|
9
|
+
"""Evidence-theoretic signal fusion methods."""
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def bayesian_fusion(
|
|
13
|
+
priors: List[float], likelihoods: List[float]
|
|
14
|
+
) -> Tuple[float, float]:
|
|
15
|
+
"""
|
|
16
|
+
Bayesian evidence combination: P(H|E) = P(E|H) * P(H) / P(E).
|
|
17
|
+
|
|
18
|
+
Computes the posterior for each hypothesis, normalizes by total
|
|
19
|
+
evidence, and returns the maximum posterior along with an
|
|
20
|
+
entropy-based confidence measure.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
priors: Prior probabilities for each hypothesis (should sum to 1)
|
|
24
|
+
likelihoods: Likelihoods P(E|H_i) for each hypothesis
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Tuple of (max_posterior, confidence)
|
|
28
|
+
confidence is 1 - normalized_entropy of the posterior distribution,
|
|
29
|
+
bounded in [0, 1].
|
|
30
|
+
|
|
31
|
+
Reference:
|
|
32
|
+
Bayes' theorem; Bishop, "Pattern Recognition and Machine Learning"
|
|
33
|
+
(2006), Chapter 1.2.
|
|
34
|
+
"""
|
|
35
|
+
import math as _math
|
|
36
|
+
|
|
37
|
+
if len(priors) != len(likelihoods):
|
|
38
|
+
raise ValueError("priors and likelihoods must have the same length")
|
|
39
|
+
|
|
40
|
+
# Unnormalized posteriors: P(E|H_i) * P(H_i)
|
|
41
|
+
unnormalized = [p * l for p, l in zip(priors, likelihoods)]
|
|
42
|
+
evidence = sum(unnormalized)
|
|
43
|
+
|
|
44
|
+
if evidence <= 0:
|
|
45
|
+
n = len(priors)
|
|
46
|
+
return 1.0 / n if n > 0 else 0.0, 0.0
|
|
47
|
+
|
|
48
|
+
posteriors = [u / evidence for u in unnormalized]
|
|
49
|
+
max_posterior = max(posteriors)
|
|
50
|
+
|
|
51
|
+
# Confidence = 1 - normalized entropy of the posterior distribution.
|
|
52
|
+
# When all mass is on one hypothesis, entropy = 0 -> confidence = 1.
|
|
53
|
+
# When posteriors are uniform, entropy = log2(n) -> confidence = 0.
|
|
54
|
+
n = len(posteriors)
|
|
55
|
+
if n <= 1:
|
|
56
|
+
confidence = 1.0
|
|
57
|
+
else:
|
|
58
|
+
entropy = -sum(p * _math.log2(p) for p in posteriors if p > 0)
|
|
59
|
+
max_entropy = _math.log2(n)
|
|
60
|
+
confidence = 1.0 - (entropy / max_entropy) if max_entropy > 0 else 1.0
|
|
61
|
+
|
|
62
|
+
return float(max_posterior), float(confidence)
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
def dempster_shafer_combination(
|
|
66
|
+
mass_functions: List[Dict[frozenset, float]]
|
|
67
|
+
) -> Dict[frozenset, float]:
|
|
68
|
+
"""
|
|
69
|
+
Combine evidence using Dempster-Shafer theory.
|
|
70
|
+
|
|
71
|
+
m(A) = Σ(B∩C=A) m1(B) * m2(C) / (1 - K)
|
|
72
|
+
|
|
73
|
+
Where K is conflict coefficient.
|
|
74
|
+
|
|
75
|
+
Keys must be frozensets representing hypothesis sets.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
mass_functions: List of mass functions {frozenset(hypotheses): mass}
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Combined mass function
|
|
82
|
+
"""
|
|
83
|
+
if not mass_functions:
|
|
84
|
+
return {}
|
|
85
|
+
|
|
86
|
+
combined = mass_functions[0].copy()
|
|
87
|
+
|
|
88
|
+
for i in range(1, len(mass_functions)):
|
|
89
|
+
m2 = mass_functions[i]
|
|
90
|
+
new_combined: Dict[frozenset, float] = {}
|
|
91
|
+
total_conflict = 0.0
|
|
92
|
+
|
|
93
|
+
for a, ma in combined.items():
|
|
94
|
+
for b, mb in m2.items():
|
|
95
|
+
intersection = a & b # proper set intersection
|
|
96
|
+
if intersection:
|
|
97
|
+
new_combined[intersection] = (
|
|
98
|
+
new_combined.get(intersection, 0.0) + ma * mb
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
total_conflict += ma * mb
|
|
102
|
+
|
|
103
|
+
normalization = 1.0 - total_conflict
|
|
104
|
+
if normalization > 0:
|
|
105
|
+
new_combined = {k: v / normalization for k, v in new_combined.items()}
|
|
106
|
+
|
|
107
|
+
combined = new_combined
|
|
108
|
+
|
|
109
|
+
return combined
|