gitgalaxy 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitgalaxy/__init__.py +0 -0
- gitgalaxy/aperture.py +315 -0
- gitgalaxy/audit_recorder.py +277 -0
- gitgalaxy/chronometer.py +335 -0
- gitgalaxy/detector.py +1531 -0
- gitgalaxy/galaxyscope.py +987 -0
- gitgalaxy/gitgalaxy_standards_v011.py +10590 -0
- gitgalaxy/gpu_recorder.py +316 -0
- gitgalaxy/guidestar_lens.py +300 -0
- gitgalaxy/language_lens.py +819 -0
- gitgalaxy/llm_recorder.py +538 -0
- gitgalaxy/prism.py +484 -0
- gitgalaxy/record_keeper.py +409 -0
- gitgalaxy/signal_processor.py +797 -0
- gitgalaxy/spectral_auditor.py +392 -0
- gitgalaxy-0.1.0.dist-info/METADATA +85 -0
- gitgalaxy-0.1.0.dist-info/RECORD +21 -0
- gitgalaxy-0.1.0.dist-info/WHEEL +5 -0
- gitgalaxy-0.1.0.dist-info/entry_points.txt +2 -0
- gitgalaxy-0.1.0.dist-info/licenses/LICENSE +73 -0
- gitgalaxy-0.1.0.dist-info/top_level.txt +1 -0
gitgalaxy/__init__.py
ADDED
|
File without changes
|
gitgalaxy/aperture.py
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
import fnmatch
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, Any, Set, Optional, TypedDict, Union, List, Tuple
|
|
6
|
+
|
|
7
|
+
# ==============================================================================
|
|
8
|
+
# GitGalaxy Phase 0.1: Ingestion & Filtering (The Solar Shield)
|
|
9
|
+
# Strategy: v6.2.0 (Bayesian Optics, Intent Overrides & Stateful Caching)
|
|
10
|
+
# Architecture: Lead Shield -> Path Gate -> Intent Gate -> Content Gate
|
|
11
|
+
# ==============================================================================
|
|
12
|
+
|
|
13
|
+
# --- CUSTOM EXCEPTION HIERARCHY (The Lead Shield) ---
|
|
14
|
+
|
|
15
|
+
class ApertureError(Exception):
|
|
16
|
+
"""Base class for all errors generated by the Solar Shield filtering process."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
class InaccessibleArtifactError(ApertureError):
|
|
20
|
+
"""Raised when an artifact cannot be accessed due to OS permissions or path corruption."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
class SaturationError(ApertureError):
|
|
24
|
+
"""Raised when a signal is too dense or minified to be safely refracted by the detector."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
class FilterResult(TypedDict):
|
|
28
|
+
"""Structured telemetry returned by the Solar Shield for the Pipeline Orchestrator."""
|
|
29
|
+
is_in_scope: bool
|
|
30
|
+
band: str
|
|
31
|
+
reason: Optional[str]
|
|
32
|
+
path: str
|
|
33
|
+
size_bytes: int
|
|
34
|
+
total_loc: int
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ApertureFilter:
|
|
38
|
+
"""
|
|
39
|
+
Primary solar shield for the telescope. Performs perimeter gating to ensure
|
|
40
|
+
only maintainable source code matter reaches the detector. Integrates with
|
|
41
|
+
GuideStar's Bayesian 'Intent Locks' to dynamically adjust suppression
|
|
42
|
+
thresholds for known, high-priority artifacts.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
root_dir: Union[str, Path],
|
|
48
|
+
language_definitions: Dict[str, Any],
|
|
49
|
+
aperture_config: Dict[str, Any],
|
|
50
|
+
parent_logger: Optional[logging.Logger] = None
|
|
51
|
+
):
|
|
52
|
+
# --- TELEMETRY SYNC ---
|
|
53
|
+
if parent_logger:
|
|
54
|
+
self.logger = parent_logger.getChild("filter")
|
|
55
|
+
self.logger.setLevel(parent_logger.level)
|
|
56
|
+
else:
|
|
57
|
+
self.logger = logging.getLogger("filter")
|
|
58
|
+
self.logger.setLevel(logging.INFO)
|
|
59
|
+
self.root = Path(root_dir).resolve()
|
|
60
|
+
self.registry = language_definitions
|
|
61
|
+
|
|
62
|
+
# 1. Safely bind the config passed down from the Orchestrator
|
|
63
|
+
self.config = aperture_config or {}
|
|
64
|
+
|
|
65
|
+
# 2. Extract specific variables from the config payload
|
|
66
|
+
self.bands = self.config.get("BANDS", {
|
|
67
|
+
"RADIO": "radio_noise", "MICROWAVE": "binary_debris",
|
|
68
|
+
"DARK_MATTER": "unknown_ext", "INFRARED": "saturated",
|
|
69
|
+
"VISIBLE": "source_code"
|
|
70
|
+
})
|
|
71
|
+
self.black_holes = set(self.config.get("EXCLUDED_DIRECTORIES", set()))
|
|
72
|
+
self.black_hole_exts = set(self.config.get("BLACKLISTED_EXTENSIONS", set()))
|
|
73
|
+
|
|
74
|
+
# --- STATE CACHE ---
|
|
75
|
+
self._intent_cache: Set[str] = set()
|
|
76
|
+
|
|
77
|
+
self.logger.debug(f"Initializing Solar Shield for sector: '{self.root.name}'...")
|
|
78
|
+
|
|
79
|
+
# Optimized lookup sets from Language Definitions
|
|
80
|
+
self.whitelisted_extensions: Set[str] = set()
|
|
81
|
+
self.ecosystem_anchors: Set[str] = set()
|
|
82
|
+
|
|
83
|
+
for lang_id, data in self.registry.items():
|
|
84
|
+
self.whitelisted_extensions.update(data.get('extensions', []))
|
|
85
|
+
self.ecosystem_anchors.update(data.get('exact_matches', []))
|
|
86
|
+
|
|
87
|
+
self.ignore_patterns = self._load_gitignore_patterns()
|
|
88
|
+
|
|
89
|
+
self.logger.info(
|
|
90
|
+
f"Dispatching Survey Probe to Sector '{self.root.name}' | "
|
|
91
|
+
f"Tracking {len(self.whitelisted_extensions)} spectral bands."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def evaluate_path_integrity(self, file_path: Union[str, Path], has_intent: bool = False) -> Tuple[bool, int, str]:
|
|
95
|
+
"""
|
|
96
|
+
[PHASE 0 ENTRY POINT]
|
|
97
|
+
Fast path-only check to build the CensusArray (Radar Walk).
|
|
98
|
+
Now safely fetches file size to prevent the 0-Byte Dark Matter telemetry bug.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
file_path: The artifact's path.
|
|
102
|
+
has_intent: True if the GuideStar mapped this file in a manifest or priority list.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Tuple: (is_valid: bool, size_bytes: int, reason: str)
|
|
106
|
+
"""
|
|
107
|
+
path_obj = Path(file_path)
|
|
108
|
+
normalized_path = path_obj.as_posix()
|
|
109
|
+
_, ext = os.path.splitext(path_obj.name)
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
relative_path = path_obj.relative_to(self.root).as_posix()
|
|
113
|
+
except ValueError:
|
|
114
|
+
relative_path = normalized_path
|
|
115
|
+
|
|
116
|
+
# Safely fetch size before making drop decisions to ensure accurate Dark Matter telemetry
|
|
117
|
+
try:
|
|
118
|
+
size_bytes = path_obj.stat().st_size if path_obj.exists() else 0
|
|
119
|
+
except OSError:
|
|
120
|
+
size_bytes = 0
|
|
121
|
+
|
|
122
|
+
# 0. TIER 0.5: THE ABSOLUTE EXTENSION SHIELD (Impervious to Intent)
|
|
123
|
+
# Drops SVGs, 3D Models, PDFs, and PGP keys before disk I/O ever happens.
|
|
124
|
+
if ext.lower() in self.black_hole_exts:
|
|
125
|
+
reason = f"Blocked (Explicitly Blacklisted Media/Binary Extension: '{ext}')"
|
|
126
|
+
self.logger.debug(f"{reason}: {relative_path}")
|
|
127
|
+
return False, size_bytes, reason
|
|
128
|
+
|
|
129
|
+
# Resolve intent status against cache
|
|
130
|
+
active_intent = has_intent or (normalized_path in self._intent_cache)
|
|
131
|
+
if active_intent:
|
|
132
|
+
self._intent_cache.add(normalized_path)
|
|
133
|
+
|
|
134
|
+
# 1. TIER 1: THE SOLAR SHIELD (Radio Noise)
|
|
135
|
+
if not self._check_solar_shield(relative_path, has_intent=active_intent):
|
|
136
|
+
reason = "Blocked (Path Excluded by System Rules, Hidden Directory, or .gitignore)"
|
|
137
|
+
self.logger.debug(f"{reason}: {relative_path}")
|
|
138
|
+
return False, size_bytes, reason
|
|
139
|
+
|
|
140
|
+
# --- INTENT BYPASS ---
|
|
141
|
+
# If the file has a GuideStar lock, it bypasses linguistic whitelisting entirely.
|
|
142
|
+
if active_intent:
|
|
143
|
+
reason = "Passed (GuideStar Intent Lock Bypassed Tier 2)"
|
|
144
|
+
self.logger.debug(f"{reason}: {relative_path}")
|
|
145
|
+
return True, size_bytes, reason
|
|
146
|
+
|
|
147
|
+
# 2. TIER 2: THE VISIBLE SPECTRUM (Linguistic Whitelisting)
|
|
148
|
+
# Rule 2.1: Deep Space Remnants (Spec 2.3.3.B)
|
|
149
|
+
# We allow extensionless files through without intent to be evaluated by the Shebang Scanner
|
|
150
|
+
if not ext:
|
|
151
|
+
reason = "Passed (Extensionless -> Shebang scan required)"
|
|
152
|
+
self.logger.debug(f"{reason}: {relative_path}")
|
|
153
|
+
return True, size_bytes, reason
|
|
154
|
+
|
|
155
|
+
# Rule 2.2: Known Ecosystem Anchor or Whitelisted Extension
|
|
156
|
+
if path_obj.name in self.ecosystem_anchors or ext.lower() in self.whitelisted_extensions:
|
|
157
|
+
reason = "Passed (Ecosystem Anchor or Whitelisted Ext)"
|
|
158
|
+
return True, size_bytes, reason
|
|
159
|
+
|
|
160
|
+
reason = f"Blocked (Unsupported or Unrecognized Extension: '{ext}')"
|
|
161
|
+
self.logger.debug(f"{reason}: {relative_path}")
|
|
162
|
+
return False, size_bytes, reason
|
|
163
|
+
|
|
164
|
+
def is_in_scope(self, file_path: Union[str, Path], content: Optional[str] = None, has_intent: bool = False) -> FilterResult:
|
|
165
|
+
"""Runs the 5-tier perimeter gate to validate maintainable code matter."""
|
|
166
|
+
path_obj = Path(file_path)
|
|
167
|
+
normalized_path = path_obj.as_posix()
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
relative_path = path_obj.relative_to(self.root).as_posix()
|
|
171
|
+
except ValueError:
|
|
172
|
+
relative_path = normalized_path
|
|
173
|
+
|
|
174
|
+
# Pull from state to guarantee consistency between Phase 0 and Phase 1 calls
|
|
175
|
+
active_intent = has_intent or (normalized_path in self._intent_cache)
|
|
176
|
+
|
|
177
|
+
result: FilterResult = {
|
|
178
|
+
"is_in_scope": False,
|
|
179
|
+
"band": self.bands.get("VISIBLE", "source_code"),
|
|
180
|
+
"reason": None,
|
|
181
|
+
"path": normalized_path,
|
|
182
|
+
"size_bytes": 0,
|
|
183
|
+
"total_loc": 0
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
# --- TIER 0: THE LEAD SHIELD (Resource Guarding) ---
|
|
188
|
+
# NOTE: Intent cannot override file size or existence checks. Physics are absolute.
|
|
189
|
+
if not path_obj.exists():
|
|
190
|
+
raise InaccessibleArtifactError("Artifact missing or path invalid")
|
|
191
|
+
|
|
192
|
+
stats = path_obj.stat()
|
|
193
|
+
result["size_bytes"] = stats.st_size
|
|
194
|
+
|
|
195
|
+
max_mb = self.config.get("MAX_FILE_SIZE_MB", 10)
|
|
196
|
+
if stats.st_size > (max_mb * 1024 * 1024):
|
|
197
|
+
self.logger.warning(f"Resource Guard: Rejected massive file '{relative_path}' ({stats.st_size / (1024 * 1024):.1f} MB)")
|
|
198
|
+
result["band"] = self.bands.get("INFRARED", "saturated")
|
|
199
|
+
result["reason"] = f"Blocked (File Size Exceeds Configured Limit: > {max_mb}MB)"
|
|
200
|
+
return result
|
|
201
|
+
|
|
202
|
+
# --- TIER 1 & 2: PATH VALIDATION ---
|
|
203
|
+
is_valid, size_bytes, reason = self.evaluate_path_integrity(path_obj, has_intent=active_intent)
|
|
204
|
+
if not is_valid:
|
|
205
|
+
result["band"] = self.bands.get("RADIO", "radio_noise")
|
|
206
|
+
result["reason"] = reason
|
|
207
|
+
result["size_bytes"] = size_bytes
|
|
208
|
+
return result
|
|
209
|
+
|
|
210
|
+
# --- TIER 3 & 4: ARTIFACT INTEGRITY (Content Gate) ---
|
|
211
|
+
if content is None:
|
|
212
|
+
self.logger.warning(f"Protocol Violation: Missing content buffer for '{relative_path}'")
|
|
213
|
+
result["reason"] = "Protocol Violation: Missing content buffer"
|
|
214
|
+
return result
|
|
215
|
+
|
|
216
|
+
integrity = self._check_artifact_integrity(content, relative_path)
|
|
217
|
+
result["total_loc"] = integrity["loc"]
|
|
218
|
+
|
|
219
|
+
if not integrity["valid"]:
|
|
220
|
+
result["band"] = integrity["band"]
|
|
221
|
+
result["reason"] = integrity["reason"]
|
|
222
|
+
return result
|
|
223
|
+
|
|
224
|
+
# --- SUCCESS ---
|
|
225
|
+
result["is_in_scope"] = True
|
|
226
|
+
self.logger.debug(f"Aperture Lock: '{relative_path}' safely in scope (LOC: {result['total_loc']}).")
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
except (InaccessibleArtifactError, PermissionError) as e:
|
|
230
|
+
self.logger.warning(f"Inaccessible Artifact: '{relative_path}' | {str(e)}")
|
|
231
|
+
result["band"] = self.bands.get("RADIO", "radio_noise")
|
|
232
|
+
result["reason"] = f"I/O Exception: {str(e)}"
|
|
233
|
+
return result
|
|
234
|
+
except Exception as e:
|
|
235
|
+
self.logger.error(f"Critical Solar Shield Failure on '{relative_path}': {str(e)}", exc_info=True)
|
|
236
|
+
result["reason"] = f"Internal Exception: {str(e)}"
|
|
237
|
+
return result
|
|
238
|
+
|
|
239
|
+
def _check_artifact_integrity(self, content: str, rel_path: str) -> Dict[str, Any]:
|
|
240
|
+
"""Inspects the Photon Buffer for corruption or saturation."""
|
|
241
|
+
report = {"valid": True, "band": self.bands.get("VISIBLE", "source_code"), "reason": None, "loc": 0}
|
|
242
|
+
|
|
243
|
+
# --- TIER 3: THE LEAD SHIELD (Binary Detection) ---
|
|
244
|
+
# FIX: Removed the [:1024] slice. We must check the entire buffer
|
|
245
|
+
# so binary files don't sneak into Visible Stars.
|
|
246
|
+
if '\x00' in content:
|
|
247
|
+
self.logger.debug(f"Integrity check failed (Opaque Binary / Null Bytes): {rel_path}")
|
|
248
|
+
report.update({
|
|
249
|
+
"valid": False,
|
|
250
|
+
"band": self.bands.get("MICROWAVE", "binary_debris"),
|
|
251
|
+
"reason": "Blocked (Binary Format: Null bytes detected during read)"
|
|
252
|
+
})
|
|
253
|
+
return report
|
|
254
|
+
|
|
255
|
+
# --- TIER 4: INFRARED GATE (Minification & Saturation) ---
|
|
256
|
+
max_line = self.config.get("MAX_LINE_LENGTH", 500)
|
|
257
|
+
scan_limit = self.config.get("MINIFICATION_SCAN_LIMIT", 50)
|
|
258
|
+
|
|
259
|
+
lines = content.splitlines()
|
|
260
|
+
report["loc"] = len(lines)
|
|
261
|
+
|
|
262
|
+
for i, line in enumerate(lines):
|
|
263
|
+
if i < scan_limit:
|
|
264
|
+
if len(line) > max_line:
|
|
265
|
+
self.logger.debug(f"Integrity check failed: '{rel_path}' saturated > {max_line} chars on line {i+1}")
|
|
266
|
+
report.update({
|
|
267
|
+
"valid": False,
|
|
268
|
+
"band": self.bands.get("INFRARED", "saturated"),
|
|
269
|
+
"reason": f"Blocked (Minified or Dense Data: Line {i+1} exceeds {max_line} characters)"
|
|
270
|
+
})
|
|
271
|
+
return report
|
|
272
|
+
else:
|
|
273
|
+
break
|
|
274
|
+
|
|
275
|
+
return report
|
|
276
|
+
|
|
277
|
+
def _load_gitignore_patterns(self) -> List[str]:
|
|
278
|
+
"""Reads local .gitignore files to identify Radio Noise."""
|
|
279
|
+
patterns = []
|
|
280
|
+
ignore_file = self.root / ".gitignore"
|
|
281
|
+
if ignore_file.exists():
|
|
282
|
+
try:
|
|
283
|
+
with ignore_file.open('r', encoding='utf-8') as f:
|
|
284
|
+
for line in f:
|
|
285
|
+
line = line.strip()
|
|
286
|
+
if line and not line.startswith('#'):
|
|
287
|
+
patterns.append(line)
|
|
288
|
+
except (IOError, OSError) as e:
|
|
289
|
+
self.logger.warning(f"Failed to load .gitignore rules: {e}")
|
|
290
|
+
return patterns
|
|
291
|
+
|
|
292
|
+
def _check_solar_shield(self, rel_path: str, has_intent: bool = False) -> bool:
|
|
293
|
+
"""Determines if the path sits in a blocked or ignored sector."""
|
|
294
|
+
parts = rel_path.split('/')
|
|
295
|
+
|
|
296
|
+
for part in parts:
|
|
297
|
+
if part in self.black_holes:
|
|
298
|
+
return False
|
|
299
|
+
|
|
300
|
+
# Hidden Path Suppression (e.g., .github/, .vscode/)
|
|
301
|
+
if part.startswith('.') and part not in self.ecosystem_anchors:
|
|
302
|
+
# If GuideStar explicitly mapped a dotfile (e.g., .hooks/pre-commit), lift the blockade.
|
|
303
|
+
if has_intent:
|
|
304
|
+
self.logger.debug(f"Solar Shield VIP Pass: Bypassing hidden-path block for intent-locked artifact '{part}'")
|
|
305
|
+
continue
|
|
306
|
+
self.logger.debug(f"Blocked by Hidden Path Component '{part}': {rel_path}")
|
|
307
|
+
return False
|
|
308
|
+
|
|
309
|
+
for pattern in self.ignore_patterns:
|
|
310
|
+
if pattern.endswith('/') and any(fnmatch.fnmatch(p + '/', pattern) for p in parts):
|
|
311
|
+
return False
|
|
312
|
+
if fnmatch.fnmatch(rel_path, pattern) or fnmatch.fnmatch(parts[-1], pattern):
|
|
313
|
+
return False
|
|
314
|
+
|
|
315
|
+
return True
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import argparse
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
from . import gitgalaxy_standards_v011 as config
|
|
9
|
+
|
|
10
|
+
# ==============================================================================
|
|
11
|
+
# GitGalaxy Phase 8 & 9: Astrograph Auditor (The Forensic Record)
|
|
12
|
+
# Strategy v6.2.0 Protocol: Raw-Matter Preservation & Columnar Decoding
|
|
13
|
+
# Stage 2.5: Total Feature Parity (Descriptive Descriptors + Performance)
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
|
|
16
|
+
class AuditRecorder:
|
|
17
|
+
"""
|
|
18
|
+
The GitGalaxy Audit Recorder.
|
|
19
|
+
|
|
20
|
+
PURPOSE: Generates a verbose, human-readable forensic log from live RAM data.
|
|
21
|
+
Designed for compliance, debugging, and deep-dive analysis.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, parent_logger=None):
|
|
25
|
+
import logging
|
|
26
|
+
self.logger = parent_logger.getChild("audit_recorder") if parent_logger else logging.getLogger("audit_recorder")
|
|
27
|
+
|
|
28
|
+
# --- DYNAMIC SCHEMA FETCH ---
|
|
29
|
+
schemas = getattr(config, "RECORDING_SCHEMAS", {})
|
|
30
|
+
self.RISK_SCHEMA = schemas.get("RISK_SCHEMA", [])
|
|
31
|
+
# Note: The pipeline calls it SIGNAL_SCHEMA, but the Auditor references it as HIT_SCHEMA
|
|
32
|
+
self.HIT_SCHEMA = schemas.get("SIGNAL_SCHEMA", [])
|
|
33
|
+
|
|
34
|
+
# Performance optimization: Pre-cache all labels to avoid regex on the hot path
|
|
35
|
+
self._label_cache = {}
|
|
36
|
+
self._friendly_map = schemas.get("FRIENDLY_MAP", {})
|
|
37
|
+
|
|
38
|
+
def format_label(self, key: str) -> str:
|
|
39
|
+
"""Translates raw keys into descriptive labels using a fast-lookup cache."""
|
|
40
|
+
if key in self._label_cache:
|
|
41
|
+
return self._label_cache[key]
|
|
42
|
+
|
|
43
|
+
clean_key = re.sub(r'_x\d+$', '', key)
|
|
44
|
+
label = self._friendly_map.get(clean_key)
|
|
45
|
+
|
|
46
|
+
if not label:
|
|
47
|
+
label = " ".join(word.capitalize() for word in clean_key.split('_'))
|
|
48
|
+
|
|
49
|
+
self._label_cache[key] = label
|
|
50
|
+
return label
|
|
51
|
+
|
|
52
|
+
def descale(self, key: str, value: Any, default_scalar: float = 1.0) -> Any:
|
|
53
|
+
"""Dynamically scales integers back to floats using a fixed-string check."""
|
|
54
|
+
if not isinstance(value, (int, float)) or isinstance(value, bool):
|
|
55
|
+
return value
|
|
56
|
+
|
|
57
|
+
if key.endswith("_x1000"):
|
|
58
|
+
return round(value / 1000.0, 3)
|
|
59
|
+
if key.endswith("_x10"):
|
|
60
|
+
return round(value / 10.0, 3)
|
|
61
|
+
|
|
62
|
+
if default_scalar != 1.0:
|
|
63
|
+
return round(value / default_scalar, 3)
|
|
64
|
+
return value
|
|
65
|
+
|
|
66
|
+
def generate_report(self, stars, singularity, summary, forensic_report, session_meta, output_path):
|
|
67
|
+
"""
|
|
68
|
+
Subphase 2.3: Transforms raw pipeline data into a verbose forensic manifest.
|
|
69
|
+
Optimized to handle projects with 10,000+ files efficiently.
|
|
70
|
+
"""
|
|
71
|
+
# 1. Forensic Traceability Anchor
|
|
72
|
+
git_audit = session_meta.get("git_audit", {})
|
|
73
|
+
forensic_trail = {
|
|
74
|
+
"Analysis Context": {
|
|
75
|
+
"Engine Identity": session_meta.get("engine", "GitGalaxy Scope v6.2.0"),
|
|
76
|
+
"Target Root Name": session_meta.get("target", "Unknown"),
|
|
77
|
+
"Absolute Project Path": session_meta.get("target_directory", "Unknown"),
|
|
78
|
+
"Analysis ISO Timestamp": session_meta.get("timestamp"),
|
|
79
|
+
"Total Scan Duration": f"{session_meta.get('duration_seconds', 0.0)} seconds"
|
|
80
|
+
},
|
|
81
|
+
"Source Control Footprint (Immutable Anchor)": {
|
|
82
|
+
"Active Branch": git_audit.get("branch", "N/A"),
|
|
83
|
+
"Commit Hash (SHA-1)": git_audit.get("commit_hash", "N/A"),
|
|
84
|
+
"Remote Origin URL": git_audit.get("remote_url", "Local/Disconnected"),
|
|
85
|
+
"Last Code Integration Date": git_audit.get("latest_commit_date", "Unknown")
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# --- DYNAMIC TRANSLATION FETCH ---
|
|
90
|
+
schemas = getattr(config, "RECORDING_SCHEMAS", {})
|
|
91
|
+
exposure_labels = schemas.get("EXPOSURE_LABELS", {})
|
|
92
|
+
|
|
93
|
+
# Pre-calculate labels for vectors to avoid repeating work in the loop
|
|
94
|
+
risk_labels = [exposure_labels.get(k, self.format_label(k)) for k in self.RISK_SCHEMA]
|
|
95
|
+
hit_labels = [self.format_label(k) for k in self.HIT_SCHEMA]
|
|
96
|
+
|
|
97
|
+
# --- NEW CONSTELLATION SORTING & HIERARCHY ---
|
|
98
|
+
pretty_constellations = {}
|
|
99
|
+
constellations_meta = summary.get("constellations", {})
|
|
100
|
+
|
|
101
|
+
# Sort folders by mass descending
|
|
102
|
+
sorted_constellations = sorted(
|
|
103
|
+
constellations_meta.items(),
|
|
104
|
+
key=lambda x: x[1].get("total_mass", 0.0),
|
|
105
|
+
reverse=True
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Initialize the ordered dictionary with constellation-level metrics
|
|
109
|
+
for c_name, c_data in sorted_constellations:
|
|
110
|
+
pretty_constellations[c_name] = {
|
|
111
|
+
"Constellation Mass": c_data.get("total_mass", 0.0),
|
|
112
|
+
"File Count": c_data.get("file_count", 0),
|
|
113
|
+
"Average Risk Exposures": {
|
|
114
|
+
exposure_labels.get(k, self.format_label(k)): f"{v}%"
|
|
115
|
+
for k, v in c_data.get("avg_exposures", {}).items()
|
|
116
|
+
},
|
|
117
|
+
"Stars / Files": {}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# 2. Row Reconstruction (Visible Stars) mapped into Constellations
|
|
121
|
+
for star in stars:
|
|
122
|
+
path = star.get("path", "Unknown")
|
|
123
|
+
telemetry = star.get("telemetry", {})
|
|
124
|
+
lang_raw = str(star.get("lang_id", "Unknown")).lower()
|
|
125
|
+
c_name = star.get("constellation", "__monolith__")
|
|
126
|
+
|
|
127
|
+
# --- THE ULTIMATE UPSTREAM BYPASS FIX ---
|
|
128
|
+
doc_languages = {"markdown", "plaintext", "rst", "text", "md"}
|
|
129
|
+
if lang_raw in doc_languages and len(star.get("risk_vector", [])) < len(self.RISK_SCHEMA):
|
|
130
|
+
# Inject 13-point synthetic Risk Blanket
|
|
131
|
+
star["risk_vector"] = [0.0, 100.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 100.0, 100.0, 0.0, 100.0, 0.0]
|
|
132
|
+
telemetry["control_flow_ratio"] = 0.0
|
|
133
|
+
if not star.get("file_impact"):
|
|
134
|
+
star["file_impact"] = round(max(star.get("total_loc", 1) / 50.0, 1.0), 2)
|
|
135
|
+
|
|
136
|
+
# --- SYSTEM LEVEL FIX: Dynamic Identity Block ---
|
|
137
|
+
identity_block = {
|
|
138
|
+
"Filename": star.get("name", Path(path).name),
|
|
139
|
+
"Path": path,
|
|
140
|
+
"Language": str(star.get("lang_id", "Unknown")).title(),
|
|
141
|
+
"Architect": telemetry.get("ownership", "Unknown Architect")
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
domain_data = telemetry.get("domain_context", {})
|
|
145
|
+
for custom_key, custom_val in domain_data.items():
|
|
146
|
+
if custom_key != "ownership":
|
|
147
|
+
display_key = custom_key.replace('_', ' ').title()
|
|
148
|
+
if display_key == "Purpose":
|
|
149
|
+
display_key = "Museum Entry"
|
|
150
|
+
identity_block[display_key] = custom_val
|
|
151
|
+
|
|
152
|
+
identity_block["Lock Tier"] = star.get("lock_tier", telemetry.get("identity_lock_tier", 4))
|
|
153
|
+
identity_block["Identity Proof"] = telemetry.get("identity_source_proof", star.get("source_proof", "Discovery"))
|
|
154
|
+
|
|
155
|
+
# --- THE FACTION INTERCEPTOR ---
|
|
156
|
+
exposures_dict = {}
|
|
157
|
+
for label, v in zip(risk_labels, star.get("risk_vector") or [0.0] * len(risk_labels)):
|
|
158
|
+
if label == "Civil War Exposure":
|
|
159
|
+
if v == 0.0:
|
|
160
|
+
exposures_dict[label] = "Team Tabs"
|
|
161
|
+
elif v == 100.0:
|
|
162
|
+
exposures_dict[label] = "Team Spaces"
|
|
163
|
+
elif v == 50.0:
|
|
164
|
+
exposures_dict[label] = "Neutral / Deadlocked"
|
|
165
|
+
else:
|
|
166
|
+
exposures_dict[label] = f"Mixed ({100-v:.1f}% Tabs / {v:.1f}% Spaces)"
|
|
167
|
+
else:
|
|
168
|
+
exposures_dict[label] = f"{round(v, 2)}%"
|
|
169
|
+
|
|
170
|
+
# Assemble the star profile
|
|
171
|
+
star_profile = {
|
|
172
|
+
"1. Identity": identity_block,
|
|
173
|
+
"2. Spatial Coordinates": {
|
|
174
|
+
"X": star.get("pos_x", 0.0),
|
|
175
|
+
"Y": star.get("pos_y", 0.0),
|
|
176
|
+
"Z": star.get("pos_z", 0.0)
|
|
177
|
+
},
|
|
178
|
+
"3. Galactic Profile": {
|
|
179
|
+
"Total LOC": star.get("total_loc", 0),
|
|
180
|
+
"coding LOC": star.get("coding_loc", 0),
|
|
181
|
+
"Documentation LOC": star.get("doc_loc", 0),
|
|
182
|
+
"Structural Mass": round(star.get("file_impact", 0.0), 3),
|
|
183
|
+
"Control Flow Ratio": f"{round(telemetry.get('control_flow_ratio', 0.0) * 100, 1)}%",
|
|
184
|
+
"Popularity Rank": telemetry.get("popularity", 0)
|
|
185
|
+
},
|
|
186
|
+
"4. Risk Exposures": exposures_dict,
|
|
187
|
+
"5. Function Analysis (Satellites)": [
|
|
188
|
+
{
|
|
189
|
+
"Function Name": sat.get("name", "Unknown"),
|
|
190
|
+
"Structural Impact": sat.get("impact", sat.get("magnitude", 0.0)),
|
|
191
|
+
"Lines of Code (LOC)": sat.get("loc", 0),
|
|
192
|
+
"Control Flow Branches": sat.get("branch", sat.get("branch_count", 0)),
|
|
193
|
+
"Input Parameters": sat.get("args", sat.get("args_count", 0)),
|
|
194
|
+
"Control Flow Ratio": f"{round(sat.get('control_flow_ratio', sat.get('cf_ratio', 0.0)) * 100, 1)}%",
|
|
195
|
+
"Start Line": sat.get("start_line", 0),
|
|
196
|
+
"End Line": sat.get("end_line", 0)
|
|
197
|
+
}
|
|
198
|
+
for sat in star.get("satellites", []) if isinstance(sat, dict)
|
|
199
|
+
],
|
|
200
|
+
"6. Structural DNA (Raw Hits)": {
|
|
201
|
+
label: v for label, v in zip(hit_labels, star.get("hit_vector") or [0] * len(hit_labels))
|
|
202
|
+
},
|
|
203
|
+
"7. Extracted Dependencies": sorted(list(star.get("raw_imports", [])))
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
# Map the star into its parent constellation
|
|
207
|
+
if c_name not in pretty_constellations:
|
|
208
|
+
pretty_constellations[c_name] = {"Constellation Mass": 0.0, "Stars / Files": {}}
|
|
209
|
+
pretty_constellations[c_name]["Stars / Files"][path] = star_profile
|
|
210
|
+
|
|
211
|
+
# 3. Format Dark Matter (Excluded Artifacts)
|
|
212
|
+
pretty_singularity = []
|
|
213
|
+
target_dir = Path(session_meta.get("target_directory", ""))
|
|
214
|
+
|
|
215
|
+
# 3.1 Format standard excluded items for the JSON output
|
|
216
|
+
for dark in singularity:
|
|
217
|
+
rel_path = dark.get("path", "Unknown")
|
|
218
|
+
abs_path = target_dir / rel_path
|
|
219
|
+
|
|
220
|
+
# Physically weighs the file on disk if the pipeline dropped the byte count
|
|
221
|
+
try:
|
|
222
|
+
actual_size = os.path.getsize(abs_path) if abs_path.exists() else dark.get('size_bytes', 0)
|
|
223
|
+
except Exception:
|
|
224
|
+
actual_size = dark.get('size_bytes', 0)
|
|
225
|
+
|
|
226
|
+
pretty_singularity.append({
|
|
227
|
+
"Path": rel_path,
|
|
228
|
+
"Forensic Category": "Dark Matter (Excluded Artifact)",
|
|
229
|
+
"Diagnostic Reason": dark.get("reason", "Engine Shielding (Format Excluded)"),
|
|
230
|
+
"Size": f"{actual_size} bytes",
|
|
231
|
+
"Identity Confidence": f"{round(dark.get('identity_confidence', 0.0) * 100, 1)}%",
|
|
232
|
+
"Discovery Proof": dark.get("identity_source_proof", "Radar Scan")
|
|
233
|
+
})
|
|
234
|
+
|
|
235
|
+
# 3.2 Append optically bypassed artifacts to the local output list
|
|
236
|
+
for anon_path in summary.get("singularity", {}).get("unparsable_artifacts", []):
|
|
237
|
+
pretty_singularity.append({
|
|
238
|
+
"Path": anon_path,
|
|
239
|
+
"Forensic Category": "Dark Matter (Optical Bypass)",
|
|
240
|
+
"Diagnostic Reason": "Engine Bypass (Dense Structure or Unrecognized Syntax)",
|
|
241
|
+
"Size": "Unknown (Prism Bypass)",
|
|
242
|
+
"Identity Confidence": "0.0% (Scan Yielded No Data)",
|
|
243
|
+
"Discovery Proof": "Logic Splicer Shielding"
|
|
244
|
+
})
|
|
245
|
+
|
|
246
|
+
# 4. Final Mission Archive Packaging
|
|
247
|
+
mission_audit = {
|
|
248
|
+
"Audit Protocol": "GitGalaxy v6.2.0-Audit",
|
|
249
|
+
"1. Forensic Trail (Traceability)": forensic_trail,
|
|
250
|
+
"2. Global Synthesis Summary": summary,
|
|
251
|
+
"3. High-Value Forensic Report": forensic_report,
|
|
252
|
+
"4. Dark Matter (Excluded Artifacts)": pretty_singularity,
|
|
253
|
+
"5. Visible Matter (Scanned Artifacts)": pretty_constellations
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
# --- ABSOLUTE ROUTING LOGIC ---
|
|
257
|
+
base_dir = Path("/srv/storage_16tb/projects/gitgalaxy/data")
|
|
258
|
+
base_dir.mkdir(parents=True, exist_ok=True)
|
|
259
|
+
safe_filename = Path(output_path).name
|
|
260
|
+
target_path = base_dir / safe_filename
|
|
261
|
+
|
|
262
|
+
try:
|
|
263
|
+
with open(target_path, 'w', encoding='utf-8') as f:
|
|
264
|
+
json.dump(mission_audit, f, indent=4, ensure_ascii=False)
|
|
265
|
+
self.logger.info(f"Audit Success: Forensic manifest sealed -> {target_path}")
|
|
266
|
+
except Exception as e:
|
|
267
|
+
self.logger.error(f"Audit Write Error: {e}")
|
|
268
|
+
|
|
269
|
+
def decode_galaxy(input_path, output_path=None):
|
|
270
|
+
"""Standalone decoding logic preserved for CLI compatibility."""
|
|
271
|
+
pass
|
|
272
|
+
|
|
273
|
+
if __name__ == "__main__":
|
|
274
|
+
parser = argparse.ArgumentParser(description="GitGalaxy v6.2.0 Astrograph Auditor CLI")
|
|
275
|
+
parser.add_argument("input", help="Path to columnar galaxy.json")
|
|
276
|
+
parser.add_argument("--out", help="Optional output path")
|
|
277
|
+
args = parser.parse_args()
|