gitgalaxy 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitgalaxy-0.1.0/LICENSE +73 -0
- gitgalaxy-0.1.0/PKG-INFO +85 -0
- gitgalaxy-0.1.0/README.md +68 -0
- gitgalaxy-0.1.0/gitgalaxy/__init__.py +0 -0
- gitgalaxy-0.1.0/gitgalaxy/aperture.py +315 -0
- gitgalaxy-0.1.0/gitgalaxy/audit_recorder.py +277 -0
- gitgalaxy-0.1.0/gitgalaxy/chronometer.py +335 -0
- gitgalaxy-0.1.0/gitgalaxy/detector.py +1531 -0
- gitgalaxy-0.1.0/gitgalaxy/galaxyscope.py +987 -0
- gitgalaxy-0.1.0/gitgalaxy/gitgalaxy_standards_v011.py +10590 -0
- gitgalaxy-0.1.0/gitgalaxy/gpu_recorder.py +316 -0
- gitgalaxy-0.1.0/gitgalaxy/guidestar_lens.py +300 -0
- gitgalaxy-0.1.0/gitgalaxy/language_lens.py +819 -0
- gitgalaxy-0.1.0/gitgalaxy/llm_recorder.py +538 -0
- gitgalaxy-0.1.0/gitgalaxy/prism.py +484 -0
- gitgalaxy-0.1.0/gitgalaxy/record_keeper.py +409 -0
- gitgalaxy-0.1.0/gitgalaxy/signal_processor.py +797 -0
- gitgalaxy-0.1.0/gitgalaxy/spectral_auditor.py +392 -0
- gitgalaxy-0.1.0/gitgalaxy.egg-info/PKG-INFO +85 -0
- gitgalaxy-0.1.0/gitgalaxy.egg-info/SOURCES.txt +23 -0
- gitgalaxy-0.1.0/gitgalaxy.egg-info/dependency_links.txt +1 -0
- gitgalaxy-0.1.0/gitgalaxy.egg-info/entry_points.txt +2 -0
- gitgalaxy-0.1.0/gitgalaxy.egg-info/top_level.txt +1 -0
- gitgalaxy-0.1.0/pyproject.toml +33 -0
- gitgalaxy-0.1.0/setup.cfg +4 -0
gitgalaxy-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
PolyForm Noncommercial License 1.0.0
|
|
2
|
+
|
|
3
|
+
<https://polyformproject.org/licenses/noncommercial/1.0.0>
|
|
4
|
+
|
|
5
|
+
## Acceptance
|
|
6
|
+
|
|
7
|
+
In order to get any license under these terms, you must agree to them as both strict obligations and conditions to all your licenses.
|
|
8
|
+
|
|
9
|
+
## Copyright License
|
|
10
|
+
|
|
11
|
+
The licensor grants you a copyright license for the software to do everything you might do with the software that would otherwise infringe the licensor's copyright in it for any permitted purpose. However, you may only distribute the software according to [Distribution License](#distribution-license) and make changes or new works based on the software according to [Changes and New Works License](#changes-and-new-works-license).
|
|
12
|
+
|
|
13
|
+
## Distribution License
|
|
14
|
+
|
|
15
|
+
The licensor grants you an additional copyright license to distribute copies of the software. Your license to distribute covers distributing the software with changes and new works permitted by [Changes and New Works License](#changes-and-new-works-license).
|
|
16
|
+
|
|
17
|
+
## Notices
|
|
18
|
+
|
|
19
|
+
You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms or the URL for them above, as well as copies of any plain-text lines beginning with `Required Notice:` that the licensor provided with the software. For example:
|
|
20
|
+
|
|
21
|
+
> Required Notice: Copyright Joe Esquibel (https://github.com/squid-protocol/gitgalaxy)
|
|
22
|
+
|
|
23
|
+
## Changes and New Works License
|
|
24
|
+
|
|
25
|
+
The licensor grants you an additional copyright license to make changes and new works based on the software for any permitted purpose.
|
|
26
|
+
|
|
27
|
+
## Patent License
|
|
28
|
+
|
|
29
|
+
The licensor grants you a patent license for the software that covers patent claims the licensor can license, or becomes able to license, that you would infringe by using the software.
|
|
30
|
+
|
|
31
|
+
## Noncommercial Purposes
|
|
32
|
+
|
|
33
|
+
Any noncommercial purpose is a permitted purpose.
|
|
34
|
+
|
|
35
|
+
## Personal Uses
|
|
36
|
+
|
|
37
|
+
Personal use for research, experiment, and testing for the benefit of public knowledge, personal study, private entertainment, hobby projects, amateur pursuits, or religious observance, without any anticipated commercial application, is use for a permitted purpose.
|
|
38
|
+
|
|
39
|
+
## Noncommercial Organizations
|
|
40
|
+
|
|
41
|
+
Use by any charitable organization, educational institution, public research organization, public safety or health organization, environmental protection organization, or government institution is use for a permitted purpose regardless of the source of funding or obligations resulting from the funding.
|
|
42
|
+
|
|
43
|
+
## Fair Use
|
|
44
|
+
|
|
45
|
+
You may have "fair use" rights for the software under the law. These terms do not limit them.
|
|
46
|
+
|
|
47
|
+
## No Other Rights
|
|
48
|
+
|
|
49
|
+
These terms do not allow you to sublicense or transfer any of your licenses to anyone else, or prevent the licensor from granting licenses to anyone else. These terms do not imply any other licenses.
|
|
50
|
+
|
|
51
|
+
## Patent Defense
|
|
52
|
+
|
|
53
|
+
If you make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
|
|
54
|
+
|
|
55
|
+
## Violations
|
|
56
|
+
|
|
57
|
+
The first time you are notified in writing that you have violated any of these terms, or done anything with the software not covered by your licenses, your licenses can nonetheless continue if you come into full compliance with these terms, and take practical steps to correct past violations, within 32 days of receiving notice. Otherwise, all your licenses end immediately.
|
|
58
|
+
|
|
59
|
+
## No Liability
|
|
60
|
+
|
|
61
|
+
***As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.***
|
|
62
|
+
|
|
63
|
+
## Definitions
|
|
64
|
+
|
|
65
|
+
The **licensor** is the individual or entity offering these terms, and the **software** is the software the licensor makes available under these terms.
|
|
66
|
+
|
|
67
|
+
**You** refers to the individual or entity agreeing to these terms.
|
|
68
|
+
|
|
69
|
+
**Your company** is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. **Control** means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
|
|
70
|
+
|
|
71
|
+
**Your licenses** are all the licenses granted to you for the software under these terms.
|
|
72
|
+
|
|
73
|
+
**Use** means anything you do with the software requiring one of your licenses.
|
gitgalaxy-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gitgalaxy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Visualizes codebase complexity as explorable 3D galaxies.
|
|
5
|
+
Author: Joseph Michael Esquibel
|
|
6
|
+
License: PolyForm Noncommercial License 1.0.0
|
|
7
|
+
Project-URL: Homepage, https://gitgalaxy.io
|
|
8
|
+
Project-URL: Source, https://github.com/squid-protocol/gitgalaxy
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: Other/Proprietary License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
|
|
18
|
+
# GitGalaxy
|
|
19
|
+
|
|
20
|
+
Code is art. Logic is art. Systems engineering is art.
|
|
21
|
+
|
|
22
|
+
GitGalaxy reveals the complexity of codebases as explorable 3D galaxies by using source code as a seed for procedural generative art. It acts as a Rosetta Stone for code complexity, allowing you to visually compare the scale and risk exposure of different projects—from Apollo 11 to the Linux Kernel—under the same set of rules.
|
|
23
|
+
|
|
24
|
+
> **Note:** This is a condensed version of the full documentation. For the 200-page Architectural Master Blueprint, please visit: [https://github.com/squid-protocol/gitgalaxy](https://github.com/squid-protocol/gitgalaxy)
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quickstart
|
|
29
|
+
|
|
30
|
+
### 1. Install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install gitgalaxy
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### 2. Scan a Repository
|
|
37
|
+
|
|
38
|
+
Point the GalaxyScope at any local repository or ZIP archive. The engine runs entirely on your local machine—zero data is transmitted.
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
gitgalaxy /path/to/your/repo
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### 3. View the Galaxy
|
|
45
|
+
|
|
46
|
+
GitGalaxy offers two ways to visualize your 3D architecture, both built on a strict Zero-Trust Privacy Model where your code never leaves your machine.
|
|
47
|
+
|
|
48
|
+
**Option A: The Web Viewer (Frictionless)**
|
|
49
|
+
Simply drag and drop your generated "your_repo_galaxy.json" file (or a .zip of your raw repository) directly into GitGalaxy.io. All rendering and scanning happens entirely in your browser's local memory.
|
|
50
|
+
|
|
51
|
+
**Option B: The Local Server (Enterprise & Offline)**
|
|
52
|
+
For teams working behind strict corporate firewalls, you can host the 3D viewer locally. Clone the repository and spin up the included Flask server:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
git clone https://github.com/squid-protocol/gitgalaxy.git
|
|
56
|
+
cd gitgalaxy
|
|
57
|
+
python app.py
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Then open http://localhost:5000 in your browser and drop your JSON file in securely.
|
|
61
|
+
|
|
62
|
+
## What Does it Measure?
|
|
63
|
+
|
|
64
|
+
Traditional code analysis tools act like strict linguists. GitGalaxy functions as a sensor grid. We do not assess "Bad Code"; we measure Risk Exposure. By analyzing 9 independent metrics, we procedurally generate a 3D universe where:
|
|
65
|
+
|
|
66
|
+
* **Stars (Files):** Mass indicates Lines of Code (LOC).
|
|
67
|
+
* **Bioluminescence (Pulse):** Indicates inbound references and popularity.
|
|
68
|
+
* **Satellites (Functions):** Moons orbit their parent stars; orbital reach dictates function length.
|
|
69
|
+
* **Color Overlays:** Instantly highlight Cognitive Load (Purple), Tech Debt (Red), Churn (Orange), and API Exposure (Electric Rose).
|
|
70
|
+
|
|
71
|
+
## Zero-Trust Architecture
|
|
72
|
+
|
|
73
|
+
Your code never leaves your machine. GitGalaxy performs 100% of its scanning and vectorization locally.
|
|
74
|
+
|
|
75
|
+
* **No Data Transmission:** Source code is never transmitted to any API, cloud database, or third-party service.
|
|
76
|
+
* **Ephemeral Memory Processing:** Repositories are unpacked into a volatile memory buffer (RAM) and are automatically purged when the browser tab is closed.
|
|
77
|
+
* **Privacy-by-Design:** Even when using the web-based viewer, the data remains behind the user's firewall at all times.
|
|
78
|
+
|
|
79
|
+
## License & Copyright
|
|
80
|
+
|
|
81
|
+
Copyright (c) 2026 Joseph Michael Esquibel
|
|
82
|
+
|
|
83
|
+
GitGalaxy is released under the PolyForm Noncommercial License 1.0.0. It is completely free for personal use, research, experiment, testing, and hobby projects. Use by educational or charitable organizations is also permitted.
|
|
84
|
+
|
|
85
|
+
Any commercial use or integration into commercial SaaS products or corporate CI/CD pipelines requires a separate commercial license. Please reach out via gitgalaxy.io to discuss commercial integration.
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# GitGalaxy
|
|
2
|
+
|
|
3
|
+
Code is art. Logic is art. Systems engineering is art.
|
|
4
|
+
|
|
5
|
+
GitGalaxy reveals the complexity of codebases as explorable 3D galaxies by using source code as a seed for procedural generative art. It acts as a Rosetta Stone for code complexity, allowing you to visually compare the scale and risk exposure of different projects—from Apollo 11 to the Linux Kernel—under the same set of rules.
|
|
6
|
+
|
|
7
|
+
> **Note:** This is a condensed version of the full documentation. For the 200-page Architectural Master Blueprint, please visit: [https://github.com/squid-protocol/gitgalaxy](https://github.com/squid-protocol/gitgalaxy)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Quickstart
|
|
12
|
+
|
|
13
|
+
### 1. Install
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install gitgalaxy
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
### 2. Scan a Repository
|
|
20
|
+
|
|
21
|
+
Point the GalaxyScope at any local repository or ZIP archive. The engine runs entirely on your local machine—zero data is transmitted.
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
gitgalaxy /path/to/your/repo
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### 3. View the Galaxy
|
|
28
|
+
|
|
29
|
+
GitGalaxy offers two ways to visualize your 3D architecture, both built on a strict Zero-Trust Privacy Model where your code never leaves your machine.
|
|
30
|
+
|
|
31
|
+
**Option A: The Web Viewer (Frictionless)**
|
|
32
|
+
Simply drag and drop your generated "your_repo_galaxy.json" file (or a .zip of your raw repository) directly into GitGalaxy.io. All rendering and scanning happens entirely in your browser's local memory.
|
|
33
|
+
|
|
34
|
+
**Option B: The Local Server (Enterprise & Offline)**
|
|
35
|
+
For teams working behind strict corporate firewalls, you can host the 3D viewer locally. Clone the repository and spin up the included Flask server:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
git clone https://github.com/squid-protocol/gitgalaxy.git
|
|
39
|
+
cd gitgalaxy
|
|
40
|
+
python app.py
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Then open http://localhost:5000 in your browser and drop your JSON file in securely.
|
|
44
|
+
|
|
45
|
+
## What Does it Measure?
|
|
46
|
+
|
|
47
|
+
Traditional code analysis tools act like strict linguists. GitGalaxy functions as a sensor grid. We do not assess "Bad Code"; we measure Risk Exposure. By analyzing 9 independent metrics, we procedurally generate a 3D universe where:
|
|
48
|
+
|
|
49
|
+
* **Stars (Files):** Mass indicates Lines of Code (LOC).
|
|
50
|
+
* **Bioluminescence (Pulse):** Indicates inbound references and popularity.
|
|
51
|
+
* **Satellites (Functions):** Moons orbit their parent stars; orbital reach dictates function length.
|
|
52
|
+
* **Color Overlays:** Instantly highlight Cognitive Load (Purple), Tech Debt (Red), Churn (Orange), and API Exposure (Electric Rose).
|
|
53
|
+
|
|
54
|
+
## Zero-Trust Architecture
|
|
55
|
+
|
|
56
|
+
Your code never leaves your machine. GitGalaxy performs 100% of its scanning and vectorization locally.
|
|
57
|
+
|
|
58
|
+
* **No Data Transmission:** Source code is never transmitted to any API, cloud database, or third-party service.
|
|
59
|
+
* **Ephemeral Memory Processing:** Repositories are unpacked into a volatile memory buffer (RAM) and are automatically purged when the browser tab is closed.
|
|
60
|
+
* **Privacy-by-Design:** Even when using the web-based viewer, the data remains behind the user's firewall at all times.
|
|
61
|
+
|
|
62
|
+
## License & Copyright
|
|
63
|
+
|
|
64
|
+
Copyright (c) 2026 Joseph Michael Esquibel
|
|
65
|
+
|
|
66
|
+
GitGalaxy is released under the PolyForm Noncommercial License 1.0.0. It is completely free for personal use, research, experiment, testing, and hobby projects. Use by educational or charitable organizations is also permitted.
|
|
67
|
+
|
|
68
|
+
Any commercial use or integration into commercial SaaS products or corporate CI/CD pipelines requires a separate commercial license. Please reach out via gitgalaxy.io to discuss commercial integration.
|
|
File without changes
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
import fnmatch
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, Any, Set, Optional, TypedDict, Union, List, Tuple
|
|
6
|
+
|
|
7
|
+
# ==============================================================================
|
|
8
|
+
# GitGalaxy Phase 0.1: Ingestion & Filtering (The Solar Shield)
|
|
9
|
+
# Strategy: v6.2.0 (Bayesian Optics, Intent Overrides & Stateful Caching)
|
|
10
|
+
# Architecture: Lead Shield -> Path Gate -> Intent Gate -> Content Gate
|
|
11
|
+
# ==============================================================================
|
|
12
|
+
|
|
13
|
+
# --- CUSTOM EXCEPTION HIERARCHY (The Lead Shield) ---
|
|
14
|
+
|
|
15
|
+
class ApertureError(Exception):
|
|
16
|
+
"""Base class for all errors generated by the Solar Shield filtering process."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
class InaccessibleArtifactError(ApertureError):
|
|
20
|
+
"""Raised when an artifact cannot be accessed due to OS permissions or path corruption."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
class SaturationError(ApertureError):
|
|
24
|
+
"""Raised when a signal is too dense or minified to be safely refracted by the detector."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
class FilterResult(TypedDict):
|
|
28
|
+
"""Structured telemetry returned by the Solar Shield for the Pipeline Orchestrator."""
|
|
29
|
+
is_in_scope: bool
|
|
30
|
+
band: str
|
|
31
|
+
reason: Optional[str]
|
|
32
|
+
path: str
|
|
33
|
+
size_bytes: int
|
|
34
|
+
total_loc: int
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ApertureFilter:
|
|
38
|
+
"""
|
|
39
|
+
Primary solar shield for the telescope. Performs perimeter gating to ensure
|
|
40
|
+
only maintainable source code matter reaches the detector. Integrates with
|
|
41
|
+
GuideStar's Bayesian 'Intent Locks' to dynamically adjust suppression
|
|
42
|
+
thresholds for known, high-priority artifacts.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
root_dir: Union[str, Path],
|
|
48
|
+
language_definitions: Dict[str, Any],
|
|
49
|
+
aperture_config: Dict[str, Any],
|
|
50
|
+
parent_logger: Optional[logging.Logger] = None
|
|
51
|
+
):
|
|
52
|
+
# --- TELEMETRY SYNC ---
|
|
53
|
+
if parent_logger:
|
|
54
|
+
self.logger = parent_logger.getChild("filter")
|
|
55
|
+
self.logger.setLevel(parent_logger.level)
|
|
56
|
+
else:
|
|
57
|
+
self.logger = logging.getLogger("filter")
|
|
58
|
+
self.logger.setLevel(logging.INFO)
|
|
59
|
+
self.root = Path(root_dir).resolve()
|
|
60
|
+
self.registry = language_definitions
|
|
61
|
+
|
|
62
|
+
# 1. Safely bind the config passed down from the Orchestrator
|
|
63
|
+
self.config = aperture_config or {}
|
|
64
|
+
|
|
65
|
+
# 2. Extract specific variables from the config payload
|
|
66
|
+
self.bands = self.config.get("BANDS", {
|
|
67
|
+
"RADIO": "radio_noise", "MICROWAVE": "binary_debris",
|
|
68
|
+
"DARK_MATTER": "unknown_ext", "INFRARED": "saturated",
|
|
69
|
+
"VISIBLE": "source_code"
|
|
70
|
+
})
|
|
71
|
+
self.black_holes = set(self.config.get("EXCLUDED_DIRECTORIES", set()))
|
|
72
|
+
self.black_hole_exts = set(self.config.get("BLACKLISTED_EXTENSIONS", set()))
|
|
73
|
+
|
|
74
|
+
# --- STATE CACHE ---
|
|
75
|
+
self._intent_cache: Set[str] = set()
|
|
76
|
+
|
|
77
|
+
self.logger.debug(f"Initializing Solar Shield for sector: '{self.root.name}'...")
|
|
78
|
+
|
|
79
|
+
# Optimized lookup sets from Language Definitions
|
|
80
|
+
self.whitelisted_extensions: Set[str] = set()
|
|
81
|
+
self.ecosystem_anchors: Set[str] = set()
|
|
82
|
+
|
|
83
|
+
for lang_id, data in self.registry.items():
|
|
84
|
+
self.whitelisted_extensions.update(data.get('extensions', []))
|
|
85
|
+
self.ecosystem_anchors.update(data.get('exact_matches', []))
|
|
86
|
+
|
|
87
|
+
self.ignore_patterns = self._load_gitignore_patterns()
|
|
88
|
+
|
|
89
|
+
self.logger.info(
|
|
90
|
+
f"Dispatching Survey Probe to Sector '{self.root.name}' | "
|
|
91
|
+
f"Tracking {len(self.whitelisted_extensions)} spectral bands."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def evaluate_path_integrity(self, file_path: Union[str, Path], has_intent: bool = False) -> Tuple[bool, int, str]:
|
|
95
|
+
"""
|
|
96
|
+
[PHASE 0 ENTRY POINT]
|
|
97
|
+
Fast path-only check to build the CensusArray (Radar Walk).
|
|
98
|
+
Now safely fetches file size to prevent the 0-Byte Dark Matter telemetry bug.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
file_path: The artifact's path.
|
|
102
|
+
has_intent: True if the GuideStar mapped this file in a manifest or priority list.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Tuple: (is_valid: bool, size_bytes: int, reason: str)
|
|
106
|
+
"""
|
|
107
|
+
path_obj = Path(file_path)
|
|
108
|
+
normalized_path = path_obj.as_posix()
|
|
109
|
+
_, ext = os.path.splitext(path_obj.name)
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
relative_path = path_obj.relative_to(self.root).as_posix()
|
|
113
|
+
except ValueError:
|
|
114
|
+
relative_path = normalized_path
|
|
115
|
+
|
|
116
|
+
# Safely fetch size before making drop decisions to ensure accurate Dark Matter telemetry
|
|
117
|
+
try:
|
|
118
|
+
size_bytes = path_obj.stat().st_size if path_obj.exists() else 0
|
|
119
|
+
except OSError:
|
|
120
|
+
size_bytes = 0
|
|
121
|
+
|
|
122
|
+
# 0. TIER 0.5: THE ABSOLUTE EXTENSION SHIELD (Impervious to Intent)
|
|
123
|
+
# Drops SVGs, 3D Models, PDFs, and PGP keys before disk I/O ever happens.
|
|
124
|
+
if ext.lower() in self.black_hole_exts:
|
|
125
|
+
reason = f"Blocked (Explicitly Blacklisted Media/Binary Extension: '{ext}')"
|
|
126
|
+
self.logger.debug(f"{reason}: {relative_path}")
|
|
127
|
+
return False, size_bytes, reason
|
|
128
|
+
|
|
129
|
+
# Resolve intent status against cache
|
|
130
|
+
active_intent = has_intent or (normalized_path in self._intent_cache)
|
|
131
|
+
if active_intent:
|
|
132
|
+
self._intent_cache.add(normalized_path)
|
|
133
|
+
|
|
134
|
+
# 1. TIER 1: THE SOLAR SHIELD (Radio Noise)
|
|
135
|
+
if not self._check_solar_shield(relative_path, has_intent=active_intent):
|
|
136
|
+
reason = "Blocked (Path Excluded by System Rules, Hidden Directory, or .gitignore)"
|
|
137
|
+
self.logger.debug(f"{reason}: {relative_path}")
|
|
138
|
+
return False, size_bytes, reason
|
|
139
|
+
|
|
140
|
+
# --- INTENT BYPASS ---
|
|
141
|
+
# If the file has a GuideStar lock, it bypasses linguistic whitelisting entirely.
|
|
142
|
+
if active_intent:
|
|
143
|
+
reason = "Passed (GuideStar Intent Lock Bypassed Tier 2)"
|
|
144
|
+
self.logger.debug(f"{reason}: {relative_path}")
|
|
145
|
+
return True, size_bytes, reason
|
|
146
|
+
|
|
147
|
+
# 2. TIER 2: THE VISIBLE SPECTRUM (Linguistic Whitelisting)
|
|
148
|
+
# Rule 2.1: Deep Space Remnants (Spec 2.3.3.B)
|
|
149
|
+
# We allow extensionless files through without intent to be evaluated by the Shebang Scanner
|
|
150
|
+
if not ext:
|
|
151
|
+
reason = "Passed (Extensionless -> Shebang scan required)"
|
|
152
|
+
self.logger.debug(f"{reason}: {relative_path}")
|
|
153
|
+
return True, size_bytes, reason
|
|
154
|
+
|
|
155
|
+
# Rule 2.2: Known Ecosystem Anchor or Whitelisted Extension
|
|
156
|
+
if path_obj.name in self.ecosystem_anchors or ext.lower() in self.whitelisted_extensions:
|
|
157
|
+
reason = "Passed (Ecosystem Anchor or Whitelisted Ext)"
|
|
158
|
+
return True, size_bytes, reason
|
|
159
|
+
|
|
160
|
+
reason = f"Blocked (Unsupported or Unrecognized Extension: '{ext}')"
|
|
161
|
+
self.logger.debug(f"{reason}: {relative_path}")
|
|
162
|
+
return False, size_bytes, reason
|
|
163
|
+
|
|
164
|
+
def is_in_scope(self, file_path: Union[str, Path], content: Optional[str] = None, has_intent: bool = False) -> FilterResult:
|
|
165
|
+
"""Runs the 5-tier perimeter gate to validate maintainable code matter."""
|
|
166
|
+
path_obj = Path(file_path)
|
|
167
|
+
normalized_path = path_obj.as_posix()
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
relative_path = path_obj.relative_to(self.root).as_posix()
|
|
171
|
+
except ValueError:
|
|
172
|
+
relative_path = normalized_path
|
|
173
|
+
|
|
174
|
+
# Pull from state to guarantee consistency between Phase 0 and Phase 1 calls
|
|
175
|
+
active_intent = has_intent or (normalized_path in self._intent_cache)
|
|
176
|
+
|
|
177
|
+
result: FilterResult = {
|
|
178
|
+
"is_in_scope": False,
|
|
179
|
+
"band": self.bands.get("VISIBLE", "source_code"),
|
|
180
|
+
"reason": None,
|
|
181
|
+
"path": normalized_path,
|
|
182
|
+
"size_bytes": 0,
|
|
183
|
+
"total_loc": 0
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
# --- TIER 0: THE LEAD SHIELD (Resource Guarding) ---
|
|
188
|
+
# NOTE: Intent cannot override file size or existence checks. Physics are absolute.
|
|
189
|
+
if not path_obj.exists():
|
|
190
|
+
raise InaccessibleArtifactError("Artifact missing or path invalid")
|
|
191
|
+
|
|
192
|
+
stats = path_obj.stat()
|
|
193
|
+
result["size_bytes"] = stats.st_size
|
|
194
|
+
|
|
195
|
+
max_mb = self.config.get("MAX_FILE_SIZE_MB", 10)
|
|
196
|
+
if stats.st_size > (max_mb * 1024 * 1024):
|
|
197
|
+
self.logger.warning(f"Resource Guard: Rejected massive file '{relative_path}' ({stats.st_size / (1024 * 1024):.1f} MB)")
|
|
198
|
+
result["band"] = self.bands.get("INFRARED", "saturated")
|
|
199
|
+
result["reason"] = f"Blocked (File Size Exceeds Configured Limit: > {max_mb}MB)"
|
|
200
|
+
return result
|
|
201
|
+
|
|
202
|
+
# --- TIER 1 & 2: PATH VALIDATION ---
|
|
203
|
+
is_valid, size_bytes, reason = self.evaluate_path_integrity(path_obj, has_intent=active_intent)
|
|
204
|
+
if not is_valid:
|
|
205
|
+
result["band"] = self.bands.get("RADIO", "radio_noise")
|
|
206
|
+
result["reason"] = reason
|
|
207
|
+
result["size_bytes"] = size_bytes
|
|
208
|
+
return result
|
|
209
|
+
|
|
210
|
+
# --- TIER 3 & 4: ARTIFACT INTEGRITY (Content Gate) ---
|
|
211
|
+
if content is None:
|
|
212
|
+
self.logger.warning(f"Protocol Violation: Missing content buffer for '{relative_path}'")
|
|
213
|
+
result["reason"] = "Protocol Violation: Missing content buffer"
|
|
214
|
+
return result
|
|
215
|
+
|
|
216
|
+
integrity = self._check_artifact_integrity(content, relative_path)
|
|
217
|
+
result["total_loc"] = integrity["loc"]
|
|
218
|
+
|
|
219
|
+
if not integrity["valid"]:
|
|
220
|
+
result["band"] = integrity["band"]
|
|
221
|
+
result["reason"] = integrity["reason"]
|
|
222
|
+
return result
|
|
223
|
+
|
|
224
|
+
# --- SUCCESS ---
|
|
225
|
+
result["is_in_scope"] = True
|
|
226
|
+
self.logger.debug(f"Aperture Lock: '{relative_path}' safely in scope (LOC: {result['total_loc']}).")
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
except (InaccessibleArtifactError, PermissionError) as e:
|
|
230
|
+
self.logger.warning(f"Inaccessible Artifact: '{relative_path}' | {str(e)}")
|
|
231
|
+
result["band"] = self.bands.get("RADIO", "radio_noise")
|
|
232
|
+
result["reason"] = f"I/O Exception: {str(e)}"
|
|
233
|
+
return result
|
|
234
|
+
except Exception as e:
|
|
235
|
+
self.logger.error(f"Critical Solar Shield Failure on '{relative_path}': {str(e)}", exc_info=True)
|
|
236
|
+
result["reason"] = f"Internal Exception: {str(e)}"
|
|
237
|
+
return result
|
|
238
|
+
|
|
239
|
+
def _check_artifact_integrity(self, content: str, rel_path: str) -> Dict[str, Any]:
|
|
240
|
+
"""Inspects the Photon Buffer for corruption or saturation."""
|
|
241
|
+
report = {"valid": True, "band": self.bands.get("VISIBLE", "source_code"), "reason": None, "loc": 0}
|
|
242
|
+
|
|
243
|
+
# --- TIER 3: THE LEAD SHIELD (Binary Detection) ---
|
|
244
|
+
# FIX: Removed the [:1024] slice. We must check the entire buffer
|
|
245
|
+
# so binary files don't sneak into Visible Stars.
|
|
246
|
+
if '\x00' in content:
|
|
247
|
+
self.logger.debug(f"Integrity check failed (Opaque Binary / Null Bytes): {rel_path}")
|
|
248
|
+
report.update({
|
|
249
|
+
"valid": False,
|
|
250
|
+
"band": self.bands.get("MICROWAVE", "binary_debris"),
|
|
251
|
+
"reason": "Blocked (Binary Format: Null bytes detected during read)"
|
|
252
|
+
})
|
|
253
|
+
return report
|
|
254
|
+
|
|
255
|
+
# --- TIER 4: INFRARED GATE (Minification & Saturation) ---
|
|
256
|
+
max_line = self.config.get("MAX_LINE_LENGTH", 500)
|
|
257
|
+
scan_limit = self.config.get("MINIFICATION_SCAN_LIMIT", 50)
|
|
258
|
+
|
|
259
|
+
lines = content.splitlines()
|
|
260
|
+
report["loc"] = len(lines)
|
|
261
|
+
|
|
262
|
+
for i, line in enumerate(lines):
|
|
263
|
+
if i < scan_limit:
|
|
264
|
+
if len(line) > max_line:
|
|
265
|
+
self.logger.debug(f"Integrity check failed: '{rel_path}' saturated > {max_line} chars on line {i+1}")
|
|
266
|
+
report.update({
|
|
267
|
+
"valid": False,
|
|
268
|
+
"band": self.bands.get("INFRARED", "saturated"),
|
|
269
|
+
"reason": f"Blocked (Minified or Dense Data: Line {i+1} exceeds {max_line} characters)"
|
|
270
|
+
})
|
|
271
|
+
return report
|
|
272
|
+
else:
|
|
273
|
+
break
|
|
274
|
+
|
|
275
|
+
return report
|
|
276
|
+
|
|
277
|
+
def _load_gitignore_patterns(self) -> List[str]:
|
|
278
|
+
"""Reads local .gitignore files to identify Radio Noise."""
|
|
279
|
+
patterns = []
|
|
280
|
+
ignore_file = self.root / ".gitignore"
|
|
281
|
+
if ignore_file.exists():
|
|
282
|
+
try:
|
|
283
|
+
with ignore_file.open('r', encoding='utf-8') as f:
|
|
284
|
+
for line in f:
|
|
285
|
+
line = line.strip()
|
|
286
|
+
if line and not line.startswith('#'):
|
|
287
|
+
patterns.append(line)
|
|
288
|
+
except (IOError, OSError) as e:
|
|
289
|
+
self.logger.warning(f"Failed to load .gitignore rules: {e}")
|
|
290
|
+
return patterns
|
|
291
|
+
|
|
292
|
+
def _check_solar_shield(self, rel_path: str, has_intent: bool = False) -> bool:
|
|
293
|
+
"""Determines if the path sits in a blocked or ignored sector."""
|
|
294
|
+
parts = rel_path.split('/')
|
|
295
|
+
|
|
296
|
+
for part in parts:
|
|
297
|
+
if part in self.black_holes:
|
|
298
|
+
return False
|
|
299
|
+
|
|
300
|
+
# Hidden Path Suppression (e.g., .github/, .vscode/)
|
|
301
|
+
if part.startswith('.') and part not in self.ecosystem_anchors:
|
|
302
|
+
# If GuideStar explicitly mapped a dotfile (e.g., .hooks/pre-commit), lift the blockade.
|
|
303
|
+
if has_intent:
|
|
304
|
+
self.logger.debug(f"Solar Shield VIP Pass: Bypassing hidden-path block for intent-locked artifact '{part}'")
|
|
305
|
+
continue
|
|
306
|
+
self.logger.debug(f"Blocked by Hidden Path Component '{part}': {rel_path}")
|
|
307
|
+
return False
|
|
308
|
+
|
|
309
|
+
for pattern in self.ignore_patterns:
|
|
310
|
+
if pattern.endswith('/') and any(fnmatch.fnmatch(p + '/', pattern) for p in parts):
|
|
311
|
+
return False
|
|
312
|
+
if fnmatch.fnmatch(rel_path, pattern) or fnmatch.fnmatch(parts[-1], pattern):
|
|
313
|
+
return False
|
|
314
|
+
|
|
315
|
+
return True
|