PyPI - netgreener - Versions diffs - 0.1.0__py3-none-any.whl - Mend

netgreener 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

netgreener/__init__.py +51 -0
netgreener/_metrics_server.py +59 -0
netgreener/analyzer.py +295 -0
netgreener/api_client.py +123 -0
netgreener/cli.py +433 -0
netgreener/config.py +66 -0
netgreener/constants.py +3 -0
netgreener/executor.py +132 -0
netgreener/nlp_analyzer.py +123 -0
netgreener/reporter.py +79 -0
netgreener-0.1.0.dist-info/METADATA +15 -0
netgreener-0.1.0.dist-info/RECORD +15 -0
netgreener-0.1.0.dist-info/WHEEL +5 -0
netgreener-0.1.0.dist-info/entry_points.txt +2 -0
netgreener-0.1.0.dist-info/top_level.txt +1 -0

netgreener/__init__.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""
+netgreener — public API for use inside ML training scripts.
+Usage inside a user's train.py:
+    import netgreener as ng
+    ng.log_metrics(accuracy=0.95, f1=0.88, precision=0.90, recall=0.87, auc=0.96)
+"""
+import json
+import os
+import socket
+def log_metrics(
+    accuracy: float | None = None,
+    f1: float | None = None,
+    precision: float | None = None,
+    recall: float | None = None,
+    auc: float | None = None,
+    **extra: float,
+) -> None:
+    """
+    Report accuracy metrics back to the NetGreener CLI runner.
+    Call this anywhere in your script after evaluation is complete.
+    Safe to call even when not running under netgreener (silently no-ops).
+    """
+    port_str = os.environ.get("NETGREENER_METRICS_PORT")
+    if not port_str:
+        return
+    payload: dict = {}
+    if accuracy is not None:
+        payload["model_accuracy"] = accuracy
+    if f1 is not None:
+        payload["f1_score"] = f1
+    if precision is not None:
+        payload["model_precision"] = precision
+    if recall is not None:
+        payload["recall"] = recall
+    if auc is not None:
+        payload["auc_score"] = auc
+    payload.update(extra)
+    if not payload:
+        return
+    try:
+        with socket.create_connection(("127.0.0.1", int(port_str)), timeout=2) as s:
+            s.sendall(json.dumps(payload).encode())
+    except OSError:
+        pass

netgreener/_metrics_server.py ADDED Viewed

@@ -0,0 +1,59 @@
+"""
+Local TCP server that receives ng.log_metrics() calls from the user's running script.
+The CLI starts this server before launching the script and injects the port via
+NETGREENER_METRICS_PORT. The user-facing netgreener.log_metrics() connects to it.
+"""
+import json
+import socket
+import threading
+from typing import Callable
+class MetricsServer:
+    """Listens on a loopback TCP port for metric payloads from the child process."""
+    def __init__(self, on_metrics: Callable[[dict], None]):
+        self._on_metrics = on_metrics
+        self._server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        self._server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        self._server.bind(("127.0.0.1", 0))
+        self._server.listen(5)
+        self._server.settimeout(0.5)
+        self.port: int = self._server.getsockname()[1]
+        self._running = False
+        self._thread: threading.Thread | None = None
+    def start(self) -> None:
+        self._running = True
+        self._thread = threading.Thread(target=self._serve, daemon=True)
+        self._thread.start()
+    def stop(self) -> None:
+        self._running = False
+        try:
+            self._server.close()
+        except OSError:
+            pass
+    def _serve(self) -> None:
+        while self._running:
+            try:
+                conn, _ = self._server.accept()
+            except (socket.timeout, OSError):
+                continue
+            try:
+                conn.settimeout(5.0)
+                data = bytearray()
+                try:
+                    while chunk := conn.recv(4096):
+                        data += chunk
+                except socket.timeout:
+                    pass
+                payload = json.loads(bytes(data).decode())
+                self._on_metrics(payload)
+            except Exception:
+                pass
+            finally:
+                conn.close()

netgreener/analyzer.py ADDED Viewed

@@ -0,0 +1,295 @@
+"""
+Headless code analysis — wraps CodeAnalyzer without any PyQt5 dependency.
+Extracts AST features, Halstead metrics, raw Radon metrics, library domains,
+and derives CodePattern records for the supervisor dashboard.
+"""
+import sys
+from pathlib import Path
+from typing import Any
+_CODEANALYZER_SRC = Path(__file__).resolve().parents[3] / "NetGreenerCodeAnalyzer" / "Src"
+if _CODEANALYZER_SRC.exists() and str(_CODEANALYZER_SRC) not in sys.path:
+    sys.path.insert(0, str(_CODEANALYZER_SRC))
+_LIBRARY_DOMAINS: dict[str, str] = {
+    # Computer vision
+    "cv2": "computer_vision",
+    "PIL": "computer_vision",
+    "Pillow": "computer_vision",
+    "skimage": "computer_vision",
+    "torchvision": "computer_vision",
+    "imageio": "computer_vision",
+    "albumentations": "computer_vision",
+    # Deep learning
+    "tensorflow": "deep_learning",
+    "tf": "deep_learning",
+    "keras": "deep_learning",
+    "torch": "deep_learning",
+    "jax": "deep_learning",
+    "flax": "deep_learning",
+    "paddle": "deep_learning",
+    "mxnet": "deep_learning",
+    "fastai": "deep_learning",
+    "lightning": "deep_learning",
+    "pytorch_lightning": "deep_learning",
+    # Classical ML
+    "sklearn": "classical_ml",
+    "xgboost": "classical_ml",
+    "lightgbm": "classical_ml",
+    "catboost": "classical_ml",
+    "statsmodels": "classical_ml",
+    # NLP
+    "transformers": "nlp",
+    "spacy": "nlp",
+    "nltk": "nlp",
+    "gensim": "nlp",
+    "sentence_transformers": "nlp",
+    # Data
+    "pandas": "data",
+    "numpy": "data",
+    "scipy": "data",
+    "polars": "data",
+    "pyarrow": "data",
+    # Visualization
+    "matplotlib": "visualization",
+    "seaborn": "visualization",
+    "plotly": "visualization",
+}
+def _detect_domains(imports_detail: list[tuple]) -> list[str]:
+    domains: set[str] = set()
+    for module, _ in imports_detail:
+        if module:
+            root = module.split(".")[0]
+            if domain := _LIBRARY_DOMAINS.get(root):
+                domains.add(domain)
+    return sorted(domains)
+def _safe_halstead(h) -> dict[str, float]:
+    """Extract scalar Halstead fields defensively — radon's return type varies by version."""
+    try:
+        # h_visit may return a namedtuple or an object with a .total attribute
+        src = h.total if hasattr(h, "total") else h
+        return {
+            "halstead_vocabulary": int(getattr(src, "vocabulary", 0)),
+            "halstead_length": int(getattr(src, "length", 0)),
+            "halstead_volume": round(float(getattr(src, "volume", 0.0)), 2),
+            "halstead_difficulty": round(float(getattr(src, "difficulty", 0.0)), 2),
+            "halstead_effort": round(float(getattr(src, "effort", 0.0)), 2),
+            "halstead_bugs": round(float(getattr(src, "bugs", 0.0)), 4),
+        }
+    except Exception:
+        return {k: 0 for k in ("halstead_vocabulary", "halstead_length",
+                               "halstead_volume", "halstead_difficulty",
+                               "halstead_effort", "halstead_bugs")}
+def detect_code_patterns(features: dict[str, Any], file_path: str, project_id: int) -> list[dict]:
+    """Map per-file AST metrics to CodePattern records for the supervisor dashboard."""
+    patterns: list[dict] = []
+    loc = features.get("lines_of_code", 0)
+    mi = features.get("maintainability_index", 50.0)
+    max_cc = features.get("max_cyclomatic_complexity", 0)
+    comments = features.get("comments_count", 0)
+    exception_nodes = features.get("count_exception_handling_nodes", 0)
+    depth = features.get("depth_of_ast", 0)
+    def _pat(pattern_type, name, severity, description, confidence=0.9):
+        return {
+            "project_id": project_id,
+            "file_path": file_path,
+            "pattern_type": pattern_type,
+            "pattern_name": name,
+            "severity": severity,
+            "description": description,
+            "confidence": confidence,
+        }
+    # Cyclomatic complexity
+    if max_cc > 20:
+        patterns.append(_pat("code_smell", "High Cyclomatic Complexity", "critical",
+                             f"Max function complexity {max_cc} (>20). Very hard to test and maintain."))
+    elif max_cc > 10:
+        patterns.append(_pat("code_smell", "High Cyclomatic Complexity", "high",
+                             f"Max function complexity {max_cc} (>10). Consider splitting functions.", 0.85))
+    # Maintainability index
+    if mi < 20:
+        patterns.append(_pat("anti_pattern", "Low Maintainability", "high",
+                             f"Maintainability index {mi:.1f} (<20). Code is difficult to maintain."))
+    elif mi > 65:
+        patterns.append(_pat("design_pattern", "High Maintainability", "low",
+                             f"Maintainability index {mi:.1f} (>65). Code is clean and well structured.", 0.85))
+    # Documentation
+    if loc > 20:
+        comment_ratio = comments / loc
+        if comment_ratio > 0.2:
+            patterns.append(_pat("design_pattern", "Well Documented Code", "low",
+                                 f"Comment ratio {comment_ratio:.0%}. Good inline documentation.", 0.8))
+        elif comment_ratio < 0.02:
+            patterns.append(_pat("code_smell", "Insufficient Documentation", "low",
+                                 f"Comment ratio {comment_ratio:.0%}. Consider adding docstrings.", 0.75))
+    # Error handling
+    if exception_nodes > 0:
+        patterns.append(_pat("design_pattern", "Error Handling Present", "low",
+                             f"{exception_nodes} exception handling block(s) found.", 0.8))
+    # Deep nesting
+    if depth > 25:
+        patterns.append(_pat("code_smell", "Deep Nesting", "medium",
+                             f"AST depth {depth} (>25). Deeply nested code is hard to follow.", 0.8))
+    return patterns
+def analyze_file(source_code: str) -> dict[str, Any]:
+    """
+    Run full CodeAnalyzer pipeline on a source string.
+    Returns a flat dict of 30+ features including Halstead, Radon raw, and library domains.
+    Returns a dict with an 'error' key on failure.
+    """
+    try:
+        from Libs.CodeAnalyzer.FeatureExtraction.ASTFeatures.ast_base_analysis import AstBaseAnalyzer
+        from Libs.CodeAnalyzer.FeatureExtraction.ASTFeatures.node_counter import NodeCounter
+        from Libs.CodeAnalyzer.FeatureExtraction.ASTFeatures.node_detail_extractor import NodeDetailExtractor
+        from Libs.CodeAnalyzer.FeatureExtraction.ASTFeatures.metrics_calculator import MetricsCalculator
+    except ImportError as e:
+        return {"error": f"CodeAnalyzer not available: {e}"}
+    try:
+        base = AstBaseAnalyzer(source_code)
+        counter = NodeCounter(base)
+        detail = NodeDetailExtractor(base)
+        metrics = MetricsCalculator(base)
+        imports_detail = detail.import_nodes_detail()
+        _, comment_count = base.extract_comments()
+        node_count = counter.count_node()
+        mi_result = metrics.calculate_maintainability_index_rank()
+        # Cyclomatic complexity — returns list of (name, cc, rank) per function
+        cc_list = metrics.calculate_cyclomatic_complexity()
+        cc_values = [c for _, c, _ in cc_list] if cc_list else []
+        avg_cc = sum(cc_values) / len(cc_values) if cc_values else 0.0
+        max_cc = max(cc_values, default=0)
+        # Halstead metrics
+        halstead = _safe_halstead(metrics.calculate_halstead_metrics())
+        # Raw Radon metrics (LLOC, SLOC, blank, multi-line strings)
+        raw = metrics.calculate_raw_metrics()
+        features: dict[str, Any] = {
+            # AST node counts
+            "lines_of_code": counter.count_lines_of_code(),
+            "number_of_tokens": counter.count_tokens(),
+            "depth_of_ast": counter.depth_of_ast(),
+            "number_of_nodes": node_count,
+            "number_of_edges": node_count - 1,
+            "number_of_imports": counter.count_imports(),
+            "imports_detail": imports_detail,
+            "count_class_nodes": counter.count_class_nodes(),
+            "count_function_nodes": counter.count_function_nodes(),
+            "count_call_function_nodes": counter.count_call_function_nodes(),
+            "count_control_flow_nodes": counter.count_control_flow_nodes(),
+            "count_exception_handling_nodes": counter.count_exception_handling_nodes(),
+            "count_expression_nodes": counter.count_expression_nodes(),
+            "count_assignment_nodes": counter.count_assignment_nodes(),
+            "count_binary_operations": counter.count_binary_operation_nodes(),
+            "count_unary_operations": counter.count_unary_operation_nodes(),
+            "count_lambda_function_nodes": counter.count_lambda_function_nodes(),
+            "comments_count": comment_count,
+            # Complexity / quality
+            "avg_cyclomatic_complexity": round(avg_cc, 2),
+            "max_cyclomatic_complexity": max_cc,
+            "maintainability_index": mi_result[0],
+            "maintainability_index_rank": mi_result[1],
+            # Radon raw
+            "lloc": raw.lloc,
+            "sloc": raw.sloc,
+            "blank_lines": raw.blank,
+            "multi_line_strings": raw.multi,
+            # Halstead
+            **halstead,
+            # Library domains
+            "library_domains": _detect_domains(imports_detail),
+        }
+        return features
+    except SyntaxError as e:
+        return {"error": f"SyntaxError: {e}"}
+    except Exception as e:
+        return {"error": str(e)}
+def analyze_project(project_dir: str) -> dict[str, Any]:
+    """
+    Analyze all .py files in a project directory.
+    Returns aggregated features, per-file feature cache, and errors.
+    The caller should call detect_code_patterns() per file once project_id is known.
+    """
+    root = Path(project_dir)
+    py_files = list(root.rglob("*.py"))
+    all_domains: set[str] = set()
+    total_loc = 0
+    total_imports = 0
+    total_avg_cc = 0.0
+    total_mi = 0.0
+    total_halstead_volume = 0.0
+    total_halstead_difficulty = 0.0
+    total_halstead_bugs = 0.0
+    file_count = 0
+    errors: list[str] = []
+    per_file: list[dict] = []  # {"rel_path": str, "features": dict}
+    for path in py_files:
+        try:
+            source = path.read_text(encoding="utf-8", errors="replace")
+        except OSError:
+            continue
+        result = analyze_file(source)
+        if "error" in result:
+            errors.append(f"{path.name}: {result['error']}")
+            continue
+        file_count += 1
+        total_loc += result.get("lines_of_code", 0)
+        total_imports += result.get("number_of_imports", 0)
+        total_avg_cc += result.get("avg_cyclomatic_complexity", 0.0)
+        total_mi += result.get("maintainability_index", 0.0)
+        total_halstead_volume += result.get("halstead_volume", 0.0)
+        total_halstead_difficulty += result.get("halstead_difficulty", 0.0)
+        total_halstead_bugs += result.get("halstead_bugs", 0.0)
+        all_domains.update(result.get("library_domains", []))
+        per_file.append({"rel_path": str(path.relative_to(root)), "features": result})
+    def _avg(total: float) -> float:
+        return round(total / file_count, 2) if file_count else 0.0
+    return {
+        "file_count": file_count,
+        "total_lines_of_code": total_loc,
+        "total_imports": total_imports,
+        "avg_cyclomatic_complexity": _avg(total_avg_cc),
+        "avg_maintainability_index": _avg(total_mi),
+        "avg_halstead_volume": _avg(total_halstead_volume),
+        "avg_halstead_difficulty": _avg(total_halstead_difficulty),
+        "total_halstead_bugs": round(total_halstead_bugs, 3),
+        "library_domains": sorted(all_domains),
+        "per_file": per_file,
+        "errors": errors,
+    }
+def build_patterns(per_file: list[dict], project_id: int) -> list[dict]:
+    """Convert cached per-file features to CodePattern records for a known project_id."""
+    patterns: list[dict] = []
+    for entry in per_file:
+        patterns.extend(detect_code_patterns(entry["features"], entry["rel_path"], project_id))
+    return patterns

netgreener/api_client.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""
+Lightweight API client for NetGreener CLI — wraps the REST endpoints needed by the CLI.
+"""
+import requests
+from typing import Any
+from .config import get_api_url, load_credentials
+class APIError(Exception):
+    def __init__(self, status_code: int, detail: str):
+        self.status_code = status_code
+        self.detail = detail
+        super().__init__(f"HTTP {status_code}: {detail}")
+class NetGreenerClient:
+    def __init__(self, token: str | None = None, base_url: str | None = None):
+        self.base_url = (base_url or get_api_url()) + "/api/v1"
+        self._token = token
+    @classmethod
+    def from_credentials(cls) -> "NetGreenerClient":
+        creds = load_credentials()
+        if not creds:
+            raise APIError(401, "Not logged in. Run: netgreener login")
+        return cls(token=creds["access_token"])
+    def _headers(self) -> dict:
+        h = {"Content-Type": "application/json"}
+        if self._token:
+            h["Authorization"] = f"Bearer {self._token}"
+        return h
+    def _raise(self, resp: requests.Response) -> None:
+        if not resp.ok:
+            try:
+                detail = resp.json().get("detail", resp.text)
+            except Exception:
+                detail = resp.text
+            raise APIError(resp.status_code, detail)
+    def login(self, email: str, password: str) -> dict:
+        resp = requests.post(
+            f"{self.base_url}/auth/login",
+            json={"email": email, "password": password},
+            timeout=15,
+        )
+        self._raise(resp)
+        data = resp.json()
+        self._token = data["access_token"]
+        return data
+    def get_or_create_project(self, name: str, directory: str) -> dict:
+        resp = requests.get(
+            f"{self.base_url}/projects",
+            headers=self._headers(),
+            timeout=15,
+        )
+        self._raise(resp)
+        projects = resp.json()
+        for p in projects:
+            if p.get("project_name") == name:
+                return p
+        resp = requests.post(
+            f"{self.base_url}/projects",
+            headers=self._headers(),
+            json={"project_name": name, "project_directory": directory, "language": "Python"},
+            timeout=15,
+        )
+        self._raise(resp)
+        return resp.json()
+    def create_run_session(self, payload: dict) -> dict:
+        resp = requests.post(
+            f"{self.base_url}/runsessions",
+            headers=self._headers(),
+            json=payload,
+            timeout=15,
+        )
+        self._raise(resp)
+        return resp.json()
+    def update_run_accuracy(self, run_id: int, metrics: dict) -> dict:
+        resp = requests.put(
+            f"{self.base_url}/runsessions/{run_id}/accuracy",
+            headers=self._headers(),
+            json=metrics,
+            timeout=15,
+        )
+        self._raise(resp)
+        return resp.json()
+    def post_code_patterns_batch(self, patterns: list) -> dict:
+        resp = requests.post(
+            f"{self.base_url}/code-patterns/batch",
+            headers=self._headers(),
+            json={"patterns": patterns},
+            timeout=30,
+        )
+        self._raise(resp)
+        return resp.json()
+    def post_surrogate_training(self, payload: dict) -> dict:
+        resp = requests.post(
+            f"{self.base_url}/surrogate-training",
+            headers=self._headers(),
+            json=payload,
+            timeout=15,
+        )
+        self._raise(resp)
+        return resp.json()
+    def post_environmental_impact(self, payload: dict) -> dict:
+        resp = requests.post(
+            f"{self.base_url}/environmental-impact",
+            headers=self._headers(),
+            json=payload,
+            timeout=15,
+        )
+        self._raise(resp)
+        return resp.json()