PyPI - pegasource - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

pegasource 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

{pegasource-0.2.0 → pegasource-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pegasource
-Version: 0.2.0
+Version: 0.2.2
 Summary: Offline-capable toolkit: PCAP, geo, time-series, clustering, and path estimation
 Author: Josef Berman
 License: MIT
@@ -240,13 +240,10 @@ python -m pegasource.dataset_clustering.prepare_viz_data
 ### Path estimation (trajectories)
-Evaluate reconstruction methods on observation CSVs vs a 1 Hz ground-truth path (requires **`[path_estimation]`** for full method set):
+Evaluate reconstruction methods on observation CSVs vs a 1 Hz ground-truth path (requires **`[path_estimation]`** for the full estimator stack):
 ```bash
 pip install -e ".[path_estimation]"
-pegasource-path-estimation --observations run_1_observations.csv --true-path run_1_true_path.csv --output-dir out/
-# or
-python -m pegasource.path_estimation --observations ... --true-path ... --output-dir out/
 ```
 ```python
@@ -283,12 +280,9 @@ results = estimate_paths_only(
 # results["kf"] is an EstimationResult with times_s, east_m, north_m (or {"error": "..."})
 ```
-Synthetic data generation and batch method comparison:
-```bash
-python -m pegasource.path_estimation.generate_synthetic_datasets --help
-python -m pegasource.path_estimation.run_method_evaluation --help   # writes ./method_eval/
-```
+Synthetic data (``generate_dataset`` in ``pegasource.path_estimation.generate_synthetic_datasets``)
+and batch method comparison (``main`` in ``pegasource.path_estimation.run_method_evaluation``) are
+invoked from your own Python code, not as installed console commands.
 ---

{pegasource-0.2.0 → pegasource-0.2.2}/README.md RENAMED Viewed

@@ -189,13 +189,10 @@ python -m pegasource.dataset_clustering.prepare_viz_data
 ### Path estimation (trajectories)
-Evaluate reconstruction methods on observation CSVs vs a 1 Hz ground-truth path (requires **`[path_estimation]`** for full method set):
+Evaluate reconstruction methods on observation CSVs vs a 1 Hz ground-truth path (requires **`[path_estimation]`** for the full estimator stack):
 ```bash
 pip install -e ".[path_estimation]"
-pegasource-path-estimation --observations run_1_observations.csv --true-path run_1_true_path.csv --output-dir out/
-# or
-python -m pegasource.path_estimation --observations ... --true-path ... --output-dir out/
 ```
 ```python
@@ -232,12 +229,9 @@ results = estimate_paths_only(
 # results["kf"] is an EstimationResult with times_s, east_m, north_m (or {"error": "..."})
 ```
-Synthetic data generation and batch method comparison:
-```bash
-python -m pegasource.path_estimation.generate_synthetic_datasets --help
-python -m pegasource.path_estimation.run_method_evaluation --help   # writes ./method_eval/
-```
+Synthetic data (``generate_dataset`` in ``pegasource.path_estimation.generate_synthetic_datasets``)
+and batch method comparison (``main`` in ``pegasource.path_estimation.run_method_evaluation``) are
+invoked from your own Python code, not as installed console commands.
 ---

{pegasource-0.2.0 → pegasource-0.2.2}/pegasource/__init__.py RENAMED Viewed

@@ -10,5 +10,5 @@ pegasource.dataset_clustering    Hardware inventory embedding + clustering (opti
 pegasource.path_estimation       GPS/cellular path reconstruction (filters, graph, NN; optional torch stack)
 """
-__version__ = "0.2.0"
+__version__ = "0.2.2"
 __all__ = ["pcap", "geo", "timeseries", "dataset_clustering", "path_estimation"]

pegasource-0.2.2/pegasource/dataset_clustering/__init__.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""
+Hardware inventory clustering: text embeddings + hierarchical clustering.
+Reads a CSV of string cells, concatenates each row into one text line, encodes with a
+`sentence-transformers <https://www.sbert.net/>`_ model (cosine-normalized vectors),
+then clusters with scikit-learn: **agglomerative** clustering for smaller tables, or a
+**two-phase** MiniBatchKMeans + agglomerative pipeline when row count exceeds
+`DIRECT_CLUSTERING_LIMIT`.
+**Public API** (import from ``pegasource.dataset_clustering``):
+- `load_data` — load CSV as strings
+- `build_text_representations` — one string per row
+- `generate_embeddings` — requires optional ``[clustering]`` (sentence-transformers)
+- `cluster_direct` / `cluster_twophase` / `cluster_embeddings` — label vectors
+- `print_summary` — CLI-style cluster size table
+**Optional tooling**: FastAPI server (``server``), iFixit catalog fetch, Excel export,
+bundled ``cluster_viz`` UI. Install extras with ``pip install -e ".[clustering]"``.
+Examples
+--------
+>>> # After: pip install -e ".[clustering]"
+>>> from pegasource.dataset_clustering import (
+...     load_data,
+...     build_text_representations,
+...     generate_embeddings,
+...     cluster_embeddings,
+... )
+>>> # df = load_data("data.csv")
+>>> # emb = generate_embeddings(build_text_representations(df), "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", 256)
+>>> # labels = cluster_embeddings(emb, 0.3)
+CLI and server::
+    python -m pegasource.dataset_clustering.cluster_hardware --help
+    python -m pegasource.dataset_clustering.server
+    pegasource-cluster-viz --port 8001
+"""
+from .cluster_hardware import (
+    DIRECT_CLUSTERING_LIMIT,
+    build_text_representations,
+    cluster_direct,
+    cluster_embeddings,
+    cluster_twophase,
+    generate_embeddings,
+    load_data,
+    print_summary,
+)
+__all__ = [
+    "DIRECT_CLUSTERING_LIMIT",
+    "build_text_representations",
+    "cluster_direct",
+    "cluster_embeddings",
+    "cluster_twophase",
+    "generate_embeddings",
+    "load_data",
+    "print_summary",
+]

{pegasource-0.2.0 → pegasource-0.2.2}/pegasource/dataset_clustering/cluster_hardware.py RENAMED Viewed

@@ -1,14 +1,15 @@
 #!/usr/bin/env python3
-"""
-Cluster dirty hardware records using text embeddings + agglomerative hierarchical clustering.
+r"""Cluster dirty hardware records using embeddings + agglomerative clustering.
+This module is the core library behind ``python -m pegasource.dataset_clustering.cluster_hardware``
+and the symbols re-exported from :mod:`pegasource.dataset_clustering`.
-For large datasets (>10K rows), uses a two-phase approach:
-  Phase 1: Pre-group rows with MiniBatchKMeans into manageable chunks
-  Phase 2: Run agglomerative clustering within each chunk, then merge labels
+**Scaling:** For :math:`n >` ``DIRECT_CLUSTERING_LIMIT``, use :func:`cluster_twophase`
+(via :func:`cluster_embeddings`) so memory stays bounded.
-Usage:
-    python cluster_hardware.py --input data/dirty_hardware_data_40k.csv --threshold 0.3
-    python cluster_hardware.py --input data/dirty_hardware_data_40k.csv --threshold 0.2 --sample-size 5000
+See Also
+--------
+pegasource.dataset_clustering.server : interactive FastAPI + static UI
 """
 import argparse
@@ -23,7 +24,8 @@ import numpy as np
 from sklearn.cluster import AgglomerativeClustering, MiniBatchKMeans
-# Maximum rows for direct agglomerative clustering (above this, use two-phase approach)
+#: Maximum row count for single-pass agglomerative clustering. Above this,
+#: :func:`cluster_embeddings` calls :func:`cluster_twophase` automatically.
 DIRECT_CLUSTERING_LIMIT = 15_000
 _DEFAULT_INPUT = PACKAGE_DIR / "data" / "dirty_hardware_data_40k.csv"
@@ -71,7 +73,24 @@ def parse_args():
 def load_data(path, sample_size=None):
-    """Load CSV and optionally sample rows."""
+    """Load a CSV with all columns as strings (missing values become empty strings).
+    Parameters
+    ----------
+    path : str or os.PathLike
+        Input ``.csv`` path.
+    sample_size : int, optional
+        If set, only the first ``sample_size`` rows are kept (for quick tests).
+    Returns
+    -------
+    pandas.DataFrame
+        All columns have ``dtype`` str; ``NaN`` replaced with ``""``.
+    Notes
+    -----
+    Prints progress to stdout (row × column counts).
+    """
     print(f"📂 Loading data from {path}...")
     df = pd.read_csv(path, dtype=str).fillna("")
     if sample_size is not None:
@@ -81,7 +100,24 @@ def load_data(path, sample_size=None):
 def build_text_representations(df):
-    """Concatenate all columns into a single text string per row."""
+    """Join every column of each row into one text line for embedding.
+    Columns are separated by ``" | "`` (space-pipe-space), preserving column order.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        Input table (typically all strings).
+    Returns
+    -------
+    list of str
+        Length ``len(df)``; one concatenated string per row.
+    Notes
+    -----
+    Prints the first row’s text to stdout as a sanity check.
+    """
     print("📝 Building text representations...")
     texts = df.apply(lambda row: " | ".join(row.values), axis=1).tolist()
     print(f"   Example: {texts[0]!r}")
@@ -89,7 +125,32 @@ def build_text_representations(df):
 def generate_embeddings(texts, model_name, batch_size, device="cpu"):
-    """Generate embeddings using a sentence-transformer model."""
+    """Encode texts with SentenceTransformer (L2-normalized for cosine distance).
+    Requires the optional **sentence-transformers** dependency
+    (``pip install -e ".[clustering]"``).
+    Parameters
+    ----------
+    texts : list of str
+        One sentence/line per inventory row.
+    model_name : str
+        Hugging Face model id (e.g. ``sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2``)
+        or path to a local model directory.
+    batch_size : int
+        ``model.encode`` batch size (GPU memory vs throughput).
+    device : str, default ``"cpu"``
+        ``"cpu"``, ``"cuda"``, or another torch device string.
+    Returns
+    -------
+    numpy.ndarray
+        Shape ``(n_rows, dim)``; rows are unit vectors if the backend normalizes.
+    Notes
+    -----
+    Local paths are resolved with :func:`os.path.abspath` when the path exists on disk.
+    """
     from sentence_transformers import SentenceTransformer
     # Resolve local paths to absolute (avoids cwd issues)
@@ -115,7 +176,22 @@ def generate_embeddings(texts, model_name, batch_size, device="cpu"):
 def cluster_direct(embeddings, threshold):
-    """Run agglomerative clustering directly (for smaller datasets)."""
+    """Agglomerative clustering with cosine distance and average linkage.
+    Parameters
+    ----------
+    embeddings : numpy.ndarray
+        Shape ``(n, dim)``; rows are typically L2-normalized.
+    threshold : float
+        ``distance_threshold`` in ``AgglomerativeClustering`` (cosine metric).  Smaller
+        values yield **more** clusters (tighter merges); larger values yield **fewer**
+        clusters. Typical range ~0.2–0.5 for normalized vectors.
+    Returns
+    -------
+    numpy.ndarray
+        Integer labels, shape ``(n,)``, not necessarily starting at 0 or contiguous.
+    """
     print(f"🔗 Clustering {len(embeddings):,} rows with distance_threshold={threshold}...")
     t0 = time.time()
     clustering = AgglomerativeClustering(
@@ -132,10 +208,24 @@ def cluster_direct(embeddings, threshold):
 def cluster_twophase(embeddings, threshold, n_pre_clusters=None):
-    """
-    Two-phase clustering for large datasets:
-      Phase 1: MiniBatchKMeans to create manageable pre-groups
-      Phase 2: Agglomerative clustering within each pre-group
+    """Scale to large *n*: KMeans pre-groups, then agglomerative within each group.
+    Global cluster ids are formed by offsetting each group’s local labels so they do not
+    collide across groups.
+    Parameters
+    ----------
+    embeddings : numpy.ndarray
+        Shape ``(n, dim)``.
+    threshold : float
+        Same meaning as in :func:`cluster_direct` (cosine / average linkage).
+    n_pre_clusters : int, optional
+        Number of KMeans buckets. If omitted, uses ``max(10, n // 5000)`` capped at ``n``.
+    Returns
+    -------
+    numpy.ndarray
+        Integer labels, shape ``(n,)``.
     """
     n = len(embeddings)
     if n_pre_clusters is None:
@@ -190,7 +280,25 @@ def cluster_twophase(embeddings, threshold, n_pre_clusters=None):
 def cluster_embeddings(embeddings, threshold, n_pre_clusters=None):
-    """Choose clustering strategy based on dataset size."""
+    """Dispatch to :func:`cluster_direct` or :func:`cluster_twophase` by row count.
+    If ``len(embeddings) <= DIRECT_CLUSTERING_LIMIT`` uses direct agglomerative
+    clustering; otherwise uses the two-phase pipeline.
+    Parameters
+    ----------
+    embeddings : numpy.ndarray
+        Row embedding matrix.
+    threshold : float
+        Cosine distance threshold for agglomerative steps.
+    n_pre_clusters : int, optional
+        Forwarded to :func:`cluster_twophase` when the two-phase path is used.
+    Returns
+    -------
+    numpy.ndarray
+        Cluster label per row.
+    """
     n = len(embeddings)
     if n <= DIRECT_CLUSTERING_LIMIT:
         return cluster_direct(embeddings, threshold)
@@ -200,7 +308,21 @@ def cluster_embeddings(embeddings, threshold, n_pre_clusters=None):
 def print_summary(df, max_clusters=30, samples_per_cluster=3):
-    """Print a summary of the clustering results."""
+    """Print cluster sizes and sample rows to stdout (requires ``cluster_id`` column).
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        Must include a ``cluster_id`` column (e.g. after assigning labels).
+    max_clusters : int, default 30
+        Maximum number of largest clusters to list.
+    samples_per_cluster : int, default 3
+        Rows shown per cluster (excluding ``cluster_id`` in the display).
+    Returns
+    -------
+    None
+    """
     cluster_sizes = df.groupby("cluster_id").size().sort_values(ascending=False)
     n_clusters = len(cluster_sizes)

{pegasource-0.2.0 → pegasource-0.2.2}/pegasource/dataset_clustering/server.py RENAMED Viewed

@@ -1,3 +1,13 @@
+"""Interactive clustering UI: FastAPI backend + static ``cluster_viz`` frontend.
+Endpoints (JSON under ``/api``) load or upload a CSV, track embedding status, re-cluster
+by threshold, and export Excel. Static files are served from the package
+``cluster_viz/`` directory. Run with ``python -m pegasource.dataset_clustering.server``
+or the ``pegasource-cluster-viz`` console script (requires ``[clustering]`` extras).
+The module exposes ``app`` (FastAPI instance) for ASGI servers and ``run()`` for CLI.
+"""
 import argparse
 import asyncio
 import json

pegasource-0.2.2/pegasource/path_estimation/__init__.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""Trajectory reconstruction from asynchronous observations (local ENU meters).
+Re-exports :func:`run_evaluation`, :func:`evaluate_path_estimation`, and
+:func:`estimate_paths_only` from :mod:`pegasource.path_estimation.evaluate`, same pattern
+as :mod:`pegasource.pcap` / :mod:`pegasource.geo`.  Requires the optional **path_estimation**
+dependencies (filterpy, torch, …); install with ``pip install -e ".[path_estimation]"``.
+Because Python loads this file when importing any submodule (e.g.
+``import pegasource.path_estimation.metrics``), loading the package also pulls in
+``evaluate`` and those dependencies.  Previously, lazy :func:`__getattr__` deferred that
+until you accessed ``run_evaluation``; explicit imports match the rest of pegasource at
+the cost of a heavier first import.
+Quick start::
+    from pegasource.path_estimation import run_evaluation
+    from pathlib import Path
+    run_evaluation(
+        Path("obs.csv"),
+        Path("true.csv"),
+        Path("out"),
+        methods=["kf", "dijkstra"],
+    )
+Examples
+--------
+>>> from pegasource.path_estimation.metrics import compute_all_metrics
+>>> import numpy as np
+>>> t = np.zeros((10, 2))
+>>> out = compute_all_metrics(t, t + 0.1)
+>>> assert "rmse_m" in out
+"""
+from .evaluate import (
+    estimate_paths_only,
+    evaluate_path_estimation,
+    run_evaluation,
+)
+__all__ = [
+    "estimate_paths_only",
+    "evaluate_path_estimation",
+    "run_evaluation",
+]

{pegasource-0.2.0 → pegasource-0.2.2}/pegasource/path_estimation/evaluate.py RENAMED Viewed

@@ -1,4 +1,21 @@
-"""Run estimators, compute metrics, save JSON summary and figures."""
+"""High-level evaluation API: run estimators, compare to ground truth, save artifacts.
+**Method registry** (``METHOD_REGISTRY``): graph stitchers ``dijkstra`` / ``astar``,
+``hmm``, Kalman-family filters ``kf`` / ``ekf`` / ``ukf`` / ``particle``, ``gnn``, plus
+``lstm`` / ``transformer`` implemented in this file.  Supervised keys ``lstm``,
+``transformer``, ``gnn`` require a real ``*_true_path.csv`` (see ``METHODS_REQUIRING_GROUND_TRUTH``).
+**Typical entry points**
+- :func:`run_evaluation` — loads CSVs and the default projected OSM graph from
+  :func:`~pegasource.path_estimation.graph_utils.get_projected_graph`, writes
+  ``metrics.json`` and figures.
+- :func:`evaluate_path_estimation` — same pipeline but you pass the ``road_graph``.
+- :func:`estimate_paths_only` — no true path file; cannot run supervised methods.
+Output dicts map method name → either metric scores (see :func:`pegasource.path_estimation.metrics.compute_all_metrics`)
+plus ``meta``, or ``{"error": "..."}`` if the estimator raised.
+"""
 from __future__ import annotations
@@ -85,6 +102,18 @@ def _estimate_transformer(
 def torch_device(name: Optional[str] = None):
+    """Return a ``torch.device``, defaulting to CUDA when available.
+    Parameters
+    ----------
+    name : str, optional
+        If given (e.g. ``"cuda"``, ``"cpu"``), that device is returned. If ``None``,
+        uses CUDA when :func:`torch.cuda.is_available` else CPU.
+    Returns
+    -------
+    torch.device
+    """
     import torch as _torch
     if name:
@@ -199,23 +228,43 @@ def estimate_paths_only(
 ) -> Dict[str, Any]:
     """Estimate paths from observations only (no ground-truth CSV).
-    Builds an internal time grid from the observation span at ``output_hz`` (default 1 Hz).
-    Methods that require supervised training (**lstm**, **transformer**, **gnn**) are not
-    supported here; use :func:`evaluate_path_estimation` with a true path file.
-    Args:
-        observations_csv: Path to ``*_observations.csv``.
-        road_graph: Road network graph for map-based methods (ignored by pure filters).
-        methods: e.g. ``["kf", "ukf", "dijkstra"]``.
-        output_hz: Sample rate for the output trajectory timeline (Hz).
-        output_dir: If set and ``plot`` is True, writes ``figures/<method>_path_enu.png``.
-        plot: ENU figure with **estimate + observations only** (no true path line).
-        plot_map: Not supported without real lon/lat ground truth; raises if True.
-        device: Torch device (unused unless extended).
-        seed: RNG seed.
-    Returns:
-        Per method: :class:`EstimationResult`, or ``{"error": "..."}`` on failure.
+    Builds a **stub** ground-truth frame with :func:`pegasource.path_estimation.io.stub_true_path_from_observations`
+    so filter/graph code paths see a consistent time axis; **no real positions** exist,
+    so metrics against ``true_x``/``true_y`` are meaningless.  Do **not** use for
+    ``lstm``, ``transformer``, or ``gnn``.
+    Parameters
+    ----------
+    observations_csv : pathlib.Path
+        ``*_observations.csv`` with ``timestamp_s`` and source columns (see IO loaders).
+    road_graph : networkx.MultiDiGraph or compatible
+        Projected street graph for map-matching methods; ignored by pure filters.
+    methods : list of str
+        Lower-case names (e.g. ``["kf","dijkstra","hmm"]``).  Raises if any of
+        ``lstm``, ``transformer``, ``gnn`` appear.
+    output_hz : float, default 1.0
+        Stub timeline sampling rate (Hz).
+    output_dir : pathlib.Path, optional
+        If set and ``plot`` is True, creates ``output_dir/figures/<method>_path_enu.png``.
+    plot : bool, default False
+        If True and ``output_dir`` is set, writes ENU overlays **without** a true path polyline.
+    plot_map : bool, default False
+        Must stay False (no real lon/lat); raises ``ValueError`` otherwise.
+    device : str, optional
+        Passed to neural estimators (unused for filter-only methods).
+    seed : int, default 0
+        RNG seed for stochastic methods.
+    Returns
+    -------
+    dict
+        Keys are method names. Values are :class:`~pegasource.path_estimation.types.EstimationResult`
+        or ``{"error": "<message>"}``.
+    Raises
+    ------
+    ValueError
+        If ``plot_map`` is True, or if supervised method names are requested.
     """
     names = [m.strip().lower() for m in methods]
     forbidden = sorted({m for m in names if m in METHODS_REQUIRING_GROUND_TRUTH})
@@ -261,26 +310,39 @@ def evaluate_path_estimation(
     device: Optional[str] = None,
     seed: int = 0,
 ) -> Dict[str, Dict]:
-    """Run path estimation for CSV paths and a caller-supplied road graph.
-    Loads observations and ground truth from disk, then runs each method in
-    ``methods`` using ``road_graph`` for graph-based estimators (Dijkstra, A*,
-    HMM, GNN, …). Filter methods (KF, EKF, …) ignore the graph.
-    Args:
-        observations_csv: Path to ``*_observations.csv`` (mixed GPS / circle / cell).
-        true_path_csv: Path to ``*_true_path.csv`` (1 Hz ground truth).
-        road_graph: Projected road network (e.g. OSM ``MultiDiGraph`` with ``x``, ``y``,
-            ``crs`` as expected by ``graph_stitch`` / GNN).
-        methods: Method names (e.g. ``["kf", "dijkstra", "gnn"]``). Unknown names raise.
-        output_dir: If set, writes ``metrics.json`` and, when plotting, ``figures/``.
-        plot: Write ENU overlay PNGs under ``output_dir/figures`` when ``output_dir`` is set.
-        plot_map: Write Web Mercator map PNGs when ``output_dir`` is set.
-        device: Torch device string for LSTM / Transformer / GNN.
-        seed: RNG seed for stochastic estimators.
-    Returns:
-        Per-method dicts: metric scores and ``meta``, or ``{"error": "..."}`` on failure.
+    """Load CSVs and run estimators with a **caller-supplied** projected road graph.
+    Graph-based methods (``dijkstra``, ``astar``, ``hmm``, ``gnn``) use ``road_graph``;
+    filters (``kf``, ``ekf``, ``ukf``, ``particle``) ignore it.  Neural sequence models
+    train against ``true_path_csv``.
+    Parameters
+    ----------
+    observations_csv : pathlib.Path
+        Event table: must include ``timestamp_s`` and columns per ``source_type``.
+    true_path_csv : pathlib.Path
+        1 Hz (or regular) ground truth: ``timestamp_s``, ``true_x``, ``true_y``, etc.
+    road_graph : networkx.MultiDiGraph
+        Projected OSM-style graph (``x``, ``y`` node attrs, ``crs`` on ``G.graph``).
+    methods : list of str
+        Subset of registered method names; unknown names cause errors inside :func:`_run_methods`.
+    output_dir : pathlib.Path, optional
+        If set, writes ``metrics.json`` and, when ``plot``/``plot_map``, PNGs under ``figures/``.
+    plot : bool, default False
+        Save ENU matplotlib overlays (estimate vs true + observations).
+    plot_map : bool, default False
+        Save Web-Mercator basemap figures (may download tiles; slower).
+    device : str, optional
+        Torch device for ``lstm``, ``transformer``, ``gnn``.
+    seed : int, default 0
+        Base seed for RNGs inside stochastic estimators.
+    Returns
+    -------
+    dict[str, dict]
+        Each key is a method name.  On success, value is a flat dict of metrics from
+        :func:`pegasource.path_estimation.metrics.compute_all_metrics` plus a ``meta`` field;
+        on failure, ``{"error": "<message>"}``.
     """
     obs_df = load_observations_csv(observations_csv)
     true_df = load_true_path_csv(true_path_csv)
@@ -309,7 +371,40 @@ def run_evaluation(
     device: Optional[str] = None,
     seed: int = 0,
 ) -> Dict[str, Dict]:
-    """Run selected methods; write ``metrics.json`` and figures under ``output_dir``."""
+    """Convenience wrapper: load CSVs, obtain default graph, evaluate, write ``output_dir``.
+    Calls :func:`pegasource.path_estimation.graph_utils.get_projected_graph` for the same
+    walkable OSM graph used by synthetic data generation (cached under the package).
+    Parameters
+    ----------
+    observations_csv : pathlib.Path
+        See :func:`evaluate_path_estimation`.
+    true_path_csv : pathlib.Path
+        See :func:`evaluate_path_estimation`.
+    output_dir : pathlib.Path
+        Created if needed; receives ``metrics.json`` and ``figures/`` when plotting.
+    methods : list of str, optional
+        If ``None``, uses ``list(METHOD_REGISTRY.keys())`` (dijkstra, astar, hmm, kf, ekf,
+        ukf, particle, gnn).  Pass explicit names to add ``lstm`` or ``transformer``.
+    plot : bool, default True
+        Write ENU figures to ``output_dir/figures``.
+    plot_map : bool, default False
+        If True, also writes map tiles figures (requires network for contextily).
+    device : str, optional
+        Torch device for neural estimators.
+    seed : int, default 0
+        RNG seed forwarded to estimators.
+    Returns
+    -------
+    dict[str, dict]
+        Same structure as :func:`evaluate_path_estimation`.
+    See Also
+    --------
+    evaluate_path_estimation : supply your own ``road_graph``.
+    """
     obs_df = load_observations_csv(observations_csv)
     true_df = load_true_path_csv(true_path_csv)
     G = get_projected_graph()
@@ -330,6 +425,19 @@ def run_evaluation(
 def print_summary(summary: Dict[str, Dict]) -> None:
+    """Print one line per method: RMSE/MAE or an error string (CLI helper).
+    Parameters
+    ----------
+    summary : dict[str, dict]
+        Mapping produced by :func:`run_evaluation` / :func:`evaluate_path_estimation`
+        (or compatible metric dicts).
+    Returns
+    -------
+    None
+        Writes to stdout only.
+    """
     for k, v in summary.items():
         if "error" in v:
             print(f"{k}: ERROR — {v['error']}")

{pegasource-0.2.0 → pegasource-0.2.2}/pegasource/path_estimation/generate_synthetic_datasets.py RENAMED Viewed

@@ -1005,5 +1005,3 @@ def main() -> None:
     print(f"Output folder: {args.output_dir.resolve()}")
-if __name__ == "__main__":
-    main()

pegasource 0.2.0__tar.gz → 0.2.2__tar.gz

pegasource 0.2.0tar.gz → 0.2.2tar.gz