PyPI - pyvegh - Versions diffs - 0.8.0__tar.gz → 0.9.0__tar.gz - Mend

pyvegh 0.8.0tar.gz → 0.9.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{pyvegh-0.8.0 → pyvegh-0.9.0}/Cargo.lock RENAMED Viewed

@@ -540,7 +540,7 @@ dependencies = [
 [[package]]
 name = "pyvegh"
-version = "0.8.0"
+version = "0.9.0"
 dependencies = [
  "anyhow",
  "bincode",

{pyvegh-0.8.0 → pyvegh-0.9.0}/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "pyvegh"
-version = "0.8.0"
+version = "0.9.0"
 edition = "2024"
 authors = ["CodeTease"]
 readme = "README.md"

{pyvegh-0.8.0 → pyvegh-0.9.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyvegh
-Version: 0.8.0
+Version: 0.9.0
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
@@ -82,6 +82,16 @@ vegh config list
 vegh config reset
 ```
+**Advanced:** You can also configure custom `audit` patterns in `~/.vegh/config.json`:
+```json
+{
+  "audit": {
+    "patterns": ["custom_secret\\.key", ".*\\.private"],
+    "keywords": ["MY_API_KEY", "INTERNAL_TOKEN"]
+  }
+}
+```
 ### 2\. Create Snapshot
 Pack a directory into a highly compressed snapshot.
@@ -132,6 +142,9 @@ Clean up old snapshots to free disk space.
 # Keep only the 5 most recent snapshots in the current directory
 vegh prune --keep 5
+# Delete snapshots older than 30 days (but always keep the 5 most recent)
+vegh prune --older-than 30 --keep 5
 # Force clean without confirmation (useful for CI/CD)
 vegh prune --keep 1 --force
 ```
@@ -171,6 +184,7 @@ vegh cat backup.vegh src/main.rs
 vegh cat backup.vegh image.png --raw > extracted_image.png
 # Compare snapshot with a directory
+# (Automatically performs Blake3 Hash comparison if file sizes match)
 vegh diff backup.vegh ./current-project
 ```
@@ -202,6 +216,14 @@ Create a `.veghhooks.json` in your workspace.
 }
 ```
+### 12\. Audit
+Scan a snapshot for sensitive filenames and secrets.
+```shell
+vegh audit backup.vegh
+```
 ## Library Usage
 You can also use PyVegh as a library in your own Python scripts:

{pyvegh-0.8.0 → pyvegh-0.9.0}/README.md RENAMED Viewed

@@ -54,6 +54,16 @@ vegh config list
 vegh config reset
 ```
+**Advanced:** You can also configure custom `audit` patterns in `~/.vegh/config.json`:
+```json
+{
+  "audit": {
+    "patterns": ["custom_secret\\.key", ".*\\.private"],
+    "keywords": ["MY_API_KEY", "INTERNAL_TOKEN"]
+  }
+}
+```
 ### 2\. Create Snapshot
 Pack a directory into a highly compressed snapshot.
@@ -104,6 +114,9 @@ Clean up old snapshots to free disk space.
 # Keep only the 5 most recent snapshots in the current directory
 vegh prune --keep 5
+# Delete snapshots older than 30 days (but always keep the 5 most recent)
+vegh prune --older-than 30 --keep 5
 # Force clean without confirmation (useful for CI/CD)
 vegh prune --keep 1 --force
 ```
@@ -143,6 +156,7 @@ vegh cat backup.vegh src/main.rs
 vegh cat backup.vegh image.png --raw > extracted_image.png
 # Compare snapshot with a directory
+# (Automatically performs Blake3 Hash comparison if file sizes match)
 vegh diff backup.vegh ./current-project
 ```
@@ -174,6 +188,14 @@ Create a `.veghhooks.json` in your workspace.
 }
 ```
+### 12\. Audit
+Scan a snapshot for sensitive filenames and secrets.
+```shell
+vegh audit backup.vegh
+```
 ## Library Usage
 You can also use PyVegh as a library in your own Python scripts:

{pyvegh-0.8.0 → pyvegh-0.9.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "maturin"
 [project]
 name = "pyvegh"
-version = "0.8.0"
+version = "0.9.0"
 description = "Python bindings for Vegh - The Snapshot Tool."
 authors = [{name = "CodeTease"}]
 readme = "README.md"

{pyvegh-0.8.0 → pyvegh-0.9.0}/python/vegh/analytics.py RENAMED Viewed

@@ -120,9 +120,9 @@ def calculate_sloc(file_path: str) -> int:
         # Check if file is binary
         with open(file_path, "rb") as f:
             chunk = f.read(512)
-            if b'\x00' in chunk:
+            if b"\x00" in chunk:
                 return 0
         # Read file with error handling
         with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
             content = f.read()

{pyvegh-0.8.0 → pyvegh-0.9.0}/python/vegh/cli.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from .cli_main import app
 # noqa: F401 to expose app at package level
-from . import cli_commands
+from . import cli_commands  # noqa: F401
 if __name__ == "__main__":
     app()

{pyvegh-0.8.0 → pyvegh-0.9.0}/python/vegh/cli_commands.py RENAMED Viewed

@@ -14,7 +14,8 @@ from rich.table import Table
 from rich.panel import Panel
 from rich.prompt import Prompt, Confirm
-from .cli_main import app, create_snap, dry_run_snap
+from .cli_main import app
+from ._core import create_snap, dry_run_snap
 from .cli_helpers import (
     console,
     format_bytes,
@@ -44,6 +45,7 @@ from ._core import (
     get_context_xml,
     search_snap,
     read_snapshot_text,
+    hash_file,
 )
 from .analytics import render_dashboard, scan_sloc, calculate_sloc, count_sloc_from_text
@@ -56,6 +58,9 @@ def prune(
     keep: int = typer.Option(
         5, "--keep", "-k", help="Number of recent snapshots to keep"
     ),
+    older_than: Optional[int] = typer.Option(
+        None, "--older-than", help="Delete snapshots older than X days"
+    ),
     force: bool = typer.Option(False, "--force", "-f", help="Skip confirmation"),
 ):
     """Clean up old snapshots, keeping only the most recent ones."""
@@ -67,18 +72,36 @@ def prune(
         target_dir.glob("*.vegh"), key=lambda f: f.stat().st_mtime, reverse=True
     )
-    if len(snapshots) <= keep:
+    delete_list = []
+    if older_than is not None:
+        cutoff = time.time() - (older_than * 86400)
+        # Identify files older than cutoff
+        time_candidates = [s for s in snapshots if s.stat().st_mtime < cutoff]
+        # Ensure we keep at least 'keep' snapshots (the most recent ones)
+        safe_set = set(snapshots[:keep])
+        delete_list = [s for s in time_candidates if s not in safe_set]
         console.print(
-            f"[green]No cleanup needed. Found {len(snapshots)} snapshots (Keep: {keep}).[/green]"
+            f"[cyan]Policy: Delete older than {older_than} days (except top {keep}).[/cyan]"
         )
-        return
+    else:
+        if len(snapshots) <= keep:
+            console.print(
+                f"[green]No cleanup needed. Found {len(snapshots)} snapshots (Keep: {keep}).[/green]"
+            )
+            return
-    keep_list = snapshots[:keep]
-    delete_list = snapshots[keep:]
+        delete_list = snapshots[keep:]
-    console.print(
-        f"[bold cyan]Found {len(snapshots)} snapshots. Keeping {len(keep_list)} most recent.[/bold cyan]"
-    )
+        console.print(
+            f"[bold cyan]Found {len(snapshots)} snapshots. Keeping {keep} most recent.[/bold cyan]"
+        )
+    if not delete_list:
+        console.print("[green]No snapshots match deletion criteria.[/green]")
+        return
     table = Table(title="Snapshots to Delete")
     table.add_column("File", style="red")
@@ -336,15 +359,20 @@ def diff(
                 repo_path, source_name = ensure_repo(repo, branch, offline)
                 source_name = f"Repo: {source_name}"
                 snap_list = dry_run_snap(str(repo_path))
-                snap_map = {Path(p).as_posix(): s for p, s in snap_list}
+                snap_map = {
+                    Path(p).as_posix(): {"size": s, "hash": None} for p, s in snap_list
+                }
             elif file:
                 if not file.exists():
                     console.print(f"[red]File '{file}' not found.[/red]")
                     raise typer.Exit(1)
                 source_name = f"Snap: {file.name}"
                 snap_files = list_files_details(str(file))
+                # list_files_details now returns (path, size, hash)
                 snap_map = {
-                    Path(p).as_posix(): s for p, s in snap_files if p != ".vegh.json"
+                    Path(p).as_posix(): {"size": s, "hash": h}
+                    for p, s, h in snap_files
+                    if p != ".vegh.json"
                 }
             else:
                 console.print(
@@ -355,11 +383,15 @@ def diff(
             if target_is_snap:
                 target_files = list_files_details(str(target))
                 local_files = {
-                    Path(p).as_posix(): s for p, s in target_files if p != ".vegh.json"
+                    Path(p).as_posix(): {"size": s, "hash": h}
+                    for p, s, h in target_files
+                    if p != ".vegh.json"
                 }
             else:
                 local_list = dry_run_snap(str(target))
-                local_files = {Path(p).as_posix(): s for p, s in local_list}
+                local_files = {
+                    Path(p).as_posix(): {"size": s, "hash": None} for p, s in local_list
+                }
         except Exception as e:
             console.print(f"[red]Error:[/red] {e}")
             raise typer.Exit(1)
@@ -376,11 +408,48 @@ def diff(
         in_loc = path in local_files
         if in_src and in_loc:
-            if snap_map[path] != local_files[path]:
+            src_info = snap_map[path]
+            loc_info = local_files[path]
+            src_size = src_info["size"]
+            loc_size = loc_info["size"]
+            modified = False
+            details = ""
+            if src_size != loc_size:
+                modified = True
+                details = f"Size: {format_bytes(src_size)} -> {format_bytes(loc_size)}"
+            else:
+                # Same size, check content via Hash
+                src_hash = src_info.get("hash")
+                if src_hash:  # Only if source is snapshot (or has hash)
+                    loc_hash = loc_info.get("hash")
+                    if loc_hash:
+                        # Target is also snapshot, compare hashes directly
+                        if src_hash != loc_hash:
+                            modified = True
+                            details = "Content Changed (Hash mismatch)"
+                    elif not target_is_snap:
+                        # Target is local directory, compute hash on demand
+                        try:
+                            full_local_path = target / path
+                            if full_local_path.exists():
+                                computed = hash_file(str(full_local_path))
+                                if computed != src_hash:
+                                    modified = True
+                                    details = "Content Changed (Hash mismatch)"
+                        except Exception:
+                            # If hashing fails (permissions etc), assume unmodified or warn?
+                            # For now, if we can't read it, we rely on size (which matched).
+                            pass
+            if modified:
                 table.add_row(
                     path,
                     "[yellow]MODIFIED[/yellow]",
-                    f"Size: {format_bytes(snap_map[path])} -> {format_bytes(local_files[path])}",
+                    details,
                 )
                 changes = True
         elif in_src and not in_loc:
@@ -417,6 +486,24 @@ def audit(
     console.print(f"[bold cyan]Auditing {file.name}...[/bold cyan]")
+    # Load custom audit config
+    cfg = load_config()
+    audit_cfg = cfg.get("audit", {})
+    custom_patterns = audit_cfg.get("patterns", [])
+    custom_keywords = audit_cfg.get("keywords", [])
+    final_patterns = SENSITIVE_PATTERNS + custom_patterns
+    secret_keywords = [
+        "PASSWORD",
+        "SECRET_KEY",
+        "TOKEN",
+        "API_KEY",
+        "ACCESS_KEY",
+        "PRIVATE_KEY",
+    ]
+    final_keywords = secret_keywords + custom_keywords
     risks = []
     try:
@@ -424,9 +511,14 @@ def audit(
         # 1. Filename Scan
         for path in files:
-            for pattern in SENSITIVE_PATTERNS:
-                if re.search(pattern, path, re.IGNORECASE):
-                    risks.append((path, "Filename Match", f"Pattern: {pattern}"))
+            for pattern in final_patterns:
+                try:
+                    if re.search(pattern, path, re.IGNORECASE):
+                        risks.append((path, "Filename Match", f"Pattern: {pattern}"))
+                except re.error:
+                    console.print(
+                        f"[yellow]Warning: Invalid regex pattern '{pattern}' in config[/yellow]"
+                    )
         # 2. Content Scan (Config files only)
         # Scan for common secrets inside textual config files
@@ -440,14 +532,6 @@ def audit(
             ".ini",
             ".xml",
         }
-        secret_keywords = [
-            "PASSWORD",
-            "SECRET_KEY",
-            "TOKEN",
-            "API_KEY",
-            "ACCESS_KEY",
-            "PRIVATE_KEY",
-        ]
         for path in files:
             p = Path(path)
@@ -457,7 +541,7 @@ def audit(
                     content_bytes = cat_file(str(file), path)
                     try:
                         content = bytes(content_bytes).decode("utf-8")
-                        for keyword in secret_keywords:
+                        for keyword in final_keywords:
                             if keyword in content:
                                 risks.append(
                                     (path, "Content Match", f"Found keyword: {keyword}")
@@ -504,7 +588,7 @@ def doctor(
         console.print("Config: [dim]Not configured[/dim]")
     try:
-        from . import _core
+        from . import _core  # noqa: F401
         console.print("Rust Core: [green]Loaded[/green]")
     except ImportError:

{pyvegh-0.8.0 → pyvegh-0.9.0}/python/vegh/cli_helpers.py RENAMED Viewed

@@ -35,7 +35,7 @@ def load_config() -> Dict:
         try:
             with open(CONFIG_FILE, "r") as f:
                 return json.load(f)
-        except:
+        except Exception:
             pass
     return {}
@@ -65,7 +65,7 @@ def get_dir_size(path: Path) -> int:
         for entry in path.rglob("*"):
             if entry.is_file():
                 total += entry.stat().st_size
-    except:
+    except Exception:
         pass
     return total

{pyvegh-0.8.0 → pyvegh-0.9.0}/python/vegh/cli_hooks.py RENAMED Viewed

@@ -22,8 +22,9 @@ def execute_hooks(commands: List[str], hook_name: str) -> bool:
         return True
     # Just a friendly warning for the unsuspecting user
-    console.print(f"[bold yellow]⚠ Running {hook_name} hooks from project config...[/bold yellow]")
+    console.print(
+        f"[bold yellow]⚠ Running {hook_name} hooks from project config...[/bold yellow]"
+    )
     console.print(f"[bold magenta]>>> HOOK: {hook_name}[/bold magenta]")
     env = os.environ.copy()

{pyvegh-0.8.0 → pyvegh-0.9.0}/python/vegh/cli_main.py RENAMED Viewed

@@ -3,6 +3,9 @@ from rich.console import Console
 import typer
+# Add sub-apps
+from .cli_config import config_app
 # Try to import package version metadata (Modern Pythonic way)
 try:
     from importlib.metadata import version as get_package_version, PackageNotFoundError
@@ -13,38 +16,11 @@ except ImportError:
 # Import core functionality
 try:
-    from ._core import (
-        create_snap,
-        dry_run_snap,
-        restore_snap,
-        check_integrity,
-        list_files,
-        get_metadata,
-        count_locs,
-        scan_locs_dir,
-        cat_file,
-        list_files_details,
-        get_context_xml,
-        search_snap,
-        read_snapshot_text,
-    )
+    from . import _core  # noqa: F401
 except ImportError:
     print("Error: Rust core missing. Run 'maturin develop'!")
     exit(1)
-# Import Analytics module
-try:
-    from .analytics import (
-        render_dashboard,
-        scan_sloc,
-        calculate_sloc,
-        count_sloc_from_text,
-    )
-except ImportError:
-    render_dashboard = None
-    scan_sloc = None
-    calculate_sloc = None
 # Define context settings to enable '-h' alongside '--help'
 CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
@@ -92,7 +68,4 @@ def main(
     pass
-# Add sub-apps
-from .cli_config import config_app
 app.add_typer(config_app, name="config")

{pyvegh-0.8.0 → pyvegh-0.9.0}/python/vegh/jsonc.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # A simple JSONC parser, written to avoid extra dependencies.
 # It removes comments from JSONC strings and parses the result as JSON.
-# Use internally only, so we probably don't need advance parsing features.
+# Use internally only, so we probably don't need advance parsing features.
 import json
 import re

{pyvegh-0.8.0 → pyvegh-0.9.0}/src/core.rs RENAMED Viewed

@@ -39,7 +39,7 @@ pub struct VeghMetadata {
 // Pipeline Messages
 enum WorkerResult {
-    Processed(ProcessedMessage),
+    Processed(Box<ProcessedMessage>),
     Error(String),
 }
@@ -66,6 +66,7 @@ enum DataAction {
 // --- Main Packing Logic ---
+#[allow(clippy::too_many_arguments)]
 pub fn create_snap_logic(
     source: &Path,
     output: &Path,
@@ -355,7 +356,7 @@ pub fn create_snap_logic(
                 match process_res {
                     Ok(msg) => {
-                        let _ = tx.send(WorkerResult::Processed(msg));
+                        let _ = tx.send(WorkerResult::Processed(Box::new(msg)));
                     }
                     Err(e) => {
                         let _ = tx.send(WorkerResult::Error(e.to_string()));
@@ -384,7 +385,8 @@ pub fn create_snap_logic(
                     eprintln!("Error: {}", e);
                 }
             }
-            WorkerResult::Processed(pm) => {
+            WorkerResult::Processed(pm_box) => {
+                let pm = *pm_box;
                 if pm.is_cached_hit {
                     cache_hit_count += 1;
                 }

{pyvegh-0.8.0 → pyvegh-0.9.0}/src/lib.rs RENAMED Viewed

@@ -119,6 +119,7 @@ fn read_snapshot_text(file_path: String) -> PyResult<Vec<(String, String)>> {
 #[pyfunction]
 #[pyo3(signature = (source, output, level=3, comment=None, include=None, exclude=None, no_cache=false, verbose=true))]
+#[allow(clippy::too_many_arguments)]
 fn create_snap(
     source: String,
     output: String,
@@ -528,7 +529,7 @@ fn scan_locs_dir(source: String, exclude: Option<Vec<String>>) -> PyResult<Vec<(
 }
 #[pyfunction]
-fn list_files_details(file_path: String) -> PyResult<Vec<(String, u64)>> {
+fn list_files_details(file_path: String) -> PyResult<Vec<(String, u64, String)>> {
     let file = File::open(&file_path).map_err(|e| PyIOError::new_err(e.to_string()))?;
     let decoder = zstd::stream::read::Decoder::new(file).unwrap();
     let mut archive = tar::Archive::new(decoder);
@@ -548,7 +549,7 @@ fn list_files_details(file_path: String) -> PyResult<Vec<(String, u64)>> {
                     return Ok(manifest
                         .entries
                         .into_iter()
-                        .map(|en| (en.path, en.size))
+                        .map(|en| (en.path, en.size, en.hash))
                         .collect());
                 }
             }
@@ -557,13 +558,28 @@ fn list_files_details(file_path: String) -> PyResult<Vec<(String, u64)>> {
                 && path_str != ".vegh.json"
                 && path_str != "manifest.json"
             {
-                results.push((path_str, size));
+                results.push((path_str, size, String::new()));
             }
         }
     }
     Ok(results)
 }
+#[pyfunction]
+fn hash_file(file_path: String) -> PyResult<String> {
+    let file = File::open(&file_path).map_err(|e| PyIOError::new_err(e.to_string()))?;
+    let mut hasher = blake3::Hasher::new();
+    if let Ok(mmap) = unsafe { memmap2::MmapOptions::new().map(&file) } {
+        hasher.update_rayon(&mmap);
+    } else {
+        let mut f = File::open(&file_path).map_err(|e| PyIOError::new_err(e.to_string()))?;
+        std::io::copy(&mut f, &mut hasher).map_err(|e| PyIOError::new_err(e.to_string()))?;
+    }
+    Ok(hasher.finalize().to_hex().to_string())
+}
 #[pymodule]
 #[pyo3(name = "_core")]
 fn pyvegh_core(m: &Bound<'_, PyModule>) -> PyResult<()> {
@@ -580,5 +596,6 @@ fn pyvegh_core(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(search_snap, m)?)?;
     m.add_function(wrap_pyfunction!(count_locs, m)?)?;
     m.add_function(wrap_pyfunction!(read_snapshot_text, m)?)?;
+    m.add_function(wrap_pyfunction!(hash_file, m)?)?;
     Ok(())
 }