PyPI - scitex - Versions diffs - 2.16.2__py3-none-any.whl → 2.17.0__py3-none-any.whl - Mend

scitex 2.16.2py3-none-any.whl → 2.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (85) hide show

scitex/_mcp_resources/_cheatsheet.py +1 -1
scitex/_mcp_resources/_modules.py +1 -1
scitex/_mcp_tools/__init__.py +2 -0
scitex/_mcp_tools/verify.py +256 -0
scitex/cli/main.py +2 -0
scitex/cli/verify.py +476 -0
scitex/dev/plt/__init__.py +1 -1
scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +90 -0
scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +1571 -0
scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +6262 -0
scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +1274 -0
scitex/dev/plt/data/mpl/dir_ax.txt +459 -0
scitex/dev/plt/mpl/get_dir_ax.py +1 -1
scitex/dev/plt/mpl/get_signatures.py +1 -1
scitex/dev/plt/mpl/get_signatures_details.py +1 -1
scitex/io/_load.py +8 -1
scitex/io/_save.py +12 -0
scitex/scholar/data/.gitkeep +0 -0
scitex/scholar/data/README.md +44 -0
scitex/scholar/data/bib_files/bibliography.bib +1952 -0
scitex/scholar/data/bib_files/neurovista.bib +277 -0
scitex/scholar/data/bib_files/neurovista_enriched.bib +441 -0
scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +441 -0
scitex/scholar/data/bib_files/neurovista_processed.bib +338 -0
scitex/scholar/data/bib_files/openaccess.bib +89 -0
scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +2178 -0
scitex/scholar/data/bib_files/pac.bib +698 -0
scitex/scholar/data/bib_files/pac_enriched.bib +1061 -0
scitex/scholar/data/bib_files/pac_processed.bib +0 -0
scitex/scholar/data/bib_files/pac_titles.txt +75 -0
scitex/scholar/data/bib_files/paywalled.bib +98 -0
scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +58 -0
scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +87 -0
scitex/scholar/data/bib_files/seizure_prediction.bib +694 -0
scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
scitex/scholar/data/bib_files/test_complete_enriched.bib +437 -0
scitex/scholar/data/bib_files/test_final_enriched.bib +437 -0
scitex/scholar/data/bib_files/test_seizure.bib +46 -0
scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
scitex/scholar/data/impact_factor.db +0 -0
scitex/session/README.md +2 -2
scitex/session/__init__.py +1 -0
scitex/session/_decorator.py +57 -33
scitex/session/_lifecycle/__init__.py +23 -0
scitex/session/_lifecycle/_close.py +225 -0
scitex/session/_lifecycle/_config.py +112 -0
scitex/session/_lifecycle/_matplotlib.py +83 -0
scitex/session/_lifecycle/_start.py +246 -0
scitex/session/_lifecycle/_utils.py +186 -0
scitex/session/_manager.py +40 -3
scitex/session/template.py +1 -1
scitex/template/_templates/plt.py +1 -1
scitex/template/_templates/session.py +1 -1
scitex/verify/README.md +312 -0
scitex/verify/__init__.py +212 -0
scitex/verify/_chain.py +369 -0
scitex/verify/_db.py +600 -0
scitex/verify/_hash.py +187 -0
scitex/verify/_integration.py +127 -0
scitex/verify/_rerun.py +253 -0
scitex/verify/_tracker.py +330 -0
scitex/verify/_visualize.py +48 -0
scitex/verify/_viz/__init__.py +56 -0
scitex/verify/_viz/_colors.py +84 -0
scitex/verify/_viz/_format.py +302 -0
scitex/verify/_viz/_json.py +192 -0
scitex/verify/_viz/_mermaid.py +440 -0
scitex/verify/_viz/_plotly.py +193 -0
scitex/verify/_viz/_templates.py +246 -0
scitex/verify/_viz/_utils.py +56 -0
{scitex-2.16.2.dist-info → scitex-2.17.0.dist-info}/METADATA +1 -1
{scitex-2.16.2.dist-info → scitex-2.17.0.dist-info}/RECORD +78 -29
scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +0 -462
scitex/scholar/url_finder/.tmp/open_url/README.md +0 -223
scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +0 -694
scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +0 -1160
scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +0 -344
scitex/scholar/url_finder/.tmp/open_url/__init__.py +0 -24
scitex/session/_lifecycle.py +0 -827
{scitex-2.16.2.dist-info → scitex-2.17.0.dist-info}/WHEEL +0 -0
{scitex-2.16.2.dist-info → scitex-2.17.0.dist-info}/entry_points.txt +0 -0
{scitex-2.16.2.dist-info → scitex-2.17.0.dist-info}/licenses/LICENSE +0 -0

scitex/verify/_hash.py ADDED Viewed

@@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+# Timestamp: "2026-02-01 (ywatanabe)"
+# File: /home/ywatanabe/proj/scitex-python/src/scitex/verify/_hash.py
+"""File and directory hashing utilities for verification."""
+from __future__ import annotations
+import hashlib
+from pathlib import Path
+from typing import Dict, Union
+def hash_file(
+    path: Union[str, Path],
+    algorithm: str = "sha256",
+    chunk_size: int = 8192,
+) -> str:
+    """
+    Compute hash of a file.
+    Parameters
+    ----------
+    path : str or Path
+        Path to the file to hash
+    algorithm : str, optional
+        Hash algorithm (default: sha256)
+    chunk_size : int, optional
+        Size of chunks to read (default: 8192)
+    Returns
+    -------
+    str
+        Hexadecimal hash string (first 32 characters)
+    Examples
+    --------
+    >>> hash_file("data.csv")
+    'a1b2c3d4e5f6...'
+    """
+    path = Path(path)
+    if not path.exists():
+        raise FileNotFoundError(f"File not found: {path}")
+    hasher = hashlib.new(algorithm)
+    with open(path, "rb") as f:
+        while chunk := f.read(chunk_size):
+            hasher.update(chunk)
+    return hasher.hexdigest()[:32]
+def hash_directory(
+    path: Union[str, Path],
+    pattern: str = "*",
+    recursive: bool = True,
+    algorithm: str = "sha256",
+) -> Dict[str, str]:
+    """
+    Compute hashes for all files in a directory.
+    Parameters
+    ----------
+    path : str or Path
+        Directory path
+    pattern : str, optional
+        Glob pattern for files (default: "*")
+    recursive : bool, optional
+        Whether to search recursively (default: True)
+    algorithm : str, optional
+        Hash algorithm (default: sha256)
+    Returns
+    -------
+    dict
+        Mapping of relative paths to hashes
+    Examples
+    --------
+    >>> hash_directory("./data/")
+    {'input.csv': 'a1b2...', 'config.yaml': 'c3d4...'}
+    """
+    path = Path(path)
+    if not path.is_dir():
+        raise NotADirectoryError(f"Not a directory: {path}")
+    glob_method = path.rglob if recursive else path.glob
+    hashes = {}
+    for file_path in glob_method(pattern):
+        if file_path.is_file():
+            rel_path = str(file_path.relative_to(path))
+            hashes[rel_path] = hash_file(file_path, algorithm=algorithm)
+    return hashes
+def hash_files(
+    paths: list[Union[str, Path]],
+    algorithm: str = "sha256",
+) -> Dict[str, str]:
+    """
+    Compute hashes for a list of files.
+    Parameters
+    ----------
+    paths : list of str or Path
+        List of file paths
+    algorithm : str, optional
+        Hash algorithm (default: sha256)
+    Returns
+    -------
+    dict
+        Mapping of paths to hashes
+    """
+    hashes = {}
+    for path in paths:
+        path = Path(path)
+        if path.exists() and path.is_file():
+            hashes[str(path)] = hash_file(path, algorithm=algorithm)
+    return hashes
+def combine_hashes(hashes: Dict[str, str], algorithm: str = "sha256") -> str:
+    """
+    Combine multiple hashes into a single hash.
+    Creates a deterministic combined hash from a dictionary of hashes.
+    Parameters
+    ----------
+    hashes : dict
+        Mapping of names to hashes
+    algorithm : str, optional
+        Hash algorithm (default: sha256)
+    Returns
+    -------
+    str
+        Combined hash (first 32 characters)
+    Examples
+    --------
+    >>> hashes = {'input.csv': 'a1b2...', 'script.py': 'c3d4...'}
+    >>> combine_hashes(hashes)
+    'e5f6g7h8...'
+    """
+    hasher = hashlib.new(algorithm)
+    # Sort by key for deterministic ordering
+    for key in sorted(hashes.keys()):
+        hasher.update(f"{key}:{hashes[key]}".encode())
+    return hasher.hexdigest()[:32]
+def verify_hash(
+    path: Union[str, Path],
+    expected_hash: str,
+    algorithm: str = "sha256",
+) -> bool:
+    """
+    Verify that a file matches an expected hash.
+    Parameters
+    ----------
+    path : str or Path
+        Path to the file
+    expected_hash : str
+        Expected hash value
+    algorithm : str, optional
+        Hash algorithm (default: sha256)
+    Returns
+    -------
+    bool
+        True if hash matches, False otherwise
+    """
+    try:
+        actual_hash = hash_file(path, algorithm=algorithm)
+        # Compare only the length of expected_hash (may be truncated)
+        return actual_hash[: len(expected_hash)] == expected_hash
+    except FileNotFoundError:
+        return False
+# EOF

scitex/verify/_integration.py ADDED Viewed

@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+# Timestamp: "2026-02-01 (ywatanabe)"
+# File: /home/ywatanabe/proj/scitex-python/src/scitex/verify/_integration.py
+"""Integration hooks for session and io modules."""
+from __future__ import annotations
+from pathlib import Path
+from typing import Optional, Union
+from ._tracker import get_tracker, start_tracking, stop_tracking
+def on_session_start(
+    session_id: str,
+    script_path: Optional[str] = None,
+    parent_session: Optional[str] = None,
+    verbose: bool = False,
+) -> None:
+    """
+    Hook called when a session starts.
+    Parameters
+    ----------
+    session_id : str
+        Unique session identifier
+    script_path : str, optional
+        Path to the script being run
+    parent_session : str, optional
+        Parent session ID for chain tracking
+    verbose : bool, optional
+        Whether to log status messages
+    """
+    try:
+        start_tracking(
+            session_id=session_id,
+            script_path=script_path,
+            parent_session=parent_session,
+        )
+    except Exception as e:
+        if verbose:
+            import logging
+            logging.getLogger(__name__).warning(
+                f"Could not start verification tracking: {e}"
+            )
+def on_session_close(
+    status: str = "success",
+    exit_code: int = 0,
+    verbose: bool = False,
+) -> None:
+    """
+    Hook called when a session closes.
+    Parameters
+    ----------
+    status : str, optional
+        Final status (success, failed, error)
+    exit_code : int, optional
+        Exit code of the script
+    verbose : bool, optional
+        Whether to log status messages
+    """
+    try:
+        stop_tracking(status=status, exit_code=exit_code)
+    except Exception as e:
+        if verbose:
+            import logging
+            logging.getLogger(__name__).warning(
+                f"Could not stop verification tracking: {e}"
+            )
+def on_io_load(
+    path: Union[str, Path],
+    track: bool = True,
+) -> None:
+    """
+    Hook called when a file is loaded via stx.io.load().
+    Parameters
+    ----------
+    path : str or Path
+        Path to the loaded file
+    track : bool, optional
+        Whether to track this file as an input
+    """
+    if not track:
+        return
+    tracker = get_tracker()
+    if tracker is not None:
+        try:
+            tracker.record_input(path, track=track)
+        except Exception:
+            pass  # Silent fail - don't interrupt io operations
+def on_io_save(
+    path: Union[str, Path],
+    track: bool = True,
+) -> None:
+    """
+    Hook called when a file is saved via stx.io.save().
+    Parameters
+    ----------
+    path : str or Path
+        Path to the saved file
+    track : bool, optional
+        Whether to track this file as an output
+    """
+    if not track:
+        return
+    tracker = get_tracker()
+    if tracker is not None:
+        try:
+            tracker.record_output(path, track=track)
+        except Exception:
+            pass  # Silent fail - don't interrupt io operations
+# EOF

scitex/verify/_rerun.py ADDED Viewed

@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+# Timestamp: "2026-02-01 (ywatanabe)"
+# File: /home/ywatanabe/proj/scitex-python/src/scitex/verify/_rerun.py
+"""Rerun verification - re-execute scripts and compare outputs."""
+from __future__ import annotations
+import shutil
+import subprocess
+from pathlib import Path
+from typing import Dict
+from ._chain import (
+    FileVerification,
+    RunVerification,
+    VerificationLevel,
+    VerificationStatus,
+)
+from ._db import get_db
+def verify_by_rerun(
+    target: str | list[str],
+    timeout: int = 300,
+    cleanup: bool = True,
+) -> RunVerification | list[RunVerification]:
+    """
+    Verify session(s) by re-executing scripts and comparing outputs.
+    Parameters
+    ----------
+    target : str or list[str]
+        Session ID, script path, or artifact path.
+        - run_id: directly use this run
+        - script path: latest run that executed this script
+        - artifact path: latest run which produced this file
+    timeout : int, optional
+        Maximum execution time in seconds (default: 300)
+    cleanup : bool, optional
+        Whether to remove the new session's output directory after verification
+    Returns
+    -------
+    RunVerification or list[RunVerification]
+        Single result if single target, list if multiple targets
+    """
+    if isinstance(target, list):
+        return [_verify_single(t, timeout, cleanup) for t in target]
+    return _verify_single(target, timeout, cleanup)
+def _verify_single(
+    target: str,
+    timeout: int = 300,
+    cleanup: bool = True,
+) -> RunVerification:
+    """Verify a single target."""
+    db = get_db()
+    # Resolve target to session_id
+    session_id = _resolve_to_session_id(db, target)
+    if not session_id:
+        return _unknown_result(target, None)
+    # Get original run info
+    run_info = db.get_run(session_id)
+    if not run_info:
+        return _unknown_result(session_id, None)
+    script_path = run_info.get("script_path")
+    if not script_path or not Path(script_path).exists():
+        return RunVerification(
+            session_id=session_id,
+            script_path=script_path,
+            status=VerificationStatus.MISSING,
+            files=[],
+            combined_hash_expected=None,
+            combined_hash_current=None,
+            level=VerificationLevel.RERUN,
+        )
+    # Get expected output hashes from original session
+    original_hashes = db.get_file_hashes(session_id, role="output")
+    if not original_hashes:
+        return _unknown_result(session_id, script_path)
+    # Re-execute the script (creates new session)
+    exec_result = _execute_script(script_path, timeout)
+    if exec_result is not None:
+        return exec_result._replace(session_id=session_id)
+    # Find the new session (most recent from this script)
+    new_session_id, new_sdir_run = _find_new_session(db, script_path, session_id)
+    if not new_session_id:
+        return _unknown_result(session_id, script_path)
+    # Get new session's output hashes
+    new_hashes = db.get_file_hashes(new_session_id, role="output")
+    # Compare hashes by filename
+    file_verifications = _compare_hashes(original_hashes, new_hashes)
+    # Cleanup new session's output directory if requested
+    if cleanup and new_sdir_run:
+        _cleanup_session_dir(new_sdir_run)
+    # Determine overall status
+    status = _determine_status(file_verifications)
+    # Record verification result in database for original session
+    db.record_verification(
+        session_id=session_id,
+        level=VerificationLevel.RERUN.value,
+        status=status.value,
+    )
+    return RunVerification(
+        session_id=session_id,
+        script_path=script_path,
+        status=status,
+        files=file_verifications,
+        combined_hash_expected=run_info.get("combined_hash"),
+        combined_hash_current=None,
+        level=VerificationLevel.RERUN,
+    )
+def _resolve_to_session_id(db, target: str) -> str | None:
+    """Resolve target to session_id.
+    Accepts:
+        - run_id: directly use this run
+        - script path: latest run that executed this script
+        - artifact path: latest run which produced this file
+    """
+    # Try as run_id
+    if db.get_run(target):
+        return target
+    # Always resolve to absolute path
+    resolved = str(Path(target).resolve())
+    # Try as script path
+    for run in db.list_runs(limit=100):
+        if run.get("script_path") == resolved:
+            return run["session_id"]
+    # Try as artifact (output) path
+    sessions = db.find_session_by_file(resolved, role="output")
+    return sessions[0] if sessions else None
+def _unknown_result(session_id: str, script_path: str) -> RunVerification:
+    """Create an unknown verification result."""
+    return RunVerification(
+        session_id=session_id,
+        script_path=script_path,
+        status=VerificationStatus.UNKNOWN,
+        files=[],
+        combined_hash_expected=None,
+        combined_hash_current=None,
+        level=VerificationLevel.RERUN,
+    )
+def _execute_script(script_path: str, timeout: int) -> RunVerification | None:
+    """Execute script and return error result if failed, None if success."""
+    try:
+        result = subprocess.run(
+            ["python", script_path],
+            capture_output=True,
+            timeout=timeout,
+            cwd=Path(script_path).parent,
+        )
+        if result.returncode != 0:
+            return RunVerification(
+                session_id="",
+                script_path=script_path,
+                status=VerificationStatus.MISMATCH,
+                files=[],
+                combined_hash_expected=None,
+                combined_hash_current=None,
+                level=VerificationLevel.RERUN,
+            )
+        return None  # Success
+    except subprocess.TimeoutExpired:
+        return _unknown_result("", script_path)
+    except Exception:
+        return _unknown_result("", script_path)
+def _find_new_session(db, script_path: str, original_id: str) -> tuple:
+    """Find the new session created by re-running the script."""
+    recent_runs = db.list_runs(limit=5)
+    for run in recent_runs:
+        if run.get("script_path") == script_path and run["session_id"] != original_id:
+            return run["session_id"], run.get("sdir_run")
+    return None, None
+def _compare_hashes(
+    original_hashes: Dict[str, str], new_hashes: Dict[str, str]
+) -> list:
+    """Compare hashes by filename and return FileVerification list."""
+    original_by_name = {Path(p).name: h for p, h in original_hashes.items()}
+    new_by_name = {Path(p).name: h for p, h in new_hashes.items()}
+    verifications = []
+    for filename, expected_hash in original_by_name.items():
+        current_hash = new_by_name.get(filename)
+        if current_hash is None:
+            status = VerificationStatus.MISSING
+        elif current_hash == expected_hash:
+            status = VerificationStatus.VERIFIED
+        else:
+            status = VerificationStatus.MISMATCH
+        verifications.append(
+            FileVerification(
+                path=filename,
+                role="output",
+                expected_hash=expected_hash,
+                current_hash=current_hash,
+                status=status,
+            )
+        )
+    return verifications
+def _cleanup_session_dir(sdir_run: str) -> None:
+    """Remove the session's output directory (best-effort)."""
+    try:
+        path = Path(sdir_run)
+        if path.exists():
+            shutil.rmtree(path)
+    except Exception:
+        pass
+def _determine_status(file_verifications: list) -> VerificationStatus:
+    """Determine overall verification status from file verifications."""
+    if all(f.is_verified for f in file_verifications):
+        return VerificationStatus.VERIFIED
+    if any(f.status == VerificationStatus.MISMATCH for f in file_verifications):
+        return VerificationStatus.MISMATCH
+    return VerificationStatus.UNKNOWN
+# Backward compatibility alias
+verify_run_from_scratch = verify_by_rerun
+# EOF

scitex 2.16.2__py3-none-any.whl → 2.17.0__py3-none-any.whl

scitex 2.16.2py3-none-any.whl → 2.17.0py3-none-any.whl