PyPI - rosetta-sql - Versions diffs - 1.0.0__py3-none-any.whl - Mend

rosetta-sql 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

benchmark/generate_csv_data.py +83 -0
benchmark/import_data.py +168 -0
rosetta/__init__.py +3 -0
rosetta/__main__.py +8 -0
rosetta/benchmark.py +1678 -0
rosetta/buglist.py +108 -0
rosetta/cli/__init__.py +11 -0
rosetta/cli/config_cmd.py +243 -0
rosetta/cli/exec.py +219 -0
rosetta/cli/interactive_cmd.py +124 -0
rosetta/cli/list_cmd.py +215 -0
rosetta/cli/main.py +617 -0
rosetta/cli/output.py +545 -0
rosetta/cli/result.py +61 -0
rosetta/cli/result_cmd.py +247 -0
rosetta/cli/run.py +625 -0
rosetta/cli/status.py +161 -0
rosetta/comparator.py +205 -0
rosetta/config.py +139 -0
rosetta/executor.py +403 -0
rosetta/flamegraph.py +630 -0
rosetta/interactive.py +1790 -0
rosetta/models.py +197 -0
rosetta/parser.py +308 -0
rosetta/reporter/__init__.py +1 -0
rosetta/reporter/bench_html.py +1457 -0
rosetta/reporter/bench_text.py +162 -0
rosetta/reporter/history.py +1686 -0
rosetta/reporter/html.py +644 -0
rosetta/reporter/text.py +110 -0
rosetta/runner.py +3089 -0
rosetta/ui.py +736 -0
rosetta/whitelist.py +161 -0
rosetta_sql-1.0.0.dist-info/LICENSE +21 -0
rosetta_sql-1.0.0.dist-info/METADATA +379 -0
rosetta_sql-1.0.0.dist-info/RECORD +42 -0
rosetta_sql-1.0.0.dist-info/WHEEL +5 -0
rosetta_sql-1.0.0.dist-info/entry_points.txt +2 -0
rosetta_sql-1.0.0.dist-info/top_level.txt +4 -0
skills/rosetta/scripts/install_rosetta.py +469 -0
skills/rosetta/scripts/rosetta_wrapper.py +377 -0
tests/test_cli.py +749 -0

rosetta/buglist.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""Bug-mark management for Rosetta.
+A *bug entry* records the fingerprint of a diff that has been identified as a
+genuine bug.  Unlike whitelisted diffs, bug-marked diffs **still count toward
+the failure rate** — the mark is purely informational so that users can track
+known bugs across test runs.
+The bug list is persisted as a single JSON file (``buglist.json``) in the
+output directory.  The fingerprint algorithm is identical to the whitelist
+(MD5 over normalised SQL + output), so the same ``diff_fingerprint`` helper
+is reused.
+"""
+import json
+import logging
+import os
+import time as _time
+from typing import Dict, Optional
+log = logging.getLogger("rosetta")
+_BUGLIST_FILE = "buglist.json"
+# ---------------------------------------------------------------------------
+# Buglist store
+# ---------------------------------------------------------------------------
+class Buglist:
+    """In-memory bug list backed by a JSON file.
+    Structure of ``buglist.json``::
+        {
+            "<fingerprint>": {
+                "stmt": "SELECT ...",
+                "dbms_a": "tdsql",
+                "dbms_b": "mysql",
+                "block": 42,
+                "reason": "Known bug #123",
+                "added_at": "2026-03-10 18:00:00"
+            },
+            ...
+        }
+    """
+    def __init__(self, output_dir: str):
+        self._path = os.path.join(output_dir, _BUGLIST_FILE)
+        self._data: Dict[str, dict] = {}
+        self.load()
+    # -- persistence --------------------------------------------------------
+    def load(self):
+        if os.path.isfile(self._path):
+            try:
+                with open(self._path, "r", encoding="utf-8") as f:
+                    self._data = json.load(f)
+            except (json.JSONDecodeError, OSError) as e:
+                log.warning("Failed to load buglist: %s", e)
+                self._data = {}
+        else:
+            self._data = {}
+    def save(self):
+        os.makedirs(os.path.dirname(self._path) or ".", exist_ok=True)
+        with open(self._path, "w", encoding="utf-8") as f:
+            json.dump(self._data, f, indent=2, ensure_ascii=False)
+    # -- query / mutate -----------------------------------------------------
+    @property
+    def entries(self) -> Dict[str, dict]:
+        return dict(self._data)
+    def __len__(self) -> int:
+        return len(self._data)
+    def contains(self, fingerprint: str) -> bool:
+        return fingerprint in self._data
+    def add(self, fingerprint: str, stmt: str, dbms_a: str, dbms_b: str,
+            block: int = 0, reason: str = "") -> dict:
+        """Add an entry and persist.  Returns the stored dict."""
+        entry = {
+            "stmt": stmt[:300],
+            "dbms_a": dbms_a,
+            "dbms_b": dbms_b,
+            "block": block,
+            "reason": reason,
+            "added_at": _time.strftime("%Y-%m-%d %H:%M:%S"),
+        }
+        self._data[fingerprint] = entry
+        self.save()
+        return entry
+    def remove(self, fingerprint: str) -> bool:
+        """Remove an entry.  Returns True if it existed."""
+        if fingerprint in self._data:
+            del self._data[fingerprint]
+            self.save()
+            return True
+        return False
+    def clear(self):
+        """Remove all entries."""
+        self._data.clear()
+        self.save()

rosetta/cli/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""
+Rosetta CLI - Modern command-line interface for AI Agents and humans.
+Human-readable output by default; use -j/--json for JSON output.
+"""
+from .main import main
+from .result import CommandResult
+from ..runner import _enter_interactive, parse_args
+__all__ = ["main", "CommandResult", "_enter_interactive", "parse_args"]

rosetta/cli/config_cmd.py ADDED Viewed

@@ -0,0 +1,243 @@
+"""
+Handler for the 'config' subcommand.
+"""
+import json
+import os
+from typing import TYPE_CHECKING
+from .result import CommandResult
+if TYPE_CHECKING:
+    from .output import OutputFormatter
+def handle_config(args, output: "OutputFormatter") -> CommandResult:
+    """
+    Handle the 'config' subcommand.
+    Args:
+        args: Parsed command-line arguments
+        output: Output formatter
+    Returns:
+        CommandResult with config information
+    """
+    if args.action == "show":
+        return _handle_config_show(args, output)
+    elif args.action == "validate":
+        return _handle_config_validate(args, output)
+    elif args.action == "init":
+        return _handle_config_init(args, output)
+    else:
+        return CommandResult.failure(
+            f"Unknown config action: {args.action}",
+        )
+def _handle_config_show(args, output: "OutputFormatter") -> CommandResult:
+    """
+    Show current configuration.
+    Args:
+        args: Parsed arguments
+        output: Output formatter
+    Returns:
+        CommandResult with config details
+    """
+    from ..config import load_config
+    if not os.path.isfile(args.config):
+        return CommandResult.failure(
+            f"Config file not found: {args.config}",
+        )
+    try:
+        configs = load_config(args.config)
+    except Exception as e:
+        return CommandResult.failure(f"Failed to load config: {str(e)}")
+    # Read raw JSON for display
+    with open(args.config, "r", encoding="utf-8") as f:
+        raw_config = json.load(f)
+    return CommandResult.success(
+        "config show",
+        {
+            "config_path": os.path.abspath(args.config),
+            "total_dbms": len(configs),
+            "enabled_dbms": sum(1 for c in configs if c.enabled),
+            "databases": [
+                {
+                    "name": c.name,
+                    "host": c.host,
+                    "port": c.port,
+                    "user": c.user,
+                    "driver": c.driver,
+                    "enabled": c.enabled,
+                    "has_init_sql": bool(c.init_sql),
+                    "skip_patterns_count": len(c.skip_patterns),
+                }
+                for c in configs
+            ],
+            "raw_config": raw_config,
+        },
+    )
+def _handle_config_validate(args, output: "OutputFormatter") -> CommandResult:
+    """
+    Validate configuration file.
+    Args:
+        args: Parsed arguments
+        output: Output formatter
+    Returns:
+        CommandResult with validation results
+    """
+    import socket
+    from ..config import load_config
+    from ..executor import check_port
+    if not os.path.isfile(args.config):
+        return CommandResult.failure(
+            f"Config file not found: {args.config}",
+        )
+    errors = []
+    warnings = []
+    # Validate JSON structure
+    try:
+        with open(args.config, "r", encoding="utf-8") as f:
+            data = json.load(f)
+    except json.JSONDecodeError as e:
+        return CommandResult.failure(
+            f"Invalid JSON: {str(e)}",
+        )
+    # Check databases array
+    if "databases" not in data:
+        return CommandResult.failure(
+            "Missing 'databases' key in config",
+        )
+    if not isinstance(data["databases"], list):
+        return CommandResult.failure(
+            "'databases' must be an array",
+        )
+    if len(data["databases"]) == 0:
+        return CommandResult.failure(
+            "No databases configured",
+        )
+    # Validate each database config
+    for i, db in enumerate(data["databases"]):
+        prefix = f"databases[{i}]"
+        # Required fields
+        if "name" not in db:
+            errors.append(f"{prefix}: missing 'name' field")
+        # Optional fields with defaults
+        host = db.get("host", "127.0.0.1")
+        port = db.get("port", 3306)
+        # Validate types
+        if not isinstance(host, str):
+            errors.append(f"{prefix}.host: must be a string")
+        if not isinstance(port, int):
+            errors.append(f"{prefix}.port: must be an integer")
+        # Check if port is valid
+        if isinstance(port, int) and (port < 1 or port > 65535):
+            errors.append(f"{prefix}.port: must be between 1 and 65535")
+    # Try to load config
+    try:
+        configs = load_config(args.config)
+    except Exception as e:
+        errors.append(f"Failed to load config: {str(e)}")
+        configs = []
+    # Check connectivity for enabled databases
+    connectivity = []
+    for config in configs:
+        if not config.enabled:
+            continue
+        reachable = check_port(config.host, config.port, timeout=2)
+        connectivity.append({
+            "name": config.name,
+            "host": config.host,
+            "port": config.port,
+            "reachable": reachable,
+        })
+        if not reachable:
+            warnings.append(
+                f"{config.name} ({config.host}:{config.port}): not reachable"
+            )
+    if errors:
+        return CommandResult.failure(
+            "Config validation failed",
+        )
+    return CommandResult.success(
+        "config validate",
+        {
+            "config_path": os.path.abspath(args.config),
+            "valid": True,
+            "total_dbms": len(configs),
+            "enabled_dbms": sum(1 for c in configs if c.enabled),
+            "errors": errors,
+            "warnings": warnings,
+            "connectivity": connectivity,
+        },
+    )
+def _handle_config_init(args, output: "OutputFormatter") -> CommandResult:
+    """
+    Generate sample configuration file.
+    Args:
+        args: Parsed arguments
+        output: Output formatter
+    Returns:
+        CommandResult with generated config path
+    """
+    from ..config import generate_sample_config
+    # Determine output path
+    output_path = args.output if args.output else "dbms_config.sample.json"
+    # Check if file already exists
+    if os.path.isfile(output_path):
+        return CommandResult.failure(
+            f"File already exists: {output_path}. Use --output to specify a different path",
+            command="config init",
+        )
+    # Generate sample config
+    try:
+        generate_sample_config(output_path)
+    except Exception as e:
+        return CommandResult.failure(
+            f"Failed to generate config: {str(e)}",
+            command="config init",
+        )
+    return CommandResult.success(
+        "config init",
+        {
+            "config_path": os.path.abspath(output_path),
+            "message": f"Sample config written to {output_path}",
+        },
+    )

rosetta/cli/exec.py ADDED Viewed

@@ -0,0 +1,219 @@
+"""
+Handler for the 'exec' subcommand - execute SQL statements.
+"""
+from typing import TYPE_CHECKING
+from .result import CommandResult
+if TYPE_CHECKING:
+    from .output import OutputFormatter
+def handle_exec(args, output: "OutputFormatter") -> CommandResult:
+    """
+    Handle the 'exec' subcommand.
+    Args:
+        args: Parsed command-line arguments
+        output: Output formatter
+    Returns:
+        CommandResult with execution results
+    """
+    import os
+    import concurrent.futures
+    import time as _time
+    from ..config import load_config, filter_configs
+    from ..executor import DBConnection, check_port
+    from ..parser import TestFileParser
+    # Load config
+    if not os.path.isfile(args.config):
+        return CommandResult.failure(
+            f"Config file not found: {args.config}",
+        )
+    all_configs = load_config(args.config)
+    if not all_configs:
+        return CommandResult.failure(
+            f"No databases configured in {args.config}",
+        )
+    # Filter configs
+    if args.dbms:
+        try:
+            configs = filter_configs(all_configs, args.dbms)
+        except ValueError as e:
+            return CommandResult.failure(str(e))
+    else:
+        configs = [c for c in all_configs if c.enabled]
+    if not configs:
+        return CommandResult.failure("No databases selected")
+    # Get SQL statements
+    sql_text = None
+    if args.sql:
+        sql_text = args.sql
+    elif args.file:
+        if not os.path.isfile(args.file):
+            return CommandResult.failure(
+                f"SQL file not found: {args.file}",
+            )
+        with open(args.file, "r", encoding="utf-8") as f:
+            sql_text = f.read()
+    else:
+        return CommandResult.failure(
+            "Either --sql or --file is required",
+        )
+    # Parse SQL statements
+    try:
+        parsed = TestFileParser.parse_text(sql_text)
+        statements = [s.text for s in parsed]
+    except Exception as e:
+        return CommandResult.failure(f"Parse error: {str(e)}")
+    # Determine database (None means connect without selecting a database)
+    database = args.database if args.database else None
+    # Execute on each DBMS
+    def _exec_on_dbms(config):
+        """Execute all statements on one DBMS."""
+        result = {
+            "name": config.name,
+            "statements": [],
+            "error": None,
+        }
+        # Check port first
+        if not check_port(config.host, config.port):
+            result["error"] = f"Cannot reach {config.host}:{config.port}"
+            return result
+        # For exec without --database, connect directly without USE/CREATE
+        if database is None:
+            conn = None
+            cursor = None
+            try:
+                connect_kwargs = dict(
+                    host=config.host,
+                    port=config.port,
+                    user=config.user,
+                    password=config.password,
+                    connect_timeout=10,
+                )
+                if config.driver == "mysql.connector":
+                    import mysql.connector
+                    connect_kwargs["allow_local_infile"] = True
+                    conn = mysql.connector.connect(**connect_kwargs)
+                else:
+                    import pymysql
+                    connect_kwargs["local_infile"] = True
+                    conn = pymysql.connect(**connect_kwargs)
+                conn.autocommit = True
+                cursor = conn.cursor()
+            except Exception as e:
+                result["error"] = f"Connection failed: {str(e)}"
+                return result
+            try:
+                for sql in statements:
+                    stmt_result = _exec_stmt(cursor, sql)
+                    result["statements"].append(stmt_result)
+            finally:
+                if cursor:
+                    try:
+                        cursor.close()
+                    except Exception:
+                        pass
+                if conn:
+                    try:
+                        conn.close()
+                    except Exception:
+                        pass
+            return result
+        # With explicit --database, use DBConnection (creates DB + USE)
+        db = DBConnection(config, database)
+        try:
+            db.connect()
+        except Exception as e:
+            result["error"] = f"Connection failed: {str(e)}"
+            return result
+        try:
+            for sql in statements:
+                stmt_result = _exec_stmt(db.cursor, sql)
+                result["statements"].append(stmt_result)
+        finally:
+            db.close()
+        return result
+    # Execute in parallel
+    results = {}
+    with concurrent.futures.ThreadPoolExecutor(
+            max_workers=len(configs)) as pool:
+        futures = {pool.submit(_exec_on_dbms, c): c for c in configs}
+        for fut in concurrent.futures.as_completed(futures):
+            r = fut.result()
+            results[r["name"]] = r
+    return CommandResult.success(
+        "exec",
+        {
+            "sql": sql_text[:500],  # Truncate for JSON
+            "total_statements": len(statements),
+            "database": database,
+            "dbms_targets": [c.name for c in configs],
+            "results": results,
+        },
+    )
+def _exec_stmt(cursor, sql: str) -> dict:
+    """Execute a single SQL statement and return the result dict."""
+    import time as _time
+    stmt_result = {
+        "sql": sql,
+        "columns": None,
+        "rows": None,
+        "error": None,
+        "affected_rows": 0,
+        "elapsed_ms": 0,
+    }
+    try:
+        t0 = _time.monotonic()
+        cursor.execute(sql)
+        if cursor.description:
+            stmt_result["columns"] = [
+                desc[0] for desc in cursor.description
+            ]
+            rows = cursor.fetchall()
+            stmt_result["rows"] = [
+                [_format_val(c) for c in row]
+                for row in rows
+            ]
+            stmt_result["row_count"] = len(rows)
+        else:
+            stmt_result["affected_rows"] = cursor.rowcount or 0
+        t1 = _time.monotonic()
+        stmt_result["elapsed_ms"] = round((t1 - t0) * 1000, 3)
+    except Exception as e:
+        t1 = _time.monotonic()
+        stmt_result["error"] = str(e)
+        stmt_result["elapsed_ms"] = round((t1 - t0) * 1000, 3)
+    return stmt_result
+def _format_val(value) -> str:
+    """Format a cell value for JSON serialization."""
+    if value is None:
+        return "NULL"
+    if isinstance(value, bytes):
+        return value.decode("utf-8", errors="replace")
+    if isinstance(value, bool):
+        return "1" if value else "0"
+    return str(value)

rosetta/cli/interactive_cmd.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""
+Handler for the 'interactive' subcommand (and aliases 'repl', 'i').
+"""
+import sys
+from typing import TYPE_CHECKING
+from .result import CommandResult
+if TYPE_CHECKING:
+    from .output import OutputFormatter
+def handle_interactive(args, output: "OutputFormatter") -> CommandResult:
+    """
+    Handle the 'interactive' subcommand.
+    Args:
+        args: Parsed command-line arguments
+        output: Output formatter
+    Returns:
+        CommandResult with session summary
+    """
+    import os
+    import logging
+    from ..config import load_config, filter_configs
+    from ..interactive import InteractiveSession, BenchInteractiveSession
+    from ..executor import ensure_service
+    # Load config
+    if not os.path.isfile(args.config):
+        return CommandResult.failure(
+            f"Config file not found: {args.config}",
+        )
+    all_configs = load_config(args.config)
+    if not all_configs:
+        return CommandResult.failure(
+            f"No databases configured in {args.config}",
+        )
+    # Filter configs
+    if args.dbms:
+        try:
+            configs = filter_configs(all_configs, args.dbms)
+        except ValueError as e:
+            return CommandResult.failure(str(e))
+    else:
+        # Auto-detect reachable DBMS
+        reachable_configs = []
+        for config in all_configs:
+            if ensure_service(config):
+                reachable_configs.append(config)
+        if not reachable_configs:
+            return CommandResult.failure(
+                "No reachable DBMS found. Check your dbms_config.json"
+            )
+        configs = reachable_configs
+    if not configs:
+        return CommandResult.failure("No databases selected")
+    # Start interactive session
+    # Note: For JSON output mode, we still launch interactive but inform user
+    if output.format == "json":
+        # In JSON mode, inform user that interactive mode is intended for human use
+        return CommandResult.success(
+            "interactive",
+            {
+                "message": "Interactive mode launched",
+                "note": "Interactive mode is designed for human users. Run without -j/--json for best experience.",
+                "dbms_targets": [c.name for c in configs],
+                "database": args.database,
+                "output_dir": os.path.abspath(args.output_dir),
+                "serve": args.serve,
+                "port": args.port,
+            },
+        )
+    # For human mode, actually launch the interactive session
+    try:
+        # Import the existing interactive logic from old CLI
+        from ..cli import _enter_interactive, parse_args
+        # Build args for legacy interactive mode
+        legacy_args = parse_args([
+            "-i",
+            "--config", args.config,
+            "--database", args.database,
+            "--output-dir", args.output_dir,
+        ])
+        # Use filtered configs (either user-specified or auto-detected reachable)
+        legacy_args.dbms = ",".join(c.name for c in configs)
+        if args.serve:
+            legacy_args.serve = args.serve
+        if args.port:
+            legacy_args.port = args.port
+        # Launch interactive session
+        exit_code = _enter_interactive(legacy_args)
+        return CommandResult.success(
+            "interactive",
+            {
+                "exit_code": exit_code,
+                "message": "Interactive session ended",
+            },
+        )
+    except KeyboardInterrupt:
+        return CommandResult.success(
+            "interactive",
+            {
+                "message": "Interactive session interrupted",
+            },
+        )
+    except Exception as e:
+        return CommandResult.failure(
+            f"Interactive session failed: {str(e)}",
+        )