PyPI - decompressed-cli - Versions diffs - 0.1.0__tar.gz → 0.1.2__tar.gz - Mend

decompressed-cli 0.1.0tar.gz → 0.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{decompressed_cli-0.1.0 → decompressed_cli-0.1.2}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,11 @@
 Metadata-Version: 2.4
 Name: decompressed-cli
-Version: 0.1.0
+Version: 0.1.2
 Summary: CLI for Decompressed - Git-like version control for vector datasets
 Author-email: Decompressed <support@decompressed.io>
-License: MIT
+License-Expression: MIT
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10

{decompressed_cli-0.1.0 → decompressed_cli-0.1.2}/pyproject.toml RENAMED Viewed

@@ -4,18 +4,17 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "decompressed-cli"
-version = "0.1.0"
+version = "0.1.2"
 description = "CLI for Decompressed - Git-like version control for vector datasets"
 readme = "README.md"
 requires-python = ">=3.9"
-license = {text = "MIT"}
+license = "MIT"
 authors = [
     {name = "Decompressed", email = "support@decompressed.io"}
 ]
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Intended Audience :: Developers",
-    "License :: OSI Approved :: MIT License",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",

{decompressed_cli-0.1.0 → decompressed_cli-0.1.2}/src/decompressed_cli/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """Decompressed CLI - Git-like version control for vector datasets."""
-__version__ = "0.1.0"
+__version__ = "0.1.2"

decompressed_cli-0.1.2/src/decompressed_cli/commands/imports_cmd.py ADDED Viewed

@@ -0,0 +1,220 @@
+"""Import commands for pulling vectors from external databases."""
+import time
+import typer
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
+from ..config import get_client
+app = typer.Typer(no_args_is_help=True)
+console = Console()
+@app.command("pull")
+def pull(
+    connector: str = typer.Argument(..., help="Connector name or ID to import from"),
+    dataset_name: str = typer.Argument(..., help="Name for the new dataset"),
+    project: str = typer.Option(None, "--project", "-p", help="Project name or ID"),
+    no_metadata: bool = typer.Option(False, "--no-metadata", help="Skip importing metadata"),
+    batch_size: int = typer.Option(100, "--batch-size", "-b", help="Vectors per batch"),
+    no_wait: bool = typer.Option(False, "--no-wait", help="Don't wait for completion"),
+):
+    """
+    Pull vectors from an external database into a new dataset.
+    Example:
+        dcp imports pull pinecone-prod my-backup
+        dcp imports pull qdrant-index imported-vectors --project ml-team
+    """
+    try:
+        client = get_client()
+        console.print(f"[cyan]Connecting to connector:[/cyan] {connector}")
+        # Initialize import
+        session = client.imports.init(
+            connector_id=connector,
+            dataset_name=dataset_name,
+            project=project,
+            include_metadata=not no_metadata,
+            batch_size=batch_size,
+        )
+        console.print(Panel(
+            f"[green]✓[/green] Connected to [cyan]{session.connector_type}[/cyan]\n"
+            f"[dim]Estimated vectors:[/dim] {session.estimated_vectors:,}\n"
+            f"[dim]Dimension:[/dim] {session.dimension}\n"
+            f"[dim]Dataset ID:[/dim] {session.dataset_id}",
+            title="Import Initialized",
+            border_style="green"
+        ))
+        # Start the import
+        result = client.imports.start(session.import_session_id)
+        console.print(f"[cyan]Import job started:[/cyan] {result.job_id[:8]}...")
+        if no_wait:
+            console.print(f"\n[yellow]Import running in background.[/yellow]")
+            console.print(f"Check status: [cyan]dcp imports status {session.import_session_id}[/cyan]")
+            return
+        # Wait with progress
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Importing vectors...", total=100)
+            while True:
+                job = client.imports.status(session.import_session_id)
+                if job.progress:
+                    progress.update(task, completed=job.progress)
+                if job.status == "completed":
+                    progress.update(task, completed=100)
+                    break
+                elif job.status == "failed":
+                    console.print(f"\n[red]✗ Import failed:[/red] {job.error_message}")
+                    raise typer.Exit(1)
+                time.sleep(2)
+        console.print(Panel(
+            f"[green]✓[/green] Import complete!\n"
+            f"[dim]Dataset:[/dim] {dataset_name}\n"
+            f"[dim]Dataset ID:[/dim] {session.dataset_id}",
+            title="Success",
+            border_style="green"
+        ))
+    except ValueError as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+@app.command("append")
+def append(
+    connector: str = typer.Argument(..., help="Connector name or ID to import from"),
+    dataset: str = typer.Argument(..., help="Existing dataset name or ID to append to"),
+    no_metadata: bool = typer.Option(False, "--no-metadata", help="Skip importing metadata"),
+    batch_size: int = typer.Option(100, "--batch-size", "-b", help="Vectors per batch"),
+    no_wait: bool = typer.Option(False, "--no-wait", help="Don't wait for completion"),
+):
+    """
+    Append vectors from an external database to an existing dataset.
+    Example:
+        dcp imports append pinecone-prod my-dataset
+    """
+    try:
+        client = get_client()
+        # Resolve dataset ID
+        ds = client.datasets.get(dataset)
+        console.print(f"[cyan]Connecting to connector:[/cyan] {connector}")
+        console.print(f"[cyan]Appending to dataset:[/cyan] {ds.name} (v{ds.current_version})")
+        # Initialize append import
+        session = client.imports.init_append(
+            connector_id=connector,
+            dataset_id=ds.id,
+            include_metadata=not no_metadata,
+            batch_size=batch_size,
+        )
+        console.print(Panel(
+            f"[green]✓[/green] Connected to [cyan]{session.connector_type}[/cyan]\n"
+            f"[dim]Estimated vectors:[/dim] {session.estimated_vectors:,}\n"
+            f"[dim]Dimension:[/dim] {session.dimension}",
+            title="Append Import Initialized",
+            border_style="green"
+        ))
+        # Start the import
+        result = client.imports.start(session.import_session_id)
+        console.print(f"[cyan]Import job started:[/cyan] {result.job_id[:8]}...")
+        if no_wait:
+            console.print(f"\n[yellow]Import running in background.[/yellow]")
+            console.print(f"Check status: [cyan]dcp imports status {session.import_session_id}[/cyan]")
+            return
+        # Wait with progress
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Importing vectors...", total=100)
+            while True:
+                job = client.imports.status(session.import_session_id)
+                if job.progress:
+                    progress.update(task, completed=job.progress)
+                if job.status == "completed":
+                    progress.update(task, completed=100)
+                    break
+                elif job.status == "failed":
+                    console.print(f"\n[red]✗ Import failed:[/red] {job.error_message}")
+                    raise typer.Exit(1)
+                time.sleep(2)
+        console.print(f"[green]✓[/green] Appended vectors to [cyan]{ds.name}[/cyan]")
+    except ValueError as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+@app.command("status")
+def status(
+    session_id: str = typer.Argument(..., help="Import session ID"),
+):
+    """Check status of an import job."""
+    try:
+        client = get_client()
+        job = client.imports.status(session_id)
+        status_color = {
+            "initialized": "yellow",
+            "in_progress": "cyan",
+            "completed": "green",
+            "failed": "red",
+        }.get(job.status, "white")
+        panel_content = f"""[cyan]Session ID:[/cyan] {job.import_session_id}
+[cyan]Dataset ID:[/cyan] {job.dataset_id or 'N/A'}
+[cyan]Status:[/cyan] [{status_color}]{job.status}[/{status_color}]
+[cyan]Progress:[/cyan] {job.progress or 0}%
+[cyan]Job ID:[/cyan] {job.job_id or 'N/A'}"""
+        if job.error_message:
+            panel_content += f"\n[red]Error:[/red] {job.error_message}"
+        console.print(Panel(panel_content, title="Import Status", border_style=status_color))
+    except ValueError as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise typer.Exit(1)

decompressed_cli-0.1.2/src/decompressed_cli/commands/sync_cmd.py ADDED Viewed

@@ -0,0 +1,283 @@
+"""CLI commands for syncing datasets to external vector databases."""
+import typer
+import time
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
+from typing import Optional
+from ..config import get_config
+app = typer.Typer(no_args_is_help=True)
+console = Console()
+def _api_headers():
+    cfg = get_config()
+    return {
+        "Authorization": f"Bearer {cfg['api_key']}",
+        "Content-Type": "application/json",
+    }
+def _api_url(path: str) -> str:
+    cfg = get_config()
+    return f"{cfg['base_url']}/api/v1{path}"
+@app.command("push")
+def push(
+    dataset: str = typer.Argument(..., help="Dataset name or ID"),
+    connector: str = typer.Argument(..., help="Connector name or ID"),
+    version: Optional[int] = typer.Option(None, "--version", "-v", help="Source version (default: current)"),
+    batch_size: int = typer.Option(100, "--batch-size", "-b", help="Vectors per batch"),
+    mode: str = typer.Option("auto", "--mode", "-m", help="Sync mode: auto or full"),
+    force: bool = typer.Option(False, "--force", "-f", help="Force sync, overwrite destination drift"),
+):
+    """
+    Push dataset to a vector database connector.
+    Decompressed is the source of truth. This command deploys your dataset
+    version to the destination. If the destination was modified externally,
+    you will be warned (use --force to override).
+    Examples:
+        dcp sync push my-dataset my-pinecone-connector
+        dcp sync push my-dataset my-pinecone --version 3
+        dcp sync push my-dataset my-pinecone --mode full --force
+    """
+    import httpx
+    headers = _api_headers()
+    # Resolve dataset ID
+    console.print(f"[dim]Resolving dataset '{dataset}'...[/dim]")
+    try:
+        r = httpx.get(_api_url(f"/datasets/{dataset}"), headers=headers, timeout=15)
+        r.raise_for_status()
+        ds = r.json()
+        dataset_id = ds["id"]
+        ds_name = ds.get("name", dataset_id)
+        current_ver = ds.get("current_version", 1)
+        num_vectors = ds.get("num_vectors", 0)
+    except httpx.HTTPStatusError as e:
+        console.print(f"[red]Dataset not found: {e.response.text}[/red]")
+        raise typer.Exit(1)
+    source_version = version or current_ver
+    # Show sync plan
+    console.print()
+    console.print(Panel(
+        f"[bold]Dataset:[/bold] {ds_name}\n"
+        f"[bold]Version:[/bold] v{source_version} ({num_vectors:,} vectors)\n"
+        f"[bold]Connector:[/bold] {connector}\n"
+        f"[bold]Mode:[/bold] {mode}\n"
+        f"[bold]Force:[/bold] {'yes' if force else 'no'}",
+        title="[bold green]Sync Plan[/bold green]",
+        border_style="green",
+    ))
+    if mode == "auto":
+        console.print(
+            "[dim]Auto mode: will use incremental sync if a previous sync exists, "
+            "otherwise full upload.[/dim]"
+        )
+    elif mode == "full":
+        console.print("[dim]Full mode: all vectors will be re-uploaded.[/dim]")
+    console.print()
+    # Create sync job
+    config = {
+        "batch_size": batch_size,
+        "sync_mode": mode if mode == "full" else None,
+        "force_sync": force or None,
+    }
+    # Remove None values
+    config = {k: v for k, v in config.items() if v is not None}
+    try:
+        r = httpx.post(
+            _api_url("/syncs"),
+            headers=headers,
+            json={
+                "dataset_id": dataset_id,
+                "connector_id": connector,
+                "source_version": source_version,
+                "config": config,
+            },
+            timeout=30,
+        )
+        r.raise_for_status()
+        result = r.json()
+        job_id = result.get("job_id")
+        sync_job_id = result.get("sync_job_id")
+    except httpx.HTTPStatusError as e:
+        detail = e.response.json().get("detail", str(e))
+        if isinstance(detail, dict):
+            console.print(f"[red]{detail.get('message', str(detail))}[/red]")
+            if detail.get("suggestions"):
+                for s in detail["suggestions"]:
+                    console.print(f"  [yellow]→ {s}[/yellow]")
+        else:
+            console.print(f"[red]{detail}[/red]")
+        raise typer.Exit(1)
+    console.print(f"[green]✓ Sync job created[/green] (job: {job_id[:8]}...)")
+    console.print()
+    # Poll for completion
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TextColumn("{task.percentage:>3.0f}%"),
+        console=console,
+    ) as progress:
+        task = progress.add_task("Syncing...", total=100)
+        while True:
+            time.sleep(2)
+            try:
+                r = httpx.get(
+                    _api_url(f"/jobs/{job_id}"),
+                    headers=headers,
+                    timeout=15,
+                )
+                r.raise_for_status()
+                job = r.json()
+            except Exception:
+                continue
+            status = job.get("status", "unknown")
+            pct = job.get("progress", 0) or 0
+            stage = (job.get("progress_details") or {}).get("stage", "")
+            progress.update(task, completed=pct, description=stage or f"Status: {status}")
+            if status in ("completed", "failed"):
+                progress.update(task, completed=100)
+                break
+    # Show results
+    console.print()
+    try:
+        r = httpx.get(_api_url(f"/syncs/{sync_job_id}"), headers=headers, timeout=15)
+        r.raise_for_status()
+        sync_result = r.json()
+    except Exception:
+        sync_result = {}
+    error_details = sync_result.get("error_details") or {}
+    sync_mode_used = error_details.get("sync_mode", "full")
+    drift = error_details.get("drift_report")
+    diff = error_details.get("diff_summary")
+    warnings = error_details.get("warnings", [])
+    if sync_result.get("status") == "completed":
+        console.print("[bold green]✓ Sync completed successfully[/bold green]")
+    else:
+        console.print(f"[bold red]✗ Sync failed[/bold red]: {sync_result.get('error_message', 'Unknown error')}")
+    # Summary table
+    table = Table(show_header=False, box=None, padding=(0, 2))
+    table.add_column(style="dim")
+    table.add_column()
+    table.add_row("Mode", f"[cyan]{sync_mode_used}[/cyan]")
+    table.add_row("Vectors synced", f"[green]{sync_result.get('vectors_synced', 0):,}[/green]")
+    if error_details.get("vectors_deleted"):
+        table.add_row("Vectors deleted", f"[red]{error_details['vectors_deleted']:,}[/red]")
+    if diff:
+        table.add_row("Added", f"[green]+{diff.get('added', 0)}[/green]")
+        table.add_row("Deleted", f"[red]-{diff.get('deleted', 0)}[/red]")
+        table.add_row("Updated", f"[yellow]~{diff.get('updated', 0)}[/yellow]")
+        table.add_row("Unchanged", f"[dim]{diff.get('unchanged', 0)}[/dim]")
+    table.add_row("Batches", f"{sync_result.get('batches_completed', 0)} ok / {sync_result.get('batches_failed', 0)} failed")
+    console.print(table)
+    if drift and drift.get("has_drift"):
+        console.print()
+        console.print("[yellow]⚠ Drift detected in destination:[/yellow]")
+        for d in drift.get("details", []):
+            console.print(f"  [yellow]• {d}[/yellow]")
+    if warnings:
+        console.print()
+        for w in warnings[:5]:
+            console.print(f"[yellow]⚠ {w}[/yellow]")
+    if sync_result.get("status") != "completed":
+        raise typer.Exit(1)
+@app.command("status")
+def status(
+    dataset: str = typer.Argument(..., help="Dataset name or ID"),
+):
+    """Show sync state for all connectors linked to a dataset."""
+    import httpx
+    headers = _api_headers()
+    try:
+        r = httpx.get(
+            _api_url(f"/syncs/state/{dataset}"),
+            headers=headers,
+            timeout=15,
+        )
+        r.raise_for_status()
+        data = r.json()
+    except httpx.HTTPStatusError as e:
+        console.print(f"[red]{e.response.text}[/red]")
+        raise typer.Exit(1)
+    states = data.get("sync_states", [])
+    current_ver = data.get("current_version", 1)
+    if not states:
+        console.print("[dim]No connectors linked to this dataset.[/dim]")
+        console.print("[dim]Use the dashboard or API to sync to a vector database.[/dim]")
+        return
+    console.print(f"[bold]Dataset version:[/bold] v{current_ver}")
+    console.print()
+    table = Table(title="Connected Destinations")
+    table.add_column("Connector", style="bold")
+    table.add_column("Type")
+    table.add_column("Status")
+    table.add_column("Synced Version")
+    table.add_column("Vectors")
+    table.add_column("Auto-sync")
+    table.add_column("Last Synced")
+    for s in states:
+        sync_status = s.get("sync_status", "unknown")
+        status_style = {
+            "in_sync": "[green]✓ In sync[/green]",
+            "behind": f"[yellow]⚠ {s.get('versions_behind', 0)} behind[/yellow]",
+            "never_synced": "[dim]Never synced[/dim]",
+        }.get(sync_status, f"[dim]{sync_status}[/dim]")
+        auto = "[green]on[/green]" if s.get("auto_sync_enabled") else "[dim]off[/dim]"
+        last = s.get("last_synced_at", "Never")
+        if last and last != "Never":
+            last = last[:16].replace("T", " ")
+        table.add_row(
+            s.get("connector_name", "?"),
+            s.get("connector_type", "?"),
+            status_style,
+            f"v{s.get('last_synced_version', 0)}",
+            f"{(s.get('vectors_in_destination') or 0):,}",
+            auto,
+            last,
+        )
+    console.print(table)

{decompressed_cli-0.1.0 → decompressed_cli-0.1.2}/src/decompressed_cli/main.py RENAMED Viewed

@@ -4,7 +4,7 @@ import typer
 from rich.console import Console
 from . import __version__
-from .commands import config_cmd, datasets_cmd, versions_cmd, data_cmd
+from .commands import config_cmd, datasets_cmd, versions_cmd, data_cmd, imports_cmd, sync_cmd
 app = typer.Typer(
     name="dcp",
@@ -17,6 +17,8 @@ console = Console()
 app.add_typer(config_cmd.app, name="config", help="Manage CLI configuration")
 app.add_typer(datasets_cmd.app, name="datasets", help="Manage datasets")
 app.add_typer(data_cmd.app, name="data", help="Pull/push data (alias)")
+app.add_typer(imports_cmd.app, name="imports", help="Import vectors from external databases")
+app.add_typer(sync_cmd.app, name="sync", help="Sync datasets to vector databases")
 # Register top-level versioning commands
 app.command("log")(versions_cmd.log)

{decompressed_cli-0.1.0 → decompressed_cli-0.1.2}/src/decompressed_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,12 +1,11 @@
 Metadata-Version: 2.4
 Name: decompressed-cli
-Version: 0.1.0
+Version: 0.1.2
 Summary: CLI for Decompressed - Git-like version control for vector datasets
 Author-email: Decompressed <support@decompressed.io>
-License: MIT
+License-Expression: MIT
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10

{decompressed_cli-0.1.0 → decompressed_cli-0.1.2}/src/decompressed_cli.egg-info/SOURCES.txt RENAMED Viewed

@@ -13,4 +13,6 @@ src/decompressed_cli/commands/__init__.py
 src/decompressed_cli/commands/config_cmd.py
 src/decompressed_cli/commands/data_cmd.py
 src/decompressed_cli/commands/datasets_cmd.py
+src/decompressed_cli/commands/imports_cmd.py
+src/decompressed_cli/commands/sync_cmd.py
 src/decompressed_cli/commands/versions_cmd.py