decompressed-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ Metadata-Version: 2.4
2
+ Name: decompressed-cli
3
+ Version: 0.1.0
4
+ Summary: CLI for Decompressed - Git-like version control for vector datasets
5
+ Author-email: Decompressed <support@decompressed.io>
6
+ License: MIT
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: typer>=0.9.0
18
+ Requires-Dist: rich>=13.0.0
19
+ Requires-Dist: decompressed-sdk>=0.1.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
22
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
23
+
24
+ # Decompressed CLI
25
+
26
+ Git-like version control for vector datasets.
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install decompressed-cli
32
+ ```
33
+
34
+ Or install from source:
35
+ ```bash
36
+ cd sdk/cli
37
+ pip install -e .
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ```bash
43
+ # Configure your API key
44
+ dcp config set api_key YOUR_API_KEY
45
+ dcp config set base_url https://api.decompressed.io
46
+
47
+ # List your datasets
48
+ dcp datasets list
49
+
50
+ # Get dataset info
51
+ dcp datasets info my-dataset
52
+
53
+ # Version control commands
54
+ dcp log my-dataset # Show version history
55
+ dcp checkout my-dataset --version 3 # Pin to version 3
56
+ dcp commit my-dataset -m "Added data" # Create new version
57
+ dcp tag my-dataset v3 production # Tag version as 'production'
58
+ dcp diff my-dataset 2 3 # Compare versions
59
+
60
+ # Data commands
61
+ dcp pull my-dataset -o ./data/ # Download dataset
62
+ dcp push ./vectors.npy my-dataset # Upload/append vectors
63
+ ```
64
+
65
+ ## Commands
66
+
67
+ ### Configuration
68
+ - `dcp config set <key> <value>` - Set config value
69
+ - `dcp config get <key>` - Get config value
70
+ - `dcp config list` - List all config
71
+
72
+ ### Datasets
73
+ - `dcp datasets list` - List all datasets
74
+ - `dcp datasets info <dataset>` - Show dataset details
75
+ - `dcp datasets delete <dataset>` - Delete a dataset
76
+
77
+ ### Versioning
78
+ - `dcp log <dataset>` - Show version history
79
+ - `dcp checkout <dataset> --version <n>` - Pin to specific version
80
+ - `dcp commit <dataset> -m <message>` - Commit pending changes
81
+ - `dcp tag <dataset> <version> <name>` - Create named ref
82
+ - `dcp diff <dataset> <v1> <v2>` - Compare two versions
83
+
84
+ ### Data
85
+ - `dcp pull <dataset> -o <path>` - Download dataset vectors
86
+ - `dcp push <file> <dataset>` - Upload/append vectors
87
+
88
+ ## Configuration
89
+
90
+ Config is stored in `~/.decompressed/config.json`:
91
+
92
+ ```json
93
+ {
94
+ "api_key": "dcp_xxx",
95
+ "base_url": "https://api.decompressed.io"
96
+ }
97
+ ```
@@ -0,0 +1,74 @@
1
+ # Decompressed CLI
2
+
3
+ Git-like version control for vector datasets.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install decompressed-cli
9
+ ```
10
+
11
+ Or install from source:
12
+ ```bash
13
+ cd sdk/cli
14
+ pip install -e .
15
+ ```
16
+
17
+ ## Quick Start
18
+
19
+ ```bash
20
+ # Configure your API key
21
+ dcp config set api_key YOUR_API_KEY
22
+ dcp config set base_url https://api.decompressed.io
23
+
24
+ # List your datasets
25
+ dcp datasets list
26
+
27
+ # Get dataset info
28
+ dcp datasets info my-dataset
29
+
30
+ # Version control commands
31
+ dcp log my-dataset # Show version history
32
+ dcp checkout my-dataset --version 3 # Pin to version 3
33
+ dcp commit my-dataset -m "Added data" # Create new version
34
+ dcp tag my-dataset v3 production # Tag version as 'production'
35
+ dcp diff my-dataset 2 3 # Compare versions
36
+
37
+ # Data commands
38
+ dcp pull my-dataset -o ./data/ # Download dataset
39
+ dcp push ./vectors.npy my-dataset # Upload/append vectors
40
+ ```
41
+
42
+ ## Commands
43
+
44
+ ### Configuration
45
+ - `dcp config set <key> <value>` - Set config value
46
+ - `dcp config get <key>` - Get config value
47
+ - `dcp config list` - List all config
48
+
49
+ ### Datasets
50
+ - `dcp datasets list` - List all datasets
51
+ - `dcp datasets info <dataset>` - Show dataset details
52
+ - `dcp datasets delete <dataset>` - Delete a dataset
53
+
54
+ ### Versioning
55
+ - `dcp log <dataset>` - Show version history
56
+ - `dcp checkout <dataset> --version <n>` - Pin to specific version
57
+ - `dcp commit <dataset> -m <message>` - Commit pending changes
58
+ - `dcp tag <dataset> <version> <name>` - Create named ref
59
+ - `dcp diff <dataset> <v1> <v2>` - Compare two versions
60
+
61
+ ### Data
62
+ - `dcp pull <dataset> -o <path>` - Download dataset vectors
63
+ - `dcp push <file> <dataset>` - Upload/append vectors
64
+
65
+ ## Configuration
66
+
67
+ Config is stored in `~/.decompressed/config.json`:
68
+
69
+ ```json
70
+ {
71
+ "api_key": "dcp_xxx",
72
+ "base_url": "https://api.decompressed.io"
73
+ }
74
+ ```
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "decompressed-cli"
7
+ version = "0.1.0"
8
+ description = "CLI for Decompressed - Git-like version control for vector datasets"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Decompressed", email = "support@decompressed.io"}
14
+ ]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.9",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ ]
25
+ dependencies = [
26
+ "typer>=0.9.0",
27
+ "rich>=13.0.0",
28
+ "decompressed-sdk>=0.1.0",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ dev = [
33
+ "pytest>=7.0.0",
34
+ "pytest-cov>=4.0.0",
35
+ ]
36
+
37
+ [project.scripts]
38
+ dcp = "decompressed_cli.main:app"
39
+
40
+ [tool.setuptools.packages.find]
41
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """Decompressed CLI - Git-like version control for vector datasets."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """CLI command modules."""
@@ -0,0 +1,70 @@
1
+ """Configuration commands."""
2
+
3
+ import typer
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+
7
+ from ..config import load_config, set_config_value, get_config_value, CONFIG_FILE
8
+
9
+ app = typer.Typer(no_args_is_help=True)
10
+ console = Console()
11
+
12
+
13
+ @app.command("set")
14
+ def config_set(
15
+ key: str = typer.Argument(..., help="Config key (api_key, base_url)"),
16
+ value: str = typer.Argument(..., help="Config value"),
17
+ ):
18
+ """Set a configuration value."""
19
+ valid_keys = ["api_key", "base_url"]
20
+ if key not in valid_keys:
21
+ console.print(f"[red]Unknown config key: {key}[/red]")
22
+ console.print(f"Valid keys: {', '.join(valid_keys)}")
23
+ raise typer.Exit(1)
24
+
25
+ set_config_value(key, value)
26
+
27
+ # Mask API key in output
28
+ display_value = value[:8] + "..." if key == "api_key" and len(value) > 8 else value
29
+ console.print(f"[green]✓[/green] Set {key} = {display_value}")
30
+
31
+
32
+ @app.command("get")
33
+ def config_get(
34
+ key: str = typer.Argument(..., help="Config key to get"),
35
+ ):
36
+ """Get a configuration value."""
37
+ value = get_config_value(key)
38
+ if value is None:
39
+ console.print(f"[yellow]Not set:[/yellow] {key}")
40
+ else:
41
+ # Mask API key
42
+ display_value = value[:8] + "..." if key == "api_key" and len(str(value)) > 8 else value
43
+ console.print(f"{key} = {display_value}")
44
+
45
+
46
+ @app.command("list")
47
+ def config_list():
48
+ """List all configuration values."""
49
+ config = load_config()
50
+
51
+ table = Table(title=f"Config ({CONFIG_FILE})")
52
+ table.add_column("Key", style="cyan")
53
+ table.add_column("Value", style="green")
54
+
55
+ for key, value in config.items():
56
+ if value is None:
57
+ display_value = "[dim]not set[/dim]"
58
+ elif key == "api_key" and len(str(value)) > 8:
59
+ display_value = value[:8] + "..."
60
+ else:
61
+ display_value = str(value)
62
+ table.add_row(key, display_value)
63
+
64
+ console.print(table)
65
+
66
+
67
+ @app.command("path")
68
+ def config_path():
69
+ """Show config file path."""
70
+ console.print(str(CONFIG_FILE))
@@ -0,0 +1,161 @@
1
+ """Data pull/push commands."""
2
+
3
+ import typer
4
+ from rich.console import Console
5
+ from rich.progress import Progress, SpinnerColumn, TextColumn
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from ..config import get_client
10
+
11
+ app = typer.Typer(no_args_is_help=True)
12
+ console = Console()
13
+
14
+
15
+ def pull(
16
+ dataset: str = typer.Argument(..., help="Dataset name or ID"),
17
+ output: Path = typer.Option(
18
+ Path("./"), "--output", "-o", help="Output directory"
19
+ ),
20
+ version: Optional[int] = typer.Option(
21
+ None, "--version", "-v", help="Specific version (default: latest)"
22
+ ),
23
+ project: str = typer.Option(None, "--project", "-p", help="Project name"),
24
+ format: str = typer.Option("npy", "--format", "-f", help="Output format (npy, csv)"),
25
+ ):
26
+ """
27
+ Download dataset vectors to local files.
28
+
29
+ Example:
30
+ dcp pull my-dataset -o ./data/
31
+ dcp pull my-dataset --version 3 -o ./data/
32
+ """
33
+ try:
34
+ client = get_client()
35
+ ds = client.datasets.get(dataset, project=project)
36
+
37
+ target_version = version or ds.current_version
38
+ output.mkdir(parents=True, exist_ok=True)
39
+
40
+ console.print(f"Pulling {dataset} v{target_version}...")
41
+
42
+ with Progress(
43
+ SpinnerColumn(),
44
+ TextColumn("[progress.description]{task.description}"),
45
+ console=console,
46
+ ) as progress:
47
+ task = progress.add_task("Downloading blocks...", total=None)
48
+
49
+ block_count = 0
50
+ vector_count = 0
51
+
52
+ for block in client.datasets.iter_blocks(
53
+ dataset,
54
+ project=project,
55
+ version=target_version,
56
+ include_metadata=True,
57
+ prefetch=True,
58
+ ):
59
+ block_count += 1
60
+ vector_count += block.num_vectors
61
+ progress.update(
62
+ task,
63
+ description=f"Block {block.block_index + 1}/{block.total_blocks} ({vector_count:,} vectors)"
64
+ )
65
+
66
+ # Save block
67
+ import numpy as np
68
+ block_file = output / f"{block.block_id}.npy"
69
+ np.save(block_file, block.vectors)
70
+
71
+ # Save metadata if present
72
+ if block.metadata and any(block.metadata):
73
+ import json
74
+ meta_file = output / f"{block.block_id}_metadata.json"
75
+ with open(meta_file, "w") as f:
76
+ json.dump(block.metadata, f)
77
+
78
+ console.print(f"\n[green]✓[/green] Downloaded {vector_count:,} vectors in {block_count} blocks")
79
+ console.print(f" Output: {output.absolute()}")
80
+
81
+ except ValueError as e:
82
+ console.print(f"[red]Error:[/red] {e}")
83
+ raise typer.Exit(1)
84
+ except Exception as e:
85
+ console.print(f"[red]Error:[/red] {e}")
86
+ raise typer.Exit(1)
87
+
88
+
89
+ def push(
90
+ file: Path = typer.Argument(..., help="File to upload (.npy, .csv, .parquet)"),
91
+ dataset: str = typer.Argument(..., help="Dataset name (creates new or appends)"),
92
+ project: str = typer.Option(None, "--project", "-p", help="Project name"),
93
+ message: str = typer.Option(None, "--message", "-m", help="Version message"),
94
+ compression: str = typer.Option(None, "--compression", "-c", help="Compression (fp16, int8)"),
95
+ ):
96
+ """
97
+ Upload vectors to a dataset (creates new or appends).
98
+
99
+ Example:
100
+ dcp push ./vectors.npy my-dataset
101
+ dcp push ./vectors.npy my-dataset -m "Added January data"
102
+ """
103
+ try:
104
+ if not file.exists():
105
+ console.print(f"[red]Error:[/red] File not found: {file}")
106
+ raise typer.Exit(1)
107
+
108
+ client = get_client()
109
+
110
+ # Check if dataset exists
111
+ try:
112
+ existing = client.datasets.get(dataset, project=project)
113
+ is_append = True
114
+ console.print(f"Appending to existing dataset: {dataset} (v{existing.current_version})")
115
+ except Exception:
116
+ is_append = False
117
+ console.print(f"Creating new dataset: {dataset}")
118
+
119
+ with Progress(
120
+ SpinnerColumn(),
121
+ TextColumn("[progress.description]{task.description}"),
122
+ console=console,
123
+ ) as progress:
124
+ task = progress.add_task("Uploading...", total=None)
125
+
126
+ if is_append:
127
+ progress.update(task, description="Appending vectors...")
128
+ result = client.datasets.append(
129
+ file,
130
+ dataset,
131
+ project=project,
132
+ compression=compression,
133
+ description=message,
134
+ )
135
+ new_version = result.new_version
136
+ else:
137
+ progress.update(task, description="Uploading new dataset...")
138
+ result = client.datasets.upload(
139
+ file,
140
+ dataset,
141
+ project=project,
142
+ compression=compression,
143
+ )
144
+ new_version = 1
145
+
146
+ console.print(f"\n[green]✓[/green] {'Appended to' if is_append else 'Created'} {dataset}")
147
+ console.print(f" Version: v{new_version}")
148
+ if message:
149
+ console.print(f" Message: {message}")
150
+
151
+ except ValueError as e:
152
+ console.print(f"[red]Error:[/red] {e}")
153
+ raise typer.Exit(1)
154
+ except Exception as e:
155
+ console.print(f"[red]Error:[/red] {e}")
156
+ raise typer.Exit(1)
157
+
158
+
159
+ # Register commands on the app
160
+ app.command("pull")(pull)
161
+ app.command("push")(push)
@@ -0,0 +1,104 @@
1
+ """Dataset management commands."""
2
+
3
+ import typer
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+ from rich.panel import Panel
7
+
8
+ from ..config import get_client
9
+
10
+ app = typer.Typer(no_args_is_help=True)
11
+ console = Console()
12
+
13
+
14
+ @app.command("list")
15
+ def list_datasets():
16
+ """List all datasets."""
17
+ try:
18
+ client = get_client()
19
+ datasets = client.datasets.list()
20
+
21
+ if not datasets:
22
+ console.print("[yellow]No datasets found.[/yellow]")
23
+ return
24
+
25
+ table = Table(title="Datasets")
26
+ table.add_column("Name", style="cyan")
27
+ table.add_column("ID", style="dim")
28
+ table.add_column("Vectors", justify="right")
29
+ table.add_column("Dims", justify="right")
30
+ table.add_column("Version", justify="right", style="green")
31
+
32
+ for ds in datasets:
33
+ table.add_row(
34
+ ds.name,
35
+ ds.id[:8] + "...",
36
+ f"{ds.num_vectors:,}" if ds.num_vectors else "-",
37
+ str(ds.dimensions) if ds.dimensions else "-",
38
+ f"v{ds.current_version}" if ds.current_version else "-",
39
+ )
40
+
41
+ console.print(table)
42
+ console.print(f"\n[dim]{len(datasets)} dataset(s)[/dim]")
43
+
44
+ except ValueError as e:
45
+ console.print(f"[red]Error:[/red] {e}")
46
+ raise typer.Exit(1)
47
+ except Exception as e:
48
+ console.print(f"[red]Error:[/red] {e}")
49
+ raise typer.Exit(1)
50
+
51
+
52
+ @app.command("info")
53
+ def info(
54
+ dataset: str = typer.Argument(..., help="Dataset name or ID"),
55
+ project: str = typer.Option(None, "--project", "-p", help="Project name"),
56
+ ):
57
+ """Show detailed dataset information."""
58
+ try:
59
+ client = get_client()
60
+ ds = client.datasets.get(dataset, project=project)
61
+
62
+ panel_content = f"""[cyan]Name:[/cyan] {ds.name}
63
+ [cyan]ID:[/cyan] {ds.id}
64
+ [cyan]Vectors:[/cyan] {ds.num_vectors:,} vectors
65
+ [cyan]Dimensions:[/cyan] {ds.dimensions}
66
+ [cyan]Current Version:[/cyan] v{ds.current_version}
67
+ [cyan]Created:[/cyan] {ds.created_at or 'Unknown'}
68
+ [cyan]Updated:[/cyan] {ds.updated_at or 'Unknown'}"""
69
+
70
+ console.print(Panel(panel_content, title=f"Dataset: {ds.name}", border_style="green"))
71
+
72
+ except ValueError as e:
73
+ console.print(f"[red]Error:[/red] {e}")
74
+ raise typer.Exit(1)
75
+ except Exception as e:
76
+ console.print(f"[red]Error:[/red] {e}")
77
+ raise typer.Exit(1)
78
+
79
+
80
+ @app.command("delete")
81
+ def delete(
82
+ dataset: str = typer.Argument(..., help="Dataset name or ID"),
83
+ project: str = typer.Option(None, "--project", "-p", help="Project name"),
84
+ force: bool = typer.Option(False, "--force", "-f", help="Skip confirmation"),
85
+ ):
86
+ """Delete a dataset."""
87
+ try:
88
+ client = get_client()
89
+
90
+ if not force:
91
+ confirm = typer.confirm(f"Delete dataset '{dataset}'? This cannot be undone")
92
+ if not confirm:
93
+ console.print("[yellow]Cancelled.[/yellow]")
94
+ raise typer.Exit(0)
95
+
96
+ client.datasets.delete(dataset, project=project, delete_file=True)
97
+ console.print(f"[green]✓[/green] Deleted dataset: {dataset}")
98
+
99
+ except ValueError as e:
100
+ console.print(f"[red]Error:[/red] {e}")
101
+ raise typer.Exit(1)
102
+ except Exception as e:
103
+ console.print(f"[red]Error:[/red] {e}")
104
+ raise typer.Exit(1)
@@ -0,0 +1,204 @@
1
+ """Version control commands - the core git-like functionality."""
2
+
3
+ import typer
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+ from rich.panel import Panel
7
+ from typing import Optional
8
+
9
+ from ..config import get_client
10
+
11
+ console = Console()
12
+
13
+
14
+ def log(
15
+ dataset: str = typer.Argument(..., help="Dataset name or ID"),
16
+ project: str = typer.Option(None, "--project", "-p", help="Project name"),
17
+ limit: int = typer.Option(20, "--limit", "-n", help="Number of versions to show"),
18
+ ):
19
+ """Show version history for a dataset (like git log)."""
20
+ try:
21
+ client = get_client()
22
+ events = client.datasets.history(dataset, limit=limit)
23
+
24
+ if not events:
25
+ console.print("[yellow]No version history found.[/yellow]")
26
+ return
27
+
28
+ console.print(f"[bold]Version history for {dataset}[/bold]\n")
29
+
30
+ for event in events:
31
+ # Format like git log
32
+ version_str = f"v{event.version}" if event.version else "?"
33
+
34
+ console.print(f"[yellow]commit {version_str}[/yellow]")
35
+ if event.created_at:
36
+ console.print(f"Date: {event.created_at}")
37
+ if event.action:
38
+ console.print(f"Action: {event.action}")
39
+ if event.description:
40
+ console.print(f"\n {event.description}")
41
+ console.print()
42
+
43
+ except ValueError as e:
44
+ console.print(f"[red]Error:[/red] {e}")
45
+ raise typer.Exit(1)
46
+ except Exception as e:
47
+ console.print(f"[red]Error:[/red] {e}")
48
+ raise typer.Exit(1)
49
+
50
+
51
+ def checkout(
52
+ dataset: str = typer.Argument(..., help="Dataset name or ID"),
53
+ version: int = typer.Option(..., "--version", "-v", help="Version number to checkout"),
54
+ project: str = typer.Option(None, "--project", "-p", help="Project name"),
55
+ ):
56
+ """
57
+ Pin to a specific dataset version (like git checkout).
58
+
59
+ This sets the working version for subsequent operations.
60
+ """
61
+ try:
62
+ client = get_client()
63
+ ds = client.datasets.get(dataset, project=project)
64
+
65
+ # Verify version exists
66
+ pinned = ds.version(version)
67
+ info = pinned.info()
68
+
69
+ # Handle both dict and dataclass responses
70
+ num_vectors = info.get('num_vectors') if isinstance(info, dict) else getattr(info, 'num_vectors', None)
71
+ num_blocks = info.get('num_blocks') if isinstance(info, dict) else getattr(info, 'num_blocks', None)
72
+
73
+ console.print(f"[green]✓[/green] Checked out {dataset} @ v{version}")
74
+ console.print(f" Vectors: {num_vectors:,}" if num_vectors else " Vectors: unknown")
75
+ console.print(f" Blocks: {num_blocks}" if num_blocks else " Blocks: unknown")
76
+
77
+ # Note: In a full implementation, we'd store this in local state
78
+ console.print(f"\n[dim]Tip: Use --version {version} in other commands to use this version[/dim]")
79
+
80
+ except ValueError as e:
81
+ console.print(f"[red]Error:[/red] {e}")
82
+ raise typer.Exit(1)
83
+ except Exception as e:
84
+ console.print(f"[red]Error:[/red] {e}")
85
+ raise typer.Exit(1)
86
+
87
+
88
+ def commit(
89
+ dataset: str = typer.Argument(..., help="Dataset name or ID"),
90
+ message: str = typer.Option(..., "--message", "-m", help="Commit message"),
91
+ project: str = typer.Option(None, "--project", "-p", help="Project name"),
92
+ ):
93
+ """
94
+ Commit pending changes to create a new version (like git commit).
95
+
96
+ Note: In Decompressed, versions are created automatically when you
97
+ push data. This command is for adding a description to the current version.
98
+ """
99
+ try:
100
+ client = get_client()
101
+ ds = client.datasets.get(dataset, project=project)
102
+
103
+ # For now, just show current version info
104
+ # In a full implementation, this would finalize a draft version
105
+ console.print(f"[green]✓[/green] Current version: v{ds.current_version}")
106
+ console.print(f" Message: {message}")
107
+ console.print(f"\n[dim]Note: Versions are created automatically when pushing data.[/dim]")
108
+
109
+ except ValueError as e:
110
+ console.print(f"[red]Error:[/red] {e}")
111
+ raise typer.Exit(1)
112
+ except Exception as e:
113
+ console.print(f"[red]Error:[/red] {e}")
114
+ raise typer.Exit(1)
115
+
116
+
117
+ def tag(
118
+ dataset: str = typer.Argument(..., help="Dataset name or ID"),
119
+ version: int = typer.Argument(..., help="Version number to tag"),
120
+ name: str = typer.Argument(..., help="Tag name (e.g., 'production', 'stable')"),
121
+ project: str = typer.Option(None, "--project", "-p", help="Project name"),
122
+ ):
123
+ """
124
+ Create a named ref for a version (like git tag).
125
+
126
+ Example: dcp tag my-dataset 3 production
127
+ """
128
+ try:
129
+ client = get_client()
130
+ ds = client.datasets.get(dataset, project=project)
131
+
132
+ # Get the version and promote it
133
+ pinned = ds.version(version)
134
+ pinned.promote(name)
135
+
136
+ console.print(f"[green]✓[/green] Tagged {dataset} v{version} as '{name}'")
137
+ console.print(f"\n[dim]Access via: dataset.ref('{name}')[/dim]")
138
+
139
+ except ValueError as e:
140
+ console.print(f"[red]Error:[/red] {e}")
141
+ raise typer.Exit(1)
142
+ except Exception as e:
143
+ console.print(f"[red]Error:[/red] {e}")
144
+ raise typer.Exit(1)
145
+
146
+
147
+ def diff(
148
+ dataset: str = typer.Argument(..., help="Dataset name or ID"),
149
+ version1: int = typer.Argument(..., help="First version"),
150
+ version2: int = typer.Argument(..., help="Second version"),
151
+ project: str = typer.Option(None, "--project", "-p", help="Project name"),
152
+ ):
153
+ """
154
+ Compare two versions of a dataset (like git diff).
155
+
156
+ Example: dcp diff my-dataset 2 3
157
+ """
158
+ try:
159
+ client = get_client()
160
+ ds = client.datasets.get(dataset, project=project)
161
+
162
+ # Get info for both versions
163
+ v1 = ds.version(version1)
164
+ v2 = ds.version(version2)
165
+
166
+ info1 = v1.info()
167
+ info2 = v2.info()
168
+
169
+ # Helper to get value from dict or dataclass
170
+ def get_val(info, key, default=0):
171
+ if isinstance(info, dict):
172
+ return info.get(key, default)
173
+ return getattr(info, key, default) or default
174
+
175
+ console.print(f"[bold]Comparing {dataset}: v{version1} → v{version2}[/bold]\n")
176
+
177
+ table = Table()
178
+ table.add_column("Metric", style="cyan")
179
+ table.add_column(f"v{version1}", justify="right")
180
+ table.add_column(f"v{version2}", justify="right")
181
+ table.add_column("Change", justify="right")
182
+
183
+ # Vectors
184
+ v1_vecs = get_val(info1, "num_vectors", 0)
185
+ v2_vecs = get_val(info2, "num_vectors", 0)
186
+ vec_diff = v2_vecs - v1_vecs
187
+ vec_change = f"[green]+{vec_diff:,}[/green]" if vec_diff > 0 else f"[red]{vec_diff:,}[/red]" if vec_diff < 0 else "0"
188
+ table.add_row("Vectors", f"{v1_vecs:,}", f"{v2_vecs:,}", vec_change)
189
+
190
+ # Blocks
191
+ v1_blocks = get_val(info1, "num_blocks", 0)
192
+ v2_blocks = get_val(info2, "num_blocks", 0)
193
+ block_diff = v2_blocks - v1_blocks
194
+ block_change = f"[green]+{block_diff}[/green]" if block_diff > 0 else f"[red]{block_diff}[/red]" if block_diff < 0 else "0"
195
+ table.add_row("Blocks", str(v1_blocks), str(v2_blocks), block_change)
196
+
197
+ console.print(table)
198
+
199
+ except ValueError as e:
200
+ console.print(f"[red]Error:[/red] {e}")
201
+ raise typer.Exit(1)
202
+ except Exception as e:
203
+ console.print(f"[red]Error:[/red] {e}")
204
+ raise typer.Exit(1)
@@ -0,0 +1,74 @@
1
+ """Configuration management for Decompressed CLI."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, Dict, Optional
6
+
7
+ CONFIG_DIR = Path.home() / ".decompressed"
8
+ CONFIG_FILE = CONFIG_DIR / "config.json"
9
+
10
+ DEFAULT_CONFIG = {
11
+ "api_key": None,
12
+ "base_url": "http://localhost:8000",
13
+ }
14
+
15
+
16
+ def ensure_config_dir() -> None:
17
+ """Ensure config directory exists."""
18
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
19
+
20
+
21
+ def load_config() -> Dict[str, Any]:
22
+ """Load config from file, creating defaults if needed."""
23
+ ensure_config_dir()
24
+
25
+ if not CONFIG_FILE.exists():
26
+ save_config(DEFAULT_CONFIG)
27
+ return DEFAULT_CONFIG.copy()
28
+
29
+ try:
30
+ with open(CONFIG_FILE, "r") as f:
31
+ config = json.load(f)
32
+ # Merge with defaults for any missing keys
33
+ for key, value in DEFAULT_CONFIG.items():
34
+ if key not in config:
35
+ config[key] = value
36
+ return config
37
+ except (json.JSONDecodeError, IOError):
38
+ return DEFAULT_CONFIG.copy()
39
+
40
+
41
+ def save_config(config: Dict[str, Any]) -> None:
42
+ """Save config to file."""
43
+ ensure_config_dir()
44
+ with open(CONFIG_FILE, "w") as f:
45
+ json.dump(config, f, indent=2)
46
+
47
+
48
+ def get_config_value(key: str) -> Optional[Any]:
49
+ """Get a single config value."""
50
+ config = load_config()
51
+ return config.get(key)
52
+
53
+
54
+ def set_config_value(key: str, value: Any) -> None:
55
+ """Set a single config value."""
56
+ config = load_config()
57
+ config[key] = value
58
+ save_config(config)
59
+
60
+
61
+ def get_client():
62
+ """Get configured SDK client."""
63
+ from decompressed_sdk import DecompressedClient
64
+
65
+ config = load_config()
66
+ api_key = config.get("api_key")
67
+ base_url = config.get("base_url", "http://localhost:8000")
68
+
69
+ if not api_key:
70
+ raise ValueError(
71
+ "API key not configured. Run: dcp config set api_key YOUR_KEY"
72
+ )
73
+
74
+ return DecompressedClient(base_url=base_url, api_key=api_key)
@@ -0,0 +1,46 @@
1
+ """Main CLI entry point."""
2
+
3
+ import typer
4
+ from rich.console import Console
5
+
6
+ from . import __version__
7
+ from .commands import config_cmd, datasets_cmd, versions_cmd, data_cmd
8
+
9
+ app = typer.Typer(
10
+ name="dcp",
11
+ help="Decompressed CLI - Git-like version control for vector datasets",
12
+ no_args_is_help=True,
13
+ )
14
+ console = Console()
15
+
16
+ # Register command groups
17
+ app.add_typer(config_cmd.app, name="config", help="Manage CLI configuration")
18
+ app.add_typer(datasets_cmd.app, name="datasets", help="Manage datasets")
19
+ app.add_typer(data_cmd.app, name="data", help="Pull/push data (alias)")
20
+
21
+ # Register top-level versioning commands
22
+ app.command("log")(versions_cmd.log)
23
+ app.command("checkout")(versions_cmd.checkout)
24
+ app.command("commit")(versions_cmd.commit)
25
+ app.command("tag")(versions_cmd.tag)
26
+ app.command("diff")(versions_cmd.diff)
27
+
28
+ # Shortcuts for common operations
29
+ app.command("pull")(data_cmd.pull)
30
+ app.command("push")(data_cmd.push)
31
+
32
+
33
+ @app.command()
34
+ def version():
35
+ """Show CLI version."""
36
+ console.print(f"dcp version {__version__}")
37
+
38
+
39
+ @app.callback()
40
+ def main_callback():
41
+ """Decompressed CLI - Git-like version control for vector datasets."""
42
+ pass
43
+
44
+
45
+ if __name__ == "__main__":
46
+ app()
@@ -0,0 +1,97 @@
1
+ Metadata-Version: 2.4
2
+ Name: decompressed-cli
3
+ Version: 0.1.0
4
+ Summary: CLI for Decompressed - Git-like version control for vector datasets
5
+ Author-email: Decompressed <support@decompressed.io>
6
+ License: MIT
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: typer>=0.9.0
18
+ Requires-Dist: rich>=13.0.0
19
+ Requires-Dist: decompressed-sdk>=0.1.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
22
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
23
+
24
+ # Decompressed CLI
25
+
26
+ Git-like version control for vector datasets.
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install decompressed-cli
32
+ ```
33
+
34
+ Or install from source:
35
+ ```bash
36
+ cd sdk/cli
37
+ pip install -e .
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ```bash
43
+ # Configure your API key
44
+ dcp config set api_key YOUR_API_KEY
45
+ dcp config set base_url https://api.decompressed.io
46
+
47
+ # List your datasets
48
+ dcp datasets list
49
+
50
+ # Get dataset info
51
+ dcp datasets info my-dataset
52
+
53
+ # Version control commands
54
+ dcp log my-dataset # Show version history
55
+ dcp checkout my-dataset --version 3 # Pin to version 3
56
+ dcp commit my-dataset -m "Added data" # Create new version
57
+ dcp tag my-dataset v3 production # Tag version as 'production'
58
+ dcp diff my-dataset 2 3 # Compare versions
59
+
60
+ # Data commands
61
+ dcp pull my-dataset -o ./data/ # Download dataset
62
+ dcp push ./vectors.npy my-dataset # Upload/append vectors
63
+ ```
64
+
65
+ ## Commands
66
+
67
+ ### Configuration
68
+ - `dcp config set <key> <value>` - Set config value
69
+ - `dcp config get <key>` - Get config value
70
+ - `dcp config list` - List all config
71
+
72
+ ### Datasets
73
+ - `dcp datasets list` - List all datasets
74
+ - `dcp datasets info <dataset>` - Show dataset details
75
+ - `dcp datasets delete <dataset>` - Delete a dataset
76
+
77
+ ### Versioning
78
+ - `dcp log <dataset>` - Show version history
79
+ - `dcp checkout <dataset> --version <n>` - Pin to specific version
80
+ - `dcp commit <dataset> -m <message>` - Commit pending changes
81
+ - `dcp tag <dataset> <version> <name>` - Create named ref
82
+ - `dcp diff <dataset> <v1> <v2>` - Compare two versions
83
+
84
+ ### Data
85
+ - `dcp pull <dataset> -o <path>` - Download dataset vectors
86
+ - `dcp push <file> <dataset>` - Upload/append vectors
87
+
88
+ ## Configuration
89
+
90
+ Config is stored in `~/.decompressed/config.json`:
91
+
92
+ ```json
93
+ {
94
+ "api_key": "dcp_xxx",
95
+ "base_url": "https://api.decompressed.io"
96
+ }
97
+ ```
@@ -0,0 +1,16 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/decompressed_cli/__init__.py
4
+ src/decompressed_cli/config.py
5
+ src/decompressed_cli/main.py
6
+ src/decompressed_cli.egg-info/PKG-INFO
7
+ src/decompressed_cli.egg-info/SOURCES.txt
8
+ src/decompressed_cli.egg-info/dependency_links.txt
9
+ src/decompressed_cli.egg-info/entry_points.txt
10
+ src/decompressed_cli.egg-info/requires.txt
11
+ src/decompressed_cli.egg-info/top_level.txt
12
+ src/decompressed_cli/commands/__init__.py
13
+ src/decompressed_cli/commands/config_cmd.py
14
+ src/decompressed_cli/commands/data_cmd.py
15
+ src/decompressed_cli/commands/datasets_cmd.py
16
+ src/decompressed_cli/commands/versions_cmd.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dcp = decompressed_cli.main:app
@@ -0,0 +1,7 @@
1
+ typer>=0.9.0
2
+ rich>=13.0.0
3
+ decompressed-sdk>=0.1.0
4
+
5
+ [dev]
6
+ pytest>=7.0.0
7
+ pytest-cov>=4.0.0
@@ -0,0 +1 @@
1
+ decompressed_cli