cyborgdb-migrate 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. cyborgdb_migrate-0.1.0/LICENSE +21 -0
  2. cyborgdb_migrate-0.1.0/PKG-INFO +50 -0
  3. cyborgdb_migrate-0.1.0/README.md +159 -0
  4. cyborgdb_migrate-0.1.0/pyproject.toml +88 -0
  5. cyborgdb_migrate-0.1.0/setup.cfg +4 -0
  6. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/__init__.py +1 -0
  7. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/app.py +58 -0
  8. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/checkpoint.py +77 -0
  9. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/cli.py +225 -0
  10. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/clipboard.py +34 -0
  11. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/config.py +118 -0
  12. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/destination.py +146 -0
  13. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/engine.py +343 -0
  14. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/models.py +96 -0
  15. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/__init__.py +0 -0
  16. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/cyborgdb_connect.py +101 -0
  17. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/dest_index.py +305 -0
  18. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/migrate.py +183 -0
  19. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/source_credentials.py +69 -0
  20. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/source_inspect.py +148 -0
  21. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/source_select.py +55 -0
  22. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/summary.py +189 -0
  23. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/welcome.py +82 -0
  24. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/__init__.py +13 -0
  25. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/base.py +66 -0
  26. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/chromadb.py +172 -0
  27. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/milvus.py +214 -0
  28. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/pinecone.py +128 -0
  29. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/qdrant.py +120 -0
  30. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/weaviate.py +160 -0
  31. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/widgets/__init__.py +0 -0
  32. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/widgets/key_warning.py +54 -0
  33. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/widgets/logo.py +26 -0
  34. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/widgets/source_form.py +106 -0
  35. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/widgets/step_header.py +48 -0
  36. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/PKG-INFO +50 -0
  37. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/SOURCES.txt +47 -0
  38. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/dependency_links.txt +1 -0
  39. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/entry_points.txt +2 -0
  40. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/requires.txt +37 -0
  41. cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/top_level.txt +1 -0
  42. cyborgdb_migrate-0.1.0/tests/test_app.py +91 -0
  43. cyborgdb_migrate-0.1.0/tests/test_checkpoint.py +99 -0
  44. cyborgdb_migrate-0.1.0/tests/test_cli.py +210 -0
  45. cyborgdb_migrate-0.1.0/tests/test_clipboard.py +51 -0
  46. cyborgdb_migrate-0.1.0/tests/test_config.py +197 -0
  47. cyborgdb_migrate-0.1.0/tests/test_destination.py +250 -0
  48. cyborgdb_migrate-0.1.0/tests/test_engine.py +306 -0
  49. cyborgdb_migrate-0.1.0/tests/test_models.py +137 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Cyborg Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,50 @@
1
+ Metadata-Version: 2.4
2
+ Name: cyborgdb-migrate
3
+ Version: 0.1.0
4
+ Summary: TUI wizard and CLI for migrating vector data into CyborgDB
5
+ Author: Cyborg Inc.
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/cyborginc/cyborgdb-migrate
8
+ Project-URL: Repository, https://github.com/cyborginc/cyborgdb-migrate
9
+ Project-URL: Issues, https://github.com/cyborginc/cyborgdb-migrate/issues
10
+ Keywords: vector-database,migration,cyborgdb,pinecone,qdrant,weaviate,chromadb,milvus,embeddings,tui
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.10
22
+ License-File: LICENSE
23
+ Requires-Dist: textual>=0.86.0
24
+ Requires-Dist: cyborgdb
25
+ Requires-Dist: numpy>=1.21.0
26
+ Requires-Dist: tomli>=1.1.0; python_version < "3.11"
27
+ Provides-Extra: pinecone
28
+ Requires-Dist: pinecone; extra == "pinecone"
29
+ Provides-Extra: qdrant
30
+ Requires-Dist: qdrant-client; extra == "qdrant"
31
+ Provides-Extra: weaviate
32
+ Requires-Dist: weaviate-client; extra == "weaviate"
33
+ Provides-Extra: chromadb
34
+ Requires-Dist: chromadb; extra == "chromadb"
35
+ Provides-Extra: milvus
36
+ Requires-Dist: pymilvus; extra == "milvus"
37
+ Provides-Extra: syntax
38
+ Requires-Dist: tree-sitter; extra == "syntax"
39
+ Requires-Dist: tree-sitter-python; extra == "syntax"
40
+ Provides-Extra: all
41
+ Requires-Dist: pinecone; extra == "all"
42
+ Requires-Dist: qdrant-client; extra == "all"
43
+ Requires-Dist: weaviate-client; extra == "all"
44
+ Requires-Dist: chromadb; extra == "all"
45
+ Requires-Dist: pymilvus; extra == "all"
46
+ Provides-Extra: dev
47
+ Requires-Dist: pytest; extra == "dev"
48
+ Requires-Dist: pytest-asyncio; extra == "dev"
49
+ Requires-Dist: textual-dev; extra == "dev"
50
+ Dynamic: license-file
@@ -0,0 +1,159 @@
1
+ # cyborgdb-migrate
2
+
3
+ A TUI wizard and CLI for migrating vector data from popular vector databases into [CyborgDB](https://cyborgdb.com) — the encrypted vector database.
4
+
5
+ ## Features
6
+
7
+ - **Interactive TUI** — step-by-step wizard powered by [Textual](https://textual.textualize.io/)
8
+ - **Headless CLI** — non-interactive mode for scripts and CI/CD pipelines
9
+ - **5 source connectors** — Pinecone, Qdrant, Weaviate, ChromaDB, Milvus
10
+ - **Encrypted at rest** — every index is AES-encrypted with a key you control
11
+ - **Checkpoint & resume** — automatically saves progress; resume interrupted migrations
12
+ - **Spot-check verification** — post-migration vector and metadata integrity checks
13
+ - **Double-buffered I/O** — overlaps extraction and upsert for maximum throughput
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ pip install cyborgdb-migrate
19
+ ```
20
+
21
+ Install with support for your source database:
22
+
23
+ ```bash
24
+ # Individual sources
25
+ pip install "cyborgdb-migrate[pinecone]"
26
+ pip install "cyborgdb-migrate[qdrant]"
27
+ pip install "cyborgdb-migrate[weaviate]"
28
+ pip install "cyborgdb-migrate[chromadb]"
29
+ pip install "cyborgdb-migrate[milvus]"
30
+
31
+ # All sources at once
32
+ pip install "cyborgdb-migrate[all]"
33
+ ```
34
+
35
+ Optional syntax highlighting for the summary code snippet:
36
+
37
+ ```bash
38
+ pip install "cyborgdb-migrate[syntax]"
39
+ ```
40
+
41
+ ## Quick Start
42
+
43
+ ### Interactive (TUI)
44
+
45
+ ```bash
46
+ cyborgdb-migrate
47
+ ```
48
+
49
+ The wizard walks you through:
50
+
51
+ 1. Selecting a source database
52
+ 2. Entering credentials and picking an index/collection
53
+ 3. Connecting to CyborgDB
54
+ 4. Creating or selecting a destination index
55
+ 5. Running the migration with live progress
56
+ 6. Viewing verification results and a Python quickstart snippet
57
+
58
+ ### Headless (CLI)
59
+
60
+ Create a TOML config file (see [`example-config.toml`](example-config.toml)):
61
+
62
+ ```toml
63
+ [source]
64
+ type = "pinecone"
65
+ api_key = "${PINECONE_API_KEY}"
66
+ index = "my-index"
67
+
68
+ [destination]
69
+ host = "http://localhost:8000"
70
+ api_key = "${CYBORGDB_API_KEY}"
71
+ create_index = true
72
+ index_name = "my-cyborgdb-index"
73
+ index_type = "ivfflat"
74
+
75
+ [options]
76
+ batch_size = 200
77
+ checkpoint_every = 10
78
+ ```
79
+
80
+ Run the migration:
81
+
82
+ ```bash
83
+ cyborgdb-migrate --config migration.toml
84
+ ```
85
+
86
+ Resume an interrupted migration:
87
+
88
+ ```bash
89
+ cyborgdb-migrate --config migration.toml --resume
90
+ ```
91
+
92
+ ## Configuration Reference
93
+
94
+ ### `[source]`
95
+
96
+ | Key | Required | Description |
97
+ |-----|----------|-------------|
98
+ | `type` | Yes | Source database: `pinecone`, `qdrant`, `weaviate`, `chromadb`, `milvus` |
99
+ | `index` | Yes | Index or collection name to migrate from |
100
+ | `namespace` | No | Namespace/partition to migrate (Pinecone, Milvus) |
101
+ | *(other keys)* | Varies | Passed as credentials to the source connector (e.g. `api_key`, `host`) |
102
+
103
+ ### `[destination]`
104
+
105
+ | Key | Required | Description |
106
+ |-----|----------|-------------|
107
+ | `host` | Yes | CyborgDB server URL |
108
+ | `api_key` | Yes | CyborgDB API key |
109
+ | `index_name` | Yes | Destination index name |
110
+ | `create_index` | No | `true` (default) to create a new index, `false` to use existing |
111
+ | `index_type` | No | `ivfflat` (default) or `ivfpq` |
112
+ | `index_key` | No | Hex-encoded encryption key (for existing indexes) |
113
+ | `key_file` | No | Path to encryption key file (for existing indexes) |
114
+
115
+ ### `[options]`
116
+
117
+ | Key | Default | Description |
118
+ |-----|---------|-------------|
119
+ | `batch_size` | `100` | Vectors per batch |
120
+ | `checkpoint_every` | `10` | Save checkpoint every N batches |
121
+ | `spot_check_per_batch` | `4` | Vectors sampled per batch for verification |
122
+
123
+ Environment variables can be referenced as `${VAR_NAME}` anywhere in the config.
124
+
125
+ ## CLI Options
126
+
127
+ ```
128
+ cyborgdb-migrate [OPTIONS]
129
+
130
+ Options:
131
+ --config FILE TOML config file for non-interactive mode
132
+ --resume Resume from checkpoint (requires --config)
133
+ --batch-size INT Override batch size (default: 100)
134
+ --log-file FILE Log file path (default: ./cyborgdb-migrate.log)
135
+ --quiet Minimal output (non-interactive only)
136
+ --version Show version and exit
137
+ ```
138
+
139
+ ## Supported Sources
140
+
141
+ | Source | Extras | Notes |
142
+ |--------|--------|-------|
143
+ | [Pinecone](https://www.pinecone.io/) | `pinecone` | Supports namespaces |
144
+ | [Qdrant](https://qdrant.tech/) | `qdrant` | Scroll-based pagination |
145
+ | [Weaviate](https://weaviate.io/) | `weaviate` | Supports named vectors |
146
+ | [ChromaDB](https://www.trychroma.com/) | `chromadb` | Local and remote modes |
147
+ | [Milvus](https://milvus.io/) | `milvus` | Supports partitions, content field heuristic |
148
+
149
+ ## Exit Codes
150
+
151
+ | Code | Meaning |
152
+ |------|---------|
153
+ | `0` | Success |
154
+ | `1` | Configuration or connection error |
155
+ | `2` | Migration completed but spot-check verification failed |
156
+
157
+ ## License
158
+
159
+ [MIT](LICENSE)
@@ -0,0 +1,88 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "cyborgdb-migrate"
7
+ version = "0.1.0"
8
+ description = "TUI wizard and CLI for migrating vector data into CyborgDB"
9
+ requires-python = ">=3.10"
10
+ license = "MIT"
11
+ authors = [
12
+ {name = "Cyborg Inc."},
13
+ ]
14
+ keywords = [
15
+ "vector-database",
16
+ "migration",
17
+ "cyborgdb",
18
+ "pinecone",
19
+ "qdrant",
20
+ "weaviate",
21
+ "chromadb",
22
+ "milvus",
23
+ "embeddings",
24
+ "tui",
25
+ ]
26
+ classifiers = [
27
+ "Environment :: Console",
28
+ "Intended Audience :: Developers",
29
+ "Programming Language :: Python :: 3",
30
+ "Programming Language :: Python :: 3.10",
31
+ "Programming Language :: Python :: 3.11",
32
+ "Programming Language :: Python :: 3.12",
33
+ "Programming Language :: Python :: 3.13",
34
+ "Programming Language :: Python :: 3.14",
35
+ "Topic :: Database",
36
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
37
+ ]
38
+ dependencies = [
39
+ "textual>=0.86.0",
40
+ "cyborgdb",
41
+ "numpy>=1.21.0",
42
+ "tomli>=1.1.0;python_version<'3.11'",
43
+ ]
44
+
45
+ [project.optional-dependencies]
46
+ pinecone = ["pinecone"]
47
+ qdrant = ["qdrant-client"]
48
+ weaviate = ["weaviate-client"]
49
+ chromadb = ["chromadb"]
50
+ milvus = ["pymilvus"]
51
+ syntax = [
52
+ "tree-sitter",
53
+ "tree-sitter-python",
54
+ ]
55
+ all = [
56
+ "pinecone",
57
+ "qdrant-client",
58
+ "weaviate-client",
59
+ "chromadb",
60
+ "pymilvus",
61
+ ]
62
+ dev = [
63
+ "pytest",
64
+ "pytest-asyncio",
65
+ "textual-dev",
66
+ ]
67
+
68
+ [project.scripts]
69
+ cyborgdb-migrate = "cyborgdb_migrate.cli:main"
70
+
71
+ [project.urls]
72
+ Homepage = "https://github.com/cyborginc/cyborgdb-migrate"
73
+ Repository = "https://github.com/cyborginc/cyborgdb-migrate"
74
+ Issues = "https://github.com/cyborginc/cyborgdb-migrate/issues"
75
+
76
+ [tool.setuptools.packages.find]
77
+ where = ["src"]
78
+
79
+ [tool.ruff]
80
+ line-length = 100
81
+ target-version = "py310"
82
+
83
+ [tool.ruff.lint]
84
+ select = ["E", "F", "I", "W"]
85
+
86
+ [tool.pytest.ini_options]
87
+ testpaths = ["tests"]
88
+ asyncio_mode = "auto"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,58 @@
1
+ import time
2
+
3
+ from textual.app import App
4
+ from textual.theme import Theme
5
+
6
+ from cyborgdb_migrate.models import MigrationState
7
+ from cyborgdb_migrate.screens.welcome import WelcomeScreen
8
+
9
+ MIN_WIDTH = 80
10
+ MIN_HEIGHT = 24
11
+
12
+ CYBORGDB_THEME = Theme(
13
+ name="cyborgdb",
14
+ primary="#217684",
15
+ secondary="#56D3DB",
16
+ accent="#38C3EE",
17
+ surface="#1a2a2e",
18
+ background="#111d20",
19
+ panel="#1e3438",
20
+ warning="#f5a623",
21
+ error="#e74c3c",
22
+ success="#2ecc71",
23
+ dark=True,
24
+ )
25
+
26
+
27
+ class MigrateApp(App):
28
+ """CyborgDB Migration Wizard TUI application."""
29
+
30
+ CSS_PATH = "theme.css"
31
+ TITLE = "CyborgDB Migration Wizard"
32
+
33
+ BINDINGS = [
34
+ ("ctrl+q", "", ""), # unbind default quit
35
+ ]
36
+
37
+ _ctrl_c_time: float = 0.0
38
+
39
+ def __init__(self, state: MigrationState | None = None) -> None:
40
+ super().__init__()
41
+ self.state = state or MigrationState()
42
+
43
+ def action_quit(self) -> None:
44
+ """Disable the default quit action."""
45
+
46
+ def _on_key(self, event) -> None:
47
+ if event.key == "ctrl+c":
48
+ now = time.monotonic()
49
+ if now - self._ctrl_c_time < 1.0:
50
+ self.exit()
51
+ else:
52
+ self._ctrl_c_time = now
53
+ self.notify("Press Ctrl+C again to quit", timeout=1)
54
+
55
+ def on_mount(self) -> None:
56
+ self.register_theme(CYBORGDB_THEME)
57
+ self.theme = "cyborgdb"
58
+ self.push_screen(WelcomeScreen(self.state))
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import tempfile
6
+ from dataclasses import asdict, dataclass
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+
10
+ CHECKPOINT_DIR = "./cyborgdb-migrate-checkpoints"
11
+
12
+
13
+ @dataclass
14
+ class CheckpointData:
15
+ version: int = 1
16
+ source_type: str = ""
17
+ source_index: str = ""
18
+ dest_index: str = ""
19
+ namespace: str | None = None
20
+ cursor: str | None = None
21
+ vectors_migrated: int = 0
22
+ vectors_total: int = 0
23
+ started_at: str = ""
24
+ updated_at: str = ""
25
+ batch_size: int = 100
26
+ batches_completed: int = 0
27
+
28
+
29
+ def checkpoint_path(source_type: str, source_index: str, dest_index: str) -> Path:
30
+ """Return the checkpoint file path for a given migration."""
31
+ safe_name = f"{source_type}_{source_index}_{dest_index}.json"
32
+ # Sanitize filename
33
+ safe_name = safe_name.replace("/", "_").replace("\\", "_")
34
+ return Path(CHECKPOINT_DIR) / safe_name
35
+
36
+
37
+ def save_checkpoint(data: CheckpointData) -> Path:
38
+ """Atomically save checkpoint data to disk."""
39
+ path = checkpoint_path(data.source_type, data.source_index, data.dest_index)
40
+ path.parent.mkdir(parents=True, exist_ok=True)
41
+
42
+ data.updated_at = datetime.now(timezone.utc).isoformat()
43
+ if not data.started_at:
44
+ data.started_at = data.updated_at
45
+
46
+ # Atomic write: write to temp file then rename
47
+ fd, tmp_path = tempfile.mkstemp(dir=path.parent, suffix=".tmp")
48
+ try:
49
+ with os.fdopen(fd, "w") as f:
50
+ json.dump(asdict(data), f, indent=2)
51
+ os.replace(tmp_path, path)
52
+ except Exception:
53
+ # Clean up temp file on failure
54
+ try:
55
+ os.unlink(tmp_path)
56
+ except OSError:
57
+ pass
58
+ raise
59
+
60
+ return path
61
+
62
+
63
+ def load_checkpoint(source_type: str, source_index: str, dest_index: str) -> CheckpointData | None:
64
+ """Load checkpoint data if it exists. Returns None if no checkpoint found."""
65
+ path = checkpoint_path(source_type, source_index, dest_index)
66
+ if not path.exists():
67
+ return None
68
+ with open(path) as f:
69
+ raw = json.load(f)
70
+ return CheckpointData(**raw)
71
+
72
+
73
+ def delete_checkpoint(source_type: str, source_index: str, dest_index: str) -> None:
74
+ """Delete a checkpoint file if it exists."""
75
+ path = checkpoint_path(source_type, source_index, dest_index)
76
+ if path.exists():
77
+ path.unlink()
@@ -0,0 +1,225 @@
1
+ import argparse
2
+ import sys
3
+
4
+ from cyborgdb_migrate import __version__
5
+
6
+
7
+ def main():
8
+ parser = argparse.ArgumentParser(
9
+ prog="cyborgdb-migrate",
10
+ description="Migrate vector data from other databases into CyborgDB",
11
+ )
12
+ parser.add_argument(
13
+ "--config", metavar="FILE",
14
+ help="TOML config file for non-interactive mode",
15
+ )
16
+ parser.add_argument(
17
+ "--resume", action="store_true",
18
+ help="Resume from checkpoint (non-interactive only)",
19
+ )
20
+ parser.add_argument(
21
+ "--batch-size", type=int, default=100,
22
+ help="Vectors per batch (default: 100)",
23
+ )
24
+ parser.add_argument(
25
+ "--log-file", metavar="FILE",
26
+ default="./cyborgdb-migrate.log",
27
+ help="Log file path (default: ./cyborgdb-migrate.log)",
28
+ )
29
+ parser.add_argument(
30
+ "--quiet", action="store_true",
31
+ help="Minimal output (non-interactive only)",
32
+ )
33
+ parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
34
+
35
+ args = parser.parse_args()
36
+
37
+ if args.config:
38
+ setup_logging(args.log_file)
39
+ run_headless(args.config, args.batch_size, args.resume, args.log_file, args.quiet)
40
+ else:
41
+ if args.resume:
42
+ print("Error: --resume is only supported with --config", file=sys.stderr)
43
+ raise SystemExit(1)
44
+ setup_logging(args.log_file)
45
+ from cyborgdb_migrate.app import MigrateApp
46
+ from cyborgdb_migrate.models import MigrationState
47
+
48
+ state = MigrationState()
49
+ state.batch_size = args.batch_size
50
+ app = MigrateApp(state)
51
+ app.run()
52
+
53
+
54
+ def setup_logging(log_file: str) -> None:
55
+ import logging
56
+
57
+ logging.basicConfig(
58
+ filename=log_file,
59
+ level=logging.INFO,
60
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
61
+ datefmt="%Y-%m-%d %H:%M:%S",
62
+ )
63
+
64
+
65
+ def run_headless(
66
+ config_path: str,
67
+ batch_size: int,
68
+ resume: bool,
69
+ log_file: str,
70
+ quiet: bool,
71
+ ) -> None:
72
+ import logging
73
+ import threading
74
+
75
+ from rich.console import Console
76
+ from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
77
+
78
+ from cyborgdb_migrate.config import load_config
79
+ from cyborgdb_migrate.destination import CyborgDestination
80
+ from cyborgdb_migrate.engine import MigrationEngine
81
+ from cyborgdb_migrate.sources import SOURCE_REGISTRY
82
+
83
+ logger = logging.getLogger("cyborgdb_migrate.headless")
84
+ console = Console(stderr=True)
85
+
86
+ config = load_config(config_path)
87
+
88
+ # Override batch_size from CLI if provided and different from default
89
+ if batch_size != 100:
90
+ config.batch_size = batch_size
91
+
92
+ # Resolve source
93
+ source_type = config.source_type
94
+ source_cls = None
95
+ for name, cls in SOURCE_REGISTRY.items():
96
+ if name.lower() == source_type.lower() or source_type.lower() in name.lower():
97
+ source_cls = cls
98
+ break
99
+ if source_cls is None:
100
+ console.print(f"[red]Unknown source type: {source_type}[/red]")
101
+ raise SystemExit(1)
102
+
103
+ source = source_cls()
104
+ source.configure(config.source_credentials)
105
+ if not quiet:
106
+ console.print(f"Connecting to {source.name()}...")
107
+ source.connect()
108
+ if not quiet:
109
+ console.print(f"[green]Connected to {source.name()}[/green]")
110
+
111
+ # Inspect source
112
+ source_info = source.inspect(config.source_index)
113
+ if not quiet:
114
+ console.print(
115
+ f"Source: {source_info.index_or_collection_name} "
116
+ f"({source_info.dimension}d, {source_info.vector_count:,} vectors)"
117
+ )
118
+
119
+ # Connect to CyborgDB
120
+ destination = CyborgDestination()
121
+ destination.connect(config.destination_host, config.destination_api_key)
122
+ if not quiet:
123
+ console.print("[green]Connected to CyborgDB[/green]")
124
+
125
+ # Set up index
126
+ if config.create_index:
127
+ from cyborgdb import Client
128
+
129
+ index_key = Client.generate_key(save=False)
130
+ if not quiet:
131
+ console.print(f"Generated encryption key (hex): {index_key.hex()}")
132
+
133
+ from cyborgdb_migrate.destination import compute_n_lists
134
+
135
+ n_lists = compute_n_lists(source_info.vector_count)
136
+ destination.create_index(
137
+ name=config.index_name,
138
+ dimension=source_info.dimension,
139
+ index_type=config.index_type or "ivfflat",
140
+ index_key=index_key,
141
+ n_lists=n_lists,
142
+ metric=source_info.metric,
143
+ )
144
+ else:
145
+ if config.index_key:
146
+ index_key = _decode_key(config.index_key)
147
+ elif config.key_file:
148
+ with open(config.key_file) as f:
149
+ index_key = _decode_key(f.read().strip())
150
+ else:
151
+ console.print("[red]No index key provided for existing index[/red]")
152
+ raise SystemExit(1)
153
+ destination.load_index(config.index_name, index_key)
154
+
155
+ # Validate dimension match for existing index
156
+ dest_dim = destination.get_index_dimension()
157
+ if dest_dim is not None and dest_dim != source_info.dimension:
158
+ console.print(
159
+ f"[red]Dimension mismatch: source has {source_info.dimension}d, "
160
+ f"destination has {dest_dim}d[/red]"
161
+ )
162
+ raise SystemExit(1)
163
+
164
+ # Run migration
165
+ cancel_event = threading.Event()
166
+
167
+ def on_progress(update):
168
+ pass # Progress handled by rich progress bar below
169
+
170
+ engine = MigrationEngine(
171
+ source=source,
172
+ destination=destination,
173
+ source_info=source_info,
174
+ batch_size=config.batch_size,
175
+ checkpoint_every=config.checkpoint_every,
176
+ spot_check_per_batch=config.spot_check_per_batch,
177
+ on_progress=on_progress,
178
+ cancel_event=cancel_event,
179
+ )
180
+
181
+ if quiet:
182
+ result = engine.run(
183
+ namespace=config.source_namespace,
184
+ resume=resume,
185
+ )
186
+ else:
187
+ with Progress(
188
+ SpinnerColumn(),
189
+ TextColumn("[progress.description]{task.description}"),
190
+ BarColumn(),
191
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
192
+ TimeElapsedColumn(),
193
+ console=console,
194
+ ) as progress:
195
+ task = progress.add_task("Migrating...", total=source_info.vector_count)
196
+
197
+ def progress_callback(update):
198
+ progress.update(task, completed=update.vectors_migrated)
199
+
200
+ engine.on_progress = progress_callback
201
+ result = engine.run(
202
+ namespace=config.source_namespace,
203
+ resume=resume,
204
+ )
205
+
206
+ if not quiet:
207
+ console.print("\n[green]Migration complete![/green]")
208
+ console.print(f" Vectors: {result.vectors_migrated:,} / {result.vectors_expected:,}")
209
+ console.print(f" Duration: {result.duration_seconds:.1f}s")
210
+ console.print(f" Spot check: {'PASSED' if result.spot_check_passed else 'FAILED'}")
211
+ console.print(f" Details: {result.spot_check_details}")
212
+
213
+ if not result.spot_check_passed:
214
+ logger.warning("Spot check failed: %s", result.spot_check_details)
215
+ raise SystemExit(2)
216
+
217
+
218
+ def _decode_key(value: str) -> bytes:
219
+ """Decode a key from hex, falling back to base64 for backwards compatibility."""
220
+ try:
221
+ return bytes.fromhex(value)
222
+ except ValueError:
223
+ import base64
224
+
225
+ return base64.b64decode(value)