cyborgdb-migrate 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cyborgdb_migrate-0.1.0/LICENSE +21 -0
- cyborgdb_migrate-0.1.0/PKG-INFO +50 -0
- cyborgdb_migrate-0.1.0/README.md +159 -0
- cyborgdb_migrate-0.1.0/pyproject.toml +88 -0
- cyborgdb_migrate-0.1.0/setup.cfg +4 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/__init__.py +1 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/app.py +58 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/checkpoint.py +77 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/cli.py +225 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/clipboard.py +34 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/config.py +118 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/destination.py +146 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/engine.py +343 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/models.py +96 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/__init__.py +0 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/cyborgdb_connect.py +101 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/dest_index.py +305 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/migrate.py +183 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/source_credentials.py +69 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/source_inspect.py +148 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/source_select.py +55 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/summary.py +189 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/screens/welcome.py +82 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/__init__.py +13 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/base.py +66 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/chromadb.py +172 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/milvus.py +214 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/pinecone.py +128 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/qdrant.py +120 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/sources/weaviate.py +160 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/widgets/__init__.py +0 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/widgets/key_warning.py +54 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/widgets/logo.py +26 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/widgets/source_form.py +106 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate/widgets/step_header.py +48 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/PKG-INFO +50 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/SOURCES.txt +47 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/dependency_links.txt +1 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/entry_points.txt +2 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/requires.txt +37 -0
- cyborgdb_migrate-0.1.0/src/cyborgdb_migrate.egg-info/top_level.txt +1 -0
- cyborgdb_migrate-0.1.0/tests/test_app.py +91 -0
- cyborgdb_migrate-0.1.0/tests/test_checkpoint.py +99 -0
- cyborgdb_migrate-0.1.0/tests/test_cli.py +210 -0
- cyborgdb_migrate-0.1.0/tests/test_clipboard.py +51 -0
- cyborgdb_migrate-0.1.0/tests/test_config.py +197 -0
- cyborgdb_migrate-0.1.0/tests/test_destination.py +250 -0
- cyborgdb_migrate-0.1.0/tests/test_engine.py +306 -0
- cyborgdb_migrate-0.1.0/tests/test_models.py +137 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Cyborg Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cyborgdb-migrate
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: TUI wizard and CLI for migrating vector data into CyborgDB
|
|
5
|
+
Author: Cyborg Inc.
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/cyborginc/cyborgdb-migrate
|
|
8
|
+
Project-URL: Repository, https://github.com/cyborginc/cyborgdb-migrate
|
|
9
|
+
Project-URL: Issues, https://github.com/cyborginc/cyborgdb-migrate/issues
|
|
10
|
+
Keywords: vector-database,migration,cyborgdb,pinecone,qdrant,weaviate,chromadb,milvus,embeddings,tui
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: textual>=0.86.0
|
|
24
|
+
Requires-Dist: cyborgdb
|
|
25
|
+
Requires-Dist: numpy>=1.21.0
|
|
26
|
+
Requires-Dist: tomli>=1.1.0; python_version < "3.11"
|
|
27
|
+
Provides-Extra: pinecone
|
|
28
|
+
Requires-Dist: pinecone; extra == "pinecone"
|
|
29
|
+
Provides-Extra: qdrant
|
|
30
|
+
Requires-Dist: qdrant-client; extra == "qdrant"
|
|
31
|
+
Provides-Extra: weaviate
|
|
32
|
+
Requires-Dist: weaviate-client; extra == "weaviate"
|
|
33
|
+
Provides-Extra: chromadb
|
|
34
|
+
Requires-Dist: chromadb; extra == "chromadb"
|
|
35
|
+
Provides-Extra: milvus
|
|
36
|
+
Requires-Dist: pymilvus; extra == "milvus"
|
|
37
|
+
Provides-Extra: syntax
|
|
38
|
+
Requires-Dist: tree-sitter; extra == "syntax"
|
|
39
|
+
Requires-Dist: tree-sitter-python; extra == "syntax"
|
|
40
|
+
Provides-Extra: all
|
|
41
|
+
Requires-Dist: pinecone; extra == "all"
|
|
42
|
+
Requires-Dist: qdrant-client; extra == "all"
|
|
43
|
+
Requires-Dist: weaviate-client; extra == "all"
|
|
44
|
+
Requires-Dist: chromadb; extra == "all"
|
|
45
|
+
Requires-Dist: pymilvus; extra == "all"
|
|
46
|
+
Provides-Extra: dev
|
|
47
|
+
Requires-Dist: pytest; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
49
|
+
Requires-Dist: textual-dev; extra == "dev"
|
|
50
|
+
Dynamic: license-file
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# cyborgdb-migrate
|
|
2
|
+
|
|
3
|
+
A TUI wizard and CLI for migrating vector data from popular vector databases into [CyborgDB](https://cyborgdb.com) — the encrypted vector database.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Interactive TUI** — step-by-step wizard powered by [Textual](https://textual.textualize.io/)
|
|
8
|
+
- **Headless CLI** — non-interactive mode for scripts and CI/CD pipelines
|
|
9
|
+
- **5 source connectors** — Pinecone, Qdrant, Weaviate, ChromaDB, Milvus
|
|
10
|
+
- **Encrypted at rest** — every index is AES-encrypted with a key you control
|
|
11
|
+
- **Checkpoint & resume** — automatically saves progress; resume interrupted migrations
|
|
12
|
+
- **Spot-check verification** — post-migration vector and metadata integrity checks
|
|
13
|
+
- **Double-buffered I/O** — overlaps extraction and upsert for maximum throughput
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install cyborgdb-migrate
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Install with support for your source database:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Individual sources
|
|
25
|
+
pip install "cyborgdb-migrate[pinecone]"
|
|
26
|
+
pip install "cyborgdb-migrate[qdrant]"
|
|
27
|
+
pip install "cyborgdb-migrate[weaviate]"
|
|
28
|
+
pip install "cyborgdb-migrate[chromadb]"
|
|
29
|
+
pip install "cyborgdb-migrate[milvus]"
|
|
30
|
+
|
|
31
|
+
# All sources at once
|
|
32
|
+
pip install "cyborgdb-migrate[all]"
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Optional syntax highlighting for the summary code snippet:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install "cyborgdb-migrate[syntax]"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
### Interactive (TUI)
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
cyborgdb-migrate
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
The wizard walks you through:
|
|
50
|
+
|
|
51
|
+
1. Selecting a source database
|
|
52
|
+
2. Entering credentials and picking an index/collection
|
|
53
|
+
3. Connecting to CyborgDB
|
|
54
|
+
4. Creating or selecting a destination index
|
|
55
|
+
5. Running the migration with live progress
|
|
56
|
+
6. Viewing verification results and a Python quickstart snippet
|
|
57
|
+
|
|
58
|
+
### Headless (CLI)
|
|
59
|
+
|
|
60
|
+
Create a TOML config file (see [`example-config.toml`](example-config.toml)):
|
|
61
|
+
|
|
62
|
+
```toml
|
|
63
|
+
[source]
|
|
64
|
+
type = "pinecone"
|
|
65
|
+
api_key = "${PINECONE_API_KEY}"
|
|
66
|
+
index = "my-index"
|
|
67
|
+
|
|
68
|
+
[destination]
|
|
69
|
+
host = "http://localhost:8000"
|
|
70
|
+
api_key = "${CYBORGDB_API_KEY}"
|
|
71
|
+
create_index = true
|
|
72
|
+
index_name = "my-cyborgdb-index"
|
|
73
|
+
index_type = "ivfflat"
|
|
74
|
+
|
|
75
|
+
[options]
|
|
76
|
+
batch_size = 200
|
|
77
|
+
checkpoint_every = 10
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Run the migration:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
cyborgdb-migrate --config migration.toml
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Resume an interrupted migration:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
cyborgdb-migrate --config migration.toml --resume
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Configuration Reference
|
|
93
|
+
|
|
94
|
+
### `[source]`
|
|
95
|
+
|
|
96
|
+
| Key | Required | Description |
|
|
97
|
+
|-----|----------|-------------|
|
|
98
|
+
| `type` | Yes | Source database: `pinecone`, `qdrant`, `weaviate`, `chromadb`, `milvus` |
|
|
99
|
+
| `index` | Yes | Index or collection name to migrate from |
|
|
100
|
+
| `namespace` | No | Namespace/partition to migrate (Pinecone, Milvus) |
|
|
101
|
+
| *(other keys)* | Varies | Passed as credentials to the source connector (e.g. `api_key`, `host`) |
|
|
102
|
+
|
|
103
|
+
### `[destination]`
|
|
104
|
+
|
|
105
|
+
| Key | Required | Description |
|
|
106
|
+
|-----|----------|-------------|
|
|
107
|
+
| `host` | Yes | CyborgDB server URL |
|
|
108
|
+
| `api_key` | Yes | CyborgDB API key |
|
|
109
|
+
| `index_name` | Yes | Destination index name |
|
|
110
|
+
| `create_index` | No | `true` (default) to create a new index, `false` to use existing |
|
|
111
|
+
| `index_type` | No | `ivfflat` (default) or `ivfpq` |
|
|
112
|
+
| `index_key` | No | Hex-encoded encryption key (for existing indexes) |
|
|
113
|
+
| `key_file` | No | Path to encryption key file (for existing indexes) |
|
|
114
|
+
|
|
115
|
+
### `[options]`
|
|
116
|
+
|
|
117
|
+
| Key | Default | Description |
|
|
118
|
+
|-----|---------|-------------|
|
|
119
|
+
| `batch_size` | `100` | Vectors per batch |
|
|
120
|
+
| `checkpoint_every` | `10` | Save checkpoint every N batches |
|
|
121
|
+
| `spot_check_per_batch` | `4` | Vectors sampled per batch for verification |
|
|
122
|
+
|
|
123
|
+
Environment variables can be referenced as `${VAR_NAME}` anywhere in the config.
|
|
124
|
+
|
|
125
|
+
## CLI Options
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
cyborgdb-migrate [OPTIONS]
|
|
129
|
+
|
|
130
|
+
Options:
|
|
131
|
+
--config FILE TOML config file for non-interactive mode
|
|
132
|
+
--resume Resume from checkpoint (requires --config)
|
|
133
|
+
--batch-size INT Override batch size (default: 100)
|
|
134
|
+
--log-file FILE Log file path (default: ./cyborgdb-migrate.log)
|
|
135
|
+
--quiet Minimal output (non-interactive only)
|
|
136
|
+
--version Show version and exit
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Supported Sources
|
|
140
|
+
|
|
141
|
+
| Source | Extras | Notes |
|
|
142
|
+
|--------|--------|-------|
|
|
143
|
+
| [Pinecone](https://www.pinecone.io/) | `pinecone` | Supports namespaces |
|
|
144
|
+
| [Qdrant](https://qdrant.tech/) | `qdrant` | Scroll-based pagination |
|
|
145
|
+
| [Weaviate](https://weaviate.io/) | `weaviate` | Supports named vectors |
|
|
146
|
+
| [ChromaDB](https://www.trychroma.com/) | `chromadb` | Local and remote modes |
|
|
147
|
+
| [Milvus](https://milvus.io/) | `milvus` | Supports partitions, content field heuristic |
|
|
148
|
+
|
|
149
|
+
## Exit Codes
|
|
150
|
+
|
|
151
|
+
| Code | Meaning |
|
|
152
|
+
|------|---------|
|
|
153
|
+
| `0` | Success |
|
|
154
|
+
| `1` | Configuration or connection error |
|
|
155
|
+
| `2` | Migration completed but spot-check verification failed |
|
|
156
|
+
|
|
157
|
+
## License
|
|
158
|
+
|
|
159
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cyborgdb-migrate"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "TUI wizard and CLI for migrating vector data into CyborgDB"
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Cyborg Inc."},
|
|
13
|
+
]
|
|
14
|
+
keywords = [
|
|
15
|
+
"vector-database",
|
|
16
|
+
"migration",
|
|
17
|
+
"cyborgdb",
|
|
18
|
+
"pinecone",
|
|
19
|
+
"qdrant",
|
|
20
|
+
"weaviate",
|
|
21
|
+
"chromadb",
|
|
22
|
+
"milvus",
|
|
23
|
+
"embeddings",
|
|
24
|
+
"tui",
|
|
25
|
+
]
|
|
26
|
+
classifiers = [
|
|
27
|
+
"Environment :: Console",
|
|
28
|
+
"Intended Audience :: Developers",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Programming Language :: Python :: 3.10",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
"Programming Language :: Python :: 3.12",
|
|
33
|
+
"Programming Language :: Python :: 3.13",
|
|
34
|
+
"Programming Language :: Python :: 3.14",
|
|
35
|
+
"Topic :: Database",
|
|
36
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
37
|
+
]
|
|
38
|
+
dependencies = [
|
|
39
|
+
"textual>=0.86.0",
|
|
40
|
+
"cyborgdb",
|
|
41
|
+
"numpy>=1.21.0",
|
|
42
|
+
"tomli>=1.1.0;python_version<'3.11'",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.optional-dependencies]
|
|
46
|
+
pinecone = ["pinecone"]
|
|
47
|
+
qdrant = ["qdrant-client"]
|
|
48
|
+
weaviate = ["weaviate-client"]
|
|
49
|
+
chromadb = ["chromadb"]
|
|
50
|
+
milvus = ["pymilvus"]
|
|
51
|
+
syntax = [
|
|
52
|
+
"tree-sitter",
|
|
53
|
+
"tree-sitter-python",
|
|
54
|
+
]
|
|
55
|
+
all = [
|
|
56
|
+
"pinecone",
|
|
57
|
+
"qdrant-client",
|
|
58
|
+
"weaviate-client",
|
|
59
|
+
"chromadb",
|
|
60
|
+
"pymilvus",
|
|
61
|
+
]
|
|
62
|
+
dev = [
|
|
63
|
+
"pytest",
|
|
64
|
+
"pytest-asyncio",
|
|
65
|
+
"textual-dev",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
[project.scripts]
|
|
69
|
+
cyborgdb-migrate = "cyborgdb_migrate.cli:main"
|
|
70
|
+
|
|
71
|
+
[project.urls]
|
|
72
|
+
Homepage = "https://github.com/cyborginc/cyborgdb-migrate"
|
|
73
|
+
Repository = "https://github.com/cyborginc/cyborgdb-migrate"
|
|
74
|
+
Issues = "https://github.com/cyborginc/cyborgdb-migrate/issues"
|
|
75
|
+
|
|
76
|
+
[tool.setuptools.packages.find]
|
|
77
|
+
where = ["src"]
|
|
78
|
+
|
|
79
|
+
[tool.ruff]
|
|
80
|
+
line-length = 100
|
|
81
|
+
target-version = "py310"
|
|
82
|
+
|
|
83
|
+
[tool.ruff.lint]
|
|
84
|
+
select = ["E", "F", "I", "W"]
|
|
85
|
+
|
|
86
|
+
[tool.pytest.ini_options]
|
|
87
|
+
testpaths = ["tests"]
|
|
88
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
from textual.app import App
|
|
4
|
+
from textual.theme import Theme
|
|
5
|
+
|
|
6
|
+
from cyborgdb_migrate.models import MigrationState
|
|
7
|
+
from cyborgdb_migrate.screens.welcome import WelcomeScreen
|
|
8
|
+
|
|
9
|
+
MIN_WIDTH = 80
|
|
10
|
+
MIN_HEIGHT = 24
|
|
11
|
+
|
|
12
|
+
CYBORGDB_THEME = Theme(
|
|
13
|
+
name="cyborgdb",
|
|
14
|
+
primary="#217684",
|
|
15
|
+
secondary="#56D3DB",
|
|
16
|
+
accent="#38C3EE",
|
|
17
|
+
surface="#1a2a2e",
|
|
18
|
+
background="#111d20",
|
|
19
|
+
panel="#1e3438",
|
|
20
|
+
warning="#f5a623",
|
|
21
|
+
error="#e74c3c",
|
|
22
|
+
success="#2ecc71",
|
|
23
|
+
dark=True,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MigrateApp(App):
|
|
28
|
+
"""CyborgDB Migration Wizard TUI application."""
|
|
29
|
+
|
|
30
|
+
CSS_PATH = "theme.css"
|
|
31
|
+
TITLE = "CyborgDB Migration Wizard"
|
|
32
|
+
|
|
33
|
+
BINDINGS = [
|
|
34
|
+
("ctrl+q", "", ""), # unbind default quit
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
_ctrl_c_time: float = 0.0
|
|
38
|
+
|
|
39
|
+
def __init__(self, state: MigrationState | None = None) -> None:
|
|
40
|
+
super().__init__()
|
|
41
|
+
self.state = state or MigrationState()
|
|
42
|
+
|
|
43
|
+
def action_quit(self) -> None:
|
|
44
|
+
"""Disable the default quit action."""
|
|
45
|
+
|
|
46
|
+
def _on_key(self, event) -> None:
|
|
47
|
+
if event.key == "ctrl+c":
|
|
48
|
+
now = time.monotonic()
|
|
49
|
+
if now - self._ctrl_c_time < 1.0:
|
|
50
|
+
self.exit()
|
|
51
|
+
else:
|
|
52
|
+
self._ctrl_c_time = now
|
|
53
|
+
self.notify("Press Ctrl+C again to quit", timeout=1)
|
|
54
|
+
|
|
55
|
+
def on_mount(self) -> None:
|
|
56
|
+
self.register_theme(CYBORGDB_THEME)
|
|
57
|
+
self.theme = "cyborgdb"
|
|
58
|
+
self.push_screen(WelcomeScreen(self.state))
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
from dataclasses import asdict, dataclass
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
CHECKPOINT_DIR = "./cyborgdb-migrate-checkpoints"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class CheckpointData:
|
|
15
|
+
version: int = 1
|
|
16
|
+
source_type: str = ""
|
|
17
|
+
source_index: str = ""
|
|
18
|
+
dest_index: str = ""
|
|
19
|
+
namespace: str | None = None
|
|
20
|
+
cursor: str | None = None
|
|
21
|
+
vectors_migrated: int = 0
|
|
22
|
+
vectors_total: int = 0
|
|
23
|
+
started_at: str = ""
|
|
24
|
+
updated_at: str = ""
|
|
25
|
+
batch_size: int = 100
|
|
26
|
+
batches_completed: int = 0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def checkpoint_path(source_type: str, source_index: str, dest_index: str) -> Path:
|
|
30
|
+
"""Return the checkpoint file path for a given migration."""
|
|
31
|
+
safe_name = f"{source_type}_{source_index}_{dest_index}.json"
|
|
32
|
+
# Sanitize filename
|
|
33
|
+
safe_name = safe_name.replace("/", "_").replace("\\", "_")
|
|
34
|
+
return Path(CHECKPOINT_DIR) / safe_name
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def save_checkpoint(data: CheckpointData) -> Path:
|
|
38
|
+
"""Atomically save checkpoint data to disk."""
|
|
39
|
+
path = checkpoint_path(data.source_type, data.source_index, data.dest_index)
|
|
40
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
|
|
42
|
+
data.updated_at = datetime.now(timezone.utc).isoformat()
|
|
43
|
+
if not data.started_at:
|
|
44
|
+
data.started_at = data.updated_at
|
|
45
|
+
|
|
46
|
+
# Atomic write: write to temp file then rename
|
|
47
|
+
fd, tmp_path = tempfile.mkstemp(dir=path.parent, suffix=".tmp")
|
|
48
|
+
try:
|
|
49
|
+
with os.fdopen(fd, "w") as f:
|
|
50
|
+
json.dump(asdict(data), f, indent=2)
|
|
51
|
+
os.replace(tmp_path, path)
|
|
52
|
+
except Exception:
|
|
53
|
+
# Clean up temp file on failure
|
|
54
|
+
try:
|
|
55
|
+
os.unlink(tmp_path)
|
|
56
|
+
except OSError:
|
|
57
|
+
pass
|
|
58
|
+
raise
|
|
59
|
+
|
|
60
|
+
return path
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def load_checkpoint(source_type: str, source_index: str, dest_index: str) -> CheckpointData | None:
|
|
64
|
+
"""Load checkpoint data if it exists. Returns None if no checkpoint found."""
|
|
65
|
+
path = checkpoint_path(source_type, source_index, dest_index)
|
|
66
|
+
if not path.exists():
|
|
67
|
+
return None
|
|
68
|
+
with open(path) as f:
|
|
69
|
+
raw = json.load(f)
|
|
70
|
+
return CheckpointData(**raw)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def delete_checkpoint(source_type: str, source_index: str, dest_index: str) -> None:
|
|
74
|
+
"""Delete a checkpoint file if it exists."""
|
|
75
|
+
path = checkpoint_path(source_type, source_index, dest_index)
|
|
76
|
+
if path.exists():
|
|
77
|
+
path.unlink()
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
from cyborgdb_migrate import __version__
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def main():
|
|
8
|
+
parser = argparse.ArgumentParser(
|
|
9
|
+
prog="cyborgdb-migrate",
|
|
10
|
+
description="Migrate vector data from other databases into CyborgDB",
|
|
11
|
+
)
|
|
12
|
+
parser.add_argument(
|
|
13
|
+
"--config", metavar="FILE",
|
|
14
|
+
help="TOML config file for non-interactive mode",
|
|
15
|
+
)
|
|
16
|
+
parser.add_argument(
|
|
17
|
+
"--resume", action="store_true",
|
|
18
|
+
help="Resume from checkpoint (non-interactive only)",
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"--batch-size", type=int, default=100,
|
|
22
|
+
help="Vectors per batch (default: 100)",
|
|
23
|
+
)
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"--log-file", metavar="FILE",
|
|
26
|
+
default="./cyborgdb-migrate.log",
|
|
27
|
+
help="Log file path (default: ./cyborgdb-migrate.log)",
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--quiet", action="store_true",
|
|
31
|
+
help="Minimal output (non-interactive only)",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
|
34
|
+
|
|
35
|
+
args = parser.parse_args()
|
|
36
|
+
|
|
37
|
+
if args.config:
|
|
38
|
+
setup_logging(args.log_file)
|
|
39
|
+
run_headless(args.config, args.batch_size, args.resume, args.log_file, args.quiet)
|
|
40
|
+
else:
|
|
41
|
+
if args.resume:
|
|
42
|
+
print("Error: --resume is only supported with --config", file=sys.stderr)
|
|
43
|
+
raise SystemExit(1)
|
|
44
|
+
setup_logging(args.log_file)
|
|
45
|
+
from cyborgdb_migrate.app import MigrateApp
|
|
46
|
+
from cyborgdb_migrate.models import MigrationState
|
|
47
|
+
|
|
48
|
+
state = MigrationState()
|
|
49
|
+
state.batch_size = args.batch_size
|
|
50
|
+
app = MigrateApp(state)
|
|
51
|
+
app.run()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def setup_logging(log_file: str) -> None:
|
|
55
|
+
import logging
|
|
56
|
+
|
|
57
|
+
logging.basicConfig(
|
|
58
|
+
filename=log_file,
|
|
59
|
+
level=logging.INFO,
|
|
60
|
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
61
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def run_headless(
|
|
66
|
+
config_path: str,
|
|
67
|
+
batch_size: int,
|
|
68
|
+
resume: bool,
|
|
69
|
+
log_file: str,
|
|
70
|
+
quiet: bool,
|
|
71
|
+
) -> None:
|
|
72
|
+
import logging
|
|
73
|
+
import threading
|
|
74
|
+
|
|
75
|
+
from rich.console import Console
|
|
76
|
+
from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
|
|
77
|
+
|
|
78
|
+
from cyborgdb_migrate.config import load_config
|
|
79
|
+
from cyborgdb_migrate.destination import CyborgDestination
|
|
80
|
+
from cyborgdb_migrate.engine import MigrationEngine
|
|
81
|
+
from cyborgdb_migrate.sources import SOURCE_REGISTRY
|
|
82
|
+
|
|
83
|
+
logger = logging.getLogger("cyborgdb_migrate.headless")
|
|
84
|
+
console = Console(stderr=True)
|
|
85
|
+
|
|
86
|
+
config = load_config(config_path)
|
|
87
|
+
|
|
88
|
+
# Override batch_size from CLI if provided and different from default
|
|
89
|
+
if batch_size != 100:
|
|
90
|
+
config.batch_size = batch_size
|
|
91
|
+
|
|
92
|
+
# Resolve source
|
|
93
|
+
source_type = config.source_type
|
|
94
|
+
source_cls = None
|
|
95
|
+
for name, cls in SOURCE_REGISTRY.items():
|
|
96
|
+
if name.lower() == source_type.lower() or source_type.lower() in name.lower():
|
|
97
|
+
source_cls = cls
|
|
98
|
+
break
|
|
99
|
+
if source_cls is None:
|
|
100
|
+
console.print(f"[red]Unknown source type: {source_type}[/red]")
|
|
101
|
+
raise SystemExit(1)
|
|
102
|
+
|
|
103
|
+
source = source_cls()
|
|
104
|
+
source.configure(config.source_credentials)
|
|
105
|
+
if not quiet:
|
|
106
|
+
console.print(f"Connecting to {source.name()}...")
|
|
107
|
+
source.connect()
|
|
108
|
+
if not quiet:
|
|
109
|
+
console.print(f"[green]Connected to {source.name()}[/green]")
|
|
110
|
+
|
|
111
|
+
# Inspect source
|
|
112
|
+
source_info = source.inspect(config.source_index)
|
|
113
|
+
if not quiet:
|
|
114
|
+
console.print(
|
|
115
|
+
f"Source: {source_info.index_or_collection_name} "
|
|
116
|
+
f"({source_info.dimension}d, {source_info.vector_count:,} vectors)"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Connect to CyborgDB
|
|
120
|
+
destination = CyborgDestination()
|
|
121
|
+
destination.connect(config.destination_host, config.destination_api_key)
|
|
122
|
+
if not quiet:
|
|
123
|
+
console.print("[green]Connected to CyborgDB[/green]")
|
|
124
|
+
|
|
125
|
+
# Set up index
|
|
126
|
+
if config.create_index:
|
|
127
|
+
from cyborgdb import Client
|
|
128
|
+
|
|
129
|
+
index_key = Client.generate_key(save=False)
|
|
130
|
+
if not quiet:
|
|
131
|
+
console.print(f"Generated encryption key (hex): {index_key.hex()}")
|
|
132
|
+
|
|
133
|
+
from cyborgdb_migrate.destination import compute_n_lists
|
|
134
|
+
|
|
135
|
+
n_lists = compute_n_lists(source_info.vector_count)
|
|
136
|
+
destination.create_index(
|
|
137
|
+
name=config.index_name,
|
|
138
|
+
dimension=source_info.dimension,
|
|
139
|
+
index_type=config.index_type or "ivfflat",
|
|
140
|
+
index_key=index_key,
|
|
141
|
+
n_lists=n_lists,
|
|
142
|
+
metric=source_info.metric,
|
|
143
|
+
)
|
|
144
|
+
else:
|
|
145
|
+
if config.index_key:
|
|
146
|
+
index_key = _decode_key(config.index_key)
|
|
147
|
+
elif config.key_file:
|
|
148
|
+
with open(config.key_file) as f:
|
|
149
|
+
index_key = _decode_key(f.read().strip())
|
|
150
|
+
else:
|
|
151
|
+
console.print("[red]No index key provided for existing index[/red]")
|
|
152
|
+
raise SystemExit(1)
|
|
153
|
+
destination.load_index(config.index_name, index_key)
|
|
154
|
+
|
|
155
|
+
# Validate dimension match for existing index
|
|
156
|
+
dest_dim = destination.get_index_dimension()
|
|
157
|
+
if dest_dim is not None and dest_dim != source_info.dimension:
|
|
158
|
+
console.print(
|
|
159
|
+
f"[red]Dimension mismatch: source has {source_info.dimension}d, "
|
|
160
|
+
f"destination has {dest_dim}d[/red]"
|
|
161
|
+
)
|
|
162
|
+
raise SystemExit(1)
|
|
163
|
+
|
|
164
|
+
# Run migration
|
|
165
|
+
cancel_event = threading.Event()
|
|
166
|
+
|
|
167
|
+
def on_progress(update):
|
|
168
|
+
pass # Progress handled by rich progress bar below
|
|
169
|
+
|
|
170
|
+
engine = MigrationEngine(
|
|
171
|
+
source=source,
|
|
172
|
+
destination=destination,
|
|
173
|
+
source_info=source_info,
|
|
174
|
+
batch_size=config.batch_size,
|
|
175
|
+
checkpoint_every=config.checkpoint_every,
|
|
176
|
+
spot_check_per_batch=config.spot_check_per_batch,
|
|
177
|
+
on_progress=on_progress,
|
|
178
|
+
cancel_event=cancel_event,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
if quiet:
|
|
182
|
+
result = engine.run(
|
|
183
|
+
namespace=config.source_namespace,
|
|
184
|
+
resume=resume,
|
|
185
|
+
)
|
|
186
|
+
else:
|
|
187
|
+
with Progress(
|
|
188
|
+
SpinnerColumn(),
|
|
189
|
+
TextColumn("[progress.description]{task.description}"),
|
|
190
|
+
BarColumn(),
|
|
191
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
192
|
+
TimeElapsedColumn(),
|
|
193
|
+
console=console,
|
|
194
|
+
) as progress:
|
|
195
|
+
task = progress.add_task("Migrating...", total=source_info.vector_count)
|
|
196
|
+
|
|
197
|
+
def progress_callback(update):
|
|
198
|
+
progress.update(task, completed=update.vectors_migrated)
|
|
199
|
+
|
|
200
|
+
engine.on_progress = progress_callback
|
|
201
|
+
result = engine.run(
|
|
202
|
+
namespace=config.source_namespace,
|
|
203
|
+
resume=resume,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
if not quiet:
|
|
207
|
+
console.print("\n[green]Migration complete![/green]")
|
|
208
|
+
console.print(f" Vectors: {result.vectors_migrated:,} / {result.vectors_expected:,}")
|
|
209
|
+
console.print(f" Duration: {result.duration_seconds:.1f}s")
|
|
210
|
+
console.print(f" Spot check: {'PASSED' if result.spot_check_passed else 'FAILED'}")
|
|
211
|
+
console.print(f" Details: {result.spot_check_details}")
|
|
212
|
+
|
|
213
|
+
if not result.spot_check_passed:
|
|
214
|
+
logger.warning("Spot check failed: %s", result.spot_check_details)
|
|
215
|
+
raise SystemExit(2)
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _decode_key(value: str) -> bytes:
|
|
219
|
+
"""Decode a key from hex, falling back to base64 for backwards compatibility."""
|
|
220
|
+
try:
|
|
221
|
+
return bytes.fromhex(value)
|
|
222
|
+
except ValueError:
|
|
223
|
+
import base64
|
|
224
|
+
|
|
225
|
+
return base64.b64decode(value)
|