foodforthought-cli 0.2.7__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. ate/__init__.py +6 -0
  2. ate/__main__.py +16 -0
  3. ate/auth/__init__.py +1 -0
  4. ate/auth/device_flow.py +141 -0
  5. ate/auth/token_store.py +96 -0
  6. ate/behaviors/__init__.py +100 -0
  7. ate/behaviors/approach.py +399 -0
  8. ate/behaviors/common.py +686 -0
  9. ate/behaviors/tree.py +454 -0
  10. ate/cli.py +855 -3995
  11. ate/client.py +90 -0
  12. ate/commands/__init__.py +168 -0
  13. ate/commands/auth.py +389 -0
  14. ate/commands/bridge.py +448 -0
  15. ate/commands/data.py +185 -0
  16. ate/commands/deps.py +111 -0
  17. ate/commands/generate.py +384 -0
  18. ate/commands/memory.py +907 -0
  19. ate/commands/parts.py +166 -0
  20. ate/commands/primitive.py +399 -0
  21. ate/commands/protocol.py +288 -0
  22. ate/commands/recording.py +524 -0
  23. ate/commands/repo.py +154 -0
  24. ate/commands/simulation.py +291 -0
  25. ate/commands/skill.py +303 -0
  26. ate/commands/skills.py +487 -0
  27. ate/commands/team.py +147 -0
  28. ate/commands/workflow.py +271 -0
  29. ate/detection/__init__.py +38 -0
  30. ate/detection/base.py +142 -0
  31. ate/detection/color_detector.py +399 -0
  32. ate/detection/trash_detector.py +322 -0
  33. ate/drivers/__init__.py +39 -0
  34. ate/drivers/ble_transport.py +405 -0
  35. ate/drivers/mechdog.py +942 -0
  36. ate/drivers/wifi_camera.py +477 -0
  37. ate/interfaces/__init__.py +187 -0
  38. ate/interfaces/base.py +273 -0
  39. ate/interfaces/body.py +267 -0
  40. ate/interfaces/detection.py +282 -0
  41. ate/interfaces/locomotion.py +422 -0
  42. ate/interfaces/manipulation.py +408 -0
  43. ate/interfaces/navigation.py +389 -0
  44. ate/interfaces/perception.py +362 -0
  45. ate/interfaces/sensors.py +247 -0
  46. ate/interfaces/types.py +371 -0
  47. ate/llm_proxy.py +239 -0
  48. ate/mcp_server.py +387 -0
  49. ate/memory/__init__.py +35 -0
  50. ate/memory/cloud.py +244 -0
  51. ate/memory/context.py +269 -0
  52. ate/memory/embeddings.py +184 -0
  53. ate/memory/export.py +26 -0
  54. ate/memory/merge.py +146 -0
  55. ate/memory/migrate/__init__.py +34 -0
  56. ate/memory/migrate/base.py +89 -0
  57. ate/memory/migrate/pipeline.py +189 -0
  58. ate/memory/migrate/sources/__init__.py +13 -0
  59. ate/memory/migrate/sources/chroma.py +170 -0
  60. ate/memory/migrate/sources/pinecone.py +120 -0
  61. ate/memory/migrate/sources/qdrant.py +110 -0
  62. ate/memory/migrate/sources/weaviate.py +160 -0
  63. ate/memory/reranker.py +353 -0
  64. ate/memory/search.py +26 -0
  65. ate/memory/store.py +548 -0
  66. ate/recording/__init__.py +83 -0
  67. ate/recording/demonstration.py +378 -0
  68. ate/recording/session.py +415 -0
  69. ate/recording/upload.py +304 -0
  70. ate/recording/visual.py +416 -0
  71. ate/recording/wrapper.py +95 -0
  72. ate/robot/__init__.py +221 -0
  73. ate/robot/agentic_servo.py +856 -0
  74. ate/robot/behaviors.py +493 -0
  75. ate/robot/ble_capture.py +1000 -0
  76. ate/robot/ble_enumerate.py +506 -0
  77. ate/robot/calibration.py +668 -0
  78. ate/robot/calibration_state.py +388 -0
  79. ate/robot/commands.py +3735 -0
  80. ate/robot/direction_calibration.py +554 -0
  81. ate/robot/discovery.py +441 -0
  82. ate/robot/introspection.py +330 -0
  83. ate/robot/llm_system_id.py +654 -0
  84. ate/robot/locomotion_calibration.py +508 -0
  85. ate/robot/manager.py +270 -0
  86. ate/robot/marker_generator.py +611 -0
  87. ate/robot/perception.py +502 -0
  88. ate/robot/primitives.py +614 -0
  89. ate/robot/profiles.py +281 -0
  90. ate/robot/registry.py +322 -0
  91. ate/robot/servo_mapper.py +1153 -0
  92. ate/robot/skill_upload.py +675 -0
  93. ate/robot/target_calibration.py +500 -0
  94. ate/robot/teach.py +515 -0
  95. ate/robot/types.py +242 -0
  96. ate/robot/visual_labeler.py +1048 -0
  97. ate/robot/visual_servo_loop.py +494 -0
  98. ate/robot/visual_servoing.py +570 -0
  99. ate/robot/visual_system_id.py +906 -0
  100. ate/transports/__init__.py +121 -0
  101. ate/transports/base.py +394 -0
  102. ate/transports/ble.py +405 -0
  103. ate/transports/hybrid.py +444 -0
  104. ate/transports/serial.py +345 -0
  105. ate/urdf/__init__.py +30 -0
  106. ate/urdf/capture.py +582 -0
  107. ate/urdf/cloud.py +491 -0
  108. ate/urdf/collision.py +271 -0
  109. ate/urdf/commands.py +708 -0
  110. ate/urdf/depth.py +360 -0
  111. ate/urdf/inertial.py +312 -0
  112. ate/urdf/kinematics.py +330 -0
  113. ate/urdf/lifting.py +415 -0
  114. ate/urdf/meshing.py +300 -0
  115. ate/urdf/models/__init__.py +110 -0
  116. ate/urdf/models/depth_anything.py +253 -0
  117. ate/urdf/models/sam2.py +324 -0
  118. ate/urdf/motion_analysis.py +396 -0
  119. ate/urdf/pipeline.py +468 -0
  120. ate/urdf/scale.py +256 -0
  121. ate/urdf/scan_session.py +411 -0
  122. ate/urdf/segmentation.py +299 -0
  123. ate/urdf/synthesis.py +319 -0
  124. ate/urdf/topology.py +336 -0
  125. ate/urdf/validation.py +371 -0
  126. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/METADATA +9 -1
  127. foodforthought_cli-0.3.0.dist-info/RECORD +166 -0
  128. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/WHEEL +1 -1
  129. foodforthought_cli-0.2.7.dist-info/RECORD +0 -44
  130. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/entry_points.txt +0 -0
  131. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,89 @@
1
+ """Base classes and data structures for migration."""
2
+ from abc import ABC, abstractmethod
3
+ from dataclasses import dataclass, field
4
+ from typing import Optional, List, Dict, Any, Tuple
5
+
6
+
7
+ @dataclass
8
+ class VectorRecord:
9
+ """A vector record with metadata."""
10
+ id: str
11
+ vector: List[float]
12
+ text: Optional[str] = None
13
+ metadata: Dict[str, Any] = field(default_factory=dict)
14
+
15
+
16
+ @dataclass
17
+ class MigrationEstimate:
18
+ """Estimation of migration resources and time."""
19
+ total_vectors: int
20
+ dimensions: int
21
+ estimated_mv2_bytes: int
22
+ estimated_seconds: float
23
+ source_size_bytes: Optional[int] = None
24
+ compression_ratio: Optional[float] = None
25
+
26
+
27
+ @dataclass
28
+ class MigrationResult:
29
+ """Result of a migration operation."""
30
+ source_type: str
31
+ source_name: str
32
+ output_path: str
33
+ total_migrated: int
34
+ total_skipped: int
35
+ duration_seconds: float
36
+ output_size_bytes: int
37
+ compression_ratio: Optional[float] = None
38
+ errors: List[str] = field(default_factory=list)
39
+
40
+
41
+ @dataclass
42
+ class MigrationCheckpoint:
43
+ """Migration checkpoint for resuming."""
44
+ source_type: str
45
+ source_name: str
46
+ output_path: str
47
+ last_cursor: Optional[str]
48
+ records_completed: int
49
+ started_at: str
50
+
51
+
52
+ class MigrationSource(ABC):
53
+ """Abstract base class for migration sources."""
54
+
55
+ @property
56
+ @abstractmethod
57
+ def source_type(self) -> str:
58
+ """Return the source type (e.g., 'pinecone', 'qdrant')."""
59
+ pass
60
+
61
+ @property
62
+ @abstractmethod
63
+ def source_name(self) -> str:
64
+ """Return the source name (e.g., index name, collection name)."""
65
+ pass
66
+
67
+ @abstractmethod
68
+ def connect(self) -> None:
69
+ """Connect to the source."""
70
+ pass
71
+
72
+ @abstractmethod
73
+ def estimate(self) -> MigrationEstimate:
74
+ """Estimate migration size and time."""
75
+ pass
76
+
77
+ @abstractmethod
78
+ def fetch_batch(self, batch_size: int = 10000, cursor: Optional[str] = None) -> Tuple[List[VectorRecord], Optional[str]]:
79
+ """Fetch a batch of records.
80
+
81
+ Returns:
82
+ Tuple of (records, next_cursor). next_cursor is None if this is the last batch.
83
+ """
84
+ pass
85
+
86
+ @abstractmethod
87
+ def close(self) -> None:
88
+ """Close the connection and clean up resources."""
89
+ pass
@@ -0,0 +1,189 @@
1
+ """Migration pipeline for orchestrating the migration process."""
2
+ import json
3
+ import os
4
+ import time
5
+ from typing import Optional, Callable, Any
6
+
7
+ from .base import MigrationSource, MigrationResult, MigrationCheckpoint
8
+
9
+
10
+ class MigrationPipeline:
11
+ """Pipeline for orchestrating vector database migrations."""
12
+
13
+ def __init__(self, source: MigrationSource, output_path: str,
14
+ batch_size: int = 10000, checkpoint_path: Optional[str] = None):
15
+ """Initialize the migration pipeline.
16
+
17
+ Args:
18
+ source: The migration source to read from
19
+ output_path: Path to write the .mv2 output file
20
+ batch_size: Number of records to fetch per batch
21
+ checkpoint_path: Optional path for checkpoint file
22
+ """
23
+ self.source = source
24
+ self.output_path = output_path
25
+ self.batch_size = batch_size
26
+ self.checkpoint_path = checkpoint_path
27
+ self.progress_callbacks = []
28
+
29
+ def on_progress(self, callback: Callable[[int, int], None]) -> None:
30
+ """Register a progress callback function.
31
+
32
+ Args:
33
+ callback: Function called with (records_done, total_estimate)
34
+ """
35
+ self.progress_callbacks.append(callback)
36
+
37
+ def run(self, dry_run: bool = False) -> MigrationResult:
38
+ """Run the migration.
39
+
40
+ Args:
41
+ dry_run: If True, only estimate without migrating
42
+
43
+ Returns:
44
+ MigrationResult with operation details
45
+ """
46
+ start_time = time.time()
47
+
48
+ try:
49
+ self.source.connect()
50
+
51
+ # Get estimate
52
+ estimate = self.source.estimate()
53
+
54
+ if dry_run:
55
+ # For dry run, return early with estimate-based result
56
+ return MigrationResult(
57
+ source_type=self.source.source_type,
58
+ source_name=self.source.source_name,
59
+ output_path=self.output_path,
60
+ total_migrated=0,
61
+ total_skipped=0,
62
+ duration_seconds=time.time() - start_time,
63
+ output_size_bytes=0
64
+ )
65
+
66
+ # Create output .mv2 using MemoryStore
67
+ parent_dir = os.path.dirname(self.output_path)
68
+ if parent_dir:
69
+ os.makedirs(parent_dir, exist_ok=True)
70
+
71
+ from ..store import MemoryStore
72
+ output_store = MemoryStore.create(self.output_path)
73
+
74
+ total_migrated = 0
75
+ total_skipped = 0
76
+ errors = []
77
+ cursor = None
78
+
79
+ try:
80
+ while True:
81
+ try:
82
+ records, next_cursor = self.source.fetch_batch(self.batch_size, cursor)
83
+
84
+ if not records:
85
+ break
86
+
87
+ # Write records to .mv2 via MemoryStore
88
+ for record in records:
89
+ text = record.text or f"[vector-only record id={record.id}]"
90
+ metadata = dict(record.metadata) if record.metadata else {}
91
+ metadata['_source_id'] = record.id
92
+ metadata['_source_type'] = self.source.source_type
93
+
94
+ try:
95
+ output_store.add(
96
+ text=text,
97
+ metadata=metadata
98
+ )
99
+ total_migrated += 1
100
+ except Exception as e:
101
+ total_skipped += 1
102
+ errors.append(f"Record {record.id}: {str(e)}")
103
+
104
+ # Update checkpoint
105
+ if self.checkpoint_path:
106
+ checkpoint = MigrationCheckpoint(
107
+ source_type=self.source.source_type,
108
+ source_name=self.source.source_name,
109
+ output_path=self.output_path,
110
+ last_cursor=cursor,
111
+ records_completed=total_migrated,
112
+ started_at=time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(start_time))
113
+ )
114
+ self._write_checkpoint(checkpoint)
115
+
116
+ # Call progress callbacks
117
+ for callback in self.progress_callbacks:
118
+ callback(total_migrated, estimate.total_vectors)
119
+
120
+ cursor = next_cursor
121
+ if cursor is None:
122
+ break
123
+
124
+ except Exception as e:
125
+ errors.append(str(e))
126
+ break
127
+ finally:
128
+ output_store.close()
129
+
130
+ # Calculate output file size
131
+ output_size = os.path.getsize(self.output_path) if os.path.exists(self.output_path) else 0
132
+
133
+ return MigrationResult(
134
+ source_type=self.source.source_type,
135
+ source_name=self.source.source_name,
136
+ output_path=self.output_path,
137
+ total_migrated=total_migrated,
138
+ total_skipped=total_skipped,
139
+ duration_seconds=time.time() - start_time,
140
+ output_size_bytes=output_size,
141
+ errors=errors
142
+ )
143
+
144
+ except Exception as e:
145
+ return MigrationResult(
146
+ source_type=self.source.source_type,
147
+ source_name=self.source.source_name,
148
+ output_path=self.output_path,
149
+ total_migrated=0,
150
+ total_skipped=0,
151
+ duration_seconds=time.time() - start_time,
152
+ output_size_bytes=0,
153
+ errors=[str(e)]
154
+ )
155
+ finally:
156
+ try:
157
+ self.source.close()
158
+ except:
159
+ pass
160
+
161
+ def resume(self) -> MigrationResult:
162
+ """Resume migration from checkpoint.
163
+
164
+ Raises:
165
+ NotImplementedError: Resume is not yet implemented.
166
+ """
167
+ raise NotImplementedError(
168
+ "Resume not yet implemented — use run() to restart migration"
169
+ )
170
+
171
+ def _write_checkpoint(self, checkpoint: MigrationCheckpoint) -> None:
172
+ """Write checkpoint to file."""
173
+ if not self.checkpoint_path:
174
+ return
175
+
176
+ checkpoint_data = {
177
+ 'source_type': checkpoint.source_type,
178
+ 'source_name': checkpoint.source_name,
179
+ 'output_path': checkpoint.output_path,
180
+ 'last_cursor': checkpoint.last_cursor,
181
+ 'records_completed': checkpoint.records_completed,
182
+ 'started_at': checkpoint.started_at
183
+ }
184
+
185
+ parent = os.path.dirname(self.checkpoint_path)
186
+ if parent:
187
+ os.makedirs(parent, exist_ok=True)
188
+ with open(self.checkpoint_path, 'w') as f:
189
+ json.dump(checkpoint_data, f)
@@ -0,0 +1,13 @@
1
+ """Migration sources for various vector databases."""
2
+
3
+ from .pinecone import PineconeMigrationSource
4
+ from .qdrant import QdrantMigrationSource
5
+ from .weaviate import WeaviateMigrationSource
6
+ from .chroma import ChromaMigrationSource
7
+
8
+ __all__ = [
9
+ 'PineconeMigrationSource',
10
+ 'QdrantMigrationSource',
11
+ 'WeaviateMigrationSource',
12
+ 'ChromaMigrationSource'
13
+ ]
@@ -0,0 +1,170 @@
1
+ """Chroma migration source implementation."""
2
+ from typing import Optional, List, Tuple
3
+
4
+ try:
5
+ import chromadb
6
+ except ImportError:
7
+ # Create a simple mock structure for testing
8
+ class ChromaDBMock:
9
+ PersistentClient = None
10
+ HttpClient = None
11
+ Client = None
12
+
13
+ chromadb = ChromaDBMock()
14
+
15
+ from ..base import MigrationSource, VectorRecord, MigrationEstimate
16
+
17
+
18
+ class ChromaMigrationSource(MigrationSource):
19
+ """Migration source for Chroma vector database."""
20
+
21
+ def __init__(self, path: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None,
22
+ collection_name: str = "default", api_key: Optional[str] = None):
23
+ """Initialize Chroma migration source.
24
+
25
+ Args:
26
+ path: Path to persistent Chroma database (for PersistentClient)
27
+ host: Host for Chroma server (for HttpClient)
28
+ port: Port for Chroma server (for HttpClient)
29
+ collection_name: Name of the collection to migrate from
30
+ api_key: Optional API key for authentication
31
+ """
32
+ self.path = path
33
+ self.host = host
34
+ self.port = port
35
+ self.collection_name = collection_name
36
+ self.api_key = api_key
37
+ self.client = None
38
+ self.collection = None
39
+
40
+ @property
41
+ def source_type(self) -> str:
42
+ """Return the source type."""
43
+ return "chroma"
44
+
45
+ @property
46
+ def source_name(self) -> str:
47
+ """Return the source name."""
48
+ return self.collection_name
49
+
50
+ def connect(self) -> None:
51
+ """Connect to Chroma."""
52
+ if chromadb is None:
53
+ raise ImportError("chromadb library is required for Chroma migration")
54
+
55
+ # Choose client type based on configuration
56
+ if self.path:
57
+ # Use persistent client
58
+ self.client = chromadb.PersistentClient(path=self.path)
59
+ elif self.host and self.port:
60
+ # Use HTTP client
61
+ settings = {}
62
+ if self.api_key:
63
+ settings['chroma_api_impl'] = 'chromadb.api.fastapi.FastAPI'
64
+ settings['chroma_server_auth_credentials'] = self.api_key
65
+
66
+ self.client = chromadb.HttpClient(host=self.host, port=self.port, settings=settings)
67
+ else:
68
+ # Use in-memory client
69
+ self.client = chromadb.Client()
70
+
71
+ # Get or create collection
72
+ self.collection = self.client.get_collection(name=self.collection_name)
73
+
74
+ def estimate(self) -> MigrationEstimate:
75
+ """Estimate migration size and time."""
76
+ if not self.collection:
77
+ raise RuntimeError("Must call connect() first")
78
+
79
+ # Get collection count
80
+ total_vectors = self.collection.count()
81
+
82
+ # Peek at one record to get dimensions
83
+ dimensions = 768 # Default assumption
84
+ if total_vectors > 0:
85
+ try:
86
+ peek_result = self.collection.peek(limit=1)
87
+ if peek_result.get('embeddings') and len(peek_result['embeddings']) > 0:
88
+ dimensions = len(peek_result['embeddings'][0])
89
+ except Exception:
90
+ pass # Use default
91
+
92
+ # Rough estimates
93
+ bytes_per_vector = dimensions * 4 + 1024 # 4 bytes per float + metadata overhead
94
+ estimated_mv2_bytes = total_vectors * bytes_per_vector
95
+ estimated_seconds = total_vectors / 1200.0 # Rough estimate of 1200 vectors/second
96
+
97
+ return MigrationEstimate(
98
+ total_vectors=total_vectors,
99
+ dimensions=dimensions,
100
+ estimated_mv2_bytes=estimated_mv2_bytes,
101
+ estimated_seconds=estimated_seconds
102
+ )
103
+
104
+ def fetch_batch(self, batch_size: int = 10000, cursor: Optional[str] = None) -> Tuple[List[VectorRecord], Optional[str]]:
105
+ """Fetch a batch of records from Chroma."""
106
+ if not self.collection:
107
+ raise RuntimeError("Must call connect() first")
108
+
109
+ # Parse cursor as offset if provided
110
+ offset = 0
111
+ if cursor:
112
+ try:
113
+ offset = int(cursor)
114
+ except (ValueError, TypeError):
115
+ offset = 0
116
+
117
+ # Get batch of records
118
+ result = self.collection.get(
119
+ limit=batch_size,
120
+ offset=offset,
121
+ include=['embeddings', 'metadatas', 'documents']
122
+ )
123
+
124
+ ids = result.get('ids', [])
125
+ embeddings = result.get('embeddings', [])
126
+ metadatas = result.get('metadatas', [])
127
+ documents = result.get('documents', [])
128
+
129
+ records = []
130
+ for i, record_id in enumerate(ids):
131
+ embedding = embeddings[i] if i < len(embeddings) else []
132
+ metadata = metadatas[i] if i < len(metadatas) else {}
133
+ document = documents[i] if i < len(documents) else None
134
+
135
+ # Extract text from document or metadata
136
+ text = document
137
+ if not text and metadata:
138
+ text = metadata.get('text') or metadata.get('content')
139
+
140
+ # Create clean metadata without text fields
141
+ clean_metadata = {}
142
+ if metadata:
143
+ clean_metadata = {k: v for k, v in metadata.items() if k not in ('text', 'content')}
144
+
145
+ record = VectorRecord(
146
+ id=str(record_id),
147
+ vector=embedding,
148
+ text=text,
149
+ metadata=clean_metadata
150
+ )
151
+ records.append(record)
152
+
153
+ # Determine if there are more results
154
+ # If we got any results and this is the first page, assume there might be more
155
+ # If we got fewer results than requested, we're at the end
156
+ if len(ids) == batch_size:
157
+ next_cursor = str(offset + batch_size)
158
+ elif len(ids) > 0 and offset == 0:
159
+ # First batch with some results - optimistically assume there might be more
160
+ next_cursor = str(offset + batch_size)
161
+ else:
162
+ next_cursor = None
163
+
164
+ return records, next_cursor
165
+
166
+ def close(self) -> None:
167
+ """Close the connection and clean up resources."""
168
+ # Chroma doesn't require explicit cleanup
169
+ self.client = None
170
+ self.collection = None
@@ -0,0 +1,120 @@
1
+ """Pinecone migration source implementation."""
2
+ from typing import Optional, List, Tuple
3
+
4
+ try:
5
+ import pinecone
6
+ except ImportError:
7
+ pinecone = None
8
+
9
+ from ..base import MigrationSource, VectorRecord, MigrationEstimate
10
+
11
+
12
+ class PineconeMigrationSource(MigrationSource):
13
+ """Migration source for Pinecone vector database."""
14
+
15
+ def __init__(self, api_key: str, index_name: str, environment: str, namespace: Optional[str] = None):
16
+ """Initialize Pinecone migration source.
17
+
18
+ Args:
19
+ api_key: Pinecone API key
20
+ index_name: Name of the Pinecone index
21
+ environment: Pinecone environment
22
+ namespace: Optional namespace to migrate from
23
+ """
24
+ self.api_key = api_key
25
+ self.index_name = index_name
26
+ self.environment = environment
27
+ self.namespace = namespace
28
+ self.index = None
29
+
30
+ @property
31
+ def source_type(self) -> str:
32
+ """Return the source type."""
33
+ return "pinecone"
34
+
35
+ @property
36
+ def source_name(self) -> str:
37
+ """Return the source name."""
38
+ return self.index_name
39
+
40
+ def connect(self) -> None:
41
+ """Connect to Pinecone."""
42
+ if pinecone is None:
43
+ raise ImportError("pinecone library is required for Pinecone migration")
44
+
45
+ pinecone.init(api_key=self.api_key, environment=self.environment)
46
+ self.index = pinecone.Index(self.index_name)
47
+
48
+ def estimate(self) -> MigrationEstimate:
49
+ """Estimate migration size and time."""
50
+ if not self.index:
51
+ raise RuntimeError("Must call connect() first")
52
+
53
+ stats = self.index.describe_index_stats()
54
+
55
+ # If namespace is specified, use namespace count, otherwise use total
56
+ if self.namespace and 'namespaces' in stats and self.namespace in stats['namespaces']:
57
+ total_vectors = stats['namespaces'][self.namespace]['vector_count']
58
+ else:
59
+ total_vectors = stats.get('total_vector_count', 0)
60
+
61
+ dimensions = stats.get('dimension', 768)
62
+
63
+ # Rough estimates
64
+ bytes_per_vector = dimensions * 4 + 1024 # 4 bytes per float + metadata overhead
65
+ estimated_mv2_bytes = total_vectors * bytes_per_vector
66
+ estimated_seconds = total_vectors / 1000.0 # Rough estimate of 1000 vectors/second
67
+
68
+ return MigrationEstimate(
69
+ total_vectors=total_vectors,
70
+ dimensions=dimensions,
71
+ estimated_mv2_bytes=estimated_mv2_bytes,
72
+ estimated_seconds=estimated_seconds
73
+ )
74
+
75
+ def fetch_batch(self, batch_size: int = 10000, cursor: Optional[str] = None) -> Tuple[List[VectorRecord], Optional[str]]:
76
+ """Fetch a batch of records from Pinecone."""
77
+ if not self.index:
78
+ raise RuntimeError("Must call connect() first")
79
+
80
+ # List vector IDs with pagination
81
+ list_response = self.index.list(
82
+ namespace=self.namespace,
83
+ limit=batch_size,
84
+ pagination_token=cursor
85
+ )
86
+
87
+ vector_ids = [vec['id'] for vec in list_response.get('vectors', [])]
88
+
89
+ if not vector_ids:
90
+ return [], None
91
+
92
+ # Fetch the actual vectors
93
+ fetch_response = self.index.fetch(vector_ids, namespace=self.namespace)
94
+
95
+ records = []
96
+ for vector_id, vector_data in fetch_response.get('vectors', {}).items():
97
+ # Extract text from metadata if present
98
+ metadata = vector_data.get('metadata', {})
99
+ text = metadata.get('text')
100
+
101
+ # Create clean metadata without text (since text has its own field)
102
+ clean_metadata = {k: v for k, v in metadata.items() if k != 'text'}
103
+
104
+ record = VectorRecord(
105
+ id=vector_data['id'],
106
+ vector=vector_data['values'],
107
+ text=text,
108
+ metadata=clean_metadata
109
+ )
110
+ records.append(record)
111
+
112
+ # Get next cursor from pagination
113
+ next_cursor = list_response.get('pagination', {}).get('next')
114
+
115
+ return records, next_cursor
116
+
117
+ def close(self) -> None:
118
+ """Close the connection and clean up resources."""
119
+ # Pinecone doesn't require explicit cleanup
120
+ self.index = None
@@ -0,0 +1,110 @@
1
+ """Qdrant migration source implementation."""
2
+ from typing import Optional, List, Tuple
3
+
4
+ try:
5
+ from qdrant_client import QdrantClient
6
+ except ImportError:
7
+ QdrantClient = None
8
+
9
+ from ..base import MigrationSource, VectorRecord, MigrationEstimate
10
+
11
+
12
+ class QdrantMigrationSource(MigrationSource):
13
+ """Migration source for Qdrant vector database."""
14
+
15
+ def __init__(self, url: str, collection_name: str, api_key: Optional[str] = None):
16
+ """Initialize Qdrant migration source.
17
+
18
+ Args:
19
+ url: Qdrant server URL
20
+ collection_name: Name of the collection to migrate from
21
+ api_key: Optional API key for authentication
22
+ """
23
+ self.url = url
24
+ self.collection_name = collection_name
25
+ self.api_key = api_key
26
+ self.client = None
27
+
28
+ @property
29
+ def source_type(self) -> str:
30
+ """Return the source type."""
31
+ return "qdrant"
32
+
33
+ @property
34
+ def source_name(self) -> str:
35
+ """Return the source name."""
36
+ return self.collection_name
37
+
38
+ def connect(self) -> None:
39
+ """Connect to Qdrant."""
40
+ if QdrantClient is None:
41
+ raise ImportError("qdrant-client library is required for Qdrant migration")
42
+
43
+ self.client = QdrantClient(url=self.url, api_key=self.api_key)
44
+
45
+ def estimate(self) -> MigrationEstimate:
46
+ """Estimate migration size and time."""
47
+ if not self.client:
48
+ raise RuntimeError("Must call connect() first")
49
+
50
+ collection_info = self.client.get_collection(self.collection_name)
51
+
52
+ total_vectors = collection_info.points_count
53
+ dimensions = collection_info.config.params.vectors.size
54
+
55
+ # Rough estimates
56
+ bytes_per_vector = dimensions * 4 + 1024 # 4 bytes per float + metadata overhead
57
+ estimated_mv2_bytes = total_vectors * bytes_per_vector
58
+ estimated_seconds = total_vectors / 1000.0 # Rough estimate of 1000 vectors/second
59
+
60
+ return MigrationEstimate(
61
+ total_vectors=total_vectors,
62
+ dimensions=dimensions,
63
+ estimated_mv2_bytes=estimated_mv2_bytes,
64
+ estimated_seconds=estimated_seconds
65
+ )
66
+
67
+ def fetch_batch(self, batch_size: int = 10000, cursor: Optional[str] = None) -> Tuple[List[VectorRecord], Optional[str]]:
68
+ """Fetch a batch of records from Qdrant."""
69
+ if not self.client:
70
+ raise RuntimeError("Must call connect() first")
71
+
72
+ # Use cursor directly as offset (Qdrant handles different offset types)
73
+ offset = cursor
74
+
75
+ # Scroll through points
76
+ points, next_page_offset = self.client.scroll(
77
+ collection_name=self.collection_name,
78
+ limit=batch_size,
79
+ offset=offset,
80
+ with_payload=True,
81
+ with_vectors=True
82
+ )
83
+
84
+ records = []
85
+ for point in points:
86
+ # Extract text from payload if present
87
+ payload = point.payload or {}
88
+ text = payload.get('text')
89
+
90
+ # Create clean metadata without text
91
+ clean_metadata = {k: v for k, v in payload.items() if k != 'text'}
92
+
93
+ record = VectorRecord(
94
+ id=str(point.id),
95
+ vector=point.vector,
96
+ text=text,
97
+ metadata=clean_metadata
98
+ )
99
+ records.append(record)
100
+
101
+ # Convert next_page_offset to cursor string
102
+ next_cursor = str(next_page_offset) if next_page_offset is not None else None
103
+
104
+ return records, next_cursor
105
+
106
+ def close(self) -> None:
107
+ """Close the connection and clean up resources."""
108
+ if self.client:
109
+ self.client.close()
110
+ self.client = None