foodforthought-cli 0.2.7__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. ate/__init__.py +6 -0
  2. ate/__main__.py +16 -0
  3. ate/auth/__init__.py +1 -0
  4. ate/auth/device_flow.py +141 -0
  5. ate/auth/token_store.py +96 -0
  6. ate/behaviors/__init__.py +100 -0
  7. ate/behaviors/approach.py +399 -0
  8. ate/behaviors/common.py +686 -0
  9. ate/behaviors/tree.py +454 -0
  10. ate/cli.py +855 -3995
  11. ate/client.py +90 -0
  12. ate/commands/__init__.py +168 -0
  13. ate/commands/auth.py +389 -0
  14. ate/commands/bridge.py +448 -0
  15. ate/commands/data.py +185 -0
  16. ate/commands/deps.py +111 -0
  17. ate/commands/generate.py +384 -0
  18. ate/commands/memory.py +907 -0
  19. ate/commands/parts.py +166 -0
  20. ate/commands/primitive.py +399 -0
  21. ate/commands/protocol.py +288 -0
  22. ate/commands/recording.py +524 -0
  23. ate/commands/repo.py +154 -0
  24. ate/commands/simulation.py +291 -0
  25. ate/commands/skill.py +303 -0
  26. ate/commands/skills.py +487 -0
  27. ate/commands/team.py +147 -0
  28. ate/commands/workflow.py +271 -0
  29. ate/detection/__init__.py +38 -0
  30. ate/detection/base.py +142 -0
  31. ate/detection/color_detector.py +399 -0
  32. ate/detection/trash_detector.py +322 -0
  33. ate/drivers/__init__.py +39 -0
  34. ate/drivers/ble_transport.py +405 -0
  35. ate/drivers/mechdog.py +942 -0
  36. ate/drivers/wifi_camera.py +477 -0
  37. ate/interfaces/__init__.py +187 -0
  38. ate/interfaces/base.py +273 -0
  39. ate/interfaces/body.py +267 -0
  40. ate/interfaces/detection.py +282 -0
  41. ate/interfaces/locomotion.py +422 -0
  42. ate/interfaces/manipulation.py +408 -0
  43. ate/interfaces/navigation.py +389 -0
  44. ate/interfaces/perception.py +362 -0
  45. ate/interfaces/sensors.py +247 -0
  46. ate/interfaces/types.py +371 -0
  47. ate/llm_proxy.py +239 -0
  48. ate/mcp_server.py +387 -0
  49. ate/memory/__init__.py +35 -0
  50. ate/memory/cloud.py +244 -0
  51. ate/memory/context.py +269 -0
  52. ate/memory/embeddings.py +184 -0
  53. ate/memory/export.py +26 -0
  54. ate/memory/merge.py +146 -0
  55. ate/memory/migrate/__init__.py +34 -0
  56. ate/memory/migrate/base.py +89 -0
  57. ate/memory/migrate/pipeline.py +189 -0
  58. ate/memory/migrate/sources/__init__.py +13 -0
  59. ate/memory/migrate/sources/chroma.py +170 -0
  60. ate/memory/migrate/sources/pinecone.py +120 -0
  61. ate/memory/migrate/sources/qdrant.py +110 -0
  62. ate/memory/migrate/sources/weaviate.py +160 -0
  63. ate/memory/reranker.py +353 -0
  64. ate/memory/search.py +26 -0
  65. ate/memory/store.py +548 -0
  66. ate/recording/__init__.py +83 -0
  67. ate/recording/demonstration.py +378 -0
  68. ate/recording/session.py +415 -0
  69. ate/recording/upload.py +304 -0
  70. ate/recording/visual.py +416 -0
  71. ate/recording/wrapper.py +95 -0
  72. ate/robot/__init__.py +221 -0
  73. ate/robot/agentic_servo.py +856 -0
  74. ate/robot/behaviors.py +493 -0
  75. ate/robot/ble_capture.py +1000 -0
  76. ate/robot/ble_enumerate.py +506 -0
  77. ate/robot/calibration.py +668 -0
  78. ate/robot/calibration_state.py +388 -0
  79. ate/robot/commands.py +3735 -0
  80. ate/robot/direction_calibration.py +554 -0
  81. ate/robot/discovery.py +441 -0
  82. ate/robot/introspection.py +330 -0
  83. ate/robot/llm_system_id.py +654 -0
  84. ate/robot/locomotion_calibration.py +508 -0
  85. ate/robot/manager.py +270 -0
  86. ate/robot/marker_generator.py +611 -0
  87. ate/robot/perception.py +502 -0
  88. ate/robot/primitives.py +614 -0
  89. ate/robot/profiles.py +281 -0
  90. ate/robot/registry.py +322 -0
  91. ate/robot/servo_mapper.py +1153 -0
  92. ate/robot/skill_upload.py +675 -0
  93. ate/robot/target_calibration.py +500 -0
  94. ate/robot/teach.py +515 -0
  95. ate/robot/types.py +242 -0
  96. ate/robot/visual_labeler.py +1048 -0
  97. ate/robot/visual_servo_loop.py +494 -0
  98. ate/robot/visual_servoing.py +570 -0
  99. ate/robot/visual_system_id.py +906 -0
  100. ate/transports/__init__.py +121 -0
  101. ate/transports/base.py +394 -0
  102. ate/transports/ble.py +405 -0
  103. ate/transports/hybrid.py +444 -0
  104. ate/transports/serial.py +345 -0
  105. ate/urdf/__init__.py +30 -0
  106. ate/urdf/capture.py +582 -0
  107. ate/urdf/cloud.py +491 -0
  108. ate/urdf/collision.py +271 -0
  109. ate/urdf/commands.py +708 -0
  110. ate/urdf/depth.py +360 -0
  111. ate/urdf/inertial.py +312 -0
  112. ate/urdf/kinematics.py +330 -0
  113. ate/urdf/lifting.py +415 -0
  114. ate/urdf/meshing.py +300 -0
  115. ate/urdf/models/__init__.py +110 -0
  116. ate/urdf/models/depth_anything.py +253 -0
  117. ate/urdf/models/sam2.py +324 -0
  118. ate/urdf/motion_analysis.py +396 -0
  119. ate/urdf/pipeline.py +468 -0
  120. ate/urdf/scale.py +256 -0
  121. ate/urdf/scan_session.py +411 -0
  122. ate/urdf/segmentation.py +299 -0
  123. ate/urdf/synthesis.py +319 -0
  124. ate/urdf/topology.py +336 -0
  125. ate/urdf/validation.py +371 -0
  126. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/METADATA +9 -1
  127. foodforthought_cli-0.3.0.dist-info/RECORD +166 -0
  128. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/WHEEL +1 -1
  129. foodforthought_cli-0.2.7.dist-info/RECORD +0 -44
  130. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/entry_points.txt +0 -0
  131. {foodforthought_cli-0.2.7.dist-info → foodforthought_cli-0.3.0.dist-info}/top_level.txt +0 -0
ate/memory/context.py ADDED
@@ -0,0 +1,269 @@
1
+ """Context management for git-like memory operations.
2
+
3
+ This module provides the ContextManager that tracks the active memory and train
4
+ of thought, similar to how git tracks the current repository and branch.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import re
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import List, Optional, Dict, Any
13
+
14
+ from .store import MemoryStore
15
+
16
+
17
+ @dataclass
18
+ class MemoryContext:
19
+ """Tracks active memory and train of thought."""
20
+ active_memory: str
21
+ active_train: str
22
+ path: str
23
+
24
+
25
+ @dataclass
26
+ class MemoryMetadata:
27
+ """Metadata for a memory."""
28
+ name: str
29
+ visibility: str
30
+ trains: List[str]
31
+ default_train: str
32
+ description: str = ""
33
+ created_at: Optional[str] = None
34
+ remote: Optional[str] = None
35
+
36
+
37
+ class ContextManager:
38
+ """Manages the active memory context (~/.ate/context.json)."""
39
+
40
+ CONFIG_DIR = os.path.expanduser("~/.ate")
41
+ CONTEXT_FILE = os.path.expanduser("~/.ate/context.json")
42
+ MEMORIES_DIR = os.path.expanduser("~/.ate/memories")
43
+
44
+ @classmethod
45
+ def get_context(cls) -> MemoryContext:
46
+ """Get current context. Auto-initializes on first use."""
47
+ if os.path.exists(cls.CONTEXT_FILE):
48
+ try:
49
+ with open(cls.CONTEXT_FILE, 'r') as f:
50
+ data = json.load(f)
51
+
52
+ # Provide defaults for missing fields
53
+ active_memory = data.get("active_memory", "default")
54
+ active_train = data.get("active_train", "main")
55
+ path = data.get("path")
56
+
57
+ if not path:
58
+ path = cls._train_to_path(active_memory, active_train)
59
+
60
+ return MemoryContext(
61
+ active_memory=active_memory,
62
+ active_train=active_train,
63
+ path=path
64
+ )
65
+ except (json.JSONDecodeError, KeyError):
66
+ pass # Fall through to auto-init
67
+
68
+ # Auto-initialize on first use
69
+ return cls._auto_initialize()
70
+
71
+ @classmethod
72
+ def set_context(cls, memory: str, train: str) -> MemoryContext:
73
+ """Set active context."""
74
+ # Validate memory name
75
+ if not cls._is_valid_memory_name(memory):
76
+ raise ValueError(f"Invalid memory name '{memory}'. Use lowercase alphanumeric and hyphens only.")
77
+
78
+ # Ensure directories exist
79
+ os.makedirs(cls.CONFIG_DIR, exist_ok=True)
80
+ os.makedirs(cls.MEMORIES_DIR, exist_ok=True)
81
+ memory_dir = os.path.join(cls.MEMORIES_DIR, memory)
82
+ os.makedirs(memory_dir, exist_ok=True)
83
+
84
+ # Create path
85
+ path = cls._train_to_path(memory, train)
86
+
87
+ # Create .mv2 file if it doesn't exist (auto-create new trains)
88
+ if not os.path.exists(path):
89
+ try:
90
+ store = MemoryStore.create(path)
91
+ store.close()
92
+ except Exception:
93
+ pass
94
+ # Ensure file exists on disk (memvid create may not touch filesystem)
95
+ if not os.path.exists(path):
96
+ Path(path).touch()
97
+
98
+ # Update memory.json trains list
99
+ safe_train = train.replace('/', '-')
100
+ memory_json = os.path.join(memory_dir, "memory.json")
101
+ if os.path.exists(memory_json):
102
+ try:
103
+ with open(memory_json, 'r') as f:
104
+ data = json.load(f)
105
+ if safe_train not in data.get("trains", []):
106
+ data.setdefault("trains", []).append(safe_train)
107
+ with open(memory_json, 'w') as f:
108
+ json.dump(data, f, indent=2)
109
+ except (json.JSONDecodeError, KeyError):
110
+ pass
111
+
112
+ # Create context
113
+ context = MemoryContext(
114
+ active_memory=memory,
115
+ active_train=train,
116
+ path=path
117
+ )
118
+
119
+ # Write context file
120
+ context_data = {
121
+ "active_memory": memory,
122
+ "active_train": train,
123
+ "path": path
124
+ }
125
+
126
+ with open(cls.CONTEXT_FILE, 'w') as f:
127
+ json.dump(context_data, f, indent=2)
128
+
129
+ return context
130
+
131
+ @classmethod
132
+ def resolve_path(cls, memory: Optional[str] = None, train: Optional[str] = None) -> str:
133
+ """Resolve .mv2 path from context or explicit args."""
134
+ if memory is not None and train is not None:
135
+ return cls._train_to_path(memory, train)
136
+
137
+ context = cls.get_context()
138
+ if memory is not None:
139
+ return cls._train_to_path(memory, context.active_train)
140
+ if train is not None:
141
+ return cls._train_to_path(context.active_memory, train)
142
+
143
+ return context.path
144
+
145
+ @classmethod
146
+ def ensure_memory(cls, name: str) -> str:
147
+ """Create memory dir + default train if doesn't exist."""
148
+ if not cls._is_valid_memory_name(name):
149
+ raise ValueError(f"Invalid memory name '{name}'. Use lowercase alphanumeric and hyphens only.")
150
+
151
+ # Create config and memories directories
152
+ os.makedirs(cls.CONFIG_DIR, exist_ok=True)
153
+ os.makedirs(cls.MEMORIES_DIR, exist_ok=True)
154
+
155
+ # Create memory directory
156
+ memory_dir = os.path.join(cls.MEMORIES_DIR, name)
157
+ os.makedirs(memory_dir, exist_ok=True)
158
+
159
+ # Create default train (main.mv2) if it doesn't exist
160
+ main_path = cls._train_to_path(name, "main")
161
+ if not os.path.exists(main_path):
162
+ try:
163
+ store = MemoryStore.create(main_path)
164
+ store.close()
165
+ except Exception:
166
+ pass
167
+ # Ensure file exists on disk
168
+ if not os.path.exists(main_path):
169
+ Path(main_path).touch()
170
+
171
+ # Create memory.json if it doesn't exist
172
+ memory_json_path = os.path.join(memory_dir, "memory.json")
173
+ if not os.path.exists(memory_json_path):
174
+ metadata = {
175
+ "name": name,
176
+ "visibility": "private",
177
+ "trains": ["main"],
178
+ "default_train": "main",
179
+ "description": ""
180
+ }
181
+ with open(memory_json_path, 'w') as f:
182
+ json.dump(metadata, f, indent=2)
183
+
184
+ return main_path
185
+
186
+ @classmethod
187
+ def list_memories(cls) -> List[MemoryMetadata]:
188
+ """List all local memories."""
189
+ memories = []
190
+
191
+ if not os.path.exists(cls.MEMORIES_DIR):
192
+ return memories
193
+
194
+ for item in os.listdir(cls.MEMORIES_DIR):
195
+ memory_dir = os.path.join(cls.MEMORIES_DIR, item)
196
+ if os.path.isdir(memory_dir):
197
+ memory_json_path = os.path.join(memory_dir, "memory.json")
198
+ if os.path.exists(memory_json_path):
199
+ try:
200
+ with open(memory_json_path, 'r') as f:
201
+ data = json.load(f)
202
+
203
+ metadata = MemoryMetadata(
204
+ name=data["name"],
205
+ visibility=data.get("visibility", "private"),
206
+ trains=data.get("trains", ["main"]),
207
+ default_train=data.get("default_train", "main"),
208
+ description=data.get("description", ""),
209
+ created_at=data.get("created_at"),
210
+ remote=data.get("remote")
211
+ )
212
+ memories.append(metadata)
213
+ except (json.JSONDecodeError, KeyError):
214
+ continue # Skip malformed memory.json files
215
+
216
+ return memories
217
+
218
+ @classmethod
219
+ def list_trains(cls, memory: Optional[str] = None) -> List[str]:
220
+ """List trains of thought in a memory."""
221
+ if memory is None:
222
+ context = cls.get_context()
223
+ memory = context.active_memory
224
+
225
+ memory_dir = os.path.join(cls.MEMORIES_DIR, memory)
226
+ if not os.path.exists(memory_dir):
227
+ raise FileNotFoundError(f"Memory '{memory}' does not exist")
228
+
229
+ trains = set()
230
+
231
+ # Source 1: .mv2 files on disk
232
+ for item in os.listdir(memory_dir):
233
+ if item.endswith('.mv2'):
234
+ train_name = item[:-4] # Remove .mv2 extension
235
+ trains.add(train_name)
236
+
237
+ # Source 2: memory.json trains list (in case files haven't been created yet)
238
+ memory_json = os.path.join(memory_dir, "memory.json")
239
+ if os.path.exists(memory_json):
240
+ try:
241
+ with open(memory_json, 'r') as f:
242
+ data = json.load(f)
243
+ for t in data.get("trains", []):
244
+ trains.add(t)
245
+ except (json.JSONDecodeError, KeyError):
246
+ pass
247
+
248
+ return sorted(trains)
249
+
250
+ @classmethod
251
+ def _auto_initialize(cls) -> MemoryContext:
252
+ """Auto-initialize default memory on first use."""
253
+ # Ensure default memory exists
254
+ cls.ensure_memory("default")
255
+
256
+ # Set context to default/main
257
+ return cls.set_context("default", "main")
258
+
259
+ @classmethod
260
+ def _train_to_path(cls, memory: str, train: str) -> str:
261
+ """Convert memory + train to .mv2 file path."""
262
+ # Convert train name to filename-safe format (slashes to hyphens)
263
+ safe_train = train.replace('/', '-')
264
+ return os.path.join(cls.MEMORIES_DIR, memory, f"{safe_train}.mv2")
265
+
266
+ @classmethod
267
+ def _is_valid_memory_name(cls, name: str) -> bool:
268
+ """Check if memory name is valid (alphanumeric + hyphens only)."""
269
+ return bool(re.match(r'^[a-z0-9-]+$', name))
@@ -0,0 +1,184 @@
1
+ """Embedding configuration and management for ate memory."""
2
+
3
+ import os
4
+ import requests
5
+ from dataclasses import dataclass
6
+ from typing import Optional, List, Dict, Any
7
+
8
+ import memvid_sdk
9
+
10
+
11
+ @dataclass
12
+ class EmbeddingConfig:
13
+ """Embedding provider configuration."""
14
+ provider: str = "none"
15
+ model: Optional[str] = None
16
+ api_key: Optional[str] = None
17
+
18
+ def __post_init__(self):
19
+ """Validate provider after initialization."""
20
+ valid_providers = ["openai", "cohere", "voyage", "ollama", "none"]
21
+ if self.provider not in valid_providers:
22
+ raise ValueError(f"Invalid provider: {self.provider}. Must be one of {valid_providers}")
23
+
24
+ # Set default models based on provider
25
+ if self.model is None:
26
+ if self.provider == "openai":
27
+ self.model = "text-embedding-3-small"
28
+ elif self.provider == "cohere":
29
+ self.model = "embed-english-v3.0"
30
+ elif self.provider == "voyage":
31
+ self.model = "voyage-2"
32
+ elif self.provider == "ollama":
33
+ self.model = "nomic-embed-text"
34
+
35
+
36
+ class EmbeddingManager:
37
+ """Detects and manages embedding providers for ate memory."""
38
+
39
+ @staticmethod
40
+ def detect() -> EmbeddingConfig:
41
+ """Auto-detect best available embedding provider from env.
42
+
43
+ Detection order: OpenAI → Cohere → Voyage → Ollama → BM25-only
44
+
45
+ Returns:
46
+ EmbeddingConfig with detected provider and settings
47
+ """
48
+ # Check OpenAI first (highest priority)
49
+ openai_key = os.environ.get('OPENAI_API_KEY')
50
+ if openai_key:
51
+ return EmbeddingConfig(
52
+ provider="openai",
53
+ api_key=openai_key
54
+ )
55
+
56
+ # Check Cohere second
57
+ cohere_key = os.environ.get('COHERE_API_KEY')
58
+ if cohere_key:
59
+ return EmbeddingConfig(
60
+ provider="cohere",
61
+ api_key=cohere_key
62
+ )
63
+
64
+ # Check Voyage third
65
+ voyage_key = os.environ.get('VOYAGE_API_KEY')
66
+ if voyage_key:
67
+ return EmbeddingConfig(
68
+ provider="voyage",
69
+ api_key=voyage_key
70
+ )
71
+
72
+ # Check Ollama fourth (local service)
73
+ ollama_host = os.environ.get('OLLAMA_HOST', 'http://localhost:11434')
74
+ if EmbeddingManager._is_ollama_available(ollama_host):
75
+ return EmbeddingConfig(
76
+ provider="ollama",
77
+ model="nomic-embed-text"
78
+ )
79
+
80
+ # No providers available
81
+ return EmbeddingConfig(provider="none")
82
+
83
+ @staticmethod
84
+ def _is_ollama_available(host: str) -> bool:
85
+ """Check if Ollama is reachable at the given host."""
86
+ try:
87
+ response = requests.get(f'{host}/api/tags', timeout=2)
88
+ return response.status_code == 200
89
+ except:
90
+ return False
91
+
92
+ @staticmethod
93
+ def get_provider(config: EmbeddingConfig):
94
+ """Get a memvid_sdk EmbeddingProvider from config.
95
+
96
+ Args:
97
+ config: EmbeddingConfig instance
98
+
99
+ Returns:
100
+ EmbeddingProvider instance or None if provider is "none"
101
+ """
102
+ if config.provider == "none":
103
+ return None
104
+
105
+ return memvid_sdk.embeddings.get_embedder(
106
+ provider=config.provider,
107
+ model=config.model,
108
+ api_key=config.api_key
109
+ )
110
+
111
+ @staticmethod
112
+ def available_providers() -> List[Dict[str, Any]]:
113
+ """List all detected providers with status.
114
+
115
+ Returns:
116
+ List of provider status dictionaries
117
+ """
118
+ providers = []
119
+
120
+ # Check OpenAI
121
+ openai_key = os.environ.get('OPENAI_API_KEY')
122
+ if openai_key:
123
+ providers.append({
124
+ "name": "openai",
125
+ "available": True,
126
+ "model": "text-embedding-3-small",
127
+ "source": "OPENAI_API_KEY"
128
+ })
129
+ else:
130
+ providers.append({
131
+ "name": "openai",
132
+ "available": False,
133
+ "reason": "OPENAI_API_KEY not set"
134
+ })
135
+
136
+ # Check Cohere
137
+ cohere_key = os.environ.get('COHERE_API_KEY')
138
+ if cohere_key:
139
+ providers.append({
140
+ "name": "cohere",
141
+ "available": True,
142
+ "model": "embed-english-v3.0",
143
+ "source": "COHERE_API_KEY"
144
+ })
145
+ else:
146
+ providers.append({
147
+ "name": "cohere",
148
+ "available": False,
149
+ "reason": "COHERE_API_KEY not set"
150
+ })
151
+
152
+ # Check Voyage
153
+ voyage_key = os.environ.get('VOYAGE_API_KEY')
154
+ if voyage_key:
155
+ providers.append({
156
+ "name": "voyage",
157
+ "available": True,
158
+ "model": "voyage-2",
159
+ "source": "VOYAGE_API_KEY"
160
+ })
161
+ else:
162
+ providers.append({
163
+ "name": "voyage",
164
+ "available": False,
165
+ "reason": "VOYAGE_API_KEY not set"
166
+ })
167
+
168
+ # Check Ollama
169
+ ollama_host = os.environ.get('OLLAMA_HOST', 'http://localhost:11434')
170
+ if EmbeddingManager._is_ollama_available(ollama_host):
171
+ providers.append({
172
+ "name": "ollama",
173
+ "available": True,
174
+ "model": "nomic-embed-text",
175
+ "source": ollama_host
176
+ })
177
+ else:
178
+ providers.append({
179
+ "name": "ollama",
180
+ "available": False,
181
+ "reason": "OLLAMA_HOST not set, localhost:11434 not reachable"
182
+ })
183
+
184
+ return providers
ate/memory/export.py ADDED
@@ -0,0 +1,26 @@
1
+ """Export operations and info structures."""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Optional
5
+
6
+
7
+ @dataclass
8
+ class MemoryInfo:
9
+ """Information about a memory store.
10
+
11
+ Attributes:
12
+ path: Path to the .mv2 file
13
+ frame_count: Number of memory frames stored
14
+ size_bytes: Total size in bytes
15
+ has_lex_index: Whether lexical indexing is enabled
16
+ has_vec_index: Whether vector indexing is enabled
17
+ has_time_index: Whether time indexing is enabled
18
+ created_at: ISO timestamp when created (optional)
19
+ """
20
+ path: str
21
+ frame_count: int
22
+ size_bytes: int
23
+ has_lex_index: bool
24
+ has_vec_index: bool
25
+ has_time_index: bool
26
+ created_at: Optional[str] = None
ate/memory/merge.py ADDED
@@ -0,0 +1,146 @@
1
+ """Memory merging operations."""
2
+
3
+ from typing import List
4
+ import json
5
+
6
+ from .store import MemoryStore
7
+ from .export import MemoryInfo
8
+
9
+
10
+ def merge_memories(source_paths: List[str], output_path: str, dedup: bool = True) -> MemoryInfo:
11
+ """Merge multiple .mv2 files into a single output file.
12
+
13
+ Args:
14
+ source_paths: List of paths to source .mv2 files
15
+ output_path: Path where merged .mv2 file will be created
16
+ dedup: Whether to deduplicate identical content (default True)
17
+
18
+ Returns:
19
+ MemoryInfo about the merged output file
20
+ """
21
+ # Create the output memory store
22
+ output_store = MemoryStore.create(output_path)
23
+
24
+ seen_texts = set() if dedup else None
25
+ all_items = []
26
+
27
+ try:
28
+ # Process each source file
29
+ for source_path in source_paths:
30
+ source_store = MemoryStore.open(source_path)
31
+
32
+ try:
33
+ # Try timeline-based iteration first (works with real memvid)
34
+ items_from_source = []
35
+ try:
36
+ timeline = source_store._mem.timeline()
37
+ if timeline:
38
+ for entry in timeline:
39
+ if isinstance(entry, dict):
40
+ uri = entry.get('uri', f"mv2://frames/{entry.get('frame_id', 0)}")
41
+ frame_data = source_store._mem.frame(uri)
42
+
43
+ title = None
44
+ tags = []
45
+ metadata = {}
46
+
47
+ # Get text from timeline preview (labels are just keywords)
48
+ text = entry.get('preview', '').split('\ntitle:')[0].split('\ntags:')[0].strip()
49
+
50
+ if isinstance(frame_data, dict):
51
+ title = frame_data.get('title')
52
+ tags = frame_data.get('tags', [])
53
+ for key, value in frame_data.get('extra_metadata', {}).items():
54
+ if key == 'extractous_metadata':
55
+ continue
56
+ try:
57
+ if isinstance(value, str) and (value.startswith('{') or value.startswith('"')):
58
+ metadata[key] = json.loads(value)
59
+ else:
60
+ metadata[key] = value
61
+ except json.JSONDecodeError:
62
+ metadata[key] = value
63
+
64
+ if text:
65
+ items_from_source.append({
66
+ 'text': text,
67
+ 'title': title,
68
+ 'tags': tags,
69
+ 'metadata': metadata
70
+ })
71
+ except (AttributeError, TypeError):
72
+ pass # Fall through to search-based approach
73
+
74
+ # Fallback: use search (for mocked tests)
75
+ if not items_from_source:
76
+ search_result = source_store.search("*", top_k=10000)
77
+
78
+ if hasattr(search_result, 'hits'):
79
+ search_results = search_result.hits
80
+ elif isinstance(search_result, list):
81
+ search_results = search_result
82
+ else:
83
+ search_results = []
84
+
85
+ for search_item in search_results:
86
+ text = getattr(search_item, 'snippet', getattr(search_item, 'text', ''))
87
+ title = getattr(search_item, 'title', None)
88
+ tags = getattr(search_item, 'tags', [])
89
+ frame_id = getattr(search_item, 'frame_id', 0)
90
+
91
+ metadata = {}
92
+ try:
93
+ frame_data = source_store._mem.frame(frame_id)
94
+ if hasattr(frame_data, 'metadata'):
95
+ metadata = frame_data.metadata or {}
96
+ except Exception:
97
+ try:
98
+ if hasattr(search_item, 'metadata'):
99
+ metadata = search_item.metadata or {}
100
+ except Exception:
101
+ metadata = {}
102
+
103
+ items_from_source.append({
104
+ 'text': text,
105
+ 'title': title,
106
+ 'tags': tags,
107
+ 'metadata': metadata
108
+ })
109
+
110
+ for item in items_from_source:
111
+ text = item['text']
112
+
113
+ if dedup and text in seen_texts:
114
+ continue
115
+
116
+ if dedup:
117
+ seen_texts.add(text)
118
+
119
+ all_items.append(item)
120
+
121
+ finally:
122
+ source_store.close()
123
+
124
+ # Add all collected items to the output store
125
+ output_store.add_batch(all_items)
126
+
127
+ # Get info about the merged result
128
+ info = output_store.info()
129
+
130
+ finally:
131
+ output_store.close()
132
+
133
+ # Handle mocked info object vs real MemoryInfo
134
+ if hasattr(info, 'frame_count') and not isinstance(info, MemoryInfo):
135
+ # It's a mock, create actual MemoryInfo
136
+ return MemoryInfo(
137
+ path=info.path,
138
+ frame_count=info.frame_count,
139
+ size_bytes=info.size_bytes,
140
+ has_lex_index=info.has_lex_index,
141
+ has_vec_index=info.has_vec_index,
142
+ has_time_index=info.has_time_index,
143
+ created_at=getattr(info, 'created_at', None)
144
+ )
145
+
146
+ return info
@@ -0,0 +1,34 @@
1
+ """Migration module for vector database migrations."""
2
+
3
+ from .base import (
4
+ VectorRecord,
5
+ MigrationEstimate,
6
+ MigrationResult,
7
+ MigrationCheckpoint,
8
+ MigrationSource
9
+ )
10
+ from .pipeline import MigrationPipeline
11
+ from .sources import (
12
+ PineconeMigrationSource,
13
+ QdrantMigrationSource,
14
+ WeaviateMigrationSource,
15
+ ChromaMigrationSource
16
+ )
17
+
18
+ __all__ = [
19
+ # Base classes and data structures
20
+ 'VectorRecord',
21
+ 'MigrationEstimate',
22
+ 'MigrationResult',
23
+ 'MigrationCheckpoint',
24
+ 'MigrationSource',
25
+
26
+ # Pipeline
27
+ 'MigrationPipeline',
28
+
29
+ # Sources
30
+ 'PineconeMigrationSource',
31
+ 'QdrantMigrationSource',
32
+ 'WeaviateMigrationSource',
33
+ 'ChromaMigrationSource'
34
+ ]