hqde 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hqde might be problematic. Click here for more details.

@@ -0,0 +1,394 @@
1
+ """
2
+ MapReduce-inspired ensemble management for HQDE framework.
3
+
4
+ This module implements distributed key-value storage and MapReduce patterns
5
+ for efficient ensemble weight management and aggregation.
6
+ """
7
+
8
+ import torch
9
+ import ray
10
+ import numpy as np
11
+ from typing import Dict, List, Optional, Tuple, Any, Callable
12
+ import hashlib
13
+ import pickle
14
+ import time
15
+ from collections import defaultdict
16
+ import logging
17
+
18
+
19
+ @ray.remote
20
+ class DistributedWeightStore:
21
+ """Distributed key-value store for ensemble weights."""
22
+
23
+ def __init__(self, store_id: str, replication_factor: int = 3):
24
+ self.store_id = store_id
25
+ self.replication_factor = replication_factor
26
+ self.weights = {}
27
+ self.metadata = {}
28
+ self.access_count = defaultdict(int)
29
+ self.last_access = {}
30
+
31
+ def put_weight(self, key: str, weight_tensor: torch.Tensor, metadata: Dict[str, Any] = None):
32
+ """Store a weight tensor with optional metadata."""
33
+ self.weights[key] = weight_tensor.cpu() # Store on CPU to save GPU memory
34
+ self.metadata[key] = metadata or {}
35
+ self.access_count[key] = 0
36
+ self.last_access[key] = time.time()
37
+
38
+ def get_weight(self, key: str) -> Optional[torch.Tensor]:
39
+ """Retrieve a weight tensor."""
40
+ if key in self.weights:
41
+ self.access_count[key] += 1
42
+ self.last_access[key] = time.time()
43
+ return self.weights[key]
44
+ return None
45
+
46
+ def get_metadata(self, key: str) -> Optional[Dict[str, Any]]:
47
+ """Retrieve metadata for a weight tensor."""
48
+ return self.metadata.get(key, None)
49
+
50
+ def list_keys(self) -> List[str]:
51
+ """List all stored weight keys."""
52
+ return list(self.weights.keys())
53
+
54
+ def delete_weight(self, key: str) -> bool:
55
+ """Delete a weight tensor."""
56
+ if key in self.weights:
57
+ del self.weights[key]
58
+ del self.metadata[key]
59
+ if key in self.access_count:
60
+ del self.access_count[key]
61
+ if key in self.last_access:
62
+ del self.last_access[key]
63
+ return True
64
+ return False
65
+
66
+ def get_store_stats(self) -> Dict[str, Any]:
67
+ """Get statistics about the store."""
68
+ return {
69
+ 'store_id': self.store_id,
70
+ 'num_weights': len(self.weights),
71
+ 'total_memory_mb': sum(w.numel() * w.element_size() for w in self.weights.values()) / (1024 * 1024),
72
+ 'access_counts': dict(self.access_count),
73
+ 'last_access_times': dict(self.last_access)
74
+ }
75
+
76
+
77
+ @ray.remote
78
+ class MapperWorker:
79
+ """Worker node that performs map operations on ensemble data."""
80
+
81
+ def __init__(self, worker_id: str):
82
+ self.worker_id = worker_id
83
+ self.local_cache = {}
84
+
85
+ def map_operation(self,
86
+ data_partition: List[Any],
87
+ map_function: Callable,
88
+ context: Dict[str, Any] = None) -> List[Tuple[str, Any]]:
89
+ """Perform map operation on data partition."""
90
+ results = []
91
+ for item in data_partition:
92
+ try:
93
+ mapped_result = map_function(item, context or {})
94
+ # Create key-value pairs
95
+ if isinstance(mapped_result, (list, tuple)):
96
+ results.extend(mapped_result)
97
+ else:
98
+ results.append(mapped_result)
99
+ except Exception as e:
100
+ logging.error(f"Map operation failed for item {item}: {e}")
101
+ continue
102
+
103
+ return results
104
+
105
+ def cache_intermediate_result(self, key: str, value: Any):
106
+ """Cache intermediate results locally."""
107
+ self.local_cache[key] = value
108
+
109
+ def get_cached_result(self, key: str) -> Optional[Any]:
110
+ """Retrieve cached intermediate result."""
111
+ return self.local_cache.get(key, None)
112
+
113
+
114
+ @ray.remote
115
+ class ReducerWorker:
116
+ """Worker node that performs reduce operations on mapped data."""
117
+
118
+ def __init__(self, worker_id: str):
119
+ self.worker_id = worker_id
120
+
121
+ def reduce_operation(self,
122
+ key: str,
123
+ values: List[Any],
124
+ reduce_function: Callable,
125
+ context: Dict[str, Any] = None) -> Tuple[str, Any]:
126
+ """Perform reduce operation on values for a given key."""
127
+ try:
128
+ reduced_result = reduce_function(key, values, context or {})
129
+ return key, reduced_result
130
+ except Exception as e:
131
+ logging.error(f"Reduce operation failed for key {key}: {e}")
132
+ return key, None
133
+
134
+ def aggregate_weights(self,
135
+ weight_list: List[torch.Tensor],
136
+ aggregation_method: str = "mean") -> torch.Tensor:
137
+ """Aggregate a list of weight tensors."""
138
+ if not weight_list:
139
+ return torch.empty(0)
140
+
141
+ if aggregation_method == "mean":
142
+ return torch.stack(weight_list).mean(dim=0)
143
+ elif aggregation_method == "weighted_mean":
144
+ # Assume equal weights for now
145
+ return torch.stack(weight_list).mean(dim=0)
146
+ elif aggregation_method == "median":
147
+ return torch.stack(weight_list).median(dim=0)[0]
148
+ else:
149
+ return torch.stack(weight_list).mean(dim=0)
150
+
151
+
152
+ class MapReduceEnsembleManager:
153
+ """MapReduce-inspired ensemble manager for distributed weight aggregation."""
154
+
155
+ def __init__(self,
156
+ num_stores: int = 3,
157
+ num_mappers: int = 4,
158
+ num_reducers: int = 2,
159
+ replication_factor: int = 3):
160
+ """
161
+ Initialize MapReduce ensemble manager.
162
+
163
+ Args:
164
+ num_stores: Number of distributed weight stores
165
+ num_mappers: Number of mapper workers
166
+ num_reducers: Number of reducer workers
167
+ replication_factor: Replication factor for fault tolerance
168
+ """
169
+ self.num_stores = num_stores
170
+ self.num_mappers = num_mappers
171
+ self.num_reducers = num_reducers
172
+ self.replication_factor = replication_factor
173
+
174
+ # Initialize distributed components
175
+ self.weight_stores = []
176
+ self.mappers = []
177
+ self.reducers = []
178
+
179
+ self._initialize_workers()
180
+
181
+ def _initialize_workers(self):
182
+ """Initialize all distributed workers."""
183
+ # Initialize weight stores
184
+ for i in range(self.num_stores):
185
+ store = DistributedWeightStore.remote(f"store_{i}", self.replication_factor)
186
+ self.weight_stores.append(store)
187
+
188
+ # Initialize mapper workers
189
+ for i in range(self.num_mappers):
190
+ mapper = MapperWorker.remote(f"mapper_{i}")
191
+ self.mappers.append(mapper)
192
+
193
+ # Initialize reducer workers
194
+ for i in range(self.num_reducers):
195
+ reducer = ReducerWorker.remote(f"reducer_{i}")
196
+ self.reducers.append(reducer)
197
+
198
+ def _hash_key_to_store(self, key: str) -> int:
199
+ """Hash a key to determine which store it belongs to."""
200
+ hash_obj = hashlib.md5(key.encode())
201
+ return int(hash_obj.hexdigest(), 16) % self.num_stores
202
+
203
+ def store_ensemble_weights(self,
204
+ ensemble_weights: Dict[str, torch.Tensor],
205
+ metadata: Dict[str, Any] = None) -> bool:
206
+ """Store ensemble weights across distributed stores."""
207
+ storage_futures = []
208
+
209
+ for weight_key, weight_tensor in ensemble_weights.items():
210
+ # Determine primary store
211
+ primary_store_idx = self._hash_key_to_store(weight_key)
212
+
213
+ # Store in primary store
214
+ primary_store = self.weight_stores[primary_store_idx]
215
+ future = primary_store.put_weight.remote(weight_key, weight_tensor, metadata)
216
+ storage_futures.append(future)
217
+
218
+ # Replicate to other stores for fault tolerance
219
+ for replica in range(1, min(self.replication_factor, self.num_stores)):
220
+ replica_store_idx = (primary_store_idx + replica) % self.num_stores
221
+ replica_store = self.weight_stores[replica_store_idx]
222
+ replica_key = f"{weight_key}_replica_{replica}"
223
+ replica_future = replica_store.put_weight.remote(replica_key, weight_tensor, metadata)
224
+ storage_futures.append(replica_future)
225
+
226
+ # Wait for all storage operations to complete
227
+ ray.get(storage_futures)
228
+ return True
229
+
230
+ def retrieve_ensemble_weights(self, weight_keys: List[str]) -> Dict[str, torch.Tensor]:
231
+ """Retrieve ensemble weights from distributed stores."""
232
+ retrieval_futures = {}
233
+
234
+ for weight_key in weight_keys:
235
+ store_idx = self._hash_key_to_store(weight_key)
236
+ store = self.weight_stores[store_idx]
237
+ future = store.get_weight.remote(weight_key)
238
+ retrieval_futures[weight_key] = future
239
+
240
+ # Collect results
241
+ retrieved_weights = {}
242
+ for weight_key, future in retrieval_futures.items():
243
+ weight_tensor = ray.get(future)
244
+ if weight_tensor is not None:
245
+ retrieved_weights[weight_key] = weight_tensor
246
+
247
+ return retrieved_weights
248
+
249
+ def mapreduce_ensemble_aggregation(self,
250
+ ensemble_data: List[Dict[str, Any]],
251
+ aggregation_strategy: str = "hierarchical") -> Dict[str, torch.Tensor]:
252
+ """
253
+ Perform MapReduce-style ensemble aggregation.
254
+
255
+ Args:
256
+ ensemble_data: List of ensemble member data
257
+ aggregation_strategy: Strategy for aggregation ("hierarchical", "flat")
258
+
259
+ Returns:
260
+ Aggregated ensemble weights
261
+ """
262
+ # Map phase: distribute data processing
263
+ map_results = self._map_phase(ensemble_data)
264
+
265
+ # Shuffle phase: group by keys
266
+ grouped_data = self._shuffle_phase(map_results)
267
+
268
+ # Reduce phase: aggregate grouped data
269
+ aggregated_weights = self._reduce_phase(grouped_data, aggregation_strategy)
270
+
271
+ return aggregated_weights
272
+
273
+ def _map_phase(self, ensemble_data: List[Dict[str, Any]]) -> List[Tuple[str, Any]]:
274
+ """Map phase: process ensemble data in parallel."""
275
+ # Partition data across mappers
276
+ partitions = [[] for _ in range(self.num_mappers)]
277
+ for i, data_item in enumerate(ensemble_data):
278
+ partition_idx = i % self.num_mappers
279
+ partitions[partition_idx].append(data_item)
280
+
281
+ # Define map function
282
+ def ensemble_map_function(item: Dict[str, Any], context: Dict[str, Any]) -> List[Tuple[str, Any]]:
283
+ """Map function to extract weight information."""
284
+ results = []
285
+ if 'weights' in item:
286
+ for param_name, weight_tensor in item['weights'].items():
287
+ results.append((param_name, {
288
+ 'weight': weight_tensor,
289
+ 'source_id': item.get('source_id', 'unknown'),
290
+ 'accuracy': item.get('accuracy', 0.0),
291
+ 'timestamp': item.get('timestamp', time.time())
292
+ }))
293
+ return results
294
+
295
+ # Execute map operations
296
+ map_futures = []
297
+ for i, partition in enumerate(partitions):
298
+ if partition: # Only process non-empty partitions
299
+ mapper = self.mappers[i]
300
+ future = mapper.map_operation.remote(partition, ensemble_map_function)
301
+ map_futures.append(future)
302
+
303
+ # Collect map results
304
+ all_map_results = []
305
+ for future in map_futures:
306
+ partition_results = ray.get(future)
307
+ all_map_results.extend(partition_results)
308
+
309
+ return all_map_results
310
+
311
+ def _shuffle_phase(self, map_results: List[Tuple[str, Any]]) -> Dict[str, List[Any]]:
312
+ """Shuffle phase: group map results by key."""
313
+ grouped_data = defaultdict(list)
314
+
315
+ for key, value in map_results:
316
+ grouped_data[key].append(value)
317
+
318
+ return dict(grouped_data)
319
+
320
+ def _reduce_phase(self,
321
+ grouped_data: Dict[str, List[Any]],
322
+ aggregation_strategy: str) -> Dict[str, torch.Tensor]:
323
+ """Reduce phase: aggregate grouped data."""
324
+ # Partition keys across reducers
325
+ keys = list(grouped_data.keys())
326
+ key_partitions = [[] for _ in range(self.num_reducers)]
327
+
328
+ for i, key in enumerate(keys):
329
+ partition_idx = i % self.num_reducers
330
+ key_partitions[partition_idx].append(key)
331
+
332
+ # Define reduce function
333
+ def ensemble_reduce_function(key: str, values: List[Any], context: Dict[str, Any]) -> torch.Tensor:
334
+ """Reduce function to aggregate weights."""
335
+ weight_tensors = [v['weight'] for v in values if 'weight' in v]
336
+
337
+ if not weight_tensors:
338
+ return torch.empty(0)
339
+
340
+ if aggregation_strategy == "hierarchical":
341
+ # Weight by accuracy
342
+ accuracies = [v.get('accuracy', 1.0) for v in values]
343
+ accuracy_weights = torch.softmax(torch.tensor(accuracies), dim=0)
344
+
345
+ weighted_sum = torch.zeros_like(weight_tensors[0])
346
+ for weight, acc_weight in zip(weight_tensors, accuracy_weights):
347
+ weighted_sum += acc_weight * weight
348
+
349
+ return weighted_sum
350
+ else:
351
+ # Simple averaging
352
+ return torch.stack(weight_tensors).mean(dim=0)
353
+
354
+ # Execute reduce operations
355
+ reduce_futures = {}
356
+ for i, key_partition in enumerate(key_partitions):
357
+ if key_partition: # Only process non-empty partitions
358
+ reducer = self.reducers[i]
359
+ for key in key_partition:
360
+ values = grouped_data[key]
361
+ future = reducer.reduce_operation.remote(key, values, ensemble_reduce_function)
362
+ reduce_futures[key] = future
363
+
364
+ # Collect reduce results
365
+ aggregated_weights = {}
366
+ for key, future in reduce_futures.items():
367
+ result_key, aggregated_weight = ray.get(future)
368
+ if aggregated_weight is not None:
369
+ aggregated_weights[result_key] = aggregated_weight
370
+
371
+ return aggregated_weights
372
+
373
+ def get_cluster_statistics(self) -> Dict[str, Any]:
374
+ """Get statistics about the distributed cluster."""
375
+ # Get store statistics
376
+ store_stat_futures = [store.get_store_stats.remote() for store in self.weight_stores]
377
+ store_stats = ray.get(store_stat_futures)
378
+
379
+ total_weights = sum(stats['num_weights'] for stats in store_stats)
380
+ total_memory = sum(stats['total_memory_mb'] for stats in store_stats)
381
+
382
+ return {
383
+ 'num_stores': self.num_stores,
384
+ 'num_mappers': self.num_mappers,
385
+ 'num_reducers': self.num_reducers,
386
+ 'total_stored_weights': total_weights,
387
+ 'total_memory_usage_mb': total_memory,
388
+ 'store_statistics': store_stats
389
+ }
390
+
391
+ def cleanup(self):
392
+ """Cleanup distributed resources."""
393
+ # Ray will automatically clean up remote actors
394
+ pass
hqde/py.typed ADDED
File without changes
@@ -0,0 +1,17 @@
1
+ """
2
+ Quantum-inspired components for HQDE framework.
3
+
4
+ This module provides quantum-inspired algorithms for ensemble learning,
5
+ including quantum noise injection, entanglement simulation, and quantum
6
+ annealing approaches for ensemble optimization.
7
+ """
8
+
9
+ from .quantum_aggregator import QuantumEnsembleAggregator
10
+ from .quantum_noise import QuantumNoiseGenerator
11
+ from .quantum_optimization import QuantumEnsembleOptimizer
12
+
13
+ __all__ = [
14
+ 'QuantumEnsembleAggregator',
15
+ 'QuantumNoiseGenerator',
16
+ 'QuantumEnsembleOptimizer'
17
+ ]