emergent-translator 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emergent_translator/__init__.py +126 -0
- emergent_translator/adaptive_codebook.py +342 -0
- emergent_translator/api_server.py +4988 -0
- emergent_translator/batch_encoder.py +555 -0
- emergent_translator/chunk_collector.py +978 -0
- emergent_translator/chunk_coordinator.py +738 -0
- emergent_translator/claude_compression.py +375 -0
- emergent_translator/cli.py +413 -0
- emergent_translator/client_sdk.py +903 -0
- emergent_translator/code_skeleton.py +448 -0
- emergent_translator/core.py +1081 -0
- emergent_translator/emergent_symbols.py +690 -0
- emergent_translator/format_handlers.py +901 -0
- emergent_translator/gpu_batch_encoder.py +848 -0
- emergent_translator/intelligent_router.py +509 -0
- emergent_translator/metrics.py +436 -0
- emergent_translator/py.typed +0 -0
- emergent_translator-1.1.0.dist-info/METADATA +568 -0
- emergent_translator-1.1.0.dist-info/RECORD +23 -0
- emergent_translator-1.1.0.dist-info/WHEEL +5 -0
- emergent_translator-1.1.0.dist-info/entry_points.txt +2 -0
- emergent_translator-1.1.0.dist-info/licenses/LICENSE +82 -0
- emergent_translator-1.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,738 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Distributed Chunk Coordinator for Emergent Language Processing
|
|
3
|
+
|
|
4
|
+
Enables distributed processing of large files across multiple instances
|
|
5
|
+
using emergent language θ symbols as the wire protocol for 87% bandwidth reduction.
|
|
6
|
+
|
|
7
|
+
Architecture:
|
|
8
|
+
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
|
9
|
+
│ Ingest │────▶│ Chunk + │────▶│ Distribute │
|
|
10
|
+
│ (10GB) │ │ θ-compress │ │ (1.3GB) │
|
|
11
|
+
└─────────────┘ └─────────────┘ └──────┬──────┘
|
|
12
|
+
│
|
|
13
|
+
┌─────────────────────────────────────────┼─────────────────────────────────────────┐
|
|
14
|
+
▼ ▼ ▼
|
|
15
|
+
┌─────────┐ ┌─────────┐ ┌─────────┐
|
|
16
|
+
│ Worker 1│ │Worker 50│ │Worker100│
|
|
17
|
+
│ Process │ │ Process │ │ Process │
|
|
18
|
+
└────┬────┘ └────┬────┘ └────┬────┘
|
|
19
|
+
│ │ │
|
|
20
|
+
└─────────────────────────────────────────┼─────────────────────────────────────────┘
|
|
21
|
+
│
|
|
22
|
+
┌───────▼───────┐
|
|
23
|
+
│ Reassemble │
|
|
24
|
+
│ θ-decompress │
|
|
25
|
+
└───────────────┘
|
|
26
|
+
|
|
27
|
+
Usage:
|
|
28
|
+
coordinator = ChunkCoordinator(
|
|
29
|
+
workers=["https://instance1.fly.dev", "https://instance2.fly.dev"],
|
|
30
|
+
chunk_size_mb=100
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
job = await coordinator.submit(
|
|
34
|
+
data=large_file_bytes,
|
|
35
|
+
operation="transform"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
result = await coordinator.wait(job.id)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
import asyncio
|
|
42
|
+
import hashlib
|
|
43
|
+
import time
|
|
44
|
+
import uuid
|
|
45
|
+
import base64
|
|
46
|
+
import json
|
|
47
|
+
import logging
|
|
48
|
+
from dataclasses import dataclass, field
|
|
49
|
+
from enum import Enum
|
|
50
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
51
|
+
from datetime import datetime
|
|
52
|
+
import struct
|
|
53
|
+
|
|
54
|
+
import httpx
|
|
55
|
+
|
|
56
|
+
logger = logging.getLogger(__name__)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ChunkStatus(Enum):
|
|
60
|
+
"""Status of a chunk in the processing pipeline."""
|
|
61
|
+
PENDING = "pending"
|
|
62
|
+
COMPRESSING = "compressing"
|
|
63
|
+
QUEUED = "queued"
|
|
64
|
+
DISPATCHED = "dispatched"
|
|
65
|
+
PROCESSING = "processing"
|
|
66
|
+
COMPLETED = "completed"
|
|
67
|
+
FAILED = "failed"
|
|
68
|
+
RETRYING = "retrying"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class JobStatus(Enum):
|
|
72
|
+
"""Status of a distributed job."""
|
|
73
|
+
CREATED = "created"
|
|
74
|
+
CHUNKING = "chunking"
|
|
75
|
+
DISTRIBUTING = "distributing"
|
|
76
|
+
PROCESSING = "processing"
|
|
77
|
+
COLLECTING = "collecting"
|
|
78
|
+
REASSEMBLING = "reassembling"
|
|
79
|
+
COMPLETED = "completed"
|
|
80
|
+
FAILED = "failed"
|
|
81
|
+
CANCELLED = "cancelled"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class ChunkMetadata:
|
|
86
|
+
"""Metadata for tracking a chunk through the pipeline."""
|
|
87
|
+
chunk_id: str
|
|
88
|
+
job_id: str
|
|
89
|
+
sequence: int # Order in original file
|
|
90
|
+
offset: int # Byte offset in original
|
|
91
|
+
size_raw: int # Size before compression
|
|
92
|
+
size_compressed: int = 0 # Size after θ-compression
|
|
93
|
+
checksum_raw: str = "" # SHA-256 of raw data
|
|
94
|
+
checksum_compressed: str = "" # SHA-256 of compressed
|
|
95
|
+
status: ChunkStatus = ChunkStatus.PENDING
|
|
96
|
+
worker_url: Optional[str] = None
|
|
97
|
+
dispatch_time: Optional[float] = None
|
|
98
|
+
complete_time: Optional[float] = None
|
|
99
|
+
retries: int = 0
|
|
100
|
+
error: Optional[str] = None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dataclass
|
|
104
|
+
class Chunk:
|
|
105
|
+
"""A chunk of data ready for distributed processing."""
|
|
106
|
+
metadata: ChunkMetadata
|
|
107
|
+
data_raw: Optional[bytes] = None # Original data (cleared after compression)
|
|
108
|
+
data_compressed: Optional[bytes] = None # θ-compressed data
|
|
109
|
+
result_compressed: Optional[bytes] = None # θ-compressed result from worker
|
|
110
|
+
result_raw: Optional[bytes] = None # Decompressed result
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def compression_ratio(self) -> float:
|
|
114
|
+
if self.metadata.size_raw == 0:
|
|
115
|
+
return 0.0
|
|
116
|
+
return self.metadata.size_compressed / self.metadata.size_raw
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class JobStats:
|
|
121
|
+
"""Statistics for a distributed job."""
|
|
122
|
+
total_chunks: int = 0
|
|
123
|
+
completed_chunks: int = 0
|
|
124
|
+
failed_chunks: int = 0
|
|
125
|
+
total_bytes_raw: int = 0
|
|
126
|
+
total_bytes_compressed: int = 0
|
|
127
|
+
total_bytes_transferred: int = 0
|
|
128
|
+
start_time: Optional[float] = None
|
|
129
|
+
end_time: Optional[float] = None
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def compression_ratio(self) -> float:
|
|
133
|
+
if self.total_bytes_raw == 0:
|
|
134
|
+
return 0.0
|
|
135
|
+
return self.total_bytes_compressed / self.total_bytes_raw
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def bandwidth_savings_percent(self) -> float:
|
|
139
|
+
return (1 - self.compression_ratio) * 100
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def duration_seconds(self) -> float:
|
|
143
|
+
if not self.start_time or not self.end_time:
|
|
144
|
+
return 0.0
|
|
145
|
+
return self.end_time - self.start_time
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def throughput_mbps(self) -> float:
|
|
149
|
+
if self.duration_seconds == 0:
|
|
150
|
+
return 0.0
|
|
151
|
+
return (self.total_bytes_raw / 1024 / 1024) / self.duration_seconds
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass
|
|
155
|
+
class DistributedJob:
|
|
156
|
+
"""A distributed processing job."""
|
|
157
|
+
id: str
|
|
158
|
+
operation: str
|
|
159
|
+
status: JobStatus = JobStatus.CREATED
|
|
160
|
+
chunks: List[Chunk] = field(default_factory=list)
|
|
161
|
+
stats: JobStats = field(default_factory=JobStats)
|
|
162
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
163
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
164
|
+
result: Optional[bytes] = None
|
|
165
|
+
errors: List[str] = field(default_factory=list)
|
|
166
|
+
|
|
167
|
+
def get_chunk(self, chunk_id: str) -> Optional[Chunk]:
|
|
168
|
+
"""Get chunk by ID."""
|
|
169
|
+
for chunk in self.chunks:
|
|
170
|
+
if chunk.metadata.chunk_id == chunk_id:
|
|
171
|
+
return chunk
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class WorkerPool:
|
|
176
|
+
"""Manages a pool of worker instances."""
|
|
177
|
+
|
|
178
|
+
def __init__(self, workers: List[str], auth_token: Optional[str] = None):
|
|
179
|
+
self.workers = workers
|
|
180
|
+
self.auth_token = auth_token or "eudaimonia-translator-demo"
|
|
181
|
+
self._worker_stats: Dict[str, Dict] = {
|
|
182
|
+
url: {"active": 0, "completed": 0, "failed": 0, "avg_latency_ms": 0}
|
|
183
|
+
for url in workers
|
|
184
|
+
}
|
|
185
|
+
self._lock = asyncio.Lock()
|
|
186
|
+
|
|
187
|
+
async def get_best_worker(self) -> str:
|
|
188
|
+
"""Select the best available worker (least loaded, best performance)."""
|
|
189
|
+
async with self._lock:
|
|
190
|
+
# Simple strategy: least active connections
|
|
191
|
+
best = min(
|
|
192
|
+
self.workers,
|
|
193
|
+
key=lambda w: (
|
|
194
|
+
self._worker_stats[w]["active"],
|
|
195
|
+
-self._worker_stats[w]["completed"],
|
|
196
|
+
self._worker_stats[w]["avg_latency_ms"]
|
|
197
|
+
)
|
|
198
|
+
)
|
|
199
|
+
self._worker_stats[best]["active"] += 1
|
|
200
|
+
return best
|
|
201
|
+
|
|
202
|
+
async def release_worker(self, url: str, success: bool, latency_ms: float):
|
|
203
|
+
"""Release a worker after task completion."""
|
|
204
|
+
async with self._lock:
|
|
205
|
+
stats = self._worker_stats[url]
|
|
206
|
+
stats["active"] = max(0, stats["active"] - 1)
|
|
207
|
+
if success:
|
|
208
|
+
stats["completed"] += 1
|
|
209
|
+
# Rolling average latency
|
|
210
|
+
n = stats["completed"]
|
|
211
|
+
stats["avg_latency_ms"] = (
|
|
212
|
+
stats["avg_latency_ms"] * (n - 1) + latency_ms
|
|
213
|
+
) / n
|
|
214
|
+
else:
|
|
215
|
+
stats["failed"] += 1
|
|
216
|
+
|
|
217
|
+
def get_stats(self) -> Dict[str, Dict]:
|
|
218
|
+
"""Get worker pool statistics."""
|
|
219
|
+
return dict(self._worker_stats)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class ChunkCoordinator:
|
|
223
|
+
"""
|
|
224
|
+
Coordinates distributed processing of large data across multiple instances.
|
|
225
|
+
|
|
226
|
+
Uses emergent language θ-compression for efficient wire protocol,
|
|
227
|
+
achieving ~87% bandwidth reduction on structured data.
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
def __init__(
|
|
231
|
+
self,
|
|
232
|
+
workers: List[str],
|
|
233
|
+
chunk_size_mb: int = 100,
|
|
234
|
+
max_concurrent: int = 50,
|
|
235
|
+
auth_token: Optional[str] = None,
|
|
236
|
+
compress_endpoint: str = "/translate",
|
|
237
|
+
process_endpoint: str = "/process",
|
|
238
|
+
timeout_seconds: float = 60.0,
|
|
239
|
+
max_retries: int = 3
|
|
240
|
+
):
|
|
241
|
+
"""
|
|
242
|
+
Initialize the chunk coordinator.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
workers: List of worker instance URLs
|
|
246
|
+
chunk_size_mb: Target chunk size in MB
|
|
247
|
+
max_concurrent: Maximum concurrent chunk dispatches
|
|
248
|
+
auth_token: Authentication token for workers
|
|
249
|
+
compress_endpoint: Endpoint for θ-compression
|
|
250
|
+
process_endpoint: Endpoint for processing
|
|
251
|
+
timeout_seconds: Request timeout
|
|
252
|
+
max_retries: Max retries per chunk
|
|
253
|
+
"""
|
|
254
|
+
self.worker_pool = WorkerPool(workers, auth_token)
|
|
255
|
+
self.chunk_size_bytes = int(chunk_size_mb * 1024 * 1024)
|
|
256
|
+
self.max_concurrent = max_concurrent
|
|
257
|
+
self.auth_token = auth_token or "eudaimonia-translator-demo"
|
|
258
|
+
self.compress_endpoint = compress_endpoint
|
|
259
|
+
self.process_endpoint = process_endpoint
|
|
260
|
+
self.timeout = timeout_seconds
|
|
261
|
+
self.max_retries = max_retries
|
|
262
|
+
|
|
263
|
+
# Active jobs
|
|
264
|
+
self._jobs: Dict[str, DistributedJob] = {}
|
|
265
|
+
self._semaphore = asyncio.Semaphore(max_concurrent)
|
|
266
|
+
|
|
267
|
+
def _split_into_chunks(self, data: bytes, job_id: str) -> List[Chunk]:
|
|
268
|
+
"""Split data into chunks for distribution."""
|
|
269
|
+
chunks = []
|
|
270
|
+
offset = 0
|
|
271
|
+
sequence = 0
|
|
272
|
+
|
|
273
|
+
while offset < len(data):
|
|
274
|
+
chunk_data = data[offset:offset + self.chunk_size_bytes]
|
|
275
|
+
chunk_size = len(chunk_data)
|
|
276
|
+
|
|
277
|
+
metadata = ChunkMetadata(
|
|
278
|
+
chunk_id=f"{job_id}-{sequence:04d}",
|
|
279
|
+
job_id=job_id,
|
|
280
|
+
sequence=sequence,
|
|
281
|
+
offset=offset,
|
|
282
|
+
size_raw=chunk_size,
|
|
283
|
+
checksum_raw=hashlib.sha256(chunk_data).hexdigest()[:16]
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
chunks.append(Chunk(metadata=metadata, data_raw=chunk_data))
|
|
287
|
+
|
|
288
|
+
offset += chunk_size
|
|
289
|
+
sequence += 1
|
|
290
|
+
|
|
291
|
+
return chunks
|
|
292
|
+
|
|
293
|
+
async def _compress_chunk(self, chunk: Chunk, client: httpx.AsyncClient) -> Chunk:
|
|
294
|
+
"""Compress a chunk using θ-protocol via local or remote translator."""
|
|
295
|
+
chunk.metadata.status = ChunkStatus.COMPRESSING
|
|
296
|
+
|
|
297
|
+
# For prototype, use first worker for compression
|
|
298
|
+
# In production, could have dedicated compression service
|
|
299
|
+
worker = self.worker_pool.workers[0]
|
|
300
|
+
|
|
301
|
+
try:
|
|
302
|
+
# Encode raw data as base64 for JSON transport
|
|
303
|
+
data_b64 = base64.b64encode(chunk.data_raw).decode('utf-8')
|
|
304
|
+
|
|
305
|
+
response = await client.post(
|
|
306
|
+
f"{worker}{self.compress_endpoint}",
|
|
307
|
+
json={
|
|
308
|
+
"data": f"CHUNK:{chunk.metadata.chunk_id}:{data_b64}",
|
|
309
|
+
"source_format": "text",
|
|
310
|
+
"target_format": "emergent"
|
|
311
|
+
},
|
|
312
|
+
headers={"Authorization": f"Bearer {self.auth_token}"},
|
|
313
|
+
timeout=self.timeout
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
if response.status_code == 200:
|
|
317
|
+
result = response.json()
|
|
318
|
+
compressed_b64 = result.get("translated_data", "")
|
|
319
|
+
compressed_bytes = base64.b64decode(compressed_b64)
|
|
320
|
+
|
|
321
|
+
chunk.data_compressed = compressed_bytes
|
|
322
|
+
chunk.metadata.size_compressed = len(compressed_bytes)
|
|
323
|
+
chunk.metadata.checksum_compressed = hashlib.sha256(compressed_bytes).hexdigest()[:16]
|
|
324
|
+
chunk.metadata.status = ChunkStatus.QUEUED
|
|
325
|
+
|
|
326
|
+
# Clear raw data to free memory
|
|
327
|
+
chunk.data_raw = None
|
|
328
|
+
|
|
329
|
+
logger.debug(
|
|
330
|
+
f"Chunk {chunk.metadata.chunk_id} compressed: "
|
|
331
|
+
f"{chunk.metadata.size_raw} → {chunk.metadata.size_compressed} "
|
|
332
|
+
f"({chunk.compression_ratio:.2%})"
|
|
333
|
+
)
|
|
334
|
+
else:
|
|
335
|
+
chunk.metadata.status = ChunkStatus.FAILED
|
|
336
|
+
chunk.metadata.error = f"Compression failed: HTTP {response.status_code}"
|
|
337
|
+
|
|
338
|
+
except Exception as e:
|
|
339
|
+
chunk.metadata.status = ChunkStatus.FAILED
|
|
340
|
+
chunk.metadata.error = f"Compression error: {str(e)[:100]}"
|
|
341
|
+
logger.error(f"Chunk {chunk.metadata.chunk_id} compression failed: {e}")
|
|
342
|
+
|
|
343
|
+
return chunk
|
|
344
|
+
|
|
345
|
+
async def _dispatch_chunk(
|
|
346
|
+
self,
|
|
347
|
+
chunk: Chunk,
|
|
348
|
+
operation: str,
|
|
349
|
+
client: httpx.AsyncClient,
|
|
350
|
+
operation_params: Optional[Dict] = None
|
|
351
|
+
) -> Chunk:
|
|
352
|
+
"""Dispatch a chunk to a worker for processing."""
|
|
353
|
+
async with self._semaphore:
|
|
354
|
+
worker_url = await self.worker_pool.get_best_worker()
|
|
355
|
+
chunk.metadata.worker_url = worker_url
|
|
356
|
+
chunk.metadata.dispatch_time = time.time()
|
|
357
|
+
chunk.metadata.status = ChunkStatus.DISPATCHED
|
|
358
|
+
|
|
359
|
+
start_time = time.perf_counter()
|
|
360
|
+
success = False
|
|
361
|
+
|
|
362
|
+
try:
|
|
363
|
+
chunk.metadata.status = ChunkStatus.PROCESSING
|
|
364
|
+
|
|
365
|
+
# Send compressed chunk to worker
|
|
366
|
+
payload = {
|
|
367
|
+
"chunk_id": chunk.metadata.chunk_id,
|
|
368
|
+
"operation": operation,
|
|
369
|
+
"data": base64.b64encode(chunk.data_compressed).decode('utf-8'),
|
|
370
|
+
"params": operation_params or {}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
response = await client.post(
|
|
374
|
+
f"{worker_url}{self.process_endpoint}",
|
|
375
|
+
json=payload,
|
|
376
|
+
headers={"Authorization": f"Bearer {self.auth_token}"},
|
|
377
|
+
timeout=self.timeout
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
381
|
+
|
|
382
|
+
if response.status_code == 200:
|
|
383
|
+
result = response.json()
|
|
384
|
+
if result.get("success"):
|
|
385
|
+
result_b64 = result.get("result", "")
|
|
386
|
+
chunk.result_compressed = base64.b64decode(result_b64)
|
|
387
|
+
chunk.metadata.status = ChunkStatus.COMPLETED
|
|
388
|
+
chunk.metadata.complete_time = time.time()
|
|
389
|
+
success = True
|
|
390
|
+
else:
|
|
391
|
+
chunk.metadata.status = ChunkStatus.FAILED
|
|
392
|
+
chunk.metadata.error = result.get("error", "Unknown processing error")
|
|
393
|
+
else:
|
|
394
|
+
chunk.metadata.status = ChunkStatus.FAILED
|
|
395
|
+
chunk.metadata.error = f"HTTP {response.status_code}"
|
|
396
|
+
|
|
397
|
+
except asyncio.TimeoutError:
|
|
398
|
+
chunk.metadata.status = ChunkStatus.FAILED
|
|
399
|
+
chunk.metadata.error = "Timeout"
|
|
400
|
+
latency_ms = self.timeout * 1000
|
|
401
|
+
|
|
402
|
+
except Exception as e:
|
|
403
|
+
chunk.metadata.status = ChunkStatus.FAILED
|
|
404
|
+
chunk.metadata.error = str(e)[:100]
|
|
405
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
406
|
+
|
|
407
|
+
finally:
|
|
408
|
+
await self.worker_pool.release_worker(worker_url, success, latency_ms)
|
|
409
|
+
|
|
410
|
+
return chunk
|
|
411
|
+
|
|
412
|
+
async def _decompress_result(self, chunk: Chunk, client: httpx.AsyncClient) -> Chunk:
|
|
413
|
+
"""Decompress a chunk result back to original format."""
|
|
414
|
+
if not chunk.result_compressed:
|
|
415
|
+
return chunk
|
|
416
|
+
|
|
417
|
+
worker = self.worker_pool.workers[0]
|
|
418
|
+
|
|
419
|
+
try:
|
|
420
|
+
response = await client.post(
|
|
421
|
+
f"{worker}/decompress",
|
|
422
|
+
json={
|
|
423
|
+
"compressed_data": base64.b64encode(chunk.result_compressed).decode('utf-8')
|
|
424
|
+
},
|
|
425
|
+
headers={"Authorization": f"Bearer {self.auth_token}"},
|
|
426
|
+
timeout=self.timeout
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
if response.status_code == 200:
|
|
430
|
+
result = response.json()
|
|
431
|
+
if result.get("success"):
|
|
432
|
+
decompressed = result.get("decompressed_data", "")
|
|
433
|
+
# Handle CHUNK: prefix if present
|
|
434
|
+
if isinstance(decompressed, str) and decompressed.startswith("CHUNK:"):
|
|
435
|
+
parts = decompressed.split(":", 2)
|
|
436
|
+
if len(parts) >= 3:
|
|
437
|
+
decompressed = parts[2]
|
|
438
|
+
|
|
439
|
+
if isinstance(decompressed, str):
|
|
440
|
+
chunk.result_raw = base64.b64decode(decompressed)
|
|
441
|
+
else:
|
|
442
|
+
chunk.result_raw = decompressed
|
|
443
|
+
|
|
444
|
+
except Exception as e:
|
|
445
|
+
logger.error(f"Chunk {chunk.metadata.chunk_id} decompression failed: {e}")
|
|
446
|
+
# Keep compressed result, let reassembly handle it
|
|
447
|
+
|
|
448
|
+
return chunk
|
|
449
|
+
|
|
450
|
+
def _reassemble_results(self, chunks: List[Chunk]) -> bytes:
|
|
451
|
+
"""Reassemble chunk results in order."""
|
|
452
|
+
# Sort by sequence number
|
|
453
|
+
sorted_chunks = sorted(chunks, key=lambda c: c.metadata.sequence)
|
|
454
|
+
|
|
455
|
+
result_parts = []
|
|
456
|
+
for chunk in sorted_chunks:
|
|
457
|
+
if chunk.result_raw:
|
|
458
|
+
result_parts.append(chunk.result_raw)
|
|
459
|
+
elif chunk.result_compressed:
|
|
460
|
+
# Fallback: include compressed if decompression failed
|
|
461
|
+
result_parts.append(chunk.result_compressed)
|
|
462
|
+
else:
|
|
463
|
+
logger.warning(f"Chunk {chunk.metadata.chunk_id} has no result")
|
|
464
|
+
|
|
465
|
+
return b''.join(result_parts)
|
|
466
|
+
|
|
467
|
+
async def submit(
|
|
468
|
+
self,
|
|
469
|
+
data: bytes,
|
|
470
|
+
operation: str,
|
|
471
|
+
operation_params: Optional[Dict] = None,
|
|
472
|
+
metadata: Optional[Dict] = None
|
|
473
|
+
) -> DistributedJob:
|
|
474
|
+
"""
|
|
475
|
+
Submit data for distributed processing.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
data: Raw data bytes to process
|
|
479
|
+
operation: Operation name to perform on chunks
|
|
480
|
+
operation_params: Optional parameters for the operation
|
|
481
|
+
metadata: Optional job metadata
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
DistributedJob with job ID for tracking
|
|
485
|
+
"""
|
|
486
|
+
job_id = str(uuid.uuid4())[:8]
|
|
487
|
+
|
|
488
|
+
job = DistributedJob(
|
|
489
|
+
id=job_id,
|
|
490
|
+
operation=operation,
|
|
491
|
+
status=JobStatus.CHUNKING,
|
|
492
|
+
metadata=metadata or {}
|
|
493
|
+
)
|
|
494
|
+
job.stats.start_time = time.time()
|
|
495
|
+
job.stats.total_bytes_raw = len(data)
|
|
496
|
+
|
|
497
|
+
logger.info(f"Job {job_id}: Starting chunking of {len(data)} bytes")
|
|
498
|
+
|
|
499
|
+
# Split into chunks
|
|
500
|
+
job.chunks = self._split_into_chunks(data, job_id)
|
|
501
|
+
job.stats.total_chunks = len(job.chunks)
|
|
502
|
+
|
|
503
|
+
logger.info(f"Job {job_id}: Created {len(job.chunks)} chunks")
|
|
504
|
+
|
|
505
|
+
self._jobs[job_id] = job
|
|
506
|
+
|
|
507
|
+
# Start async processing
|
|
508
|
+
asyncio.create_task(self._process_job(job, operation_params))
|
|
509
|
+
|
|
510
|
+
return job
|
|
511
|
+
|
|
512
|
+
async def _process_job(self, job: DistributedJob, operation_params: Optional[Dict] = None):
|
|
513
|
+
"""Process a job through the full pipeline."""
|
|
514
|
+
try:
|
|
515
|
+
async with httpx.AsyncClient() as client:
|
|
516
|
+
# Phase 1: Compress all chunks
|
|
517
|
+
job.status = JobStatus.DISTRIBUTING
|
|
518
|
+
logger.info(f"Job {job.id}: Compressing {len(job.chunks)} chunks")
|
|
519
|
+
|
|
520
|
+
compress_tasks = [
|
|
521
|
+
self._compress_chunk(chunk, client)
|
|
522
|
+
for chunk in job.chunks
|
|
523
|
+
]
|
|
524
|
+
await asyncio.gather(*compress_tasks)
|
|
525
|
+
|
|
526
|
+
# Calculate compression stats
|
|
527
|
+
job.stats.total_bytes_compressed = sum(
|
|
528
|
+
c.metadata.size_compressed for c in job.chunks
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
logger.info(
|
|
532
|
+
f"Job {job.id}: Compression complete - "
|
|
533
|
+
f"{job.stats.total_bytes_raw} → {job.stats.total_bytes_compressed} "
|
|
534
|
+
f"({job.stats.bandwidth_savings_percent:.1f}% savings)"
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
# Phase 2: Dispatch to workers
|
|
538
|
+
job.status = JobStatus.PROCESSING
|
|
539
|
+
logger.info(f"Job {job.id}: Dispatching to workers")
|
|
540
|
+
|
|
541
|
+
# Filter chunks that compressed successfully
|
|
542
|
+
ready_chunks = [
|
|
543
|
+
c for c in job.chunks
|
|
544
|
+
if c.metadata.status == ChunkStatus.QUEUED
|
|
545
|
+
]
|
|
546
|
+
|
|
547
|
+
dispatch_tasks = [
|
|
548
|
+
self._dispatch_chunk(chunk, job.operation, client, operation_params)
|
|
549
|
+
for chunk in ready_chunks
|
|
550
|
+
]
|
|
551
|
+
await asyncio.gather(*dispatch_tasks)
|
|
552
|
+
|
|
553
|
+
# Track bytes transferred
|
|
554
|
+
job.stats.total_bytes_transferred = (
|
|
555
|
+
job.stats.total_bytes_compressed * 2 # Round trip
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
# Phase 3: Collect and decompress results
|
|
559
|
+
job.status = JobStatus.COLLECTING
|
|
560
|
+
logger.info(f"Job {job.id}: Collecting results")
|
|
561
|
+
|
|
562
|
+
completed_chunks = [
|
|
563
|
+
c for c in job.chunks
|
|
564
|
+
if c.metadata.status == ChunkStatus.COMPLETED
|
|
565
|
+
]
|
|
566
|
+
|
|
567
|
+
decompress_tasks = [
|
|
568
|
+
self._decompress_result(chunk, client)
|
|
569
|
+
for chunk in completed_chunks
|
|
570
|
+
]
|
|
571
|
+
await asyncio.gather(*decompress_tasks)
|
|
572
|
+
|
|
573
|
+
# Phase 4: Reassemble
|
|
574
|
+
job.status = JobStatus.REASSEMBLING
|
|
575
|
+
logger.info(f"Job {job.id}: Reassembling {len(completed_chunks)} chunks")
|
|
576
|
+
|
|
577
|
+
job.result = self._reassemble_results(completed_chunks)
|
|
578
|
+
|
|
579
|
+
# Final stats
|
|
580
|
+
job.stats.completed_chunks = len(completed_chunks)
|
|
581
|
+
job.stats.failed_chunks = len(job.chunks) - len(completed_chunks)
|
|
582
|
+
job.stats.end_time = time.time()
|
|
583
|
+
|
|
584
|
+
if job.stats.failed_chunks == 0:
|
|
585
|
+
job.status = JobStatus.COMPLETED
|
|
586
|
+
logger.info(
|
|
587
|
+
f"Job {job.id}: Completed successfully - "
|
|
588
|
+
f"{job.stats.throughput_mbps:.2f} MB/s, "
|
|
589
|
+
f"{job.stats.bandwidth_savings_percent:.1f}% bandwidth saved"
|
|
590
|
+
)
|
|
591
|
+
else:
|
|
592
|
+
job.status = JobStatus.COMPLETED # Partial success
|
|
593
|
+
job.errors.append(f"{job.stats.failed_chunks} chunks failed")
|
|
594
|
+
logger.warning(
|
|
595
|
+
f"Job {job.id}: Completed with {job.stats.failed_chunks} failed chunks"
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
except Exception as e:
|
|
599
|
+
job.status = JobStatus.FAILED
|
|
600
|
+
job.errors.append(str(e))
|
|
601
|
+
job.stats.end_time = time.time()
|
|
602
|
+
logger.error(f"Job {job.id}: Failed - {e}")
|
|
603
|
+
|
|
604
|
+
async def wait(self, job_id: str, poll_interval: float = 0.5) -> DistributedJob:
|
|
605
|
+
"""Wait for a job to complete."""
|
|
606
|
+
while True:
|
|
607
|
+
job = self._jobs.get(job_id)
|
|
608
|
+
if not job:
|
|
609
|
+
raise ValueError(f"Job {job_id} not found")
|
|
610
|
+
|
|
611
|
+
if job.status in (JobStatus.COMPLETED, JobStatus.FAILED, JobStatus.CANCELLED):
|
|
612
|
+
return job
|
|
613
|
+
|
|
614
|
+
await asyncio.sleep(poll_interval)
|
|
615
|
+
|
|
616
|
+
def get_job(self, job_id: str) -> Optional[DistributedJob]:
|
|
617
|
+
"""Get job by ID."""
|
|
618
|
+
return self._jobs.get(job_id)
|
|
619
|
+
|
|
620
|
+
def get_job_status(self, job_id: str) -> Dict[str, Any]:
|
|
621
|
+
"""Get detailed job status."""
|
|
622
|
+
job = self._jobs.get(job_id)
|
|
623
|
+
if not job:
|
|
624
|
+
return {"error": f"Job {job_id} not found"}
|
|
625
|
+
|
|
626
|
+
chunk_statuses = {}
|
|
627
|
+
for chunk in job.chunks:
|
|
628
|
+
status = chunk.metadata.status.value
|
|
629
|
+
chunk_statuses[status] = chunk_statuses.get(status, 0) + 1
|
|
630
|
+
|
|
631
|
+
return {
|
|
632
|
+
"job_id": job.id,
|
|
633
|
+
"status": job.status.value,
|
|
634
|
+
"operation": job.operation,
|
|
635
|
+
"chunks": {
|
|
636
|
+
"total": job.stats.total_chunks,
|
|
637
|
+
"completed": job.stats.completed_chunks,
|
|
638
|
+
"failed": job.stats.failed_chunks,
|
|
639
|
+
"by_status": chunk_statuses
|
|
640
|
+
},
|
|
641
|
+
"bytes": {
|
|
642
|
+
"raw": job.stats.total_bytes_raw,
|
|
643
|
+
"compressed": job.stats.total_bytes_compressed,
|
|
644
|
+
"transferred": job.stats.total_bytes_transferred,
|
|
645
|
+
"compression_ratio": f"{job.stats.compression_ratio:.2%}",
|
|
646
|
+
"bandwidth_saved": f"{job.stats.bandwidth_savings_percent:.1f}%"
|
|
647
|
+
},
|
|
648
|
+
"timing": {
|
|
649
|
+
"duration_seconds": job.stats.duration_seconds,
|
|
650
|
+
"throughput_mbps": job.stats.throughput_mbps
|
|
651
|
+
},
|
|
652
|
+
"errors": job.errors
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
def list_jobs(self) -> List[Dict[str, Any]]:
|
|
656
|
+
"""List all jobs with summary info."""
|
|
657
|
+
return [
|
|
658
|
+
{
|
|
659
|
+
"id": job.id,
|
|
660
|
+
"status": job.status.value,
|
|
661
|
+
"operation": job.operation,
|
|
662
|
+
"chunks": f"{job.stats.completed_chunks}/{job.stats.total_chunks}",
|
|
663
|
+
"created": job.created_at.isoformat()
|
|
664
|
+
}
|
|
665
|
+
for job in self._jobs.values()
|
|
666
|
+
]
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
# Convenience function for simple usage
|
|
670
|
+
async def distributed_process(
|
|
671
|
+
data: bytes,
|
|
672
|
+
workers: List[str],
|
|
673
|
+
operation: str = "passthrough",
|
|
674
|
+
chunk_size_mb: int = 100,
|
|
675
|
+
**kwargs
|
|
676
|
+
) -> Tuple[bytes, Dict[str, Any]]:
|
|
677
|
+
"""
|
|
678
|
+
Simple interface for distributed processing.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
data: Data to process
|
|
682
|
+
workers: List of worker URLs
|
|
683
|
+
operation: Operation to perform
|
|
684
|
+
chunk_size_mb: Chunk size
|
|
685
|
+
**kwargs: Additional parameters
|
|
686
|
+
|
|
687
|
+
Returns:
|
|
688
|
+
Tuple of (result_bytes, stats_dict)
|
|
689
|
+
"""
|
|
690
|
+
coordinator = ChunkCoordinator(
|
|
691
|
+
workers=workers,
|
|
692
|
+
chunk_size_mb=chunk_size_mb
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
job = await coordinator.submit(data, operation, kwargs.get("params"))
|
|
696
|
+
job = await coordinator.wait(job.id)
|
|
697
|
+
|
|
698
|
+
return job.result or b'', coordinator.get_job_status(job.id)
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
if __name__ == "__main__":
|
|
702
|
+
# Quick test
|
|
703
|
+
async def test():
|
|
704
|
+
# Create coordinator with single test endpoint
|
|
705
|
+
coordinator = ChunkCoordinator(
|
|
706
|
+
workers=["https://emergent-language.fly.dev"],
|
|
707
|
+
chunk_size_mb=1 # Small chunks for testing
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
# Create test data (1MB of JSON-like content)
|
|
711
|
+
test_data = json.dumps({
|
|
712
|
+
"records": [
|
|
713
|
+
{"id": i, "data": f"record_{i}" * 100}
|
|
714
|
+
for i in range(1000)
|
|
715
|
+
]
|
|
716
|
+
}).encode('utf-8')
|
|
717
|
+
|
|
718
|
+
print(f"Test data size: {len(test_data)} bytes")
|
|
719
|
+
|
|
720
|
+
# Submit job
|
|
721
|
+
job = await coordinator.submit(
|
|
722
|
+
data=test_data,
|
|
723
|
+
operation="passthrough"
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
print(f"Job submitted: {job.id}")
|
|
727
|
+
|
|
728
|
+
# Wait for completion
|
|
729
|
+
job = await coordinator.wait(job.id)
|
|
730
|
+
|
|
731
|
+
# Print results
|
|
732
|
+
status = coordinator.get_job_status(job.id)
|
|
733
|
+
print(f"\nJob Status: {json.dumps(status, indent=2)}")
|
|
734
|
+
|
|
735
|
+
if job.result:
|
|
736
|
+
print(f"\nResult size: {len(job.result)} bytes")
|
|
737
|
+
|
|
738
|
+
asyncio.run(test())
|