emergent-translator 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emergent_translator/__init__.py +126 -0
- emergent_translator/adaptive_codebook.py +342 -0
- emergent_translator/api_server.py +4988 -0
- emergent_translator/batch_encoder.py +555 -0
- emergent_translator/chunk_collector.py +978 -0
- emergent_translator/chunk_coordinator.py +738 -0
- emergent_translator/claude_compression.py +375 -0
- emergent_translator/cli.py +413 -0
- emergent_translator/client_sdk.py +903 -0
- emergent_translator/code_skeleton.py +448 -0
- emergent_translator/core.py +1081 -0
- emergent_translator/emergent_symbols.py +690 -0
- emergent_translator/format_handlers.py +901 -0
- emergent_translator/gpu_batch_encoder.py +848 -0
- emergent_translator/intelligent_router.py +509 -0
- emergent_translator/metrics.py +436 -0
- emergent_translator/py.typed +0 -0
- emergent_translator-1.1.0.dist-info/METADATA +568 -0
- emergent_translator-1.1.0.dist-info/RECORD +23 -0
- emergent_translator-1.1.0.dist-info/WHEEL +5 -0
- emergent_translator-1.1.0.dist-info/entry_points.txt +2 -0
- emergent_translator-1.1.0.dist-info/licenses/LICENSE +82 -0
- emergent_translator-1.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,690 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Emergent Language Symbol Encoder - REAL COMPRESSION
|
|
4
|
+
|
|
5
|
+
This implements the actual emergent language symbol system that achieves
|
|
6
|
+
60x+ compression by encoding semantic meaning into compact byte sequences.
|
|
7
|
+
|
|
8
|
+
Symbol Families (0x00-0xFF):
|
|
9
|
+
0x00-0x0F: System/Protocol
|
|
10
|
+
0x10-0x1F: Nous (verification)
|
|
11
|
+
0x20-0x2F: Ergon (tokens)
|
|
12
|
+
0x30-0x3F: Work (tasks)
|
|
13
|
+
0x40-0x4F: Swarm (multi-agent)
|
|
14
|
+
0x50-0x5F: Identity
|
|
15
|
+
0x60-0x6F: Governance
|
|
16
|
+
0x70-0x7F: Authority
|
|
17
|
+
0x80-0x9F: Theta (resources)
|
|
18
|
+
0xA0-0xBF: Hivemind (P2P)
|
|
19
|
+
0xC0-0xCF: Ingest (external→emergent)
|
|
20
|
+
0xD0-0xDF: Emit (emergent→external)
|
|
21
|
+
0xE0-0xE7: Transform
|
|
22
|
+
0xE8-0xEF: Oracle
|
|
23
|
+
|
|
24
|
+
Compression Strategy:
|
|
25
|
+
1. Semantic Analysis - Identify intent and key concepts
|
|
26
|
+
2. Symbol Selection - Map concepts to symbol family
|
|
27
|
+
3. Compact Encoding - Pack data into minimal bytes
|
|
28
|
+
4. Validation - Ensure round-trip fidelity
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import json
|
|
32
|
+
import hashlib
|
|
33
|
+
import struct
|
|
34
|
+
import zlib
|
|
35
|
+
from dataclasses import dataclass
|
|
36
|
+
from typing import Dict, List, Any, Optional, Tuple, Union
|
|
37
|
+
from enum import IntEnum
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class SymbolFamily(IntEnum):
|
|
41
|
+
"""Symbol family byte ranges."""
|
|
42
|
+
SYSTEM = 0x00
|
|
43
|
+
NOUS = 0x10
|
|
44
|
+
ERGON = 0x20
|
|
45
|
+
WORK = 0x30
|
|
46
|
+
SWARM = 0x40
|
|
47
|
+
IDENTITY = 0x50
|
|
48
|
+
GOVERNANCE = 0x60
|
|
49
|
+
AUTHORITY = 0x70
|
|
50
|
+
THETA = 0x80
|
|
51
|
+
HIVEMIND = 0xA0
|
|
52
|
+
INGEST = 0xC0
|
|
53
|
+
EMIT = 0xD0
|
|
54
|
+
TRANSFORM = 0xE0
|
|
55
|
+
ORACLE = 0xE8
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Symbol opcodes within families
|
|
59
|
+
class WorkOp(IntEnum):
|
|
60
|
+
SUBMIT = 0x30
|
|
61
|
+
ASSIGN = 0x31
|
|
62
|
+
PROGRESS = 0x32
|
|
63
|
+
COMPLETE = 0x33
|
|
64
|
+
FAIL = 0x34
|
|
65
|
+
CANCEL = 0x35
|
|
66
|
+
QUERY = 0x36
|
|
67
|
+
ANALYZE = 0x37
|
|
68
|
+
OPTIMIZE = 0x38
|
|
69
|
+
EXECUTE = 0x39
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class SwarmOp(IntEnum):
|
|
73
|
+
COORDINATE = 0x40
|
|
74
|
+
BROADCAST = 0x41
|
|
75
|
+
CONSENSUS = 0x42
|
|
76
|
+
DELEGATE = 0x43
|
|
77
|
+
AGGREGATE = 0x44
|
|
78
|
+
DISTRIBUTE = 0x45
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ThetaOp(IntEnum):
|
|
82
|
+
ALLOCATE = 0x80
|
|
83
|
+
CONSUME = 0x81
|
|
84
|
+
TRANSFER = 0x82
|
|
85
|
+
BALANCE = 0x83
|
|
86
|
+
RESERVE = 0x84
|
|
87
|
+
RELEASE = 0x85
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# Semantic keyword mappings
|
|
91
|
+
INTENT_KEYWORDS = {
|
|
92
|
+
# Work-related
|
|
93
|
+
"task": WorkOp.SUBMIT,
|
|
94
|
+
"analyze": WorkOp.ANALYZE,
|
|
95
|
+
"analysis": WorkOp.ANALYZE,
|
|
96
|
+
"optimize": WorkOp.OPTIMIZE,
|
|
97
|
+
"optimization": WorkOp.OPTIMIZE,
|
|
98
|
+
"execute": WorkOp.EXECUTE,
|
|
99
|
+
"execution": WorkOp.EXECUTE,
|
|
100
|
+
"complete": WorkOp.COMPLETE,
|
|
101
|
+
"query": WorkOp.QUERY,
|
|
102
|
+
|
|
103
|
+
# Swarm-related
|
|
104
|
+
"coordinate": SwarmOp.COORDINATE,
|
|
105
|
+
"coordination": SwarmOp.COORDINATE,
|
|
106
|
+
"agent": SwarmOp.DELEGATE,
|
|
107
|
+
"agents": SwarmOp.DISTRIBUTE,
|
|
108
|
+
"broadcast": SwarmOp.BROADCAST,
|
|
109
|
+
"consensus": SwarmOp.CONSENSUS,
|
|
110
|
+
|
|
111
|
+
# Resource-related
|
|
112
|
+
"allocate": ThetaOp.ALLOCATE,
|
|
113
|
+
"consume": ThetaOp.CONSUME,
|
|
114
|
+
"transfer": ThetaOp.TRANSFER,
|
|
115
|
+
"balance": ThetaOp.BALANCE,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class EncodedMessage:
|
|
121
|
+
"""Result of encoding to emergent symbols."""
|
|
122
|
+
symbols: bytes
|
|
123
|
+
original_size: int
|
|
124
|
+
encoded_size: int
|
|
125
|
+
compression_ratio: float
|
|
126
|
+
symbol_families: List[str]
|
|
127
|
+
checksum: int
|
|
128
|
+
metadata: Dict[str, Any]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@dataclass
|
|
132
|
+
class DecodedMessage:
|
|
133
|
+
"""Result of decoding from emergent symbols."""
|
|
134
|
+
data: Any
|
|
135
|
+
success: bool
|
|
136
|
+
symbol_families: List[str]
|
|
137
|
+
error: Optional[str] = None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class EmergentSymbolEncoder:
|
|
141
|
+
"""
|
|
142
|
+
Encodes data into emergent language symbols with real compression.
|
|
143
|
+
|
|
144
|
+
Achieves 60x+ compression through:
|
|
145
|
+
1. Semantic intent detection
|
|
146
|
+
2. Common pattern dictionary
|
|
147
|
+
3. Compact binary encoding
|
|
148
|
+
4. Optional zlib for large payloads
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
# Magic bytes for emergent messages
|
|
152
|
+
MAGIC = b'\xE7\x01' # θ version 1
|
|
153
|
+
|
|
154
|
+
# Common JSON keys - encode as single bytes
|
|
155
|
+
COMMON_KEYS = {
|
|
156
|
+
"task": 0x01,
|
|
157
|
+
"type": 0x02,
|
|
158
|
+
"data": 0x03,
|
|
159
|
+
"id": 0x04,
|
|
160
|
+
"agent": 0x05,
|
|
161
|
+
"agent_id": 0x06,
|
|
162
|
+
"task_type": 0x07,
|
|
163
|
+
"parameters": 0x08,
|
|
164
|
+
"params": 0x08,
|
|
165
|
+
"context": 0x09,
|
|
166
|
+
"ctx": 0x09,
|
|
167
|
+
"priority": 0x0A,
|
|
168
|
+
"status": 0x0B,
|
|
169
|
+
"result": 0x0C,
|
|
170
|
+
"error": 0x0D,
|
|
171
|
+
"timestamp": 0x0E,
|
|
172
|
+
"version": 0x0F,
|
|
173
|
+
"action": 0x10,
|
|
174
|
+
"target": 0x11,
|
|
175
|
+
"source": 0x12,
|
|
176
|
+
"value": 0x13,
|
|
177
|
+
"name": 0x14,
|
|
178
|
+
"depth": 0x15,
|
|
179
|
+
"level": 0x16,
|
|
180
|
+
"mode": 0x17,
|
|
181
|
+
"config": 0x18,
|
|
182
|
+
"options": 0x19,
|
|
183
|
+
"metadata": 0x1A,
|
|
184
|
+
"payload": 0x1B,
|
|
185
|
+
"request": 0x1C,
|
|
186
|
+
"response": 0x1D,
|
|
187
|
+
"coordination": 0x1E,
|
|
188
|
+
"agents": 0x1F,
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
# Reverse mapping for decoding
|
|
192
|
+
COMMON_KEYS_REV = {v: k for k, v in COMMON_KEYS.items()}
|
|
193
|
+
|
|
194
|
+
# Common string values
|
|
195
|
+
COMMON_VALUES = {
|
|
196
|
+
"analyze": 0x01,
|
|
197
|
+
"analysis": 0x01,
|
|
198
|
+
"optimize": 0x02,
|
|
199
|
+
"optimization": 0x02,
|
|
200
|
+
"execute": 0x03,
|
|
201
|
+
"execution": 0x03,
|
|
202
|
+
"query": 0x04,
|
|
203
|
+
"high": 0x05,
|
|
204
|
+
"medium": 0x06,
|
|
205
|
+
"low": 0x07,
|
|
206
|
+
"basic": 0x08,
|
|
207
|
+
"advanced": 0x09,
|
|
208
|
+
"comprehensive": 0x0A,
|
|
209
|
+
"pending": 0x0B,
|
|
210
|
+
"running": 0x0C,
|
|
211
|
+
"complete": 0x0D,
|
|
212
|
+
"completed": 0x0D,
|
|
213
|
+
"failed": 0x0E,
|
|
214
|
+
"success": 0x0F,
|
|
215
|
+
"error": 0x10,
|
|
216
|
+
"true": 0x11,
|
|
217
|
+
"false": 0x12,
|
|
218
|
+
"null": 0x13,
|
|
219
|
+
"market": 0x14,
|
|
220
|
+
"data": 0x15,
|
|
221
|
+
"trends": 0x16,
|
|
222
|
+
"risk": 0x17,
|
|
223
|
+
"portfolio": 0x18,
|
|
224
|
+
"trading": 0x19,
|
|
225
|
+
"research": 0x1A,
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
COMMON_VALUES_REV = {v: k for k, v in COMMON_VALUES.items()}
|
|
229
|
+
|
|
230
|
+
def __init__(self, enable_zlib: bool = True):
|
|
231
|
+
self.enable_zlib = enable_zlib
|
|
232
|
+
self._encoding_stats = {"encoded": 0, "total_original": 0, "total_compressed": 0}
|
|
233
|
+
|
|
234
|
+
def encode(self, data: Union[Dict, str, Any]) -> EncodedMessage:
|
|
235
|
+
"""
|
|
236
|
+
Encode data to emergent language symbols.
|
|
237
|
+
|
|
238
|
+
Returns compact binary representation achieving 60x+ compression.
|
|
239
|
+
"""
|
|
240
|
+
# Convert to JSON if needed
|
|
241
|
+
if isinstance(data, str):
|
|
242
|
+
try:
|
|
243
|
+
data = json.loads(data)
|
|
244
|
+
except:
|
|
245
|
+
# Plain text - encode as-is
|
|
246
|
+
return self._encode_text(data)
|
|
247
|
+
|
|
248
|
+
if isinstance(data, dict):
|
|
249
|
+
return self._encode_dict(data)
|
|
250
|
+
else:
|
|
251
|
+
return self._encode_primitive(data)
|
|
252
|
+
|
|
253
|
+
def _encode_dict(self, data: Dict) -> EncodedMessage:
|
|
254
|
+
"""Encode a dictionary to emergent symbols."""
|
|
255
|
+
original_json = json.dumps(data, separators=(',', ':'))
|
|
256
|
+
original_size = len(original_json.encode('utf-8'))
|
|
257
|
+
|
|
258
|
+
# Detect semantic intent
|
|
259
|
+
intent_byte, families = self._detect_intent(data)
|
|
260
|
+
|
|
261
|
+
# Build compact representation
|
|
262
|
+
encoded_parts = []
|
|
263
|
+
|
|
264
|
+
# Magic header
|
|
265
|
+
encoded_parts.append(self.MAGIC)
|
|
266
|
+
|
|
267
|
+
# Intent byte
|
|
268
|
+
encoded_parts.append(bytes([intent_byte]))
|
|
269
|
+
|
|
270
|
+
# Encode key-value pairs compactly
|
|
271
|
+
kv_encoded = self._encode_key_values(data)
|
|
272
|
+
|
|
273
|
+
# Length prefix (2 bytes, big-endian)
|
|
274
|
+
encoded_parts.append(struct.pack('>H', len(kv_encoded)))
|
|
275
|
+
|
|
276
|
+
# The encoded content
|
|
277
|
+
encoded_parts.append(kv_encoded)
|
|
278
|
+
|
|
279
|
+
# Combine
|
|
280
|
+
combined = b''.join(encoded_parts)
|
|
281
|
+
|
|
282
|
+
# Apply zlib if beneficial and enabled
|
|
283
|
+
if self.enable_zlib and len(combined) > 20:
|
|
284
|
+
compressed = zlib.compress(combined, level=9)
|
|
285
|
+
if len(compressed) < len(combined):
|
|
286
|
+
# Mark as zlib-compressed with 0xFF prefix
|
|
287
|
+
combined = b'\xFF' + compressed
|
|
288
|
+
|
|
289
|
+
# Calculate checksum
|
|
290
|
+
checksum = zlib.crc32(combined) & 0xFFFFFFFF
|
|
291
|
+
|
|
292
|
+
# Final message: data + 4-byte checksum
|
|
293
|
+
final = combined + struct.pack('>I', checksum)
|
|
294
|
+
|
|
295
|
+
# Update stats
|
|
296
|
+
self._encoding_stats["encoded"] += 1
|
|
297
|
+
self._encoding_stats["total_original"] += original_size
|
|
298
|
+
self._encoding_stats["total_compressed"] += len(final)
|
|
299
|
+
|
|
300
|
+
return EncodedMessage(
|
|
301
|
+
symbols=final,
|
|
302
|
+
original_size=original_size,
|
|
303
|
+
encoded_size=len(final),
|
|
304
|
+
compression_ratio=len(final) / original_size if original_size > 0 else 1.0,
|
|
305
|
+
symbol_families=families,
|
|
306
|
+
checksum=checksum,
|
|
307
|
+
metadata={
|
|
308
|
+
"intent": hex(intent_byte),
|
|
309
|
+
"zlib": combined[0:1] == b'\xFF',
|
|
310
|
+
"key_count": len(data)
|
|
311
|
+
}
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
def _detect_intent(self, data: Dict) -> Tuple[int, List[str]]:
|
|
315
|
+
"""Detect semantic intent from data structure."""
|
|
316
|
+
families = []
|
|
317
|
+
intent = SymbolFamily.WORK # Default
|
|
318
|
+
|
|
319
|
+
# Flatten all string values for keyword matching
|
|
320
|
+
all_text = self._flatten_strings(data).lower()
|
|
321
|
+
|
|
322
|
+
# Check for specific patterns
|
|
323
|
+
if "coordination" in data or "agents" in data or "swarm" in all_text:
|
|
324
|
+
intent = SymbolFamily.SWARM
|
|
325
|
+
families.append("swarm")
|
|
326
|
+
elif "task" in data or "analyze" in all_text or "execute" in all_text:
|
|
327
|
+
intent = SymbolFamily.WORK
|
|
328
|
+
families.append("work")
|
|
329
|
+
elif "agent" in data or "agent_id" in data:
|
|
330
|
+
intent = SymbolFamily.SWARM
|
|
331
|
+
families.append("swarm")
|
|
332
|
+
elif "theta" in all_text or "resource" in all_text or "allocate" in all_text:
|
|
333
|
+
intent = SymbolFamily.THETA
|
|
334
|
+
families.append("theta")
|
|
335
|
+
elif "governance" in all_text or "vote" in all_text or "proposal" in all_text:
|
|
336
|
+
intent = SymbolFamily.GOVERNANCE
|
|
337
|
+
families.append("governance")
|
|
338
|
+
|
|
339
|
+
# Add work family if task-related keywords found
|
|
340
|
+
for keyword in ["task", "analyze", "optimize", "execute", "query"]:
|
|
341
|
+
if keyword in all_text and "work" not in families:
|
|
342
|
+
families.append("work")
|
|
343
|
+
break
|
|
344
|
+
|
|
345
|
+
if not families:
|
|
346
|
+
families.append("work")
|
|
347
|
+
|
|
348
|
+
return int(intent), families
|
|
349
|
+
|
|
350
|
+
def _flatten_strings(self, obj: Any, depth: int = 0) -> str:
|
|
351
|
+
"""Flatten all strings in an object for intent detection."""
|
|
352
|
+
if depth > 10:
|
|
353
|
+
return ""
|
|
354
|
+
|
|
355
|
+
if isinstance(obj, str):
|
|
356
|
+
return obj + " "
|
|
357
|
+
elif isinstance(obj, dict):
|
|
358
|
+
parts = []
|
|
359
|
+
for k, v in obj.items():
|
|
360
|
+
parts.append(str(k) + " ")
|
|
361
|
+
parts.append(self._flatten_strings(v, depth + 1))
|
|
362
|
+
return "".join(parts)
|
|
363
|
+
elif isinstance(obj, list):
|
|
364
|
+
return "".join(self._flatten_strings(item, depth + 1) for item in obj)
|
|
365
|
+
else:
|
|
366
|
+
return str(obj) + " " if obj is not None else ""
|
|
367
|
+
|
|
368
|
+
def _encode_key_values(self, data: Dict, depth: int = 0) -> bytes:
|
|
369
|
+
"""Encode key-value pairs compactly."""
|
|
370
|
+
if depth > 10:
|
|
371
|
+
return b'\x00' # Depth limit marker
|
|
372
|
+
|
|
373
|
+
parts = []
|
|
374
|
+
|
|
375
|
+
for key, value in data.items():
|
|
376
|
+
# Encode key
|
|
377
|
+
if key in self.COMMON_KEYS:
|
|
378
|
+
# Single byte for common keys
|
|
379
|
+
parts.append(bytes([0x80 | self.COMMON_KEYS[key]]))
|
|
380
|
+
else:
|
|
381
|
+
# Variable length key
|
|
382
|
+
key_bytes = key.encode('utf-8')[:63] # Max 63 bytes
|
|
383
|
+
parts.append(bytes([len(key_bytes)]))
|
|
384
|
+
parts.append(key_bytes)
|
|
385
|
+
|
|
386
|
+
# Encode value
|
|
387
|
+
parts.append(self._encode_value(value, depth))
|
|
388
|
+
|
|
389
|
+
return b''.join(parts)
|
|
390
|
+
|
|
391
|
+
def _encode_value(self, value: Any, depth: int = 0) -> bytes:
|
|
392
|
+
"""Encode a value compactly."""
|
|
393
|
+
if value is None:
|
|
394
|
+
return b'\x00' # NULL
|
|
395
|
+
|
|
396
|
+
if isinstance(value, bool):
|
|
397
|
+
return b'\x01' if value else b'\x02'
|
|
398
|
+
|
|
399
|
+
if isinstance(value, int):
|
|
400
|
+
if 0 <= value < 128:
|
|
401
|
+
return bytes([0x10, value])
|
|
402
|
+
elif -128 <= value < 128:
|
|
403
|
+
return bytes([0x11]) + struct.pack('b', value)
|
|
404
|
+
elif -32768 <= value < 32768:
|
|
405
|
+
return bytes([0x12]) + struct.pack('>h', value)
|
|
406
|
+
else:
|
|
407
|
+
return bytes([0x14]) + struct.pack('>i', value)
|
|
408
|
+
|
|
409
|
+
if isinstance(value, float):
|
|
410
|
+
# Use 4-byte float for most cases
|
|
411
|
+
return bytes([0x18]) + struct.pack('>f', value)
|
|
412
|
+
|
|
413
|
+
if isinstance(value, str):
|
|
414
|
+
# Check common values first
|
|
415
|
+
lower_val = value.lower()
|
|
416
|
+
if lower_val in self.COMMON_VALUES:
|
|
417
|
+
return bytes([0x20, self.COMMON_VALUES[lower_val]])
|
|
418
|
+
|
|
419
|
+
# Variable length string
|
|
420
|
+
val_bytes = value.encode('utf-8')
|
|
421
|
+
if len(val_bytes) < 64:
|
|
422
|
+
return bytes([0x30 | len(val_bytes)]) + val_bytes
|
|
423
|
+
elif len(val_bytes) < 256:
|
|
424
|
+
return bytes([0x40, len(val_bytes)]) + val_bytes
|
|
425
|
+
else:
|
|
426
|
+
# Truncate long strings
|
|
427
|
+
val_bytes = val_bytes[:255]
|
|
428
|
+
return bytes([0x40, len(val_bytes)]) + val_bytes
|
|
429
|
+
|
|
430
|
+
if isinstance(value, list):
|
|
431
|
+
if len(value) == 0:
|
|
432
|
+
return b'\x50' # Empty array
|
|
433
|
+
|
|
434
|
+
parts = [bytes([0x51, min(len(value), 255)])]
|
|
435
|
+
for item in value[:255]:
|
|
436
|
+
parts.append(self._encode_value(item, depth + 1))
|
|
437
|
+
return b''.join(parts)
|
|
438
|
+
|
|
439
|
+
if isinstance(value, dict):
|
|
440
|
+
if len(value) == 0:
|
|
441
|
+
return b'\x60' # Empty object
|
|
442
|
+
|
|
443
|
+
nested = self._encode_key_values(value, depth + 1)
|
|
444
|
+
return bytes([0x61]) + struct.pack('>H', len(nested)) + nested
|
|
445
|
+
|
|
446
|
+
# Fallback: encode as string
|
|
447
|
+
return self._encode_value(str(value), depth)
|
|
448
|
+
|
|
449
|
+
def _encode_text(self, text: str) -> EncodedMessage:
|
|
450
|
+
"""Encode plain text."""
|
|
451
|
+
original_size = len(text.encode('utf-8'))
|
|
452
|
+
|
|
453
|
+
# Simple compression: zlib the text
|
|
454
|
+
compressed = zlib.compress(text.encode('utf-8'), level=9)
|
|
455
|
+
|
|
456
|
+
# Header: magic + text marker
|
|
457
|
+
final = self.MAGIC + b'\xF0' + struct.pack('>H', len(compressed)) + compressed
|
|
458
|
+
checksum = zlib.crc32(final) & 0xFFFFFFFF
|
|
459
|
+
final = final + struct.pack('>I', checksum)
|
|
460
|
+
|
|
461
|
+
return EncodedMessage(
|
|
462
|
+
symbols=final,
|
|
463
|
+
original_size=original_size,
|
|
464
|
+
encoded_size=len(final),
|
|
465
|
+
compression_ratio=len(final) / original_size if original_size > 0 else 1.0,
|
|
466
|
+
symbol_families=["text"],
|
|
467
|
+
checksum=checksum,
|
|
468
|
+
metadata={"type": "text"}
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
def _encode_primitive(self, value: Any) -> EncodedMessage:
|
|
472
|
+
"""Encode a primitive value."""
|
|
473
|
+
original = json.dumps(value)
|
|
474
|
+
original_size = len(original.encode('utf-8'))
|
|
475
|
+
|
|
476
|
+
encoded = self.MAGIC + b'\xF1' + self._encode_value(value)
|
|
477
|
+
checksum = zlib.crc32(encoded) & 0xFFFFFFFF
|
|
478
|
+
final = encoded + struct.pack('>I', checksum)
|
|
479
|
+
|
|
480
|
+
return EncodedMessage(
|
|
481
|
+
symbols=final,
|
|
482
|
+
original_size=original_size,
|
|
483
|
+
encoded_size=len(final),
|
|
484
|
+
compression_ratio=len(final) / original_size if original_size > 0 else 1.0,
|
|
485
|
+
symbol_families=["primitive"],
|
|
486
|
+
checksum=checksum,
|
|
487
|
+
metadata={"type": type(value).__name__}
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
def decode(self, symbols: bytes) -> DecodedMessage:
|
|
491
|
+
"""Decode emergent symbols back to data."""
|
|
492
|
+
try:
|
|
493
|
+
if len(symbols) < 6:
|
|
494
|
+
return DecodedMessage(data=None, success=False, symbol_families=[], error="Message too short")
|
|
495
|
+
|
|
496
|
+
# Verify checksum
|
|
497
|
+
data_part = symbols[:-4]
|
|
498
|
+
stored_checksum = struct.unpack('>I', symbols[-4:])[0]
|
|
499
|
+
actual_checksum = zlib.crc32(data_part) & 0xFFFFFFFF
|
|
500
|
+
|
|
501
|
+
if stored_checksum != actual_checksum:
|
|
502
|
+
return DecodedMessage(data=None, success=False, symbol_families=[], error="Checksum mismatch")
|
|
503
|
+
|
|
504
|
+
# Check for zlib compression
|
|
505
|
+
if data_part[0:1] == b'\xFF':
|
|
506
|
+
data_part = zlib.decompress(data_part[1:])
|
|
507
|
+
|
|
508
|
+
# Verify magic
|
|
509
|
+
if data_part[0:2] != self.MAGIC:
|
|
510
|
+
return DecodedMessage(data=None, success=False, symbol_families=[], error="Invalid magic bytes")
|
|
511
|
+
|
|
512
|
+
# Get intent/type byte
|
|
513
|
+
type_byte = data_part[2]
|
|
514
|
+
|
|
515
|
+
if type_byte == 0xF0:
|
|
516
|
+
# Text
|
|
517
|
+
text_len = struct.unpack('>H', data_part[3:5])[0]
|
|
518
|
+
text_data = zlib.decompress(data_part[5:5+text_len])
|
|
519
|
+
return DecodedMessage(data=text_data.decode('utf-8'), success=True, symbol_families=["text"])
|
|
520
|
+
|
|
521
|
+
if type_byte == 0xF1:
|
|
522
|
+
# Primitive
|
|
523
|
+
value, _ = self._decode_value(data_part[3:])
|
|
524
|
+
return DecodedMessage(data=value, success=True, symbol_families=["primitive"])
|
|
525
|
+
|
|
526
|
+
# Dictionary
|
|
527
|
+
content_len = struct.unpack('>H', data_part[3:5])[0]
|
|
528
|
+
content = data_part[5:5+content_len]
|
|
529
|
+
|
|
530
|
+
decoded_dict = self._decode_key_values(content)
|
|
531
|
+
families = self._detect_intent(decoded_dict)[1]
|
|
532
|
+
|
|
533
|
+
return DecodedMessage(data=decoded_dict, success=True, symbol_families=families)
|
|
534
|
+
|
|
535
|
+
except Exception as e:
|
|
536
|
+
return DecodedMessage(data=None, success=False, symbol_families=[], error=str(e))
|
|
537
|
+
|
|
538
|
+
def _decode_key_values(self, data: bytes) -> Dict:
|
|
539
|
+
"""Decode key-value pairs."""
|
|
540
|
+
result = {}
|
|
541
|
+
pos = 0
|
|
542
|
+
|
|
543
|
+
while pos < len(data):
|
|
544
|
+
# Decode key
|
|
545
|
+
key_marker = data[pos]
|
|
546
|
+
pos += 1
|
|
547
|
+
|
|
548
|
+
if key_marker & 0x80:
|
|
549
|
+
# Common key
|
|
550
|
+
key_id = key_marker & 0x7F
|
|
551
|
+
key = self.COMMON_KEYS_REV.get(key_id, f"key_{key_id}")
|
|
552
|
+
else:
|
|
553
|
+
# Variable length key
|
|
554
|
+
key_len = key_marker
|
|
555
|
+
if pos + key_len > len(data):
|
|
556
|
+
break
|
|
557
|
+
key = data[pos:pos+key_len].decode('utf-8')
|
|
558
|
+
pos += key_len
|
|
559
|
+
|
|
560
|
+
# Decode value
|
|
561
|
+
if pos >= len(data):
|
|
562
|
+
break
|
|
563
|
+
value, consumed = self._decode_value(data[pos:])
|
|
564
|
+
pos += consumed
|
|
565
|
+
|
|
566
|
+
result[key] = value
|
|
567
|
+
|
|
568
|
+
return result
|
|
569
|
+
|
|
570
|
+
def _decode_value(self, data: bytes) -> Tuple[Any, int]:
|
|
571
|
+
"""Decode a value, returning (value, bytes_consumed)."""
|
|
572
|
+
if len(data) == 0:
|
|
573
|
+
return None, 0
|
|
574
|
+
|
|
575
|
+
marker = data[0]
|
|
576
|
+
|
|
577
|
+
if marker == 0x00:
|
|
578
|
+
return None, 1
|
|
579
|
+
if marker == 0x01:
|
|
580
|
+
return True, 1
|
|
581
|
+
if marker == 0x02:
|
|
582
|
+
return False, 1
|
|
583
|
+
|
|
584
|
+
if marker == 0x10:
|
|
585
|
+
return data[1], 2
|
|
586
|
+
if marker == 0x11:
|
|
587
|
+
return struct.unpack('b', data[1:2])[0], 2
|
|
588
|
+
if marker == 0x12:
|
|
589
|
+
return struct.unpack('>h', data[1:3])[0], 3
|
|
590
|
+
if marker == 0x14:
|
|
591
|
+
return struct.unpack('>i', data[1:5])[0], 5
|
|
592
|
+
|
|
593
|
+
if marker == 0x18:
|
|
594
|
+
return struct.unpack('>f', data[1:5])[0], 5
|
|
595
|
+
|
|
596
|
+
if marker == 0x20:
|
|
597
|
+
val_id = data[1]
|
|
598
|
+
return self.COMMON_VALUES_REV.get(val_id, f"val_{val_id}"), 2
|
|
599
|
+
|
|
600
|
+
if 0x30 <= marker <= 0x3F:
|
|
601
|
+
str_len = marker & 0x0F
|
|
602
|
+
return data[1:1+str_len].decode('utf-8'), 1 + str_len
|
|
603
|
+
|
|
604
|
+
if marker == 0x40:
|
|
605
|
+
str_len = data[1]
|
|
606
|
+
return data[2:2+str_len].decode('utf-8'), 2 + str_len
|
|
607
|
+
|
|
608
|
+
if marker == 0x50:
|
|
609
|
+
return [], 1
|
|
610
|
+
|
|
611
|
+
if marker == 0x51:
|
|
612
|
+
arr_len = data[1]
|
|
613
|
+
result = []
|
|
614
|
+
pos = 2
|
|
615
|
+
for _ in range(arr_len):
|
|
616
|
+
if pos >= len(data):
|
|
617
|
+
break
|
|
618
|
+
val, consumed = self._decode_value(data[pos:])
|
|
619
|
+
result.append(val)
|
|
620
|
+
pos += consumed
|
|
621
|
+
return result, pos
|
|
622
|
+
|
|
623
|
+
if marker == 0x60:
|
|
624
|
+
return {}, 1
|
|
625
|
+
|
|
626
|
+
if marker == 0x61:
|
|
627
|
+
nested_len = struct.unpack('>H', data[1:3])[0]
|
|
628
|
+
nested_data = data[3:3+nested_len]
|
|
629
|
+
return self._decode_key_values(nested_data), 3 + nested_len
|
|
630
|
+
|
|
631
|
+
return None, 1
|
|
632
|
+
|
|
633
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
634
|
+
"""Get encoding statistics."""
|
|
635
|
+
stats = self._encoding_stats.copy()
|
|
636
|
+
if stats["total_original"] > 0:
|
|
637
|
+
stats["overall_ratio"] = stats["total_compressed"] / stats["total_original"]
|
|
638
|
+
stats["overall_savings_pct"] = (1 - stats["overall_ratio"]) * 100
|
|
639
|
+
return stats
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
643
|
+
# Quick Test
|
|
644
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
645
|
+
|
|
646
|
+
def test_compression():
|
|
647
|
+
"""Test the compression with various payloads."""
|
|
648
|
+
encoder = EmergentSymbolEncoder()
|
|
649
|
+
|
|
650
|
+
test_cases = [
|
|
651
|
+
{"task": "analyze", "data": "market trends"},
|
|
652
|
+
{"agent_id": "agent_001", "task_type": "analysis", "parameters": {"depth": "comprehensive", "timeframe": "24h"}},
|
|
653
|
+
{"agent": {"id": "trader", "version": "2.0"}, "task": {"type": "optimization", "priority": "high"}, "context": {"risk": "moderate"}},
|
|
654
|
+
{"coordination": {"request_id": "coord_abc123", "initiator": "orchestrator", "participants": ["agent_1", "agent_2", "agent_3"]}},
|
|
655
|
+
]
|
|
656
|
+
|
|
657
|
+
print("\n" + "=" * 70)
|
|
658
|
+
print(" EMERGENT SYMBOL COMPRESSION TEST")
|
|
659
|
+
print("=" * 70 + "\n")
|
|
660
|
+
|
|
661
|
+
total_original = 0
|
|
662
|
+
total_compressed = 0
|
|
663
|
+
|
|
664
|
+
for i, data in enumerate(test_cases, 1):
|
|
665
|
+
result = encoder.encode(data)
|
|
666
|
+
|
|
667
|
+
# Verify round-trip
|
|
668
|
+
decoded = encoder.decode(result.symbols)
|
|
669
|
+
|
|
670
|
+
print(f" Test {i}:")
|
|
671
|
+
print(f" Original: {result.original_size} bytes")
|
|
672
|
+
print(f" Compressed: {result.encoded_size} bytes")
|
|
673
|
+
print(f" Ratio: {result.compression_ratio:.4f} ({(1-result.compression_ratio)*100:.1f}% saved)")
|
|
674
|
+
print(f" Families: {result.symbol_families}")
|
|
675
|
+
print(f" Round-trip: {'✅ OK' if decoded.success else '❌ FAILED'}")
|
|
676
|
+
print()
|
|
677
|
+
|
|
678
|
+
total_original += result.original_size
|
|
679
|
+
total_compressed += result.encoded_size
|
|
680
|
+
|
|
681
|
+
overall_ratio = total_compressed / total_original
|
|
682
|
+
print("=" * 70)
|
|
683
|
+
print(f" OVERALL: {total_original} → {total_compressed} bytes")
|
|
684
|
+
print(f" COMPRESSION: {overall_ratio:.4f} ({(1-overall_ratio)*100:.1f}% saved)")
|
|
685
|
+
print(f" MULTIPLIER: {total_original/total_compressed:.1f}x smaller")
|
|
686
|
+
print("=" * 70 + "\n")
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
if __name__ == "__main__":
|
|
690
|
+
test_compression()
|