emergent-translator 1.1.0__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emergent_translator-1.1.1/PKG-INFO +248 -0
- emergent_translator-1.1.1/README.md +178 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/pyproject.toml +1 -1
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/__init__.py +1 -1
- emergent_translator-1.1.1/src/emergent_translator.egg-info/PKG-INFO +248 -0
- emergent_translator-1.1.0/PKG-INFO +0 -568
- emergent_translator-1.1.0/README.md +0 -498
- emergent_translator-1.1.0/src/emergent_translator.egg-info/PKG-INFO +0 -568
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/LICENSE +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/setup.cfg +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/adaptive_codebook.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/api_server.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/batch_encoder.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/chunk_collector.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/chunk_coordinator.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/claude_compression.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/cli.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/client_sdk.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/code_skeleton.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/core.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/emergent_symbols.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/format_handlers.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/gpu_batch_encoder.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/intelligent_router.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/metrics.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator/py.typed +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator.egg-info/SOURCES.txt +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator.egg-info/dependency_links.txt +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator.egg-info/entry_points.txt +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator.egg-info/requires.txt +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/src/emergent_translator.egg-info/top_level.txt +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/tests/test_adaptive_codebook.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/tests/test_batch_decoder.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/tests/test_benchmarks.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/tests/test_claude_compression.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/tests/test_code_skeleton.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/tests/test_format_handlers.py +0 -0
- {emergent_translator-1.1.0 → emergent_translator-1.1.1}/tests/test_sdk_api.py +0 -0
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: emergent-translator
|
|
3
|
+
Version: 1.1.1
|
|
4
|
+
Summary: 60x compression efficiency for AI communication through emergent language translation
|
|
5
|
+
Author-email: Emergent Language Team <hello@emergentlanguage.ai>
|
|
6
|
+
Maintainer-email: Emergent Language Team <hello@emergentlanguage.ai>
|
|
7
|
+
License-Expression: GPL-3.0-or-later
|
|
8
|
+
Project-URL: Homepage, https://emergentlanguage.ai
|
|
9
|
+
Project-URL: GitHub, https://github.com/maco144/emergent-language
|
|
10
|
+
Project-URL: Documentation, https://github.com/maco144/emergent-language/wiki
|
|
11
|
+
Project-URL: Repository, https://github.com/maco144/emergent-language.git
|
|
12
|
+
Project-URL: Bug Tracker, https://github.com/maco144/emergent-language/issues
|
|
13
|
+
Project-URL: Changelog, https://github.com/maco144/emergent-language/releases
|
|
14
|
+
Project-URL: API Docs, http://149.28.33.118:8001/docs
|
|
15
|
+
Project-URL: Live Demo, http://149.28.33.118:8001
|
|
16
|
+
Keywords: ai,compression,emergent-language,api,translation,efficiency,machine-learning,natural-language-processing
|
|
17
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
18
|
+
Classifier: Intended Audience :: Developers
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Communications
|
|
22
|
+
Classifier: Programming Language :: Python :: 3
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
27
|
+
Classifier: Operating System :: OS Independent
|
|
28
|
+
Requires-Python: >=3.9
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: fastapi>=0.100.0
|
|
32
|
+
Requires-Dist: uvicorn[standard]>=0.23.0
|
|
33
|
+
Requires-Dist: python-multipart>=0.0.6
|
|
34
|
+
Requires-Dist: websockets>=11.0
|
|
35
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
36
|
+
Requires-Dist: pydantic>=2.0.0
|
|
37
|
+
Requires-Dist: httpx>=0.24.0
|
|
38
|
+
Requires-Dist: aiofiles>=23.0.0
|
|
39
|
+
Requires-Dist: psutil>=5.9.0
|
|
40
|
+
Requires-Dist: openai>=1.0.0
|
|
41
|
+
Provides-Extra: dev
|
|
42
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
44
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
45
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
46
|
+
Requires-Dist: isort>=5.12; extra == "dev"
|
|
47
|
+
Requires-Dist: flake8>=6.0; extra == "dev"
|
|
48
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
49
|
+
Requires-Dist: pre-commit>=3.0; extra == "dev"
|
|
50
|
+
Requires-Dist: pyyaml>=6.0; extra == "dev"
|
|
51
|
+
Provides-Extra: monitoring
|
|
52
|
+
Requires-Dist: structlog>=23.0.0; extra == "monitoring"
|
|
53
|
+
Requires-Dist: prometheus-fastapi-instrumentator>=6.0.0; extra == "monitoring"
|
|
54
|
+
Provides-Extra: formats
|
|
55
|
+
Requires-Dist: pyyaml>=6.0; extra == "formats"
|
|
56
|
+
Requires-Dist: msgpack>=1.0; extra == "formats"
|
|
57
|
+
Requires-Dist: protobuf>=4.0; extra == "formats"
|
|
58
|
+
Requires-Dist: pyarrow>=12.0; extra == "formats"
|
|
59
|
+
Requires-Dist: tomli>=2.0; python_version < "3.11" and extra == "formats"
|
|
60
|
+
Requires-Dist: tomli_w>=1.0; extra == "formats"
|
|
61
|
+
Requires-Dist: pymongo>=4.0; extra == "formats"
|
|
62
|
+
Requires-Dist: cbor2>=5.0; extra == "formats"
|
|
63
|
+
Requires-Dist: openpyxl>=3.1; extra == "formats"
|
|
64
|
+
Provides-Extra: examples
|
|
65
|
+
Requires-Dist: langchain>=0.0.300; extra == "examples"
|
|
66
|
+
Requires-Dist: crewai>=0.1.0; extra == "examples"
|
|
67
|
+
Requires-Dist: jupyter>=1.0.0; extra == "examples"
|
|
68
|
+
Requires-Dist: matplotlib>=3.0.0; extra == "examples"
|
|
69
|
+
Dynamic: license-file
|
|
70
|
+
|
|
71
|
+
# Emergent Language Translator
|
|
72
|
+
|
|
73
|
+
**High-performance binary encoding for AI agent communication**
|
|
74
|
+
|
|
75
|
+
[](https://pypi.org/project/emergent-translator/)
|
|
76
|
+
[](https://pypi.org/project/emergent-translator/)
|
|
77
|
+
[]()
|
|
78
|
+
[](LICENSE)
|
|
79
|
+
|
|
80
|
+
Emergent Language Translator compresses structured AI messages into a compact binary format using learned codebooks, common-key dictionaries, and zlib. Batch encoding amortizes header overhead across messages — the more you batch, the better the ratio.
|
|
81
|
+
|
|
82
|
+
## Quick Start
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install emergent-translator
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from emergent_translator import BatchEncoder
|
|
90
|
+
|
|
91
|
+
encoder = BatchEncoder()
|
|
92
|
+
|
|
93
|
+
# Encode a batch of agent messages
|
|
94
|
+
messages = [
|
|
95
|
+
{"role": "user", "content": "analyze market trends", "priority": "high"},
|
|
96
|
+
{"role": "assistant", "content": "Starting analysis", "status": "active"},
|
|
97
|
+
{"role": "system", "content": "Agent coordinator online", "version": "1.0"},
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
result = encoder.encode_batch(messages)
|
|
101
|
+
print(f"{len(messages)} messages: 226 bytes JSON -> {len(result.payload)} bytes binary")
|
|
102
|
+
# 3 messages: 226 bytes JSON -> 141 bytes (38% reduction)
|
|
103
|
+
|
|
104
|
+
# Perfect round-trip reconstruction
|
|
105
|
+
decoded = encoder.decode_batch(result.payload)
|
|
106
|
+
assert decoded == messages
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Compression Results
|
|
110
|
+
|
|
111
|
+
The batch encoder uses a binary wire format with common-key/value dictionaries and zlib compression. Efficiency improves with batch size:
|
|
112
|
+
|
|
113
|
+
| Workload | JSON Size | Encoded Size | Reduction |
|
|
114
|
+
|----------|-----------|-------------|-----------|
|
|
115
|
+
| 3 agent messages | 226 bytes | 141 bytes | 38% |
|
|
116
|
+
| 10 agent messages | 750 bytes | 112 bytes | **85%** |
|
|
117
|
+
| 50 agent messages | 4,880 bytes | 286 bytes | **94%** |
|
|
118
|
+
|
|
119
|
+
Encoding speed: sub-millisecond (0.2ms typical).
|
|
120
|
+
|
|
121
|
+
## Binary Format
|
|
122
|
+
|
|
123
|
+
All payloads start with magic bytes `\xE7\xB0` followed by a version byte:
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
v1/v2: MAGIC(2) + VERSION(1) + COUNT(2) + FLAGS(1) + PAYLOAD + CRC32(4)
|
|
127
|
+
v3: MAGIC(2) + VERSION(1) + COUNT(2) + FLAGS(1) + CB_VERSION(2) + CB_LEN(2) + [CODEBOOK] + PAYLOAD + CRC32(4)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Common keys (`role`, `content`, `action`, `status`, `priority`, ...) and values (`user`, `assistant`, `system`, `high`, `low`, ...) are encoded as single-byte tokens. Remaining data is zlib-compressed.
|
|
131
|
+
|
|
132
|
+
## Adaptive Codebooks
|
|
133
|
+
|
|
134
|
+
The static dictionaries cover common AI communication patterns. For domain-specific traffic, train a codebook that learns your most frequent keys and values:
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from emergent_translator import AdaptiveCodebook, BatchEncoder
|
|
138
|
+
|
|
139
|
+
# Train on observed traffic
|
|
140
|
+
codebook = AdaptiveCodebook()
|
|
141
|
+
for msg in training_messages:
|
|
142
|
+
codebook.observe(msg)
|
|
143
|
+
codebook.rebuild(min_freq=5)
|
|
144
|
+
|
|
145
|
+
# Encode with learned codebook (v3 format, codebook embedded in payload)
|
|
146
|
+
encoder = BatchEncoder()
|
|
147
|
+
result = encoder.encode_batch(messages, codebook=codebook.active)
|
|
148
|
+
decoded = encoder.decode_batch(result.payload) # codebook auto-extracted
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Train a codebook from synthetic data:
|
|
152
|
+
```bash
|
|
153
|
+
python scripts/train_codebook.py --messages 50000 --benchmark
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Multi-Format Support
|
|
157
|
+
|
|
158
|
+
Parse and serialize 13+ formats, then compress through the batch encoder:
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from emergent_translator import detect_format, get_handler, BatchEncoder
|
|
162
|
+
|
|
163
|
+
fmt = detect_format("data.csv") # "csv"
|
|
164
|
+
parse_fn, serialize_fn = get_handler(fmt)
|
|
165
|
+
records = parse_fn(open("data.csv").read())
|
|
166
|
+
|
|
167
|
+
encoder = BatchEncoder()
|
|
168
|
+
result = encoder.encode_batch(records)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Supported: JSON, CSV, JSONL, YAML, TOML, INI, XML, MessagePack, Protobuf, Parquet, Arrow, BSON, CBOR, XLSX.
|
|
172
|
+
|
|
173
|
+
## GPU Batch Encoder
|
|
174
|
+
|
|
175
|
+
For higher throughput, use the GPU-accelerated encoder (falls back to CPU with ThreadPoolExecutor if CuPy is unavailable):
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
from emergent_translator import GPUBatchEncoder
|
|
179
|
+
|
|
180
|
+
gpu_encoder = GPUBatchEncoder(num_workers=8)
|
|
181
|
+
result = gpu_encoder.encode_batch(messages)
|
|
182
|
+
decoded = gpu_encoder.decode_batch(result.payload)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## LLM Token Savings
|
|
186
|
+
|
|
187
|
+
Two complementary modules for reducing token usage with LLMs like Claude:
|
|
188
|
+
|
|
189
|
+
### Code Skeletonization
|
|
190
|
+
|
|
191
|
+
Strip Python files to signatures + docstrings. Feed Claude the *structure* without paying for implementation lines:
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
from emergent_translator import skeletonize_file
|
|
195
|
+
|
|
196
|
+
result = skeletonize_file("my_module.py", focal=["important_func"])
|
|
197
|
+
print(f"{result.original_tokens} -> {result.skeleton_tokens} tokens "
|
|
198
|
+
f"({result.token_reduction_pct:.0f}% reduction)")
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Claude Text Compression
|
|
202
|
+
|
|
203
|
+
Compress keys and values in structured data flowing through Claude API conversations:
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
from emergent_translator import ClaudeCompressor
|
|
207
|
+
|
|
208
|
+
compressor = ClaudeCompressor()
|
|
209
|
+
system = compressor.system_prompt_prefix() + "\n\nYour prompt..."
|
|
210
|
+
compressed_msgs = compressor.compress_messages(messages)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## Project Structure
|
|
214
|
+
|
|
215
|
+
```
|
|
216
|
+
src/emergent_translator/ # pip-installable package
|
|
217
|
+
batch_encoder.py # v1 batch encoder (encode/decode)
|
|
218
|
+
gpu_batch_encoder.py # v2 GPU-accelerated encoder
|
|
219
|
+
adaptive_codebook.py # v3 learned codebooks
|
|
220
|
+
format_handlers.py # 13+ format parsers
|
|
221
|
+
emergent_symbols.py # symbol encoder
|
|
222
|
+
api_server.py # FastAPI server
|
|
223
|
+
cli.py # CLI tool
|
|
224
|
+
scripts/ # benchmarks, stress tests, workers
|
|
225
|
+
tests/ # 535 tests
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## Development
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
git clone https://github.com/maco144/emergent-language
|
|
232
|
+
cd emergent-language
|
|
233
|
+
pip install -e ".[dev,formats]"
|
|
234
|
+
python -m pytest tests/ -v
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Docker
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
docker build -t emergent-translator .
|
|
241
|
+
docker run -p 8000:8000 emergent-translator
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
## License
|
|
245
|
+
|
|
246
|
+
GPL-3.0-or-later. See [LICENSE](LICENSE) for details.
|
|
247
|
+
|
|
248
|
+
Commercial licensing available — see [COMMERCIAL_LICENSE.md](COMMERCIAL_LICENSE.md).
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Emergent Language Translator
|
|
2
|
+
|
|
3
|
+
**High-performance binary encoding for AI agent communication**
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/emergent-translator/)
|
|
6
|
+
[](https://pypi.org/project/emergent-translator/)
|
|
7
|
+
[]()
|
|
8
|
+
[](LICENSE)
|
|
9
|
+
|
|
10
|
+
Emergent Language Translator compresses structured AI messages into a compact binary format using learned codebooks, common-key dictionaries, and zlib. Batch encoding amortizes header overhead across messages — the more you batch, the better the ratio.
|
|
11
|
+
|
|
12
|
+
## Quick Start
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install emergent-translator
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from emergent_translator import BatchEncoder
|
|
20
|
+
|
|
21
|
+
encoder = BatchEncoder()
|
|
22
|
+
|
|
23
|
+
# Encode a batch of agent messages
|
|
24
|
+
messages = [
|
|
25
|
+
{"role": "user", "content": "analyze market trends", "priority": "high"},
|
|
26
|
+
{"role": "assistant", "content": "Starting analysis", "status": "active"},
|
|
27
|
+
{"role": "system", "content": "Agent coordinator online", "version": "1.0"},
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
result = encoder.encode_batch(messages)
|
|
31
|
+
print(f"{len(messages)} messages: 226 bytes JSON -> {len(result.payload)} bytes binary")
|
|
32
|
+
# 3 messages: 226 bytes JSON -> 141 bytes (38% reduction)
|
|
33
|
+
|
|
34
|
+
# Perfect round-trip reconstruction
|
|
35
|
+
decoded = encoder.decode_batch(result.payload)
|
|
36
|
+
assert decoded == messages
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Compression Results
|
|
40
|
+
|
|
41
|
+
The batch encoder uses a binary wire format with common-key/value dictionaries and zlib compression. Efficiency improves with batch size:
|
|
42
|
+
|
|
43
|
+
| Workload | JSON Size | Encoded Size | Reduction |
|
|
44
|
+
|----------|-----------|-------------|-----------|
|
|
45
|
+
| 3 agent messages | 226 bytes | 141 bytes | 38% |
|
|
46
|
+
| 10 agent messages | 750 bytes | 112 bytes | **85%** |
|
|
47
|
+
| 50 agent messages | 4,880 bytes | 286 bytes | **94%** |
|
|
48
|
+
|
|
49
|
+
Encoding speed: sub-millisecond (0.2ms typical).
|
|
50
|
+
|
|
51
|
+
## Binary Format
|
|
52
|
+
|
|
53
|
+
All payloads start with magic bytes `\xE7\xB0` followed by a version byte:
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
v1/v2: MAGIC(2) + VERSION(1) + COUNT(2) + FLAGS(1) + PAYLOAD + CRC32(4)
|
|
57
|
+
v3: MAGIC(2) + VERSION(1) + COUNT(2) + FLAGS(1) + CB_VERSION(2) + CB_LEN(2) + [CODEBOOK] + PAYLOAD + CRC32(4)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Common keys (`role`, `content`, `action`, `status`, `priority`, ...) and values (`user`, `assistant`, `system`, `high`, `low`, ...) are encoded as single-byte tokens. Remaining data is zlib-compressed.
|
|
61
|
+
|
|
62
|
+
## Adaptive Codebooks
|
|
63
|
+
|
|
64
|
+
The static dictionaries cover common AI communication patterns. For domain-specific traffic, train a codebook that learns your most frequent keys and values:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from emergent_translator import AdaptiveCodebook, BatchEncoder
|
|
68
|
+
|
|
69
|
+
# Train on observed traffic
|
|
70
|
+
codebook = AdaptiveCodebook()
|
|
71
|
+
for msg in training_messages:
|
|
72
|
+
codebook.observe(msg)
|
|
73
|
+
codebook.rebuild(min_freq=5)
|
|
74
|
+
|
|
75
|
+
# Encode with learned codebook (v3 format, codebook embedded in payload)
|
|
76
|
+
encoder = BatchEncoder()
|
|
77
|
+
result = encoder.encode_batch(messages, codebook=codebook.active)
|
|
78
|
+
decoded = encoder.decode_batch(result.payload) # codebook auto-extracted
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Train a codebook from synthetic data:
|
|
82
|
+
```bash
|
|
83
|
+
python scripts/train_codebook.py --messages 50000 --benchmark
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Multi-Format Support
|
|
87
|
+
|
|
88
|
+
Parse and serialize 13+ formats, then compress through the batch encoder:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from emergent_translator import detect_format, get_handler, BatchEncoder
|
|
92
|
+
|
|
93
|
+
fmt = detect_format("data.csv") # "csv"
|
|
94
|
+
parse_fn, serialize_fn = get_handler(fmt)
|
|
95
|
+
records = parse_fn(open("data.csv").read())
|
|
96
|
+
|
|
97
|
+
encoder = BatchEncoder()
|
|
98
|
+
result = encoder.encode_batch(records)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Supported: JSON, CSV, JSONL, YAML, TOML, INI, XML, MessagePack, Protobuf, Parquet, Arrow, BSON, CBOR, XLSX.
|
|
102
|
+
|
|
103
|
+
## GPU Batch Encoder
|
|
104
|
+
|
|
105
|
+
For higher throughput, use the GPU-accelerated encoder (falls back to CPU with ThreadPoolExecutor if CuPy is unavailable):
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from emergent_translator import GPUBatchEncoder
|
|
109
|
+
|
|
110
|
+
gpu_encoder = GPUBatchEncoder(num_workers=8)
|
|
111
|
+
result = gpu_encoder.encode_batch(messages)
|
|
112
|
+
decoded = gpu_encoder.decode_batch(result.payload)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## LLM Token Savings
|
|
116
|
+
|
|
117
|
+
Two complementary modules for reducing token usage with LLMs like Claude:
|
|
118
|
+
|
|
119
|
+
### Code Skeletonization
|
|
120
|
+
|
|
121
|
+
Strip Python files to signatures + docstrings. Feed Claude the *structure* without paying for implementation lines:
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
from emergent_translator import skeletonize_file
|
|
125
|
+
|
|
126
|
+
result = skeletonize_file("my_module.py", focal=["important_func"])
|
|
127
|
+
print(f"{result.original_tokens} -> {result.skeleton_tokens} tokens "
|
|
128
|
+
f"({result.token_reduction_pct:.0f}% reduction)")
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Claude Text Compression
|
|
132
|
+
|
|
133
|
+
Compress keys and values in structured data flowing through Claude API conversations:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from emergent_translator import ClaudeCompressor
|
|
137
|
+
|
|
138
|
+
compressor = ClaudeCompressor()
|
|
139
|
+
system = compressor.system_prompt_prefix() + "\n\nYour prompt..."
|
|
140
|
+
compressed_msgs = compressor.compress_messages(messages)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Project Structure
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
src/emergent_translator/ # pip-installable package
|
|
147
|
+
batch_encoder.py # v1 batch encoder (encode/decode)
|
|
148
|
+
gpu_batch_encoder.py # v2 GPU-accelerated encoder
|
|
149
|
+
adaptive_codebook.py # v3 learned codebooks
|
|
150
|
+
format_handlers.py # 13+ format parsers
|
|
151
|
+
emergent_symbols.py # symbol encoder
|
|
152
|
+
api_server.py # FastAPI server
|
|
153
|
+
cli.py # CLI tool
|
|
154
|
+
scripts/ # benchmarks, stress tests, workers
|
|
155
|
+
tests/ # 535 tests
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Development
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
git clone https://github.com/maco144/emergent-language
|
|
162
|
+
cd emergent-language
|
|
163
|
+
pip install -e ".[dev,formats]"
|
|
164
|
+
python -m pytest tests/ -v
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Docker
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
docker build -t emergent-translator .
|
|
171
|
+
docker run -p 8000:8000 emergent-translator
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## License
|
|
175
|
+
|
|
176
|
+
GPL-3.0-or-later. See [LICENSE](LICENSE) for details.
|
|
177
|
+
|
|
178
|
+
Commercial licensing available — see [COMMERCIAL_LICENSE.md](COMMERCIAL_LICENSE.md).
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "emergent-translator"
|
|
7
|
-
version = "1.1.
|
|
7
|
+
version = "1.1.1"
|
|
8
8
|
description = "60x compression efficiency for AI communication through emergent language translation"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "GPL-3.0-or-later"
|
|
@@ -32,7 +32,7 @@ from .format_handlers import (
|
|
|
32
32
|
|
|
33
33
|
from .emergent_symbols import EmergentSymbolEncoder
|
|
34
34
|
|
|
35
|
-
__version__ = "1.1.
|
|
35
|
+
__version__ = "1.1.1"
|
|
36
36
|
__author__ = "Emergent Language Team"
|
|
37
37
|
__email__ = "hello@emergentlanguage.ai"
|
|
38
38
|
__description__ = "60x compression efficiency for AI communication"
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: emergent-translator
|
|
3
|
+
Version: 1.1.1
|
|
4
|
+
Summary: 60x compression efficiency for AI communication through emergent language translation
|
|
5
|
+
Author-email: Emergent Language Team <hello@emergentlanguage.ai>
|
|
6
|
+
Maintainer-email: Emergent Language Team <hello@emergentlanguage.ai>
|
|
7
|
+
License-Expression: GPL-3.0-or-later
|
|
8
|
+
Project-URL: Homepage, https://emergentlanguage.ai
|
|
9
|
+
Project-URL: GitHub, https://github.com/maco144/emergent-language
|
|
10
|
+
Project-URL: Documentation, https://github.com/maco144/emergent-language/wiki
|
|
11
|
+
Project-URL: Repository, https://github.com/maco144/emergent-language.git
|
|
12
|
+
Project-URL: Bug Tracker, https://github.com/maco144/emergent-language/issues
|
|
13
|
+
Project-URL: Changelog, https://github.com/maco144/emergent-language/releases
|
|
14
|
+
Project-URL: API Docs, http://149.28.33.118:8001/docs
|
|
15
|
+
Project-URL: Live Demo, http://149.28.33.118:8001
|
|
16
|
+
Keywords: ai,compression,emergent-language,api,translation,efficiency,machine-learning,natural-language-processing
|
|
17
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
18
|
+
Classifier: Intended Audience :: Developers
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Communications
|
|
22
|
+
Classifier: Programming Language :: Python :: 3
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
27
|
+
Classifier: Operating System :: OS Independent
|
|
28
|
+
Requires-Python: >=3.9
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: fastapi>=0.100.0
|
|
32
|
+
Requires-Dist: uvicorn[standard]>=0.23.0
|
|
33
|
+
Requires-Dist: python-multipart>=0.0.6
|
|
34
|
+
Requires-Dist: websockets>=11.0
|
|
35
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
36
|
+
Requires-Dist: pydantic>=2.0.0
|
|
37
|
+
Requires-Dist: httpx>=0.24.0
|
|
38
|
+
Requires-Dist: aiofiles>=23.0.0
|
|
39
|
+
Requires-Dist: psutil>=5.9.0
|
|
40
|
+
Requires-Dist: openai>=1.0.0
|
|
41
|
+
Provides-Extra: dev
|
|
42
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
44
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
45
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
46
|
+
Requires-Dist: isort>=5.12; extra == "dev"
|
|
47
|
+
Requires-Dist: flake8>=6.0; extra == "dev"
|
|
48
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
49
|
+
Requires-Dist: pre-commit>=3.0; extra == "dev"
|
|
50
|
+
Requires-Dist: pyyaml>=6.0; extra == "dev"
|
|
51
|
+
Provides-Extra: monitoring
|
|
52
|
+
Requires-Dist: structlog>=23.0.0; extra == "monitoring"
|
|
53
|
+
Requires-Dist: prometheus-fastapi-instrumentator>=6.0.0; extra == "monitoring"
|
|
54
|
+
Provides-Extra: formats
|
|
55
|
+
Requires-Dist: pyyaml>=6.0; extra == "formats"
|
|
56
|
+
Requires-Dist: msgpack>=1.0; extra == "formats"
|
|
57
|
+
Requires-Dist: protobuf>=4.0; extra == "formats"
|
|
58
|
+
Requires-Dist: pyarrow>=12.0; extra == "formats"
|
|
59
|
+
Requires-Dist: tomli>=2.0; python_version < "3.11" and extra == "formats"
|
|
60
|
+
Requires-Dist: tomli_w>=1.0; extra == "formats"
|
|
61
|
+
Requires-Dist: pymongo>=4.0; extra == "formats"
|
|
62
|
+
Requires-Dist: cbor2>=5.0; extra == "formats"
|
|
63
|
+
Requires-Dist: openpyxl>=3.1; extra == "formats"
|
|
64
|
+
Provides-Extra: examples
|
|
65
|
+
Requires-Dist: langchain>=0.0.300; extra == "examples"
|
|
66
|
+
Requires-Dist: crewai>=0.1.0; extra == "examples"
|
|
67
|
+
Requires-Dist: jupyter>=1.0.0; extra == "examples"
|
|
68
|
+
Requires-Dist: matplotlib>=3.0.0; extra == "examples"
|
|
69
|
+
Dynamic: license-file
|
|
70
|
+
|
|
71
|
+
# Emergent Language Translator
|
|
72
|
+
|
|
73
|
+
**High-performance binary encoding for AI agent communication**
|
|
74
|
+
|
|
75
|
+
[](https://pypi.org/project/emergent-translator/)
|
|
76
|
+
[](https://pypi.org/project/emergent-translator/)
|
|
77
|
+
[]()
|
|
78
|
+
[](LICENSE)
|
|
79
|
+
|
|
80
|
+
Emergent Language Translator compresses structured AI messages into a compact binary format using learned codebooks, common-key dictionaries, and zlib. Batch encoding amortizes header overhead across messages — the more you batch, the better the ratio.
|
|
81
|
+
|
|
82
|
+
## Quick Start
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install emergent-translator
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from emergent_translator import BatchEncoder
|
|
90
|
+
|
|
91
|
+
encoder = BatchEncoder()
|
|
92
|
+
|
|
93
|
+
# Encode a batch of agent messages
|
|
94
|
+
messages = [
|
|
95
|
+
{"role": "user", "content": "analyze market trends", "priority": "high"},
|
|
96
|
+
{"role": "assistant", "content": "Starting analysis", "status": "active"},
|
|
97
|
+
{"role": "system", "content": "Agent coordinator online", "version": "1.0"},
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
result = encoder.encode_batch(messages)
|
|
101
|
+
print(f"{len(messages)} messages: 226 bytes JSON -> {len(result.payload)} bytes binary")
|
|
102
|
+
# 3 messages: 226 bytes JSON -> 141 bytes (38% reduction)
|
|
103
|
+
|
|
104
|
+
# Perfect round-trip reconstruction
|
|
105
|
+
decoded = encoder.decode_batch(result.payload)
|
|
106
|
+
assert decoded == messages
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Compression Results
|
|
110
|
+
|
|
111
|
+
The batch encoder uses a binary wire format with common-key/value dictionaries and zlib compression. Efficiency improves with batch size:
|
|
112
|
+
|
|
113
|
+
| Workload | JSON Size | Encoded Size | Reduction |
|
|
114
|
+
|----------|-----------|-------------|-----------|
|
|
115
|
+
| 3 agent messages | 226 bytes | 141 bytes | 38% |
|
|
116
|
+
| 10 agent messages | 750 bytes | 112 bytes | **85%** |
|
|
117
|
+
| 50 agent messages | 4,880 bytes | 286 bytes | **94%** |
|
|
118
|
+
|
|
119
|
+
Encoding speed: sub-millisecond (0.2ms typical).
|
|
120
|
+
|
|
121
|
+
## Binary Format
|
|
122
|
+
|
|
123
|
+
All payloads start with magic bytes `\xE7\xB0` followed by a version byte:
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
v1/v2: MAGIC(2) + VERSION(1) + COUNT(2) + FLAGS(1) + PAYLOAD + CRC32(4)
|
|
127
|
+
v3: MAGIC(2) + VERSION(1) + COUNT(2) + FLAGS(1) + CB_VERSION(2) + CB_LEN(2) + [CODEBOOK] + PAYLOAD + CRC32(4)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Common keys (`role`, `content`, `action`, `status`, `priority`, ...) and values (`user`, `assistant`, `system`, `high`, `low`, ...) are encoded as single-byte tokens. Remaining data is zlib-compressed.
|
|
131
|
+
|
|
132
|
+
## Adaptive Codebooks
|
|
133
|
+
|
|
134
|
+
The static dictionaries cover common AI communication patterns. For domain-specific traffic, train a codebook that learns your most frequent keys and values:
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from emergent_translator import AdaptiveCodebook, BatchEncoder
|
|
138
|
+
|
|
139
|
+
# Train on observed traffic
|
|
140
|
+
codebook = AdaptiveCodebook()
|
|
141
|
+
for msg in training_messages:
|
|
142
|
+
codebook.observe(msg)
|
|
143
|
+
codebook.rebuild(min_freq=5)
|
|
144
|
+
|
|
145
|
+
# Encode with learned codebook (v3 format, codebook embedded in payload)
|
|
146
|
+
encoder = BatchEncoder()
|
|
147
|
+
result = encoder.encode_batch(messages, codebook=codebook.active)
|
|
148
|
+
decoded = encoder.decode_batch(result.payload) # codebook auto-extracted
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Train a codebook from synthetic data:
|
|
152
|
+
```bash
|
|
153
|
+
python scripts/train_codebook.py --messages 50000 --benchmark
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Multi-Format Support
|
|
157
|
+
|
|
158
|
+
Parse and serialize 13+ formats, then compress through the batch encoder:
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from emergent_translator import detect_format, get_handler, BatchEncoder
|
|
162
|
+
|
|
163
|
+
fmt = detect_format("data.csv") # "csv"
|
|
164
|
+
parse_fn, serialize_fn = get_handler(fmt)
|
|
165
|
+
records = parse_fn(open("data.csv").read())
|
|
166
|
+
|
|
167
|
+
encoder = BatchEncoder()
|
|
168
|
+
result = encoder.encode_batch(records)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Supported: JSON, CSV, JSONL, YAML, TOML, INI, XML, MessagePack, Protobuf, Parquet, Arrow, BSON, CBOR, XLSX.
|
|
172
|
+
|
|
173
|
+
## GPU Batch Encoder
|
|
174
|
+
|
|
175
|
+
For higher throughput, use the GPU-accelerated encoder (falls back to CPU with ThreadPoolExecutor if CuPy is unavailable):
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
from emergent_translator import GPUBatchEncoder
|
|
179
|
+
|
|
180
|
+
gpu_encoder = GPUBatchEncoder(num_workers=8)
|
|
181
|
+
result = gpu_encoder.encode_batch(messages)
|
|
182
|
+
decoded = gpu_encoder.decode_batch(result.payload)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## LLM Token Savings
|
|
186
|
+
|
|
187
|
+
Two complementary modules for reducing token usage with LLMs like Claude:
|
|
188
|
+
|
|
189
|
+
### Code Skeletonization
|
|
190
|
+
|
|
191
|
+
Strip Python files to signatures + docstrings. Feed Claude the *structure* without paying for implementation lines:
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
from emergent_translator import skeletonize_file
|
|
195
|
+
|
|
196
|
+
result = skeletonize_file("my_module.py", focal=["important_func"])
|
|
197
|
+
print(f"{result.original_tokens} -> {result.skeleton_tokens} tokens "
|
|
198
|
+
f"({result.token_reduction_pct:.0f}% reduction)")
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Claude Text Compression
|
|
202
|
+
|
|
203
|
+
Compress keys and values in structured data flowing through Claude API conversations:
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
from emergent_translator import ClaudeCompressor
|
|
207
|
+
|
|
208
|
+
compressor = ClaudeCompressor()
|
|
209
|
+
system = compressor.system_prompt_prefix() + "\n\nYour prompt..."
|
|
210
|
+
compressed_msgs = compressor.compress_messages(messages)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## Project Structure
|
|
214
|
+
|
|
215
|
+
```
|
|
216
|
+
src/emergent_translator/ # pip-installable package
|
|
217
|
+
batch_encoder.py # v1 batch encoder (encode/decode)
|
|
218
|
+
gpu_batch_encoder.py # v2 GPU-accelerated encoder
|
|
219
|
+
adaptive_codebook.py # v3 learned codebooks
|
|
220
|
+
format_handlers.py # 13+ format parsers
|
|
221
|
+
emergent_symbols.py # symbol encoder
|
|
222
|
+
api_server.py # FastAPI server
|
|
223
|
+
cli.py # CLI tool
|
|
224
|
+
scripts/ # benchmarks, stress tests, workers
|
|
225
|
+
tests/ # 535 tests
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## Development
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
git clone https://github.com/maco144/emergent-language
|
|
232
|
+
cd emergent-language
|
|
233
|
+
pip install -e ".[dev,formats]"
|
|
234
|
+
python -m pytest tests/ -v
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Docker
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
docker build -t emergent-translator .
|
|
241
|
+
docker run -p 8000:8000 emergent-translator
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
## License
|
|
245
|
+
|
|
246
|
+
GPL-3.0-or-later. See [LICENSE](LICENSE) for details.
|
|
247
|
+
|
|
248
|
+
Commercial licensing available — see [COMMERCIAL_LICENSE.md](COMMERCIAL_LICENSE.md).
|