entroplain 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +103 -0
- package/LICENSE +21 -0
- package/README.md +389 -0
- package/dist/entroplain-0.1.0-py3-none-any.whl +0 -0
- package/dist/entroplain-0.1.0.tar.gz +0 -0
- package/dist/hooks.d.ts.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/monitor.d.ts.map +1 -0
- package/dist/types.d.ts.map +1 -0
- package/docs/USAGE.md +302 -0
- package/entroplain/__init__.py +30 -0
- package/entroplain/cli.py +152 -0
- package/entroplain/hooks.py +183 -0
- package/entroplain/monitor.py +272 -0
- package/entroplain/providers.py +626 -0
- package/examples.md +40 -0
- package/package.json +44 -0
- package/pyproject.toml +85 -0
- package/src/hooks.ts +130 -0
- package/src/index.ts +9 -0
- package/src/monitor.ts +252 -0
- package/src/types.ts +58 -0
- package/tests/test_functional.py +303 -0
- package/tests/test_monitor.py +165 -0
- package/tsconfig.json +19 -0
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provider integrations for entropy extraction.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import List, Dict, Any, Optional, AsyncIterator, Iterator
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class TokenWithEntropy:
|
|
13
|
+
"""A token with its entropy value."""
|
|
14
|
+
token: str
|
|
15
|
+
entropy: float
|
|
16
|
+
logprob: float
|
|
17
|
+
top_logprobs: List[Dict[str, float]]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseProvider(ABC):
|
|
21
|
+
"""Base class for LLM providers."""
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def calculate_entropy(self, response: Any) -> float:
|
|
25
|
+
"""Calculate entropy from provider response."""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def stream_with_entropy(self, *args, **kwargs) -> Iterator[TokenWithEntropy]:
|
|
30
|
+
"""Stream tokens with entropy values."""
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
|
|
35
|
+
"""Async stream tokens with entropy values."""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class OpenAIProvider(BaseProvider):
|
|
40
|
+
"""
|
|
41
|
+
Provider for OpenAI GPT models.
|
|
42
|
+
|
|
43
|
+
Usage:
|
|
44
|
+
provider = OpenAIProvider(api_key="sk-...")
|
|
45
|
+
|
|
46
|
+
for token in provider.stream_with_entropy(
|
|
47
|
+
model="gpt-4o",
|
|
48
|
+
messages=[{"role": "user", "content": "Hello"}]
|
|
49
|
+
):
|
|
50
|
+
print(f"{token.token} (entropy: {token.entropy:.3f})")
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
54
|
+
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
|
55
|
+
self._client = None
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def client(self):
|
|
59
|
+
if self._client is None:
|
|
60
|
+
from openai import OpenAI
|
|
61
|
+
self._client = OpenAI(api_key=self.api_key)
|
|
62
|
+
return self._client
|
|
63
|
+
|
|
64
|
+
def calculate_entropy(self, logprobs_data: Dict) -> float:
|
|
65
|
+
"""Calculate entropy from OpenAI logprobs format."""
|
|
66
|
+
if not logprobs_data or "top_logprobs" not in logprobs_data:
|
|
67
|
+
return 0.0
|
|
68
|
+
|
|
69
|
+
import math
|
|
70
|
+
entropy = 0.0
|
|
71
|
+
for lp in logprobs_data["top_logprobs"]:
|
|
72
|
+
prob = math.exp(lp["logprob"])
|
|
73
|
+
if prob > 0:
|
|
74
|
+
entropy -= prob * math.log2(prob + 1e-10)
|
|
75
|
+
return entropy
|
|
76
|
+
|
|
77
|
+
def stream_with_entropy(
|
|
78
|
+
self,
|
|
79
|
+
model: str = "gpt-4o",
|
|
80
|
+
messages: List[Dict] = None,
|
|
81
|
+
**kwargs
|
|
82
|
+
) -> Iterator[TokenWithEntropy]:
|
|
83
|
+
"""Stream tokens with entropy."""
|
|
84
|
+
# Ensure logprobs are enabled
|
|
85
|
+
kwargs["logprobs"] = True
|
|
86
|
+
kwargs["top_logprobs"] = kwargs.get("top_logprobs", 5)
|
|
87
|
+
|
|
88
|
+
response = self.client.chat.completions.create(
|
|
89
|
+
model=model,
|
|
90
|
+
messages=messages or [],
|
|
91
|
+
stream=True,
|
|
92
|
+
**kwargs
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
for chunk in response:
|
|
96
|
+
if not chunk.choices:
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
choice = chunk.choices[0]
|
|
100
|
+
|
|
101
|
+
if choice.delta and choice.delta.content:
|
|
102
|
+
logprobs_data = getattr(choice, "logprobs", None)
|
|
103
|
+
if logprobs_data and logprobs_data.content:
|
|
104
|
+
for content in logprobs_data.content:
|
|
105
|
+
entropy = self.calculate_entropy(content)
|
|
106
|
+
yield TokenWithEntropy(
|
|
107
|
+
token=content.get("token", ""),
|
|
108
|
+
entropy=entropy,
|
|
109
|
+
logprob=content.get("logprob", 0),
|
|
110
|
+
top_logprobs=content.get("top_logprobs", [])
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
|
|
114
|
+
"""Async version of stream_with_entropy."""
|
|
115
|
+
from openai import AsyncOpenAI
|
|
116
|
+
|
|
117
|
+
client = AsyncOpenAI(api_key=self.api_key)
|
|
118
|
+
kwargs["logprobs"] = True
|
|
119
|
+
kwargs["top_logprobs"] = kwargs.get("top_logprobs", 5)
|
|
120
|
+
|
|
121
|
+
response = await client.chat.completions.create(
|
|
122
|
+
stream=True,
|
|
123
|
+
*args,
|
|
124
|
+
**kwargs
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
async for chunk in response:
|
|
128
|
+
if not chunk.choices:
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
choice = chunk.choices[0]
|
|
132
|
+
|
|
133
|
+
if choice.delta and choice.delta.content:
|
|
134
|
+
logprobs_data = getattr(choice, "logprobs", None)
|
|
135
|
+
if logprobs_data and logprobs_data.content:
|
|
136
|
+
for content in logprobs_data.content:
|
|
137
|
+
entropy = self.calculate_entropy(content)
|
|
138
|
+
yield TokenWithEntropy(
|
|
139
|
+
token=content.get("token", ""),
|
|
140
|
+
entropy=entropy,
|
|
141
|
+
logprob=content.get("logprob", 0),
|
|
142
|
+
top_logprobs=content.get("top_logprobs", [])
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class AnthropicProvider(BaseProvider):
|
|
147
|
+
"""
|
|
148
|
+
Provider for Anthropic Claude models.
|
|
149
|
+
|
|
150
|
+
Usage:
|
|
151
|
+
provider = AnthropicProvider(api_key="sk-ant-...")
|
|
152
|
+
|
|
153
|
+
for token in provider.stream_with_entropy(
|
|
154
|
+
model="claude-sonnet-4-20250514",
|
|
155
|
+
messages=[{"role": "user", "content": "Hello"}]
|
|
156
|
+
):
|
|
157
|
+
print(f"{token.token} (entropy: {token.entropy:.3f})")
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
161
|
+
self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
|
162
|
+
self._client = None
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def client(self):
|
|
166
|
+
if self._client is None:
|
|
167
|
+
import anthropic
|
|
168
|
+
self._client = anthropic.Anthropic(api_key=self.api_key)
|
|
169
|
+
return self._client
|
|
170
|
+
|
|
171
|
+
def calculate_entropy(self, logprobs_data: Dict) -> float:
|
|
172
|
+
"""Calculate entropy from Anthropic logprobs format."""
|
|
173
|
+
if not logprobs_data or "top_logprobs" not in logprobs_data:
|
|
174
|
+
return 0.0
|
|
175
|
+
|
|
176
|
+
import math
|
|
177
|
+
entropy = 0.0
|
|
178
|
+
for lp in logprobs_data["top_logprobs"]:
|
|
179
|
+
prob = math.exp(lp["logprob"])
|
|
180
|
+
if prob > 0:
|
|
181
|
+
entropy -= prob * math.log2(prob + 1e-10)
|
|
182
|
+
return entropy
|
|
183
|
+
|
|
184
|
+
def stream_with_entropy(
|
|
185
|
+
self,
|
|
186
|
+
model: str = "claude-sonnet-4-20250514",
|
|
187
|
+
messages: List[Dict] = None,
|
|
188
|
+
**kwargs
|
|
189
|
+
) -> Iterator[TokenWithEntropy]:
|
|
190
|
+
"""Stream tokens with entropy."""
|
|
191
|
+
kwargs["logprobs"] = True
|
|
192
|
+
kwargs["top_logprobs"] = kwargs.get("top_logprobs", 5)
|
|
193
|
+
|
|
194
|
+
with self.client.messages.stream(
|
|
195
|
+
model=model,
|
|
196
|
+
messages=messages or [],
|
|
197
|
+
**kwargs
|
|
198
|
+
) as stream:
|
|
199
|
+
for event in stream:
|
|
200
|
+
if event.type == "content_block_delta":
|
|
201
|
+
# Claude doesn't expose per-token logprobs in streaming
|
|
202
|
+
# We approximate from the delta
|
|
203
|
+
delta = event.delta
|
|
204
|
+
if hasattr(delta, "text"):
|
|
205
|
+
yield TokenWithEntropy(
|
|
206
|
+
token=delta.text,
|
|
207
|
+
entropy=0.0, # Not available in streaming
|
|
208
|
+
logprob=0.0,
|
|
209
|
+
top_logprobs=[]
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
|
|
213
|
+
"""Async version (not implemented for Anthropic)."""
|
|
214
|
+
raise NotImplementedError("Use sync streaming for Anthropic")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class GeminiProvider(BaseProvider):
|
|
218
|
+
"""
|
|
219
|
+
Provider for Google Gemini models.
|
|
220
|
+
|
|
221
|
+
Usage:
|
|
222
|
+
provider = GeminiProvider(api_key="...")
|
|
223
|
+
|
|
224
|
+
for token in provider.stream_with_entropy(
|
|
225
|
+
model="gemini-2.0-flash",
|
|
226
|
+
prompt="Hello"
|
|
227
|
+
):
|
|
228
|
+
print(f"{token.token} (entropy: {token.entropy:.3f})")
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
232
|
+
self.api_key = api_key or os.environ.get("GOOGLE_API_KEY")
|
|
233
|
+
self._client = None
|
|
234
|
+
|
|
235
|
+
@property
|
|
236
|
+
def client(self):
|
|
237
|
+
if self._client is None:
|
|
238
|
+
import google.generativeai as genai
|
|
239
|
+
genai.configure(api_key=self.api_key)
|
|
240
|
+
self._client = genai
|
|
241
|
+
return self._client
|
|
242
|
+
|
|
243
|
+
def calculate_entropy(self, logprobs_data: Dict) -> float:
|
|
244
|
+
"""Calculate entropy from Gemini logprobs format."""
|
|
245
|
+
if not logprobs_data:
|
|
246
|
+
return 0.0
|
|
247
|
+
|
|
248
|
+
import math
|
|
249
|
+
entropy = 0.0
|
|
250
|
+
|
|
251
|
+
candidates = logprobs_data.get("candidates", [])
|
|
252
|
+
if candidates:
|
|
253
|
+
logprobs = candidates[0].get("logprobs", {})
|
|
254
|
+
for lp in logprobs.get("top_logprobs", []):
|
|
255
|
+
prob = math.exp(lp.get("logprob", 0))
|
|
256
|
+
if prob > 0:
|
|
257
|
+
entropy -= prob * math.log2(prob + 1e-10)
|
|
258
|
+
|
|
259
|
+
return entropy
|
|
260
|
+
|
|
261
|
+
def stream_with_entropy(
|
|
262
|
+
self,
|
|
263
|
+
model: str = "gemini-2.0-flash",
|
|
264
|
+
prompt: str = "",
|
|
265
|
+
**kwargs
|
|
266
|
+
) -> Iterator[TokenWithEntropy]:
|
|
267
|
+
"""Stream tokens with entropy."""
|
|
268
|
+
model_instance = self.client.GenerativeModel(model)
|
|
269
|
+
|
|
270
|
+
# Enable logprobs in generation config
|
|
271
|
+
generation_config = {
|
|
272
|
+
"response_logprobs": True,
|
|
273
|
+
"logprobs": kwargs.get("top_logprobs", 5)
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
response = model_instance.generate_content(
|
|
277
|
+
prompt,
|
|
278
|
+
generation_config=generation_config,
|
|
279
|
+
stream=True
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
for chunk in response:
|
|
283
|
+
if chunk.text:
|
|
284
|
+
# Extract logprobs if available
|
|
285
|
+
logprobs = getattr(chunk, "logprobs", None)
|
|
286
|
+
entropy = self.calculate_entropy({"candidates": [{"logprobs": logprobs}]}) if logprobs else 0.0
|
|
287
|
+
|
|
288
|
+
yield TokenWithEntropy(
|
|
289
|
+
token=chunk.text,
|
|
290
|
+
entropy=entropy,
|
|
291
|
+
logprob=0.0,
|
|
292
|
+
top_logprobs=[]
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
|
|
296
|
+
"""Async version of stream_with_entropy."""
|
|
297
|
+
# Use the async version of the Gemini SDK
|
|
298
|
+
raise NotImplementedError("Async streaming not yet implemented")
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class NVIDIAProvider(BaseProvider):
|
|
302
|
+
"""
|
|
303
|
+
Provider for NVIDIA NIM API.
|
|
304
|
+
|
|
305
|
+
Usage:
|
|
306
|
+
provider = NVIDIAProvider(api_key="nvapi-...")
|
|
307
|
+
|
|
308
|
+
for token in provider.stream_with_entropy(
|
|
309
|
+
model="meta/llama-3.1-70b-instruct",
|
|
310
|
+
messages=[{"role": "user", "content": "Hello"}]
|
|
311
|
+
):
|
|
312
|
+
print(f"{token.token} (entropy: {token.entropy:.3f})")
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
316
|
+
self.api_key = api_key or os.environ.get("NVIDIA_API_KEY")
|
|
317
|
+
self.base_url = "https://integrate.api.nvidia.com/v1"
|
|
318
|
+
|
|
319
|
+
def calculate_entropy(self, logprobs_data: Dict) -> float:
|
|
320
|
+
"""Calculate entropy from NVIDIA logprobs format (OpenAI-compatible)."""
|
|
321
|
+
if not logprobs_data or "top_logprobs" not in logprobs_data:
|
|
322
|
+
return 0.0
|
|
323
|
+
|
|
324
|
+
import math
|
|
325
|
+
entropy = 0.0
|
|
326
|
+
for lp in logprobs_data["top_logprobs"]:
|
|
327
|
+
prob = math.exp(lp["logprob"])
|
|
328
|
+
if prob > 0:
|
|
329
|
+
entropy -= prob * math.log2(prob + 1e-10)
|
|
330
|
+
return entropy
|
|
331
|
+
|
|
332
|
+
def stream_with_entropy(
|
|
333
|
+
self,
|
|
334
|
+
model: str = "meta/llama-3.1-70b-instruct",
|
|
335
|
+
messages: List[Dict] = None,
|
|
336
|
+
**kwargs
|
|
337
|
+
) -> Iterator[TokenWithEntropy]:
|
|
338
|
+
"""Stream tokens with entropy via HTTP."""
|
|
339
|
+
import requests
|
|
340
|
+
import json
|
|
341
|
+
|
|
342
|
+
headers = {
|
|
343
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
344
|
+
"Content-Type": "application/json"
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
payload = {
|
|
348
|
+
"model": model,
|
|
349
|
+
"messages": messages or [],
|
|
350
|
+
"logprobs": True,
|
|
351
|
+
"top_logprobs": kwargs.get("top_logprobs", 5),
|
|
352
|
+
"stream": True,
|
|
353
|
+
**kwargs
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
response = requests.post(
|
|
357
|
+
f"{self.base_url}/chat/completions",
|
|
358
|
+
headers=headers,
|
|
359
|
+
json=payload,
|
|
360
|
+
stream=True
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
for line in response.iter_lines():
|
|
364
|
+
if not line:
|
|
365
|
+
continue
|
|
366
|
+
|
|
367
|
+
line = line.decode("utf-8")
|
|
368
|
+
if line.startswith("data: "):
|
|
369
|
+
data = line[6:]
|
|
370
|
+
if data == "[DONE]":
|
|
371
|
+
break
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
chunk = json.loads(data)
|
|
375
|
+
if "choices" in chunk:
|
|
376
|
+
for choice in chunk["choices"]:
|
|
377
|
+
if "delta" in choice and "content" in choice["delta"]:
|
|
378
|
+
logprobs = choice.get("logprobs", {}).get("content", [])
|
|
379
|
+
for lp_data in logprobs:
|
|
380
|
+
entropy = self.calculate_entropy(lp_data)
|
|
381
|
+
yield TokenWithEntropy(
|
|
382
|
+
token=lp_data.get("token", ""),
|
|
383
|
+
entropy=entropy,
|
|
384
|
+
logprob=lp_data.get("logprob", 0),
|
|
385
|
+
top_logprobs=lp_data.get("top_logprobs", [])
|
|
386
|
+
)
|
|
387
|
+
except json.JSONDecodeError:
|
|
388
|
+
continue
|
|
389
|
+
|
|
390
|
+
async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
|
|
391
|
+
"""Async version of stream_with_entropy."""
|
|
392
|
+
import aiohttp
|
|
393
|
+
|
|
394
|
+
headers = {
|
|
395
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
396
|
+
"Content-Type": "application/json"
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
payload = {
|
|
400
|
+
"model": kwargs.get("model", "meta/llama-3.1-70b-instruct"),
|
|
401
|
+
"messages": kwargs.get("messages", []),
|
|
402
|
+
"logprobs": True,
|
|
403
|
+
"top_logprobs": kwargs.get("top_logprobs", 5),
|
|
404
|
+
"stream": True
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
async with aiohttp.ClientSession() as session:
|
|
408
|
+
async with session.post(
|
|
409
|
+
f"{self.base_url}/chat/completions",
|
|
410
|
+
headers=headers,
|
|
411
|
+
json=payload
|
|
412
|
+
) as response:
|
|
413
|
+
async for line in response.content:
|
|
414
|
+
line = line.decode("utf-8").strip()
|
|
415
|
+
if not line or not line.startswith("data: "):
|
|
416
|
+
continue
|
|
417
|
+
|
|
418
|
+
data = line[6:]
|
|
419
|
+
if data == "[DONE]":
|
|
420
|
+
break
|
|
421
|
+
|
|
422
|
+
import json
|
|
423
|
+
try:
|
|
424
|
+
chunk = json.loads(data)
|
|
425
|
+
if "choices" in chunk:
|
|
426
|
+
for choice in chunk["choices"]:
|
|
427
|
+
if "delta" in choice and "content" in choice["delta"]:
|
|
428
|
+
logprobs = choice.get("logprobs", {}).get("content", [])
|
|
429
|
+
for lp_data in logprobs:
|
|
430
|
+
entropy = self.calculate_entropy(lp_data)
|
|
431
|
+
yield TokenWithEntropy(
|
|
432
|
+
token=lp_data.get("token", ""),
|
|
433
|
+
entropy=entropy,
|
|
434
|
+
logprob=lp_data.get("logprob", 0),
|
|
435
|
+
top_logprobs=lp_data.get("top_logprobs", [])
|
|
436
|
+
)
|
|
437
|
+
except json.JSONDecodeError:
|
|
438
|
+
continue
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
class OllamaProvider(BaseProvider):
|
|
442
|
+
"""
|
|
443
|
+
Provider for Ollama (local models).
|
|
444
|
+
|
|
445
|
+
Usage:
|
|
446
|
+
provider = OllamaProvider()
|
|
447
|
+
|
|
448
|
+
for token in provider.stream_with_entropy(
|
|
449
|
+
model="llama3.1",
|
|
450
|
+
prompt="Hello"
|
|
451
|
+
):
|
|
452
|
+
print(f"{token.token} (entropy: {token.entropy:.3f})")
|
|
453
|
+
"""
|
|
454
|
+
|
|
455
|
+
def __init__(self, host: str = "http://localhost:11434"):
|
|
456
|
+
self.host = host
|
|
457
|
+
|
|
458
|
+
def calculate_entropy(self, logits: List[float]) -> float:
|
|
459
|
+
"""Calculate entropy from logits (requires softmax first)."""
|
|
460
|
+
import math
|
|
461
|
+
|
|
462
|
+
# Softmax
|
|
463
|
+
max_logit = max(logits)
|
|
464
|
+
exp_logits = [math.exp(l - max_logit) for l in logits]
|
|
465
|
+
sum_exp = sum(exp_logits)
|
|
466
|
+
probs = [e / sum_exp for e in exp_logits]
|
|
467
|
+
|
|
468
|
+
# Shannon entropy
|
|
469
|
+
entropy = 0.0
|
|
470
|
+
for p in probs:
|
|
471
|
+
if p > 0:
|
|
472
|
+
entropy -= p * math.log2(p)
|
|
473
|
+
|
|
474
|
+
return entropy
|
|
475
|
+
|
|
476
|
+
def stream_with_entropy(
|
|
477
|
+
self,
|
|
478
|
+
model: str = "llama3.1",
|
|
479
|
+
prompt: str = "",
|
|
480
|
+
**kwargs
|
|
481
|
+
) -> Iterator[TokenWithEntropy]:
|
|
482
|
+
"""Stream tokens with entropy from Ollama."""
|
|
483
|
+
import requests
|
|
484
|
+
import json
|
|
485
|
+
|
|
486
|
+
response = requests.post(
|
|
487
|
+
f"{self.host}/api/generate",
|
|
488
|
+
json={
|
|
489
|
+
"model": model,
|
|
490
|
+
"prompt": prompt,
|
|
491
|
+
"stream": True,
|
|
492
|
+
"options": kwargs.get("options", {})
|
|
493
|
+
},
|
|
494
|
+
stream=True
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
for line in response.iter_lines():
|
|
498
|
+
if not line:
|
|
499
|
+
continue
|
|
500
|
+
|
|
501
|
+
data = json.loads(line)
|
|
502
|
+
token = data.get("response", "")
|
|
503
|
+
|
|
504
|
+
if token:
|
|
505
|
+
# Note: Ollama doesn't expose logits by default
|
|
506
|
+
# For entropy, you'd need to modify Ollama or use llama.cpp directly
|
|
507
|
+
yield TokenWithEntropy(
|
|
508
|
+
token=token,
|
|
509
|
+
entropy=0.0, # Would need logits
|
|
510
|
+
logprob=0.0,
|
|
511
|
+
top_logprobs=[]
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
|
|
515
|
+
"""Async version of stream_with_entropy."""
|
|
516
|
+
import aiohttp
|
|
517
|
+
import json
|
|
518
|
+
|
|
519
|
+
async with aiohttp.ClientSession() as session:
|
|
520
|
+
async with session.post(
|
|
521
|
+
f"{self.host}/api/generate",
|
|
522
|
+
json={
|
|
523
|
+
"model": kwargs.get("model", "llama3.1"),
|
|
524
|
+
"prompt": kwargs.get("prompt", ""),
|
|
525
|
+
"stream": True
|
|
526
|
+
}
|
|
527
|
+
) as response:
|
|
528
|
+
async for line in response.content:
|
|
529
|
+
line = line.decode("utf-8").strip()
|
|
530
|
+
if not line:
|
|
531
|
+
continue
|
|
532
|
+
|
|
533
|
+
data = json.loads(line)
|
|
534
|
+
token = data.get("response", "")
|
|
535
|
+
|
|
536
|
+
if token:
|
|
537
|
+
yield TokenWithEntropy(
|
|
538
|
+
token=token,
|
|
539
|
+
entropy=0.0,
|
|
540
|
+
logprob=0.0,
|
|
541
|
+
top_logprobs=[]
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
class LlamaCppProvider(BaseProvider):
|
|
546
|
+
"""
|
|
547
|
+
Provider for llama.cpp (direct Python bindings).
|
|
548
|
+
|
|
549
|
+
This provides full access to logits for accurate entropy calculation.
|
|
550
|
+
|
|
551
|
+
Usage:
|
|
552
|
+
provider = LlamaCppProvider(model_path="./llama-3.1.gguf")
|
|
553
|
+
|
|
554
|
+
for token in provider.stream_with_entropy(prompt="Hello"):
|
|
555
|
+
print(f"{token.token} (entropy: {token.entropy:.3f})")
|
|
556
|
+
"""
|
|
557
|
+
|
|
558
|
+
def __init__(self, model_path: str):
|
|
559
|
+
self.model_path = model_path
|
|
560
|
+
self._model = None
|
|
561
|
+
|
|
562
|
+
@property
|
|
563
|
+
def model(self):
|
|
564
|
+
if self._model is None:
|
|
565
|
+
from llama_cpp import Llama
|
|
566
|
+
self._model = Llama(
|
|
567
|
+
model_path=self.model_path,
|
|
568
|
+
logits_all=True, # Required for entropy tracking
|
|
569
|
+
verbose=False
|
|
570
|
+
)
|
|
571
|
+
return self._model
|
|
572
|
+
|
|
573
|
+
def calculate_entropy(self, logits: List[float]) -> float:
|
|
574
|
+
"""Calculate entropy from raw logits."""
|
|
575
|
+
import math
|
|
576
|
+
import numpy as np
|
|
577
|
+
|
|
578
|
+
# Softmax
|
|
579
|
+
logits = np.array(logits)
|
|
580
|
+
exp_logits = np.exp(logits - np.max(logits))
|
|
581
|
+
probs = exp_logits / exp_logits.sum()
|
|
582
|
+
|
|
583
|
+
# Shannon entropy
|
|
584
|
+
entropy = -np.sum(probs[probs > 0] * np.log2(probs[probs > 0]))
|
|
585
|
+
|
|
586
|
+
return float(entropy)
|
|
587
|
+
|
|
588
|
+
def stream_with_entropy(
|
|
589
|
+
self,
|
|
590
|
+
prompt: str = "",
|
|
591
|
+
max_tokens: int = 512,
|
|
592
|
+
**kwargs
|
|
593
|
+
) -> Iterator[TokenWithEntropy]:
|
|
594
|
+
"""Stream tokens with entropy from llama.cpp."""
|
|
595
|
+
generator = self.model.create_completion(
|
|
596
|
+
prompt=prompt,
|
|
597
|
+
max_tokens=max_tokens,
|
|
598
|
+
stream=True,
|
|
599
|
+
**kwargs
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
for chunk in generator:
|
|
603
|
+
if "choices" in chunk:
|
|
604
|
+
for choice in chunk["choices"]:
|
|
605
|
+
token = choice.get("text", "")
|
|
606
|
+
if token:
|
|
607
|
+
# Get logits from the last token
|
|
608
|
+
# Note: This requires logits_all=True
|
|
609
|
+
logits = self.model._ctx.get_logits()
|
|
610
|
+
entropy = self.calculate_entropy(logits)
|
|
611
|
+
|
|
612
|
+
yield TokenWithEntropy(
|
|
613
|
+
token=token,
|
|
614
|
+
entropy=entropy,
|
|
615
|
+
logprob=0.0,
|
|
616
|
+
top_logprobs=[]
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
|
|
620
|
+
"""Async version (run in thread pool)."""
|
|
621
|
+
import asyncio
|
|
622
|
+
|
|
623
|
+
loop = asyncio.get_event_loop()
|
|
624
|
+
for token in self.stream_with_entropy(*args, **kwargs):
|
|
625
|
+
yield token
|
|
626
|
+
await asyncio.sleep(0) # Yield control
|
package/examples.md
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
examples:
|
|
2
|
+
- |
|
|
3
|
+
# Python
|
|
4
|
+
from entroplain import EntropyMonitor
|
|
5
|
+
|
|
6
|
+
monitor = EntropyMonitor()
|
|
7
|
+
monitor.track("Hello", 0.5)
|
|
8
|
+
|
|
9
|
+
if monitor.should_exit():
|
|
10
|
+
print("Reasoning complete!")
|
|
11
|
+
|
|
12
|
+
- |
|
|
13
|
+
# CLI
|
|
14
|
+
entroplain analyze "What is 2+2?" --model gpt-4o
|
|
15
|
+
|
|
16
|
+
- |
|
|
17
|
+
# With OpenAI
|
|
18
|
+
from entroplain import NVIDIAProvider, EntropyMonitor
|
|
19
|
+
|
|
20
|
+
provider = NVIDIAProvider()
|
|
21
|
+
monitor = EntropyMonitor()
|
|
22
|
+
|
|
23
|
+
for token in provider.stream_with_entropy(
|
|
24
|
+
model="meta/llama-3.1-70b-instruct",
|
|
25
|
+
messages=[{"role": "user", "content": "Hello"}]
|
|
26
|
+
):
|
|
27
|
+
monitor.track(token.token, token.entropy)
|
|
28
|
+
if monitor.should_exit():
|
|
29
|
+
break
|
|
30
|
+
|
|
31
|
+
- |
|
|
32
|
+
# Agent hook
|
|
33
|
+
from entroplain.hooks import EntropyHook
|
|
34
|
+
|
|
35
|
+
hook = EntropyHook(config={"entropy_threshold": 0.15})
|
|
36
|
+
|
|
37
|
+
for token in agent.generate():
|
|
38
|
+
result = hook.on_token(token, entropy)
|
|
39
|
+
if result["should_exit"]:
|
|
40
|
+
break
|