entroplain 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,626 @@
1
+ """
2
+ Provider integrations for entropy extraction.
3
+ """
4
+
5
+ import os
6
+ from abc import ABC, abstractmethod
7
+ from typing import List, Dict, Any, Optional, AsyncIterator, Iterator
8
+ from dataclasses import dataclass
9
+
10
+
11
+ @dataclass
12
+ class TokenWithEntropy:
13
+ """A token with its entropy value."""
14
+ token: str
15
+ entropy: float
16
+ logprob: float
17
+ top_logprobs: List[Dict[str, float]]
18
+
19
+
20
+ class BaseProvider(ABC):
21
+ """Base class for LLM providers."""
22
+
23
+ @abstractmethod
24
+ def calculate_entropy(self, response: Any) -> float:
25
+ """Calculate entropy from provider response."""
26
+ pass
27
+
28
+ @abstractmethod
29
+ def stream_with_entropy(self, *args, **kwargs) -> Iterator[TokenWithEntropy]:
30
+ """Stream tokens with entropy values."""
31
+ pass
32
+
33
+ @abstractmethod
34
+ async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
35
+ """Async stream tokens with entropy values."""
36
+ pass
37
+
38
+
39
+ class OpenAIProvider(BaseProvider):
40
+ """
41
+ Provider for OpenAI GPT models.
42
+
43
+ Usage:
44
+ provider = OpenAIProvider(api_key="sk-...")
45
+
46
+ for token in provider.stream_with_entropy(
47
+ model="gpt-4o",
48
+ messages=[{"role": "user", "content": "Hello"}]
49
+ ):
50
+ print(f"{token.token} (entropy: {token.entropy:.3f})")
51
+ """
52
+
53
+ def __init__(self, api_key: Optional[str] = None):
54
+ self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
55
+ self._client = None
56
+
57
+ @property
58
+ def client(self):
59
+ if self._client is None:
60
+ from openai import OpenAI
61
+ self._client = OpenAI(api_key=self.api_key)
62
+ return self._client
63
+
64
+ def calculate_entropy(self, logprobs_data: Dict) -> float:
65
+ """Calculate entropy from OpenAI logprobs format."""
66
+ if not logprobs_data or "top_logprobs" not in logprobs_data:
67
+ return 0.0
68
+
69
+ import math
70
+ entropy = 0.0
71
+ for lp in logprobs_data["top_logprobs"]:
72
+ prob = math.exp(lp["logprob"])
73
+ if prob > 0:
74
+ entropy -= prob * math.log2(prob + 1e-10)
75
+ return entropy
76
+
77
+ def stream_with_entropy(
78
+ self,
79
+ model: str = "gpt-4o",
80
+ messages: List[Dict] = None,
81
+ **kwargs
82
+ ) -> Iterator[TokenWithEntropy]:
83
+ """Stream tokens with entropy."""
84
+ # Ensure logprobs are enabled
85
+ kwargs["logprobs"] = True
86
+ kwargs["top_logprobs"] = kwargs.get("top_logprobs", 5)
87
+
88
+ response = self.client.chat.completions.create(
89
+ model=model,
90
+ messages=messages or [],
91
+ stream=True,
92
+ **kwargs
93
+ )
94
+
95
+ for chunk in response:
96
+ if not chunk.choices:
97
+ continue
98
+
99
+ choice = chunk.choices[0]
100
+
101
+ if choice.delta and choice.delta.content:
102
+ logprobs_data = getattr(choice, "logprobs", None)
103
+ if logprobs_data and logprobs_data.content:
104
+ for content in logprobs_data.content:
105
+ entropy = self.calculate_entropy(content)
106
+ yield TokenWithEntropy(
107
+ token=content.get("token", ""),
108
+ entropy=entropy,
109
+ logprob=content.get("logprob", 0),
110
+ top_logprobs=content.get("top_logprobs", [])
111
+ )
112
+
113
+ async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
114
+ """Async version of stream_with_entropy."""
115
+ from openai import AsyncOpenAI
116
+
117
+ client = AsyncOpenAI(api_key=self.api_key)
118
+ kwargs["logprobs"] = True
119
+ kwargs["top_logprobs"] = kwargs.get("top_logprobs", 5)
120
+
121
+ response = await client.chat.completions.create(
122
+ stream=True,
123
+ *args,
124
+ **kwargs
125
+ )
126
+
127
+ async for chunk in response:
128
+ if not chunk.choices:
129
+ continue
130
+
131
+ choice = chunk.choices[0]
132
+
133
+ if choice.delta and choice.delta.content:
134
+ logprobs_data = getattr(choice, "logprobs", None)
135
+ if logprobs_data and logprobs_data.content:
136
+ for content in logprobs_data.content:
137
+ entropy = self.calculate_entropy(content)
138
+ yield TokenWithEntropy(
139
+ token=content.get("token", ""),
140
+ entropy=entropy,
141
+ logprob=content.get("logprob", 0),
142
+ top_logprobs=content.get("top_logprobs", [])
143
+ )
144
+
145
+
146
+ class AnthropicProvider(BaseProvider):
147
+ """
148
+ Provider for Anthropic Claude models.
149
+
150
+ Usage:
151
+ provider = AnthropicProvider(api_key="sk-ant-...")
152
+
153
+ for token in provider.stream_with_entropy(
154
+ model="claude-sonnet-4-20250514",
155
+ messages=[{"role": "user", "content": "Hello"}]
156
+ ):
157
+ print(f"{token.token} (entropy: {token.entropy:.3f})")
158
+ """
159
+
160
+ def __init__(self, api_key: Optional[str] = None):
161
+ self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
162
+ self._client = None
163
+
164
+ @property
165
+ def client(self):
166
+ if self._client is None:
167
+ import anthropic
168
+ self._client = anthropic.Anthropic(api_key=self.api_key)
169
+ return self._client
170
+
171
+ def calculate_entropy(self, logprobs_data: Dict) -> float:
172
+ """Calculate entropy from Anthropic logprobs format."""
173
+ if not logprobs_data or "top_logprobs" not in logprobs_data:
174
+ return 0.0
175
+
176
+ import math
177
+ entropy = 0.0
178
+ for lp in logprobs_data["top_logprobs"]:
179
+ prob = math.exp(lp["logprob"])
180
+ if prob > 0:
181
+ entropy -= prob * math.log2(prob + 1e-10)
182
+ return entropy
183
+
184
+ def stream_with_entropy(
185
+ self,
186
+ model: str = "claude-sonnet-4-20250514",
187
+ messages: List[Dict] = None,
188
+ **kwargs
189
+ ) -> Iterator[TokenWithEntropy]:
190
+ """Stream tokens with entropy."""
191
+ kwargs["logprobs"] = True
192
+ kwargs["top_logprobs"] = kwargs.get("top_logprobs", 5)
193
+
194
+ with self.client.messages.stream(
195
+ model=model,
196
+ messages=messages or [],
197
+ **kwargs
198
+ ) as stream:
199
+ for event in stream:
200
+ if event.type == "content_block_delta":
201
+ # Claude doesn't expose per-token logprobs in streaming
202
+ # We approximate from the delta
203
+ delta = event.delta
204
+ if hasattr(delta, "text"):
205
+ yield TokenWithEntropy(
206
+ token=delta.text,
207
+ entropy=0.0, # Not available in streaming
208
+ logprob=0.0,
209
+ top_logprobs=[]
210
+ )
211
+
212
+ async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
213
+ """Async version (not implemented for Anthropic)."""
214
+ raise NotImplementedError("Use sync streaming for Anthropic")
215
+
216
+
217
+ class GeminiProvider(BaseProvider):
218
+ """
219
+ Provider for Google Gemini models.
220
+
221
+ Usage:
222
+ provider = GeminiProvider(api_key="...")
223
+
224
+ for token in provider.stream_with_entropy(
225
+ model="gemini-2.0-flash",
226
+ prompt="Hello"
227
+ ):
228
+ print(f"{token.token} (entropy: {token.entropy:.3f})")
229
+ """
230
+
231
+ def __init__(self, api_key: Optional[str] = None):
232
+ self.api_key = api_key or os.environ.get("GOOGLE_API_KEY")
233
+ self._client = None
234
+
235
+ @property
236
+ def client(self):
237
+ if self._client is None:
238
+ import google.generativeai as genai
239
+ genai.configure(api_key=self.api_key)
240
+ self._client = genai
241
+ return self._client
242
+
243
+ def calculate_entropy(self, logprobs_data: Dict) -> float:
244
+ """Calculate entropy from Gemini logprobs format."""
245
+ if not logprobs_data:
246
+ return 0.0
247
+
248
+ import math
249
+ entropy = 0.0
250
+
251
+ candidates = logprobs_data.get("candidates", [])
252
+ if candidates:
253
+ logprobs = candidates[0].get("logprobs", {})
254
+ for lp in logprobs.get("top_logprobs", []):
255
+ prob = math.exp(lp.get("logprob", 0))
256
+ if prob > 0:
257
+ entropy -= prob * math.log2(prob + 1e-10)
258
+
259
+ return entropy
260
+
261
+ def stream_with_entropy(
262
+ self,
263
+ model: str = "gemini-2.0-flash",
264
+ prompt: str = "",
265
+ **kwargs
266
+ ) -> Iterator[TokenWithEntropy]:
267
+ """Stream tokens with entropy."""
268
+ model_instance = self.client.GenerativeModel(model)
269
+
270
+ # Enable logprobs in generation config
271
+ generation_config = {
272
+ "response_logprobs": True,
273
+ "logprobs": kwargs.get("top_logprobs", 5)
274
+ }
275
+
276
+ response = model_instance.generate_content(
277
+ prompt,
278
+ generation_config=generation_config,
279
+ stream=True
280
+ )
281
+
282
+ for chunk in response:
283
+ if chunk.text:
284
+ # Extract logprobs if available
285
+ logprobs = getattr(chunk, "logprobs", None)
286
+ entropy = self.calculate_entropy({"candidates": [{"logprobs": logprobs}]}) if logprobs else 0.0
287
+
288
+ yield TokenWithEntropy(
289
+ token=chunk.text,
290
+ entropy=entropy,
291
+ logprob=0.0,
292
+ top_logprobs=[]
293
+ )
294
+
295
+ async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
296
+ """Async version of stream_with_entropy."""
297
+ # Use the async version of the Gemini SDK
298
+ raise NotImplementedError("Async streaming not yet implemented")
299
+
300
+
301
+ class NVIDIAProvider(BaseProvider):
302
+ """
303
+ Provider for NVIDIA NIM API.
304
+
305
+ Usage:
306
+ provider = NVIDIAProvider(api_key="nvapi-...")
307
+
308
+ for token in provider.stream_with_entropy(
309
+ model="meta/llama-3.1-70b-instruct",
310
+ messages=[{"role": "user", "content": "Hello"}]
311
+ ):
312
+ print(f"{token.token} (entropy: {token.entropy:.3f})")
313
+ """
314
+
315
+ def __init__(self, api_key: Optional[str] = None):
316
+ self.api_key = api_key or os.environ.get("NVIDIA_API_KEY")
317
+ self.base_url = "https://integrate.api.nvidia.com/v1"
318
+
319
+ def calculate_entropy(self, logprobs_data: Dict) -> float:
320
+ """Calculate entropy from NVIDIA logprobs format (OpenAI-compatible)."""
321
+ if not logprobs_data or "top_logprobs" not in logprobs_data:
322
+ return 0.0
323
+
324
+ import math
325
+ entropy = 0.0
326
+ for lp in logprobs_data["top_logprobs"]:
327
+ prob = math.exp(lp["logprob"])
328
+ if prob > 0:
329
+ entropy -= prob * math.log2(prob + 1e-10)
330
+ return entropy
331
+
332
+ def stream_with_entropy(
333
+ self,
334
+ model: str = "meta/llama-3.1-70b-instruct",
335
+ messages: List[Dict] = None,
336
+ **kwargs
337
+ ) -> Iterator[TokenWithEntropy]:
338
+ """Stream tokens with entropy via HTTP."""
339
+ import requests
340
+ import json
341
+
342
+ headers = {
343
+ "Authorization": f"Bearer {self.api_key}",
344
+ "Content-Type": "application/json"
345
+ }
346
+
347
+ payload = {
348
+ "model": model,
349
+ "messages": messages or [],
350
+ "logprobs": True,
351
+ "top_logprobs": kwargs.get("top_logprobs", 5),
352
+ "stream": True,
353
+ **kwargs
354
+ }
355
+
356
+ response = requests.post(
357
+ f"{self.base_url}/chat/completions",
358
+ headers=headers,
359
+ json=payload,
360
+ stream=True
361
+ )
362
+
363
+ for line in response.iter_lines():
364
+ if not line:
365
+ continue
366
+
367
+ line = line.decode("utf-8")
368
+ if line.startswith("data: "):
369
+ data = line[6:]
370
+ if data == "[DONE]":
371
+ break
372
+
373
+ try:
374
+ chunk = json.loads(data)
375
+ if "choices" in chunk:
376
+ for choice in chunk["choices"]:
377
+ if "delta" in choice and "content" in choice["delta"]:
378
+ logprobs = choice.get("logprobs", {}).get("content", [])
379
+ for lp_data in logprobs:
380
+ entropy = self.calculate_entropy(lp_data)
381
+ yield TokenWithEntropy(
382
+ token=lp_data.get("token", ""),
383
+ entropy=entropy,
384
+ logprob=lp_data.get("logprob", 0),
385
+ top_logprobs=lp_data.get("top_logprobs", [])
386
+ )
387
+ except json.JSONDecodeError:
388
+ continue
389
+
390
+ async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
391
+ """Async version of stream_with_entropy."""
392
+ import aiohttp
393
+
394
+ headers = {
395
+ "Authorization": f"Bearer {self.api_key}",
396
+ "Content-Type": "application/json"
397
+ }
398
+
399
+ payload = {
400
+ "model": kwargs.get("model", "meta/llama-3.1-70b-instruct"),
401
+ "messages": kwargs.get("messages", []),
402
+ "logprobs": True,
403
+ "top_logprobs": kwargs.get("top_logprobs", 5),
404
+ "stream": True
405
+ }
406
+
407
+ async with aiohttp.ClientSession() as session:
408
+ async with session.post(
409
+ f"{self.base_url}/chat/completions",
410
+ headers=headers,
411
+ json=payload
412
+ ) as response:
413
+ async for line in response.content:
414
+ line = line.decode("utf-8").strip()
415
+ if not line or not line.startswith("data: "):
416
+ continue
417
+
418
+ data = line[6:]
419
+ if data == "[DONE]":
420
+ break
421
+
422
+ import json
423
+ try:
424
+ chunk = json.loads(data)
425
+ if "choices" in chunk:
426
+ for choice in chunk["choices"]:
427
+ if "delta" in choice and "content" in choice["delta"]:
428
+ logprobs = choice.get("logprobs", {}).get("content", [])
429
+ for lp_data in logprobs:
430
+ entropy = self.calculate_entropy(lp_data)
431
+ yield TokenWithEntropy(
432
+ token=lp_data.get("token", ""),
433
+ entropy=entropy,
434
+ logprob=lp_data.get("logprob", 0),
435
+ top_logprobs=lp_data.get("top_logprobs", [])
436
+ )
437
+ except json.JSONDecodeError:
438
+ continue
439
+
440
+
441
+ class OllamaProvider(BaseProvider):
442
+ """
443
+ Provider for Ollama (local models).
444
+
445
+ Usage:
446
+ provider = OllamaProvider()
447
+
448
+ for token in provider.stream_with_entropy(
449
+ model="llama3.1",
450
+ prompt="Hello"
451
+ ):
452
+ print(f"{token.token} (entropy: {token.entropy:.3f})")
453
+ """
454
+
455
+ def __init__(self, host: str = "http://localhost:11434"):
456
+ self.host = host
457
+
458
+ def calculate_entropy(self, logits: List[float]) -> float:
459
+ """Calculate entropy from logits (requires softmax first)."""
460
+ import math
461
+
462
+ # Softmax
463
+ max_logit = max(logits)
464
+ exp_logits = [math.exp(l - max_logit) for l in logits]
465
+ sum_exp = sum(exp_logits)
466
+ probs = [e / sum_exp for e in exp_logits]
467
+
468
+ # Shannon entropy
469
+ entropy = 0.0
470
+ for p in probs:
471
+ if p > 0:
472
+ entropy -= p * math.log2(p)
473
+
474
+ return entropy
475
+
476
+ def stream_with_entropy(
477
+ self,
478
+ model: str = "llama3.1",
479
+ prompt: str = "",
480
+ **kwargs
481
+ ) -> Iterator[TokenWithEntropy]:
482
+ """Stream tokens with entropy from Ollama."""
483
+ import requests
484
+ import json
485
+
486
+ response = requests.post(
487
+ f"{self.host}/api/generate",
488
+ json={
489
+ "model": model,
490
+ "prompt": prompt,
491
+ "stream": True,
492
+ "options": kwargs.get("options", {})
493
+ },
494
+ stream=True
495
+ )
496
+
497
+ for line in response.iter_lines():
498
+ if not line:
499
+ continue
500
+
501
+ data = json.loads(line)
502
+ token = data.get("response", "")
503
+
504
+ if token:
505
+ # Note: Ollama doesn't expose logits by default
506
+ # For entropy, you'd need to modify Ollama or use llama.cpp directly
507
+ yield TokenWithEntropy(
508
+ token=token,
509
+ entropy=0.0, # Would need logits
510
+ logprob=0.0,
511
+ top_logprobs=[]
512
+ )
513
+
514
+ async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
515
+ """Async version of stream_with_entropy."""
516
+ import aiohttp
517
+ import json
518
+
519
+ async with aiohttp.ClientSession() as session:
520
+ async with session.post(
521
+ f"{self.host}/api/generate",
522
+ json={
523
+ "model": kwargs.get("model", "llama3.1"),
524
+ "prompt": kwargs.get("prompt", ""),
525
+ "stream": True
526
+ }
527
+ ) as response:
528
+ async for line in response.content:
529
+ line = line.decode("utf-8").strip()
530
+ if not line:
531
+ continue
532
+
533
+ data = json.loads(line)
534
+ token = data.get("response", "")
535
+
536
+ if token:
537
+ yield TokenWithEntropy(
538
+ token=token,
539
+ entropy=0.0,
540
+ logprob=0.0,
541
+ top_logprobs=[]
542
+ )
543
+
544
+
545
+ class LlamaCppProvider(BaseProvider):
546
+ """
547
+ Provider for llama.cpp (direct Python bindings).
548
+
549
+ This provides full access to logits for accurate entropy calculation.
550
+
551
+ Usage:
552
+ provider = LlamaCppProvider(model_path="./llama-3.1.gguf")
553
+
554
+ for token in provider.stream_with_entropy(prompt="Hello"):
555
+ print(f"{token.token} (entropy: {token.entropy:.3f})")
556
+ """
557
+
558
+ def __init__(self, model_path: str):
559
+ self.model_path = model_path
560
+ self._model = None
561
+
562
+ @property
563
+ def model(self):
564
+ if self._model is None:
565
+ from llama_cpp import Llama
566
+ self._model = Llama(
567
+ model_path=self.model_path,
568
+ logits_all=True, # Required for entropy tracking
569
+ verbose=False
570
+ )
571
+ return self._model
572
+
573
+ def calculate_entropy(self, logits: List[float]) -> float:
574
+ """Calculate entropy from raw logits."""
575
+ import math
576
+ import numpy as np
577
+
578
+ # Softmax
579
+ logits = np.array(logits)
580
+ exp_logits = np.exp(logits - np.max(logits))
581
+ probs = exp_logits / exp_logits.sum()
582
+
583
+ # Shannon entropy
584
+ entropy = -np.sum(probs[probs > 0] * np.log2(probs[probs > 0]))
585
+
586
+ return float(entropy)
587
+
588
+ def stream_with_entropy(
589
+ self,
590
+ prompt: str = "",
591
+ max_tokens: int = 512,
592
+ **kwargs
593
+ ) -> Iterator[TokenWithEntropy]:
594
+ """Stream tokens with entropy from llama.cpp."""
595
+ generator = self.model.create_completion(
596
+ prompt=prompt,
597
+ max_tokens=max_tokens,
598
+ stream=True,
599
+ **kwargs
600
+ )
601
+
602
+ for chunk in generator:
603
+ if "choices" in chunk:
604
+ for choice in chunk["choices"]:
605
+ token = choice.get("text", "")
606
+ if token:
607
+ # Get logits from the last token
608
+ # Note: This requires logits_all=True
609
+ logits = self.model._ctx.get_logits()
610
+ entropy = self.calculate_entropy(logits)
611
+
612
+ yield TokenWithEntropy(
613
+ token=token,
614
+ entropy=entropy,
615
+ logprob=0.0,
616
+ top_logprobs=[]
617
+ )
618
+
619
+ async def astream_with_entropy(self, *args, **kwargs) -> AsyncIterator[TokenWithEntropy]:
620
+ """Async version (run in thread pool)."""
621
+ import asyncio
622
+
623
+ loop = asyncio.get_event_loop()
624
+ for token in self.stream_with_entropy(*args, **kwargs):
625
+ yield token
626
+ await asyncio.sleep(0) # Yield control
package/examples.md ADDED
@@ -0,0 +1,40 @@
1
+ examples:
2
+ - |
3
+ # Python
4
+ from entroplain import EntropyMonitor
5
+
6
+ monitor = EntropyMonitor()
7
+ monitor.track("Hello", 0.5)
8
+
9
+ if monitor.should_exit():
10
+ print("Reasoning complete!")
11
+
12
+ - |
13
+ # CLI
14
+ entroplain analyze "What is 2+2?" --model gpt-4o
15
+
16
+ - |
17
+ # With OpenAI
18
+ from entroplain import NVIDIAProvider, EntropyMonitor
19
+
20
+ provider = NVIDIAProvider()
21
+ monitor = EntropyMonitor()
22
+
23
+ for token in provider.stream_with_entropy(
24
+ model="meta/llama-3.1-70b-instruct",
25
+ messages=[{"role": "user", "content": "Hello"}]
26
+ ):
27
+ monitor.track(token.token, token.entropy)
28
+ if monitor.should_exit():
29
+ break
30
+
31
+ - |
32
+ # Agent hook
33
+ from entroplain.hooks import EntropyHook
34
+
35
+ hook = EntropyHook(config={"entropy_threshold": 0.15})
36
+
37
+ for token in agent.generate():
38
+ result = hook.on_token(token, entropy)
39
+ if result["should_exit"]:
40
+ break