flashlite 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flashlite/_spinner.py ADDED
@@ -0,0 +1,91 @@
1
+ """Terminal spinner for user-visible progress during async waits.
2
+
3
+ Provides a lightweight, non-blocking spinner that renders to stderr
4
+ when — and only when — the output is an interactive terminal. Multiple
5
+ concurrent ``Spinner`` instances (e.g. from ``complete_many``) are
6
+ gracefully collapsed so only one animation is visible at a time.
7
+ """
8
+
9
+ import asyncio
10
+ import sys
11
+ import time
12
+
13
+ # Braille-dot frames — smooth and compact.
14
+ _FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
15
+ _INTERVAL = 0.08 # seconds between frame updates
16
+
17
+ # ANSI helpers
18
+ _CYAN = "\033[36m"
19
+ _DIM = "\033[2m"
20
+ _RESET = "\033[0m"
21
+ _CLEAR_LINE = "\r\033[K"
22
+
23
+ # Module-level guard — only one spinner renders at a time.
24
+ _active: bool = False
25
+
26
+
27
+ class Spinner:
28
+ """Async context manager that shows a terminal spinner on stderr.
29
+
30
+ The spinner only appears when stderr is a TTY **and** no other
31
+ ``Spinner`` is already active, making it safe for concurrent use
32
+ inside ``complete_many``.
33
+
34
+ Args:
35
+ message: Text displayed next to the spinner.
36
+ delay: Grace period (seconds) before the spinner appears.
37
+ If the wrapped operation finishes within this window the
38
+ spinner is never rendered, avoiding flicker for fast calls.
39
+ """
40
+
41
+ def __init__(self, message: str = "Working...", *, delay: float = 0.3) -> None:
42
+ self.message = message
43
+ self.delay = delay
44
+ self._task: asyncio.Task[None] | None = None
45
+ self._owns_active = False
46
+ self._start: float = 0.0
47
+
48
+ # -- internal -----------------------------------------------------
49
+
50
+ async def _render(self) -> None:
51
+ """Background coroutine that draws frames until cancelled."""
52
+ await asyncio.sleep(self.delay)
53
+ idx = 0
54
+ while True:
55
+ elapsed = time.monotonic() - self._start
56
+ frame = _FRAMES[idx % len(_FRAMES)]
57
+ sys.stderr.write(
58
+ f"{_CLEAR_LINE}{_CYAN}{frame}{_RESET} {self.message} "
59
+ f"{_DIM}({elapsed:.1f}s){_RESET}"
60
+ )
61
+ sys.stderr.flush()
62
+ idx += 1
63
+ await asyncio.sleep(_INTERVAL)
64
+
65
+ @staticmethod
66
+ def _clear() -> None:
67
+ sys.stderr.write(_CLEAR_LINE)
68
+ sys.stderr.flush()
69
+
70
+ # -- context manager ----------------------------------------------
71
+
72
+ async def __aenter__(self) -> "Spinner":
73
+ global _active # noqa: PLW0603
74
+ if sys.stderr.isatty() and not _active:
75
+ _active = True
76
+ self._owns_active = True
77
+ self._start = time.monotonic()
78
+ self._task = asyncio.create_task(self._render())
79
+ return self
80
+
81
+ async def __aexit__(self, *_: object) -> None:
82
+ global _active # noqa: PLW0603
83
+ if self._task is not None:
84
+ self._task.cancel()
85
+ try:
86
+ await self._task
87
+ except asyncio.CancelledError:
88
+ pass
89
+ self._clear()
90
+ if self._owns_active:
91
+ _active = False
flashlite/client.py CHANGED
@@ -7,6 +7,7 @@ from typing import Any, TypeVar, overload
7
7
 
8
8
  from pydantic import BaseModel
9
9
 
10
+ from ._spinner import Spinner
10
11
  from .cache import CacheBackend, MemoryCache
11
12
  from .config import FlashliteConfig, load_env_files
12
13
  from .conversation import ContextManager, Conversation
@@ -223,7 +224,8 @@ class Flashlite:
223
224
  if self._config.log_requests:
224
225
  logger.info(f"Completion request: model={request.model}")
225
226
 
226
- response = await core_complete(request)
227
+ async with Spinner(f"Waiting for {request.model}...", delay=0.2):
228
+ response = await core_complete(request)
227
229
 
228
230
  if self._config.log_requests:
229
231
  logger.info(
@@ -395,10 +397,12 @@ class Flashlite:
395
397
  else:
396
398
  extra_kwargs["tools"] = tools_to_openai(tools)
397
399
 
398
- # Build request
400
+ # Build request (template/variables stored for middleware traceability)
399
401
  request = CompletionRequest(
400
402
  model=resolved_model,
401
403
  messages=final_messages,
404
+ template=template,
405
+ variables=variables,
402
406
  temperature=temperature,
403
407
  max_tokens=max_tokens,
404
408
  max_completion_tokens=max_completion_tokens,
@@ -1,67 +1,134 @@
1
1
  """Multi-agent conversation support for agent-to-agent interactions."""
2
2
 
3
+ import logging
4
+ import re
5
+ import time
3
6
  from dataclasses import dataclass, field
4
- from typing import TYPE_CHECKING, Any
7
+ from typing import TYPE_CHECKING, Any, TypeVar, overload
5
8
 
9
+ from pydantic import BaseModel
10
+
11
+ from ..core.messages import assistant_message, system_message, user_message
12
+ from ..structured import (
13
+ StructuredOutputError,
14
+ format_validation_error_for_retry,
15
+ schema_to_prompt,
16
+ validate_response,
17
+ )
6
18
  from ..types import CompletionResponse
7
19
 
8
20
  if TYPE_CHECKING:
9
21
  from ..client import Flashlite
10
22
 
23
+ T = TypeVar("T", bound=BaseModel)
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ _INVALID_NAME_CHARS = re.compile(r"[\s<|\\/>]+")
28
+
29
+
30
+ def _sanitize_name(name: str) -> str:
31
+ """Sanitize a display name for use in the OpenAI message ``name`` field.
32
+
33
+ The API requires names to match ``^[^\\s<|\\\\/>]+$``. This helper
34
+ replaces any run of invalid characters with ``_`` and strips leading/
35
+ trailing underscores so that human-friendly display names like
36
+ ``"Character Voice"`` become ``"Character_Voice"``.
37
+ """
38
+ return _INVALID_NAME_CHARS.sub("_", name).strip("_")
39
+
11
40
 
12
41
  @dataclass
13
42
  class Agent:
14
43
  """
15
44
  An agent with a name, persona, and optional model override.
16
45
 
46
+ Agents can define their system prompt either as a raw string or as a
47
+ Jinja template (rendered at speak-time via the client's TemplateEngine).
48
+ Agents can also have private context that only they see.
49
+
17
50
  Attributes:
18
- name: Display name for the agent (used in transcript and message attribution)
19
- system_prompt: The agent's personality, instructions, and behavior guidelines
51
+ name: Display name (used in transcript and message attribution)
52
+ system_prompt: The agent's personality/instructions (raw string)
20
53
  model: Optional model override (uses MultiAgentChat default if None)
54
+ system_template: Jinja template name (alternative to system_prompt)
55
+ system_variables: Variables for template rendering
56
+ private_context: Static context only this agent sees (injected as system message)
21
57
 
22
- Example:
23
- agent = Agent(
24
- name="Scientist",
25
- system_prompt="You are a curious scientist who loves experiments.",
26
- model="gpt-4o", # Optional: use specific model for this agent
27
- )
58
+ Examples:
59
+ # Raw system prompt
60
+ Agent(name="Scientist", system_prompt="You are a curious scientist.")
61
+
62
+ # Jinja template
63
+ Agent(name="Analyst", system_template="analyst_persona",
64
+ system_variables={"domain": "finance"})
65
+
66
+ # With private context
67
+ Agent(name="Judge", system_prompt="You are a debate judge.",
68
+ private_context="Score on: clarity (1-5), evidence (1-5).")
28
69
  """
29
70
 
30
71
  name: str
31
- system_prompt: str
72
+ system_prompt: str | None = None
32
73
  model: str | None = None
74
+ # Jinja template support (alternative to system_prompt)
75
+ system_template: str | None = None
76
+ system_variables: dict[str, Any] | None = None
77
+ # Private context only this agent sees
78
+ private_context: str | None = None
79
+
80
+ def __post_init__(self) -> None:
81
+ if not self.system_prompt and not self.system_template:
82
+ raise ValueError(
83
+ f"Agent '{self.name}' must have either system_prompt or system_template"
84
+ )
85
+ if self.system_prompt and self.system_template:
86
+ raise ValueError(
87
+ f"Agent '{self.name}' cannot have both system_prompt and system_template"
88
+ )
33
89
 
34
90
 
35
91
  @dataclass
36
92
  class ChatMessage:
37
- """A message in the multi-agent conversation."""
93
+ """A message in the multi-agent conversation.
94
+
95
+ Attributes:
96
+ agent_name: Who sent this message
97
+ content: The message content
98
+ metadata: Additional metadata (tokens, latency, model, etc.)
99
+ visible_to: If set, only these agents can see this message.
100
+ None means all agents can see it.
101
+ """
38
102
 
39
103
  agent_name: str
40
104
  content: str
41
105
  metadata: dict[str, Any] = field(default_factory=dict)
106
+ visible_to: list[str] | None = None
42
107
 
43
108
 
44
109
  class MultiAgentChat:
45
110
  """
46
111
  Manages conversations between multiple AI agents.
47
112
 
48
- This class enables agent-to-agent conversations where multiple AI agents
49
- can discuss, debate, or collaborate. Each agent maintains its own persona
50
- and sees the conversation from its perspective.
113
+ Integrates with flashlite's templating, logging, structured outputs,
114
+ and observability features.
51
115
 
52
116
  Key features:
53
117
  - Multiple agents with different personas and optionally different models
118
+ - Jinja template support for agent system prompts
119
+ - Per-message visibility control (private whispers to specific agents)
120
+ - Structured output support via Pydantic models (per-turn, flexible)
54
121
  - Automatic context building from each agent's perspective
122
+ - Conversation-level logging and per-agent stats
55
123
  - Round-robin or directed turn-taking
56
- - Full conversation transcript with metadata
57
- - Support for injecting external messages (moderator, user input)
58
124
 
59
125
  How it works:
60
- - Each agent has a system prompt defining their persona
126
+ - Each agent has a system prompt (raw or Jinja template) defining their persona
61
127
  - When an agent speaks, they see:
62
128
  - Their own previous messages as "assistant" role
63
- - Other agents' messages as "user" role with name attribution
64
- - This creates natural back-and-forth conversation
129
+ - Other agents' messages as "user" role with the ``name`` field for attribution
130
+ - Only messages they are allowed to see (filtered by ``visible_to``)
131
+ - Private context on an agent is injected as a system message only they see
65
132
 
66
133
  Example:
67
134
  client = Flashlite(default_model="gpt-4o-mini")
@@ -80,15 +147,24 @@ class MultiAgentChat:
80
147
  # Start with a topic
81
148
  chat.add_message("Moderator", "Discuss: Will AI help or hurt jobs?")
82
149
 
150
+ # Whisper private info to one agent
151
+ chat.add_message("Moderator", "Secret: focus on healthcare jobs.",
152
+ visible_to=["Optimist"])
153
+
83
154
  # Have agents take turns
84
- await chat.speak("Optimist") # Optimist responds
85
- await chat.speak("Skeptic") # Skeptic responds to Optimist
86
- await chat.speak("Optimist") # Continue the debate
155
+ await chat.speak("Optimist")
156
+ await chat.speak("Skeptic")
157
+
158
+ # Structured output from a judge
159
+ class Score(BaseModel):
160
+ winner: str
161
+ reasoning: str
162
+
163
+ result = await chat.speak("Judge", response_model=Score)
87
164
 
88
- # Or use round-robin for structured turns
165
+ # Round-robin for structured turns
89
166
  await chat.round_robin(rounds=2)
90
167
 
91
- # Get formatted transcript
92
168
  print(chat.format_transcript())
93
169
  """
94
170
 
@@ -109,6 +185,8 @@ class MultiAgentChat:
109
185
  self._agents: dict[str, Agent] = {}
110
186
  self._transcript: list[ChatMessage] = []
111
187
 
188
+ # -- Agent management ------------------------------------------------
189
+
112
190
  def add_agent(self, agent: Agent) -> "MultiAgentChat":
113
191
  """
114
192
  Add an agent to the chat.
@@ -120,8 +198,8 @@ class MultiAgentChat:
120
198
  Self for method chaining
121
199
 
122
200
  Example:
123
- chat.add_agent(Agent("Alice", "You are helpful."))
124
- .add_agent(Agent("Bob", "You are curious."))
201
+ chat.add_agent(Agent("Alice", system_prompt="You are helpful."))
202
+ .add_agent(Agent("Bob", system_prompt="You are curious."))
125
203
  """
126
204
  self._agents[agent.name] = agent
127
205
  return self
@@ -141,60 +219,117 @@ class MultiAgentChat:
141
219
  return True
142
220
  return False
143
221
 
222
+ # -- Message injection -----------------------------------------------
223
+
144
224
  def add_message(
145
225
  self,
146
226
  agent_name: str,
147
227
  content: str,
148
228
  metadata: dict[str, Any] | None = None,
229
+ visible_to: list[str] | None = None,
149
230
  ) -> "MultiAgentChat":
150
231
  """
151
232
  Manually add a message to the transcript.
152
233
 
153
- Useful for:
154
- - Injecting moderator or facilitator prompts
155
- - Adding user input to the conversation
156
- - Simulating agent messages for testing
234
+ Useful for injecting moderator prompts, user input, or private
235
+ whispers to specific agents.
157
236
 
158
237
  Args:
159
238
  agent_name: Name to attribute the message to
160
239
  content: Message content
161
240
  metadata: Optional metadata to attach
241
+ visible_to: If set, only these agents can see this message.
242
+ None means all agents see it.
162
243
 
163
244
  Returns:
164
245
  Self for method chaining
246
+
247
+ Examples:
248
+ # Public message everyone sees
249
+ chat.add_message("Moderator", "New topic: climate change.")
250
+
251
+ # Private whisper only the Adversary sees
252
+ chat.add_message("GameMaster", "Secret: the key is in the library.",
253
+ visible_to=["Adversary"])
165
254
  """
166
255
  self._transcript.append(
167
256
  ChatMessage(
168
257
  agent_name=agent_name,
169
258
  content=content,
170
259
  metadata=metadata or {},
260
+ visible_to=visible_to,
171
261
  )
172
262
  )
263
+ logger.debug(
264
+ "Message injected from '%s'%s",
265
+ agent_name,
266
+ f" (visible_to={visible_to})" if visible_to else "",
267
+ )
173
268
  return self
174
269
 
270
+ # -- Speaking --------------------------------------------------------
271
+
272
+ @overload
175
273
  async def speak(
176
274
  self,
177
275
  agent_name: str,
276
+ *,
277
+ additional_context: str | None = ...,
278
+ response_model: None = ...,
279
+ structured_retries: int = ...,
280
+ visible_to: list[str] | None = ...,
281
+ **kwargs: Any,
282
+ ) -> str: ...
283
+
284
+ @overload
285
+ async def speak(
286
+ self,
287
+ agent_name: str,
288
+ *,
289
+ additional_context: str | None = ...,
290
+ response_model: type[T] = ...,
291
+ structured_retries: int = ...,
292
+ visible_to: list[str] | None = ...,
293
+ **kwargs: Any,
294
+ ) -> T: ...
295
+
296
+ async def speak(
297
+ self,
298
+ agent_name: str,
299
+ *,
178
300
  additional_context: str | None = None,
301
+ response_model: type[T] | None = None,
302
+ structured_retries: int = 1,
303
+ visible_to: list[str] | None = None,
179
304
  **kwargs: Any,
180
- ) -> str:
305
+ ) -> str | T:
181
306
  """
182
307
  Have an agent respond to the conversation.
183
308
 
184
309
  The agent sees the full conversation history from their perspective:
185
310
  - Their own previous messages appear as "assistant" messages
186
311
  - Other agents' messages appear as "user" messages with name attribution
312
+ - Messages with ``visible_to`` set are filtered by visibility
187
313
 
188
314
  Args:
189
315
  agent_name: Name of the agent to speak
190
316
  additional_context: Optional extra context/instruction for this turn
317
+ response_model: Pydantic model class for structured output parsing.
318
+ When provided, returns a validated model instance.
319
+ Can change per call for flexible per-turn schemas.
320
+ structured_retries: Number of retries for structured output validation
321
+ visible_to: If set, only these agents see this agent's response.
322
+ None means all agents see it.
191
323
  **kwargs: Additional kwargs passed to client.complete()
192
324
 
193
325
  Returns:
194
- The agent's response content
326
+ The agent's response content (str), or a validated Pydantic model
327
+ instance if response_model is provided.
195
328
 
196
329
  Raises:
197
330
  ValueError: If agent_name is not found
331
+ StructuredOutputError: If structured output validation fails
332
+ after all retries are exhausted
198
333
  """
199
334
  if agent_name not in self._agents:
200
335
  raise ValueError(
@@ -202,21 +337,32 @@ class MultiAgentChat:
202
337
  )
203
338
 
204
339
  agent = self._agents[agent_name]
340
+ start_time = time.perf_counter()
205
341
 
206
342
  # Build messages from this agent's perspective
207
343
  messages = self._build_messages_for(agent)
208
344
 
209
345
  # Add any additional context as a user message
210
346
  if additional_context:
211
- messages.append({"role": "user", "content": additional_context})
347
+ messages.append(user_message(additional_context))
212
348
 
213
- # Make completion
349
+ # Handle structured output: inject schema into system prompt
350
+ extra_kwargs = dict(kwargs)
351
+ if response_model is not None:
352
+ messages, extra_kwargs = self._inject_schema(
353
+ messages, extra_kwargs, response_model, agent
354
+ )
355
+
356
+ # Make completion (without response_model so we get CompletionResponse
357
+ # and can store raw content in the transcript)
214
358
  response: CompletionResponse = await self._client.complete(
215
359
  model=agent.model or self._default_model,
216
360
  messages=messages,
217
- **kwargs,
361
+ **extra_kwargs,
218
362
  )
219
363
 
364
+ latency_ms = (time.perf_counter() - start_time) * 1000
365
+
220
366
  # Record in transcript with metadata
221
367
  self._transcript.append(
222
368
  ChatMessage(
@@ -225,35 +371,175 @@ class MultiAgentChat:
225
371
  metadata={
226
372
  "model": response.model,
227
373
  "tokens": response.usage.total_tokens if response.usage else None,
374
+ "input_tokens": (response.usage.input_tokens if response.usage else None),
375
+ "output_tokens": (response.usage.output_tokens if response.usage else None),
376
+ "latency_ms": round(latency_ms, 1),
228
377
  },
378
+ visible_to=visible_to,
229
379
  )
230
380
  )
231
381
 
382
+ logger.info(
383
+ "%s spoke (model=%s, tokens=%s, %.1fms)%s",
384
+ agent_name,
385
+ response.model,
386
+ response.usage.total_tokens if response.usage else "N/A",
387
+ latency_ms,
388
+ f" [visible_to={visible_to}]" if visible_to else "",
389
+ )
390
+
391
+ # Validate structured output if requested
392
+ if response_model is not None:
393
+ return await self._validate_structured(
394
+ response=response,
395
+ response_model=response_model,
396
+ messages=messages,
397
+ extra_kwargs=extra_kwargs,
398
+ agent=agent,
399
+ structured_retries=structured_retries,
400
+ visible_to=visible_to,
401
+ )
402
+
232
403
  return response.content
233
404
 
234
- def _build_messages_for(self, agent: Agent) -> list[dict[str, str]]:
405
+ # -- Internal helpers ------------------------------------------------
406
+
407
+ def _inject_schema(
408
+ self,
409
+ messages: list[dict[str, Any]],
410
+ extra_kwargs: dict[str, Any],
411
+ response_model: type[BaseModel],
412
+ agent: Agent,
413
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
414
+ """Inject structured output schema into the system prompt and kwargs."""
415
+ schema_prompt = schema_to_prompt(response_model)
416
+
417
+ # Append schema to the system message
418
+ if messages and messages[0].get("role") == "system":
419
+ messages[0] = {
420
+ **messages[0],
421
+ "content": messages[0]["content"] + "\n\n" + schema_prompt,
422
+ }
423
+ else:
424
+ messages.insert(0, system_message(schema_prompt))
425
+
426
+ # Enable JSON mode for supported providers
427
+ if "response_format" not in extra_kwargs:
428
+ resolved_model = (agent.model or self._default_model or "").lower()
429
+ if any(
430
+ p in resolved_model for p in ["gpt-4", "gpt-3.5", "claude", "gemini", "mistral"]
431
+ ):
432
+ extra_kwargs["response_format"] = {"type": "json_object"}
433
+
434
+ return messages, extra_kwargs
435
+
436
+ async def _validate_structured(
437
+ self,
438
+ response: CompletionResponse,
439
+ response_model: type[T],
440
+ messages: list[dict[str, Any]],
441
+ extra_kwargs: dict[str, Any],
442
+ agent: Agent,
443
+ structured_retries: int,
444
+ visible_to: list[str] | None,
445
+ ) -> T:
446
+ """Validate structured output with retry support."""
447
+ last_error: StructuredOutputError | None = None
448
+ current_messages = list(messages)
449
+
450
+ for attempt in range(structured_retries + 1):
451
+ try:
452
+ return validate_response(response, response_model)
453
+ except StructuredOutputError as e:
454
+ last_error = e
455
+ logger.warning(
456
+ "%s structured output validation failed (attempt %d): %s",
457
+ agent.name,
458
+ attempt + 1,
459
+ e,
460
+ )
461
+ if attempt < structured_retries:
462
+ # Ask the model to fix its response
463
+ error_feedback = format_validation_error_for_retry(e)
464
+ current_messages.append(assistant_message(response.content))
465
+ current_messages.append(user_message(error_feedback))
466
+
467
+ response = await self._client.complete(
468
+ model=agent.model or self._default_model,
469
+ messages=current_messages,
470
+ **extra_kwargs,
471
+ )
472
+ # Update transcript with corrected response
473
+ self._transcript[-1] = ChatMessage(
474
+ agent_name=agent.name,
475
+ content=response.content,
476
+ metadata=self._transcript[-1].metadata,
477
+ visible_to=visible_to,
478
+ )
479
+
480
+ raise last_error # type: ignore[misc]
481
+
482
+ def _resolve_system_prompt(self, agent: Agent) -> str:
483
+ """
484
+ Resolve an agent's system prompt from raw string or Jinja template.
485
+
486
+ Args:
487
+ agent: The agent to resolve the prompt for
488
+
489
+ Returns:
490
+ The rendered system prompt string
491
+
492
+ Raises:
493
+ ValueError: If template engine is not configured
494
+ """
495
+ if agent.system_template:
496
+ engine = self._client.template_engine
497
+ if engine is None:
498
+ raise ValueError(
499
+ f"Agent '{agent.name}' uses system_template but no template "
500
+ "engine is configured. Pass template_dir to the Flashlite client "
501
+ "or call client.register_template()."
502
+ )
503
+ return engine.render(agent.system_template, agent.system_variables)
504
+ return agent.system_prompt or ""
505
+
506
+ def _build_messages_for(self, agent: Agent) -> list[dict[str, Any]]:
235
507
  """
236
508
  Build the message history from a specific agent's perspective.
237
509
 
238
- The agent's own messages become "assistant" role (what they said).
239
- Other agents' messages become "user" role with speaker attribution.
510
+ - System prompt (from raw string or Jinja template)
511
+ - Private context (if any, as an additional system message)
512
+ - Transcript messages filtered by visibility:
513
+ - Agent's own messages become "assistant" role with ``name`` field
514
+ - Other agents' messages become "user" role with ``name`` field
240
515
  """
241
- messages: list[dict[str, str]] = []
516
+ messages: list[dict[str, Any]] = []
242
517
 
243
518
  # System prompt for this agent
244
- messages.append({"role": "system", "content": agent.system_prompt})
519
+ prompt = self._resolve_system_prompt(agent)
520
+ messages.append(system_message(prompt))
521
+
522
+ # Private context (static, only this agent sees)
523
+ if agent.private_context:
524
+ messages.append(system_message(agent.private_context))
245
525
 
246
- # Add conversation history
526
+ # Conversation history, filtered by visibility
247
527
  for msg in self._transcript:
528
+ # Check visibility
529
+ if msg.visible_to is not None and agent.name not in msg.visible_to:
530
+ continue
531
+
248
532
  if msg.agent_name == agent.name:
249
533
  # Agent's own previous messages
250
- messages.append({"role": "assistant", "content": msg.content})
534
+ messages.append(assistant_message(msg.content, name=_sanitize_name(agent.name)))
251
535
  else:
252
- # Other agents'/sources' messages - prefix with speaker name
253
- messages.append({"role": "user", "content": f"[{msg.agent_name}]: {msg.content}"})
536
+ # Other agents'/sources' messages with name attribution
537
+ messages.append(user_message(msg.content, name=_sanitize_name(msg.agent_name)))
254
538
 
255
539
  return messages
256
540
 
541
+ # -- Batch speaking --------------------------------------------------
542
+
257
543
  async def round_robin(
258
544
  self,
259
545
  rounds: int = 1,
@@ -271,13 +557,20 @@ class MultiAgentChat:
271
557
  Returns:
272
558
  List of all responses in order
273
559
  """
274
- responses = []
560
+ responses: list[str] = []
275
561
  agent_names = list(self._agents.keys())
276
562
 
277
- for _ in range(rounds):
563
+ for round_num in range(1, rounds + 1):
564
+ logger.info(
565
+ "Round %d/%d started (agents: %s)",
566
+ round_num,
567
+ rounds,
568
+ ", ".join(agent_names),
569
+ )
278
570
  for name in agent_names:
279
571
  response = await self.speak(name, **kwargs)
280
572
  responses.append(response)
573
+ logger.info("Round %d/%d complete", round_num, rounds)
281
574
 
282
575
  return responses
283
576
 
@@ -296,12 +589,14 @@ class MultiAgentChat:
296
589
  Returns:
297
590
  List of responses in order
298
591
  """
299
- responses = []
592
+ responses: list[str] = []
300
593
  for name in agent_sequence:
301
594
  response = await self.speak(name, **kwargs)
302
595
  responses.append(response)
303
596
  return responses
304
597
 
598
+ # -- Transcript access -----------------------------------------------
599
+
305
600
  @property
306
601
  def transcript(self) -> list[ChatMessage]:
307
602
  """Get a copy of the conversation transcript."""
@@ -317,30 +612,88 @@ class MultiAgentChat:
317
612
  """Get list of agent names."""
318
613
  return list(self._agents.keys())
319
614
 
320
- def format_transcript(self, include_metadata: bool = False) -> str:
615
+ @property
616
+ def stats(self) -> dict[str, Any]:
617
+ """
618
+ Get per-agent statistics from the conversation.
619
+
620
+ Returns a dict with total and per-agent breakdowns of tokens,
621
+ latency, and message counts.
622
+ """
623
+ agent_stats: dict[str, dict[str, Any]] = {}
624
+ total_tokens = 0
625
+ total_messages = 0
626
+
627
+ for msg in self._transcript:
628
+ name = msg.agent_name
629
+ if name not in agent_stats:
630
+ agent_stats[name] = {
631
+ "messages": 0,
632
+ "total_tokens": 0,
633
+ "input_tokens": 0,
634
+ "output_tokens": 0,
635
+ "total_latency_ms": 0.0,
636
+ }
637
+ stats = agent_stats[name]
638
+ stats["messages"] += 1
639
+ total_messages += 1
640
+
641
+ tokens = msg.metadata.get("tokens")
642
+ if tokens is not None:
643
+ stats["total_tokens"] += tokens
644
+ total_tokens += tokens
645
+
646
+ input_t = msg.metadata.get("input_tokens")
647
+ if input_t is not None:
648
+ stats["input_tokens"] += input_t
649
+
650
+ output_t = msg.metadata.get("output_tokens")
651
+ if output_t is not None:
652
+ stats["output_tokens"] += output_t
653
+
654
+ latency = msg.metadata.get("latency_ms")
655
+ if latency is not None:
656
+ stats["total_latency_ms"] += latency
657
+
658
+ return {
659
+ "total_messages": total_messages,
660
+ "total_tokens": total_tokens,
661
+ "by_agent": agent_stats,
662
+ }
663
+
664
+ def format_transcript(
665
+ self,
666
+ include_metadata: bool = False,
667
+ include_private: bool = False,
668
+ ) -> str:
321
669
  """
322
670
  Format the transcript as a readable string.
323
671
 
324
672
  Args:
325
673
  include_metadata: Whether to include metadata like tokens used
674
+ include_private: Whether to show visibility annotations
326
675
 
327
676
  Returns:
328
677
  Formatted transcript string
329
678
  """
330
- lines = []
679
+ lines: list[str] = []
331
680
  for msg in self._transcript:
332
- lines.append(f"[{msg.agent_name}]:")
681
+ header = f"[{msg.agent_name}]"
682
+ if include_private and msg.visible_to is not None:
683
+ header += f" (visible_to: {', '.join(msg.visible_to)})"
684
+ header += ":"
685
+ lines.append(header)
333
686
  # Indent content for readability
334
687
  for line in msg.content.split("\n"):
335
688
  lines.append(f" {line}")
336
689
  if include_metadata and msg.metadata:
337
- meta_str = ", ".join(f"{k}={v}" for k, v in msg.metadata.items() if v)
690
+ meta_str = ", ".join(f"{k}={v}" for k, v in msg.metadata.items() if v is not None)
338
691
  if meta_str:
339
692
  lines.append(f" ({meta_str})")
340
693
  lines.append("")
341
694
  return "\n".join(lines)
342
695
 
343
- def get_messages_for(self, agent_name: str) -> list[dict[str, str]]:
696
+ def get_messages_for(self, agent_name: str) -> list[dict[str, Any]]:
344
697
  """
345
698
  Get the messages list as a specific agent would see it.
346
699
 
@@ -5,6 +5,7 @@ import logging
5
5
  import time
6
6
  from dataclasses import dataclass, field
7
7
 
8
+ from .._spinner import Spinner
8
9
  from ..types import CompletionRequest, CompletionResponse, RateLimitConfig, RateLimitError
9
10
  from .base import CompletionHandler, Middleware
10
11
 
@@ -52,6 +53,18 @@ class TokenBucket:
52
53
  Raises:
53
54
  RateLimitError: If timeout exceeded
54
55
  """
56
+ # Clamp to capacity so a single oversized request can never deadlock.
57
+ # The request still pays the refill-wait cost for `capacity` tokens,
58
+ # which preserves rate-limiting backpressure.
59
+ effective = min(tokens, self.capacity)
60
+ if effective < tokens:
61
+ logger.warning(
62
+ "Requested %d tokens exceeds bucket capacity %d — "
63
+ "clamping to capacity to avoid deadlock",
64
+ int(tokens),
65
+ int(self.capacity),
66
+ )
67
+
55
68
  start_time = time.monotonic()
56
69
  deadline = start_time + timeout if timeout else None
57
70
 
@@ -59,12 +72,12 @@ class TokenBucket:
59
72
  while True:
60
73
  self._refill()
61
74
 
62
- if self.tokens >= tokens:
63
- self.tokens -= tokens
75
+ if self.tokens >= effective:
76
+ self.tokens -= effective
64
77
  return time.monotonic() - start_time
65
78
 
66
79
  # Calculate wait time for enough tokens
67
- tokens_needed = tokens - self.tokens
80
+ tokens_needed = effective - self.tokens
68
81
  wait_time = tokens_needed / self.rate
69
82
 
70
83
  # Check timeout
@@ -146,21 +159,31 @@ class RateLimitMiddleware(Middleware):
146
159
 
147
160
  # Acquire RPM token before making request
148
161
  if self._rpm_bucket:
149
- wait_time = await self._rpm_bucket.acquire()
162
+ async with Spinner("Waiting on rate limit (RPM)...", delay=0.4):
163
+ wait_time = await self._rpm_bucket.acquire()
150
164
  if wait_time > 0.1: # Only log significant waits
151
- logger.debug(f"Rate limit: waited {wait_time:.2f}s for RPM token")
165
+ logger.info(
166
+ "⏳ Rate limit backpressure: waited %.2fs for RPM capacity", wait_time
167
+ )
152
168
 
153
169
  # Make the request
154
170
  response = await next_handler(request)
155
171
 
156
172
  # For TPM limiting, consume tokens based on actual usage
157
- # This is post-hoc - we can't know token count before the request
173
+ # This is post-hoc we can't know token count before the request
158
174
  if self._tpm_bucket and response.usage:
159
175
  total_tokens = response.usage.total_tokens
160
176
  if total_tokens > 0:
161
- # Don't block on TPM - just record the usage
162
177
  # This creates backpressure for subsequent requests
163
- await self._tpm_bucket.acquire(tokens=float(total_tokens))
178
+ async with Spinner("Waiting on rate limit (TPM)...", delay=0.4):
179
+ wait_time = await self._tpm_bucket.acquire(tokens=float(total_tokens))
180
+ if wait_time > 0.1:
181
+ logger.info(
182
+ "⏳ Rate limit backpressure: waited %.2fs for TPM capacity "
183
+ "(%d tokens used)",
184
+ wait_time,
185
+ total_tokens,
186
+ )
164
187
 
165
188
  return response
166
189
 
@@ -354,11 +354,23 @@ class InspectLogger:
354
354
  sample_id = self._sample_count
355
355
  self._sample_count += 1
356
356
 
357
- # Convert messages to Inspect format
358
- input_messages = [
359
- {"role": msg.get("role", "user"), "content": msg.get("content", "")}
360
- for msg in request.messages
361
- ]
357
+ # Convert messages to Inspect format (preserve name field for multi-agent)
358
+ input_messages = []
359
+ for msg in request.messages:
360
+ inspect_msg: dict[str, Any] = {
361
+ "role": msg.get("role", "user"),
362
+ "content": msg.get("content", ""),
363
+ }
364
+ if msg.get("name"):
365
+ inspect_msg["name"] = msg["name"]
366
+ input_messages.append(inspect_msg)
367
+
368
+ # Build metadata, including template info for traceability
369
+ entry_metadata = dict(metadata or {})
370
+ if request.template is not None:
371
+ entry_metadata["template"] = request.template
372
+ if request.variables is not None:
373
+ entry_metadata["variables"] = request.variables
362
374
 
363
375
  entry = InspectLogEntry(
364
376
  eval_id=self._eval_id,
@@ -373,7 +385,7 @@ class InspectLogger:
373
385
  "total": response.usage.total_tokens if response.usage else 0,
374
386
  },
375
387
  timestamp=datetime.now(UTC).isoformat(),
376
- metadata=metadata or {},
388
+ metadata=entry_metadata,
377
389
  )
378
390
 
379
391
  json_str = json.dumps(entry.to_dict())
@@ -175,6 +175,10 @@ class StructuredLogger:
175
175
 
176
176
  # Build parameters dict
177
177
  params: dict[str, Any] = {}
178
+ if request.template is not None:
179
+ params["template"] = request.template
180
+ if request.variables is not None:
181
+ params["variables"] = request.variables
178
182
  if request.temperature is not None:
179
183
  params["temperature"] = request.temperature
180
184
  if request.max_tokens is not None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flashlite
3
- Version: 0.1.2
3
+ Version: 0.2.1
4
4
  Summary: Batteries-included wrapper for litellm with rate limiting, retries, templating, and more
5
5
  Author-email: ndalton12 <niall.dalton12@gmail.com>
6
6
  License-File: LICENSE.md
@@ -1,5 +1,6 @@
1
1
  flashlite/__init__.py,sha256=RlXjsK7zvZXStMvfz4FGqBxTWHev9VkyHYy-35TuTuM,3585
2
- flashlite/client.py,sha256=zQH_eLWZxnkX9acwI-y9c3uxeGybA-C0I9UPU6HrzvI,25081
2
+ flashlite/_spinner.py,sha256=9KHXD1MW33P2VM-sUe7NZJYz48auJXLspwy6unjMjSE,3019
3
+ flashlite/client.py,sha256=1UYWpWEfbrZe4mkz2-qaztjUM38TJS6swKbZ_OSgThw,25309
3
4
  flashlite/config.py,sha256=3RMEIAejBPlBG_VOgD8mpZKEDNZvK0k0cVv3vMM9kW8,4818
4
5
  flashlite/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
6
  flashlite/types.py,sha256=OqtgtnZaEWRYfaI6esqzv4-HKwu76Y-RDu3-Mhsae9w,6498
@@ -10,7 +11,7 @@ flashlite/cache/memory.py,sha256=_A4F7NTR9da2KDQW7fcKnUWrC-W_JpaYmb3d6rovX3w,441
10
11
  flashlite/conversation/__init__.py,sha256=zSgC4G697mx3T5bKn8WUEkSaSkMQQeHJsfyLdRUM30w,694
11
12
  flashlite/conversation/context.py,sha256=NQMLi5_WiN1zDYaPZTO9uJG_dJ3JJiVmAFfGAPM4X6c,10164
12
13
  flashlite/conversation/manager.py,sha256=dSQDgtzNt_6T8S1sHSAXKcS3DoBQ2vI9Ig1PZKaTh48,11644
13
- flashlite/conversation/multi_agent.py,sha256=t1jZD1VS3NOcAJtjQTMtvjEZCVTlFGy3SOxE_jjAtuo,11591
14
+ flashlite/conversation/multi_agent.py,sha256=uxS90wuAAKWQugQXcM_hdNlkhzph3XtKydqvcrz6WpM,25175
14
15
  flashlite/core/__init__.py,sha256=nWbMMPED_HsD62hkIYv45DDR6zX2_cDWCMPDTNfqSu4,315
15
16
  flashlite/core/completion.py,sha256=NTtAJzJ3ba0N0xVs8lCN5htme0SWEMxYroGjI63crw4,3847
16
17
  flashlite/core/messages.py,sha256=-EUtEjFjSNY1Lzfrynb9xtYw4FZRKnfFoYQqgsUcQZQ,3848
@@ -18,12 +19,12 @@ flashlite/middleware/__init__.py,sha256=T8Z4uSqjkuAcf5u5FuUBNfKyL5sqp4Iw4sov_xiU
18
19
  flashlite/middleware/base.py,sha256=LC_IL96jWWPdE0o_PBGPvSylmyLmob20LBVvGkfUS3g,2691
19
20
  flashlite/middleware/cache.py,sha256=R1YwAZBg5YJGTiqgNWdkl7VSN1xpmqmupTSBQnpyH-s,4032
20
21
  flashlite/middleware/logging.py,sha256=D3x8X1l1LN1Um_qOWuELyO8Fgo9WulFJTIx6s94Ure4,4919
21
- flashlite/middleware/rate_limit.py,sha256=nf0-Ul0CGnX0VRKtxB2dfoplkBin3P2cMLrbks76lcg,7059
22
+ flashlite/middleware/rate_limit.py,sha256=a0L0tnnX60ouJ7rLIoHs7JNX59Q5kqlf0kQgzP4FMlw,8091
22
23
  flashlite/middleware/retry.py,sha256=_3Lz9Gmes2sNk6rO10WamH6yrwJy8TQi-esIl8NIMag,4832
23
24
  flashlite/observability/__init__.py,sha256=VHdYteU9KmVkgSHrkA-Ssz6_qoi9uL-2JFDhSH5sgwI,949
24
25
  flashlite/observability/callbacks.py,sha256=yz1oZh7f7WVxvKmt7XyHbj4WDC2xnvM3SJiTSxfAkoQ,4897
25
- flashlite/observability/inspect_compat.py,sha256=IrsdEiV-qn_wOlgAvWLcIJ_7WxU0Bpq7DcHaS_KWXPw,16366
26
- flashlite/observability/logging.py,sha256=UxBH2RN8rNcGZHYgC_QYiuEpaIRXEQFs1OjiKjxbuf0,9273
26
+ flashlite/observability/inspect_compat.py,sha256=S2D2h_w_qD7xsd6cPMwt3-kbt76NYWbR35h2BKR5m24,16913
27
+ flashlite/observability/logging.py,sha256=qH0ky22nePzjVQIPPhsALcA4VIf7mkP_wMwg69fYM2s,9458
27
28
  flashlite/observability/metrics.py,sha256=blRx5N3uN4ilnPpxBe7k_uDhYV3GmQWXoKPLVxnk8_s,7466
28
29
  flashlite/structured/__init__.py,sha256=9k5bwkzFo_JD3WZ1Tm4iyZqoZ1A51EIINI8N1H2_2ew,750
29
30
  flashlite/structured/outputs.py,sha256=Q_isfrtKJGybBadGMKmfo5UJ5vMaUQRCRgFpjGWZOF8,5070
@@ -35,7 +36,7 @@ flashlite/templating/registry.py,sha256=wp8RaibHKNyu5q4tCdOXJ0B4tey7bv-c0qb9h1a7
35
36
  flashlite/tools/__init__.py,sha256=zpQ5KyvZwZaVvaulnpMmL_JjCnMfD08nD_foI95TjVg,1791
36
37
  flashlite/tools/definitions.py,sha256=cqyk6GR1qeMkTPFqsadnJc-YkCG15QVafiaf-OjGYNU,11519
37
38
  flashlite/tools/execution.py,sha256=iQC7V3R5Tx19suISnnuaDpjpgl8wURwOHmKZbsHL16s,10814
38
- flashlite-0.1.2.dist-info/METADATA,sha256=vWQl0DuuE16hbq9n1lLRL8ASCgxwBrHZsuibi8YD-u4,4293
39
- flashlite-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
40
- flashlite-0.1.2.dist-info/licenses/LICENSE.md,sha256=z2KZcyoH16ayjxlbeBM01uD-bXn1WTcKFab5ZKBhfJE,1068
41
- flashlite-0.1.2.dist-info/RECORD,,
39
+ flashlite-0.2.1.dist-info/METADATA,sha256=hP_D4Tgs1v6LqFyM5kYn0PIju0JSEOPokkhWEGcX5LE,4293
40
+ flashlite-0.2.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
41
+ flashlite-0.2.1.dist-info/licenses/LICENSE.md,sha256=z2KZcyoH16ayjxlbeBM01uD-bXn1WTcKFab5ZKBhfJE,1068
42
+ flashlite-0.2.1.dist-info/RECORD,,