flashlite 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flashlite/_spinner.py ADDED
@@ -0,0 +1,91 @@
1
+ """Terminal spinner for user-visible progress during async waits.
2
+
3
+ Provides a lightweight, non-blocking spinner that renders to stderr
4
+ when — and only when — the output is an interactive terminal. Multiple
5
+ concurrent ``Spinner`` instances (e.g. from ``complete_many``) are
6
+ gracefully collapsed so only one animation is visible at a time.
7
+ """
8
+
9
+ import asyncio
10
+ import sys
11
+ import time
12
+
13
+ # Braille-dot frames — smooth and compact.
14
+ _FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
15
+ _INTERVAL = 0.08 # seconds between frame updates
16
+
17
+ # ANSI helpers
18
+ _CYAN = "\033[36m"
19
+ _DIM = "\033[2m"
20
+ _RESET = "\033[0m"
21
+ _CLEAR_LINE = "\r\033[K"
22
+
23
+ # Module-level guard — only one spinner renders at a time.
24
+ _active: bool = False
25
+
26
+
27
+ class Spinner:
28
+ """Async context manager that shows a terminal spinner on stderr.
29
+
30
+ The spinner only appears when stderr is a TTY **and** no other
31
+ ``Spinner`` is already active, making it safe for concurrent use
32
+ inside ``complete_many``.
33
+
34
+ Args:
35
+ message: Text displayed next to the spinner.
36
+ delay: Grace period (seconds) before the spinner appears.
37
+ If the wrapped operation finishes within this window the
38
+ spinner is never rendered, avoiding flicker for fast calls.
39
+ """
40
+
41
+ def __init__(self, message: str = "Working...", *, delay: float = 0.3) -> None:
42
+ self.message = message
43
+ self.delay = delay
44
+ self._task: asyncio.Task[None] | None = None
45
+ self._owns_active = False
46
+ self._start: float = 0.0
47
+
48
+ # -- internal -----------------------------------------------------
49
+
50
+ async def _render(self) -> None:
51
+ """Background coroutine that draws frames until cancelled."""
52
+ await asyncio.sleep(self.delay)
53
+ idx = 0
54
+ while True:
55
+ elapsed = time.monotonic() - self._start
56
+ frame = _FRAMES[idx % len(_FRAMES)]
57
+ sys.stderr.write(
58
+ f"{_CLEAR_LINE}{_CYAN}{frame}{_RESET} {self.message} "
59
+ f"{_DIM}({elapsed:.1f}s){_RESET}"
60
+ )
61
+ sys.stderr.flush()
62
+ idx += 1
63
+ await asyncio.sleep(_INTERVAL)
64
+
65
+ @staticmethod
66
+ def _clear() -> None:
67
+ sys.stderr.write(_CLEAR_LINE)
68
+ sys.stderr.flush()
69
+
70
+ # -- context manager ----------------------------------------------
71
+
72
+ async def __aenter__(self) -> "Spinner":
73
+ global _active # noqa: PLW0603
74
+ if sys.stderr.isatty() and not _active:
75
+ _active = True
76
+ self._owns_active = True
77
+ self._start = time.monotonic()
78
+ self._task = asyncio.create_task(self._render())
79
+ return self
80
+
81
+ async def __aexit__(self, *_: object) -> None:
82
+ global _active # noqa: PLW0603
83
+ if self._task is not None:
84
+ self._task.cancel()
85
+ try:
86
+ await self._task
87
+ except asyncio.CancelledError:
88
+ pass
89
+ self._clear()
90
+ if self._owns_active:
91
+ _active = False
flashlite/client.py CHANGED
@@ -7,6 +7,7 @@ from typing import Any, TypeVar, overload
7
7
 
8
8
  from pydantic import BaseModel
9
9
 
10
+ from ._spinner import Spinner
10
11
  from .cache import CacheBackend, MemoryCache
11
12
  from .config import FlashliteConfig, load_env_files
12
13
  from .conversation import ContextManager, Conversation
@@ -223,7 +224,8 @@ class Flashlite:
223
224
  if self._config.log_requests:
224
225
  logger.info(f"Completion request: model={request.model}")
225
226
 
226
- response = await core_complete(request)
227
+ async with Spinner(f"Waiting for {request.model}...", delay=0.2):
228
+ response = await core_complete(request)
227
229
 
228
230
  if self._config.log_requests:
229
231
  logger.info(
@@ -395,10 +397,12 @@ class Flashlite:
395
397
  else:
396
398
  extra_kwargs["tools"] = tools_to_openai(tools)
397
399
 
398
- # Build request
400
+ # Build request (template/variables stored for middleware traceability)
399
401
  request = CompletionRequest(
400
402
  model=resolved_model,
401
403
  messages=final_messages,
404
+ template=template,
405
+ variables=variables,
402
406
  temperature=temperature,
403
407
  max_tokens=max_tokens,
404
408
  max_completion_tokens=max_completion_tokens,
@@ -1,67 +1,134 @@
1
1
  """Multi-agent conversation support for agent-to-agent interactions."""
2
2
 
3
+ import logging
4
+ import re
5
+ import time
3
6
  from dataclasses import dataclass, field
4
- from typing import TYPE_CHECKING, Any
7
+ from typing import TYPE_CHECKING, Any, TypeVar, overload
5
8
 
9
+ from pydantic import BaseModel
10
+
11
+ from ..core.messages import assistant_message, system_message, user_message
12
+ from ..structured import (
13
+ StructuredOutputError,
14
+ format_validation_error_for_retry,
15
+ schema_to_prompt,
16
+ validate_response,
17
+ )
6
18
  from ..types import CompletionResponse
7
19
 
8
20
  if TYPE_CHECKING:
9
21
  from ..client import Flashlite
10
22
 
23
+ T = TypeVar("T", bound=BaseModel)
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ _INVALID_NAME_CHARS = re.compile(r"[\s<|\\/>]+")
28
+
29
+
30
+ def _sanitize_name(name: str) -> str:
31
+ """Sanitize a display name for use in the OpenAI message ``name`` field.
32
+
33
+ The API requires names to match ``^[^\\s<|\\\\/>]+$``. This helper
34
+ replaces any run of invalid characters with ``_`` and strips leading/
35
+ trailing underscores so that human-friendly display names like
36
+ ``"Character Voice"`` become ``"Character_Voice"``.
37
+ """
38
+ return _INVALID_NAME_CHARS.sub("_", name).strip("_")
39
+
11
40
 
12
41
  @dataclass
13
42
  class Agent:
14
43
  """
15
44
  An agent with a name, persona, and optional model override.
16
45
 
46
+ Agents can define their system prompt either as a raw string or as a
47
+ Jinja template (rendered at speak-time via the client's TemplateEngine).
48
+ Agents can also have private context that only they see.
49
+
17
50
  Attributes:
18
- name: Display name for the agent (used in transcript and message attribution)
19
- system_prompt: The agent's personality, instructions, and behavior guidelines
51
+ name: Display name (used in transcript and message attribution)
52
+ system_prompt: The agent's personality/instructions (raw string)
20
53
  model: Optional model override (uses MultiAgentChat default if None)
54
+ system_template: Jinja template name (alternative to system_prompt)
55
+ system_variables: Variables for template rendering
56
+ private_context: Static context only this agent sees (injected as system message)
21
57
 
22
- Example:
23
- agent = Agent(
24
- name="Scientist",
25
- system_prompt="You are a curious scientist who loves experiments.",
26
- model="gpt-4o", # Optional: use specific model for this agent
27
- )
58
+ Examples:
59
+ # Raw system prompt
60
+ Agent(name="Scientist", system_prompt="You are a curious scientist.")
61
+
62
+ # Jinja template
63
+ Agent(name="Analyst", system_template="analyst_persona",
64
+ system_variables={"domain": "finance"})
65
+
66
+ # With private context
67
+ Agent(name="Judge", system_prompt="You are a debate judge.",
68
+ private_context="Score on: clarity (1-5), evidence (1-5).")
28
69
  """
29
70
 
30
71
  name: str
31
- system_prompt: str
72
+ system_prompt: str | None = None
32
73
  model: str | None = None
74
+ # Jinja template support (alternative to system_prompt)
75
+ system_template: str | None = None
76
+ system_variables: dict[str, Any] | None = None
77
+ # Private context only this agent sees
78
+ private_context: str | None = None
79
+
80
+ def __post_init__(self) -> None:
81
+ if not self.system_prompt and not self.system_template:
82
+ raise ValueError(
83
+ f"Agent '{self.name}' must have either system_prompt or system_template"
84
+ )
85
+ if self.system_prompt and self.system_template:
86
+ raise ValueError(
87
+ f"Agent '{self.name}' cannot have both system_prompt and system_template"
88
+ )
33
89
 
34
90
 
35
91
  @dataclass
36
92
  class ChatMessage:
37
- """A message in the multi-agent conversation."""
93
+ """A message in the multi-agent conversation.
94
+
95
+ Attributes:
96
+ agent_name: Who sent this message
97
+ content: The message content
98
+ metadata: Additional metadata (tokens, latency, model, etc.)
99
+ visible_to: If set, only these agents can see this message.
100
+ None means all agents can see it.
101
+ """
38
102
 
39
103
  agent_name: str
40
104
  content: str
41
105
  metadata: dict[str, Any] = field(default_factory=dict)
106
+ visible_to: list[str] | None = None
42
107
 
43
108
 
44
109
  class MultiAgentChat:
45
110
  """
46
111
  Manages conversations between multiple AI agents.
47
112
 
48
- This class enables agent-to-agent conversations where multiple AI agents
49
- can discuss, debate, or collaborate. Each agent maintains its own persona
50
- and sees the conversation from its perspective.
113
+ Integrates with flashlite's templating, logging, structured outputs,
114
+ and observability features.
51
115
 
52
116
  Key features:
53
117
  - Multiple agents with different personas and optionally different models
118
+ - Jinja template support for agent system prompts
119
+ - Per-message visibility control (private whispers to specific agents)
120
+ - Structured output support via Pydantic models (per-turn, flexible)
54
121
  - Automatic context building from each agent's perspective
122
+ - Conversation-level logging and per-agent stats
55
123
  - Round-robin or directed turn-taking
56
- - Full conversation transcript with metadata
57
- - Support for injecting external messages (moderator, user input)
58
124
 
59
125
  How it works:
60
- - Each agent has a system prompt defining their persona
126
+ - Each agent has a system prompt (raw or Jinja template) defining their persona
61
127
  - When an agent speaks, they see:
62
128
  - Their own previous messages as "assistant" role
63
- - Other agents' messages as "user" role with name attribution
64
- - This creates natural back-and-forth conversation
129
+ - Other agents' messages as "user" role with the ``name`` field for attribution
130
+ - Only messages they are allowed to see (filtered by ``visible_to``)
131
+ - Private context on an agent is injected as a system message only they see
65
132
 
66
133
  Example:
67
134
  client = Flashlite(default_model="gpt-4o-mini")
@@ -80,15 +147,24 @@ class MultiAgentChat:
80
147
  # Start with a topic
81
148
  chat.add_message("Moderator", "Discuss: Will AI help or hurt jobs?")
82
149
 
150
+ # Whisper private info to one agent
151
+ chat.add_message("Moderator", "Secret: focus on healthcare jobs.",
152
+ visible_to=["Optimist"])
153
+
83
154
  # Have agents take turns
84
- await chat.speak("Optimist") # Optimist responds
85
- await chat.speak("Skeptic") # Skeptic responds to Optimist
86
- await chat.speak("Optimist") # Continue the debate
155
+ await chat.speak("Optimist")
156
+ await chat.speak("Skeptic")
157
+
158
+ # Structured output from a judge
159
+ class Score(BaseModel):
160
+ winner: str
161
+ reasoning: str
162
+
163
+ result = await chat.speak("Judge", response_model=Score)
87
164
 
88
- # Or use round-robin for structured turns
165
+ # Round-robin for structured turns
89
166
  await chat.round_robin(rounds=2)
90
167
 
91
- # Get formatted transcript
92
168
  print(chat.format_transcript())
93
169
  """
94
170
 
@@ -109,6 +185,8 @@ class MultiAgentChat:
109
185
  self._agents: dict[str, Agent] = {}
110
186
  self._transcript: list[ChatMessage] = []
111
187
 
188
+ # -- Agent management ------------------------------------------------
189
+
112
190
  def add_agent(self, agent: Agent) -> "MultiAgentChat":
113
191
  """
114
192
  Add an agent to the chat.
@@ -120,8 +198,8 @@ class MultiAgentChat:
120
198
  Self for method chaining
121
199
 
122
200
  Example:
123
- chat.add_agent(Agent("Alice", "You are helpful."))
124
- .add_agent(Agent("Bob", "You are curious."))
201
+ chat.add_agent(Agent("Alice", system_prompt="You are helpful."))
202
+ .add_agent(Agent("Bob", system_prompt="You are curious."))
125
203
  """
126
204
  self._agents[agent.name] = agent
127
205
  return self
@@ -141,82 +219,151 @@ class MultiAgentChat:
141
219
  return True
142
220
  return False
143
221
 
222
+ # -- Message injection -----------------------------------------------
223
+
144
224
  def add_message(
145
225
  self,
146
226
  agent_name: str,
147
227
  content: str,
148
228
  metadata: dict[str, Any] | None = None,
229
+ visible_to: list[str] | None = None,
149
230
  ) -> "MultiAgentChat":
150
231
  """
151
232
  Manually add a message to the transcript.
152
233
 
153
- Useful for:
154
- - Injecting moderator or facilitator prompts
155
- - Adding user input to the conversation
156
- - Simulating agent messages for testing
234
+ Useful for injecting moderator prompts, user input, or private
235
+ whispers to specific agents.
157
236
 
158
237
  Args:
159
238
  agent_name: Name to attribute the message to
160
239
  content: Message content
161
240
  metadata: Optional metadata to attach
241
+ visible_to: If set, only these agents can see this message.
242
+ None means all agents see it.
162
243
 
163
244
  Returns:
164
245
  Self for method chaining
246
+
247
+ Examples:
248
+ # Public message everyone sees
249
+ chat.add_message("Moderator", "New topic: climate change.")
250
+
251
+ # Private whisper only the Adversary sees
252
+ chat.add_message("GameMaster", "Secret: the key is in the library.",
253
+ visible_to=["Adversary"])
165
254
  """
166
255
  self._transcript.append(
167
256
  ChatMessage(
168
257
  agent_name=agent_name,
169
258
  content=content,
170
259
  metadata=metadata or {},
260
+ visible_to=visible_to,
171
261
  )
172
262
  )
263
+ logger.debug(
264
+ "Message injected from '%s'%s",
265
+ agent_name,
266
+ f" (visible_to={visible_to})" if visible_to else "",
267
+ )
173
268
  return self
174
269
 
270
+ # -- Speaking --------------------------------------------------------
271
+
272
+ @overload
175
273
  async def speak(
176
274
  self,
177
275
  agent_name: str,
276
+ *,
277
+ additional_context: str | None = ...,
278
+ response_model: None = ...,
279
+ structured_retries: int = ...,
280
+ visible_to: list[str] | None = ...,
281
+ **kwargs: Any,
282
+ ) -> str: ...
283
+
284
+ @overload
285
+ async def speak(
286
+ self,
287
+ agent_name: str,
288
+ *,
289
+ additional_context: str | None = ...,
290
+ response_model: type[T] = ...,
291
+ structured_retries: int = ...,
292
+ visible_to: list[str] | None = ...,
293
+ **kwargs: Any,
294
+ ) -> T: ...
295
+
296
+ async def speak(
297
+ self,
298
+ agent_name: str,
299
+ *,
178
300
  additional_context: str | None = None,
301
+ response_model: type[T] | None = None,
302
+ structured_retries: int = 1,
303
+ visible_to: list[str] | None = None,
179
304
  **kwargs: Any,
180
- ) -> str:
305
+ ) -> str | T:
181
306
  """
182
307
  Have an agent respond to the conversation.
183
308
 
184
309
  The agent sees the full conversation history from their perspective:
185
310
  - Their own previous messages appear as "assistant" messages
186
311
  - Other agents' messages appear as "user" messages with name attribution
312
+ - Messages with ``visible_to`` set are filtered by visibility
187
313
 
188
314
  Args:
189
315
  agent_name: Name of the agent to speak
190
316
  additional_context: Optional extra context/instruction for this turn
317
+ response_model: Pydantic model class for structured output parsing.
318
+ When provided, returns a validated model instance.
319
+ Can change per call for flexible per-turn schemas.
320
+ structured_retries: Number of retries for structured output validation
321
+ visible_to: If set, only these agents see this agent's response.
322
+ None means all agents see it.
191
323
  **kwargs: Additional kwargs passed to client.complete()
192
324
 
193
325
  Returns:
194
- The agent's response content
326
+ The agent's response content (str), or a validated Pydantic model
327
+ instance if response_model is provided.
195
328
 
196
329
  Raises:
197
330
  ValueError: If agent_name is not found
331
+ StructuredOutputError: If structured output validation fails
332
+ after all retries are exhausted
198
333
  """
199
334
  if agent_name not in self._agents:
200
335
  raise ValueError(
201
- f"Unknown agent: {agent_name}. Available agents: {list(self._agents.keys())}"
336
+ f"Unknown agent: {agent_name}. "
337
+ f"Available agents: {list(self._agents.keys())}"
202
338
  )
203
339
 
204
340
  agent = self._agents[agent_name]
341
+ start_time = time.perf_counter()
205
342
 
206
343
  # Build messages from this agent's perspective
207
344
  messages = self._build_messages_for(agent)
208
345
 
209
346
  # Add any additional context as a user message
210
347
  if additional_context:
211
- messages.append({"role": "user", "content": additional_context})
348
+ messages.append(user_message(additional_context))
212
349
 
213
- # Make completion
350
+ # Handle structured output: inject schema into system prompt
351
+ extra_kwargs = dict(kwargs)
352
+ if response_model is not None:
353
+ messages, extra_kwargs = self._inject_schema(
354
+ messages, extra_kwargs, response_model, agent
355
+ )
356
+
357
+ # Make completion (without response_model so we get CompletionResponse
358
+ # and can store raw content in the transcript)
214
359
  response: CompletionResponse = await self._client.complete(
215
360
  model=agent.model or self._default_model,
216
361
  messages=messages,
217
- **kwargs,
362
+ **extra_kwargs,
218
363
  )
219
364
 
365
+ latency_ms = (time.perf_counter() - start_time) * 1000
366
+
220
367
  # Record in transcript with metadata
221
368
  self._transcript.append(
222
369
  ChatMessage(
@@ -225,35 +372,184 @@ class MultiAgentChat:
225
372
  metadata={
226
373
  "model": response.model,
227
374
  "tokens": response.usage.total_tokens if response.usage else None,
375
+ "input_tokens": (
376
+ response.usage.input_tokens if response.usage else None
377
+ ),
378
+ "output_tokens": (
379
+ response.usage.output_tokens if response.usage else None
380
+ ),
381
+ "latency_ms": round(latency_ms, 1),
228
382
  },
383
+ visible_to=visible_to,
229
384
  )
230
385
  )
231
386
 
387
+ logger.info(
388
+ "%s spoke (model=%s, tokens=%s, %.1fms)%s",
389
+ agent_name,
390
+ response.model,
391
+ response.usage.total_tokens if response.usage else "N/A",
392
+ latency_ms,
393
+ f" [visible_to={visible_to}]" if visible_to else "",
394
+ )
395
+
396
+ # Validate structured output if requested
397
+ if response_model is not None:
398
+ return self._validate_structured(
399
+ response=response,
400
+ response_model=response_model,
401
+ messages=messages,
402
+ extra_kwargs=extra_kwargs,
403
+ agent=agent,
404
+ structured_retries=structured_retries,
405
+ visible_to=visible_to,
406
+ )
407
+
232
408
  return response.content
233
409
 
234
- def _build_messages_for(self, agent: Agent) -> list[dict[str, str]]:
410
+ # -- Internal helpers ------------------------------------------------
411
+
412
+ def _inject_schema(
413
+ self,
414
+ messages: list[dict[str, Any]],
415
+ extra_kwargs: dict[str, Any],
416
+ response_model: type[BaseModel],
417
+ agent: Agent,
418
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
419
+ """Inject structured output schema into the system prompt and kwargs."""
420
+ schema_prompt = schema_to_prompt(response_model)
421
+
422
+ # Append schema to the system message
423
+ if messages and messages[0].get("role") == "system":
424
+ messages[0] = {
425
+ **messages[0],
426
+ "content": messages[0]["content"] + "\n\n" + schema_prompt,
427
+ }
428
+ else:
429
+ messages.insert(0, system_message(schema_prompt))
430
+
431
+ # Enable JSON mode for supported providers
432
+ if "response_format" not in extra_kwargs:
433
+ resolved_model = (agent.model or self._default_model or "").lower()
434
+ if any(
435
+ p in resolved_model
436
+ for p in ["gpt-4", "gpt-3.5", "claude", "gemini", "mistral"]
437
+ ):
438
+ extra_kwargs["response_format"] = {"type": "json_object"}
439
+
440
+ return messages, extra_kwargs
441
+
442
+ async def _validate_structured(
443
+ self,
444
+ response: CompletionResponse,
445
+ response_model: type[T],
446
+ messages: list[dict[str, Any]],
447
+ extra_kwargs: dict[str, Any],
448
+ agent: Agent,
449
+ structured_retries: int,
450
+ visible_to: list[str] | None,
451
+ ) -> T:
452
+ """Validate structured output with retry support."""
453
+ last_error: StructuredOutputError | None = None
454
+ current_messages = list(messages)
455
+
456
+ for attempt in range(structured_retries + 1):
457
+ try:
458
+ return validate_response(response, response_model)
459
+ except StructuredOutputError as e:
460
+ last_error = e
461
+ logger.warning(
462
+ "%s structured output validation failed (attempt %d): %s",
463
+ agent.name,
464
+ attempt + 1,
465
+ e,
466
+ )
467
+ if attempt < structured_retries:
468
+ # Ask the model to fix its response
469
+ error_feedback = format_validation_error_for_retry(e)
470
+ current_messages.append(assistant_message(response.content))
471
+ current_messages.append(user_message(error_feedback))
472
+
473
+ response = await self._client.complete(
474
+ model=agent.model or self._default_model,
475
+ messages=current_messages,
476
+ **extra_kwargs,
477
+ )
478
+ # Update transcript with corrected response
479
+ self._transcript[-1] = ChatMessage(
480
+ agent_name=agent.name,
481
+ content=response.content,
482
+ metadata=self._transcript[-1].metadata,
483
+ visible_to=visible_to,
484
+ )
485
+
486
+ raise last_error # type: ignore[misc]
487
+
488
+ def _resolve_system_prompt(self, agent: Agent) -> str:
489
+ """
490
+ Resolve an agent's system prompt from raw string or Jinja template.
491
+
492
+ Args:
493
+ agent: The agent to resolve the prompt for
494
+
495
+ Returns:
496
+ The rendered system prompt string
497
+
498
+ Raises:
499
+ ValueError: If template engine is not configured
500
+ """
501
+ if agent.system_template:
502
+ engine = self._client.template_engine
503
+ if engine is None:
504
+ raise ValueError(
505
+ f"Agent '{agent.name}' uses system_template but no template "
506
+ "engine is configured. Pass template_dir to the Flashlite client "
507
+ "or call client.register_template()."
508
+ )
509
+ return engine.render(agent.system_template, agent.system_variables)
510
+ return agent.system_prompt or ""
511
+
512
+ def _build_messages_for(self, agent: Agent) -> list[dict[str, Any]]:
235
513
  """
236
514
  Build the message history from a specific agent's perspective.
237
515
 
238
- The agent's own messages become "assistant" role (what they said).
239
- Other agents' messages become "user" role with speaker attribution.
516
+ - System prompt (from raw string or Jinja template)
517
+ - Private context (if any, as an additional system message)
518
+ - Transcript messages filtered by visibility:
519
+ - Agent's own messages become "assistant" role with ``name`` field
520
+ - Other agents' messages become "user" role with ``name`` field
240
521
  """
241
- messages: list[dict[str, str]] = []
522
+ messages: list[dict[str, Any]] = []
242
523
 
243
524
  # System prompt for this agent
244
- messages.append({"role": "system", "content": agent.system_prompt})
525
+ prompt = self._resolve_system_prompt(agent)
526
+ messages.append(system_message(prompt))
527
+
528
+ # Private context (static, only this agent sees)
529
+ if agent.private_context:
530
+ messages.append(system_message(agent.private_context))
245
531
 
246
- # Add conversation history
532
+ # Conversation history, filtered by visibility
247
533
  for msg in self._transcript:
534
+ # Check visibility
535
+ if msg.visible_to is not None and agent.name not in msg.visible_to:
536
+ continue
537
+
248
538
  if msg.agent_name == agent.name:
249
539
  # Agent's own previous messages
250
- messages.append({"role": "assistant", "content": msg.content})
540
+ messages.append(
541
+ assistant_message(msg.content, name=_sanitize_name(agent.name))
542
+ )
251
543
  else:
252
- # Other agents'/sources' messages - prefix with speaker name
253
- messages.append({"role": "user", "content": f"[{msg.agent_name}]: {msg.content}"})
544
+ # Other agents'/sources' messages with name attribution
545
+ messages.append(
546
+ user_message(msg.content, name=_sanitize_name(msg.agent_name))
547
+ )
254
548
 
255
549
  return messages
256
550
 
551
+ # -- Batch speaking --------------------------------------------------
552
+
257
553
  async def round_robin(
258
554
  self,
259
555
  rounds: int = 1,
@@ -271,13 +567,20 @@ class MultiAgentChat:
271
567
  Returns:
272
568
  List of all responses in order
273
569
  """
274
- responses = []
570
+ responses: list[str] = []
275
571
  agent_names = list(self._agents.keys())
276
572
 
277
- for _ in range(rounds):
573
+ for round_num in range(1, rounds + 1):
574
+ logger.info(
575
+ "Round %d/%d started (agents: %s)",
576
+ round_num,
577
+ rounds,
578
+ ", ".join(agent_names),
579
+ )
278
580
  for name in agent_names:
279
581
  response = await self.speak(name, **kwargs)
280
582
  responses.append(response)
583
+ logger.info("Round %d/%d complete", round_num, rounds)
281
584
 
282
585
  return responses
283
586
 
@@ -296,12 +599,14 @@ class MultiAgentChat:
296
599
  Returns:
297
600
  List of responses in order
298
601
  """
299
- responses = []
602
+ responses: list[str] = []
300
603
  for name in agent_sequence:
301
604
  response = await self.speak(name, **kwargs)
302
605
  responses.append(response)
303
606
  return responses
304
607
 
608
+ # -- Transcript access -----------------------------------------------
609
+
305
610
  @property
306
611
  def transcript(self) -> list[ChatMessage]:
307
612
  """Get a copy of the conversation transcript."""
@@ -317,30 +622,90 @@ class MultiAgentChat:
317
622
  """Get list of agent names."""
318
623
  return list(self._agents.keys())
319
624
 
320
- def format_transcript(self, include_metadata: bool = False) -> str:
625
+ @property
626
+ def stats(self) -> dict[str, Any]:
627
+ """
628
+ Get per-agent statistics from the conversation.
629
+
630
+ Returns a dict with total and per-agent breakdowns of tokens,
631
+ latency, and message counts.
632
+ """
633
+ agent_stats: dict[str, dict[str, Any]] = {}
634
+ total_tokens = 0
635
+ total_messages = 0
636
+
637
+ for msg in self._transcript:
638
+ name = msg.agent_name
639
+ if name not in agent_stats:
640
+ agent_stats[name] = {
641
+ "messages": 0,
642
+ "total_tokens": 0,
643
+ "input_tokens": 0,
644
+ "output_tokens": 0,
645
+ "total_latency_ms": 0.0,
646
+ }
647
+ stats = agent_stats[name]
648
+ stats["messages"] += 1
649
+ total_messages += 1
650
+
651
+ tokens = msg.metadata.get("tokens")
652
+ if tokens is not None:
653
+ stats["total_tokens"] += tokens
654
+ total_tokens += tokens
655
+
656
+ input_t = msg.metadata.get("input_tokens")
657
+ if input_t is not None:
658
+ stats["input_tokens"] += input_t
659
+
660
+ output_t = msg.metadata.get("output_tokens")
661
+ if output_t is not None:
662
+ stats["output_tokens"] += output_t
663
+
664
+ latency = msg.metadata.get("latency_ms")
665
+ if latency is not None:
666
+ stats["total_latency_ms"] += latency
667
+
668
+ return {
669
+ "total_messages": total_messages,
670
+ "total_tokens": total_tokens,
671
+ "by_agent": agent_stats,
672
+ }
673
+
674
+ def format_transcript(
675
+ self,
676
+ include_metadata: bool = False,
677
+ include_private: bool = False,
678
+ ) -> str:
321
679
  """
322
680
  Format the transcript as a readable string.
323
681
 
324
682
  Args:
325
683
  include_metadata: Whether to include metadata like tokens used
684
+ include_private: Whether to show visibility annotations
326
685
 
327
686
  Returns:
328
687
  Formatted transcript string
329
688
  """
330
- lines = []
689
+ lines: list[str] = []
331
690
  for msg in self._transcript:
332
- lines.append(f"[{msg.agent_name}]:")
691
+ header = f"[{msg.agent_name}]"
692
+ if include_private and msg.visible_to is not None:
693
+ header += f" (visible_to: {', '.join(msg.visible_to)})"
694
+ header += ":"
695
+ lines.append(header)
333
696
  # Indent content for readability
334
697
  for line in msg.content.split("\n"):
335
698
  lines.append(f" {line}")
336
699
  if include_metadata and msg.metadata:
337
- meta_str = ", ".join(f"{k}={v}" for k, v in msg.metadata.items() if v)
700
+ meta_str = ", ".join(
701
+ f"{k}={v}" for k, v in msg.metadata.items() if v is not None
702
+ )
338
703
  if meta_str:
339
704
  lines.append(f" ({meta_str})")
340
705
  lines.append("")
341
706
  return "\n".join(lines)
342
707
 
343
- def get_messages_for(self, agent_name: str) -> list[dict[str, str]]:
708
+ def get_messages_for(self, agent_name: str) -> list[dict[str, Any]]:
344
709
  """
345
710
  Get the messages list as a specific agent would see it.
346
711
 
@@ -374,5 +739,6 @@ class MultiAgentChat:
374
739
 
375
740
  def __repr__(self) -> str:
376
741
  return (
377
- f"MultiAgentChat(agents={list(self._agents.keys())}, messages={len(self._transcript)})"
742
+ f"MultiAgentChat(agents={list(self._agents.keys())}, "
743
+ f"messages={len(self._transcript)})"
378
744
  )
@@ -5,6 +5,7 @@ import logging
5
5
  import time
6
6
  from dataclasses import dataclass, field
7
7
 
8
+ from .._spinner import Spinner
8
9
  from ..types import CompletionRequest, CompletionResponse, RateLimitConfig, RateLimitError
9
10
  from .base import CompletionHandler, Middleware
10
11
 
@@ -52,6 +53,18 @@ class TokenBucket:
52
53
  Raises:
53
54
  RateLimitError: If timeout exceeded
54
55
  """
56
+ # Clamp to capacity so a single oversized request can never deadlock.
57
+ # The request still pays the refill-wait cost for `capacity` tokens,
58
+ # which preserves rate-limiting backpressure.
59
+ effective = min(tokens, self.capacity)
60
+ if effective < tokens:
61
+ logger.warning(
62
+ "Requested %d tokens exceeds bucket capacity %d — "
63
+ "clamping to capacity to avoid deadlock",
64
+ int(tokens),
65
+ int(self.capacity),
66
+ )
67
+
55
68
  start_time = time.monotonic()
56
69
  deadline = start_time + timeout if timeout else None
57
70
 
@@ -59,12 +72,12 @@ class TokenBucket:
59
72
  while True:
60
73
  self._refill()
61
74
 
62
- if self.tokens >= tokens:
63
- self.tokens -= tokens
75
+ if self.tokens >= effective:
76
+ self.tokens -= effective
64
77
  return time.monotonic() - start_time
65
78
 
66
79
  # Calculate wait time for enough tokens
67
- tokens_needed = tokens - self.tokens
80
+ tokens_needed = effective - self.tokens
68
81
  wait_time = tokens_needed / self.rate
69
82
 
70
83
  # Check timeout
@@ -146,21 +159,31 @@ class RateLimitMiddleware(Middleware):
146
159
 
147
160
  # Acquire RPM token before making request
148
161
  if self._rpm_bucket:
149
- wait_time = await self._rpm_bucket.acquire()
162
+ async with Spinner("Waiting on rate limit (RPM)...", delay=0.4):
163
+ wait_time = await self._rpm_bucket.acquire()
150
164
  if wait_time > 0.1: # Only log significant waits
151
- logger.debug(f"Rate limit: waited {wait_time:.2f}s for RPM token")
165
+ logger.info(
166
+ "⏳ Rate limit backpressure: waited %.2fs for RPM capacity", wait_time
167
+ )
152
168
 
153
169
  # Make the request
154
170
  response = await next_handler(request)
155
171
 
156
172
  # For TPM limiting, consume tokens based on actual usage
157
- # This is post-hoc - we can't know token count before the request
173
+ # This is post-hoc we can't know token count before the request
158
174
  if self._tpm_bucket and response.usage:
159
175
  total_tokens = response.usage.total_tokens
160
176
  if total_tokens > 0:
161
- # Don't block on TPM - just record the usage
162
177
  # This creates backpressure for subsequent requests
163
- await self._tpm_bucket.acquire(tokens=float(total_tokens))
178
+ async with Spinner("Waiting on rate limit (TPM)...", delay=0.4):
179
+ wait_time = await self._tpm_bucket.acquire(tokens=float(total_tokens))
180
+ if wait_time > 0.1:
181
+ logger.info(
182
+ "⏳ Rate limit backpressure: waited %.2fs for TPM capacity "
183
+ "(%d tokens used)",
184
+ wait_time,
185
+ total_tokens,
186
+ )
164
187
 
165
188
  return response
166
189
 
@@ -354,11 +354,23 @@ class InspectLogger:
354
354
  sample_id = self._sample_count
355
355
  self._sample_count += 1
356
356
 
357
- # Convert messages to Inspect format
358
- input_messages = [
359
- {"role": msg.get("role", "user"), "content": msg.get("content", "")}
360
- for msg in request.messages
361
- ]
357
+ # Convert messages to Inspect format (preserve name field for multi-agent)
358
+ input_messages = []
359
+ for msg in request.messages:
360
+ inspect_msg: dict[str, Any] = {
361
+ "role": msg.get("role", "user"),
362
+ "content": msg.get("content", ""),
363
+ }
364
+ if msg.get("name"):
365
+ inspect_msg["name"] = msg["name"]
366
+ input_messages.append(inspect_msg)
367
+
368
+ # Build metadata, including template info for traceability
369
+ entry_metadata = dict(metadata or {})
370
+ if request.template is not None:
371
+ entry_metadata["template"] = request.template
372
+ if request.variables is not None:
373
+ entry_metadata["variables"] = request.variables
362
374
 
363
375
  entry = InspectLogEntry(
364
376
  eval_id=self._eval_id,
@@ -373,7 +385,7 @@ class InspectLogger:
373
385
  "total": response.usage.total_tokens if response.usage else 0,
374
386
  },
375
387
  timestamp=datetime.now(UTC).isoformat(),
376
- metadata=metadata or {},
388
+ metadata=entry_metadata,
377
389
  )
378
390
 
379
391
  json_str = json.dumps(entry.to_dict())
@@ -175,6 +175,10 @@ class StructuredLogger:
175
175
 
176
176
  # Build parameters dict
177
177
  params: dict[str, Any] = {}
178
+ if request.template is not None:
179
+ params["template"] = request.template
180
+ if request.variables is not None:
181
+ params["variables"] = request.variables
178
182
  if request.temperature is not None:
179
183
  params["temperature"] = request.temperature
180
184
  if request.max_tokens is not None:
flashlite/types.py CHANGED
@@ -58,7 +58,7 @@ class CompletionRequest:
58
58
  """A request to complete a chat conversation."""
59
59
 
60
60
  model: str
61
- messages: Messages = {}
61
+ messages: Messages = field(default_factory=list)
62
62
  template: str | None = None
63
63
  variables: dict[str, Any] | None = None
64
64
  temperature: float | None = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flashlite
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: Batteries-included wrapper for litellm with rate limiting, retries, templating, and more
5
5
  Author-email: ndalton12 <niall.dalton12@gmail.com>
6
6
  License-File: LICENSE.md
@@ -1,8 +1,9 @@
1
1
  flashlite/__init__.py,sha256=RlXjsK7zvZXStMvfz4FGqBxTWHev9VkyHYy-35TuTuM,3585
2
- flashlite/client.py,sha256=zQH_eLWZxnkX9acwI-y9c3uxeGybA-C0I9UPU6HrzvI,25081
2
+ flashlite/_spinner.py,sha256=9KHXD1MW33P2VM-sUe7NZJYz48auJXLspwy6unjMjSE,3019
3
+ flashlite/client.py,sha256=1UYWpWEfbrZe4mkz2-qaztjUM38TJS6swKbZ_OSgThw,25309
3
4
  flashlite/config.py,sha256=3RMEIAejBPlBG_VOgD8mpZKEDNZvK0k0cVv3vMM9kW8,4818
4
5
  flashlite/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- flashlite/types.py,sha256=mrqhUlnnYV8-FBSKxbxc2ZPy-aLH1hgwToCwqqlipYU,6473
6
+ flashlite/types.py,sha256=OqtgtnZaEWRYfaI6esqzv4-HKwu76Y-RDu3-Mhsae9w,6498
6
7
  flashlite/cache/__init__.py,sha256=T8O7oiZ0U181_tacJzfK6IGEAt1m3NdaIlBjq9wmB44,325
7
8
  flashlite/cache/base.py,sha256=IaDAI4EzewhJe0quh2JQK9-BxQxGxUDwrsd9BCaHFFc,5663
8
9
  flashlite/cache/disk.py,sha256=pGPI7eJW6RqVCQC4laTYhQr0iU-AkjA4aFFYt-wg8ls,8777
@@ -10,7 +11,7 @@ flashlite/cache/memory.py,sha256=_A4F7NTR9da2KDQW7fcKnUWrC-W_JpaYmb3d6rovX3w,441
10
11
  flashlite/conversation/__init__.py,sha256=zSgC4G697mx3T5bKn8WUEkSaSkMQQeHJsfyLdRUM30w,694
11
12
  flashlite/conversation/context.py,sha256=NQMLi5_WiN1zDYaPZTO9uJG_dJ3JJiVmAFfGAPM4X6c,10164
12
13
  flashlite/conversation/manager.py,sha256=dSQDgtzNt_6T8S1sHSAXKcS3DoBQ2vI9Ig1PZKaTh48,11644
13
- flashlite/conversation/multi_agent.py,sha256=t1jZD1VS3NOcAJtjQTMtvjEZCVTlFGy3SOxE_jjAtuo,11591
14
+ flashlite/conversation/multi_agent.py,sha256=tt5gNzUXqnbvp84_aWtcoCIlShPWSCzVa_Jt48Xuvy8,25427
14
15
  flashlite/core/__init__.py,sha256=nWbMMPED_HsD62hkIYv45DDR6zX2_cDWCMPDTNfqSu4,315
15
16
  flashlite/core/completion.py,sha256=NTtAJzJ3ba0N0xVs8lCN5htme0SWEMxYroGjI63crw4,3847
16
17
  flashlite/core/messages.py,sha256=-EUtEjFjSNY1Lzfrynb9xtYw4FZRKnfFoYQqgsUcQZQ,3848
@@ -18,12 +19,12 @@ flashlite/middleware/__init__.py,sha256=T8Z4uSqjkuAcf5u5FuUBNfKyL5sqp4Iw4sov_xiU
18
19
  flashlite/middleware/base.py,sha256=LC_IL96jWWPdE0o_PBGPvSylmyLmob20LBVvGkfUS3g,2691
19
20
  flashlite/middleware/cache.py,sha256=R1YwAZBg5YJGTiqgNWdkl7VSN1xpmqmupTSBQnpyH-s,4032
20
21
  flashlite/middleware/logging.py,sha256=D3x8X1l1LN1Um_qOWuELyO8Fgo9WulFJTIx6s94Ure4,4919
21
- flashlite/middleware/rate_limit.py,sha256=nf0-Ul0CGnX0VRKtxB2dfoplkBin3P2cMLrbks76lcg,7059
22
+ flashlite/middleware/rate_limit.py,sha256=a0L0tnnX60ouJ7rLIoHs7JNX59Q5kqlf0kQgzP4FMlw,8091
22
23
  flashlite/middleware/retry.py,sha256=_3Lz9Gmes2sNk6rO10WamH6yrwJy8TQi-esIl8NIMag,4832
23
24
  flashlite/observability/__init__.py,sha256=VHdYteU9KmVkgSHrkA-Ssz6_qoi9uL-2JFDhSH5sgwI,949
24
25
  flashlite/observability/callbacks.py,sha256=yz1oZh7f7WVxvKmt7XyHbj4WDC2xnvM3SJiTSxfAkoQ,4897
25
- flashlite/observability/inspect_compat.py,sha256=IrsdEiV-qn_wOlgAvWLcIJ_7WxU0Bpq7DcHaS_KWXPw,16366
26
- flashlite/observability/logging.py,sha256=UxBH2RN8rNcGZHYgC_QYiuEpaIRXEQFs1OjiKjxbuf0,9273
26
+ flashlite/observability/inspect_compat.py,sha256=S2D2h_w_qD7xsd6cPMwt3-kbt76NYWbR35h2BKR5m24,16913
27
+ flashlite/observability/logging.py,sha256=qH0ky22nePzjVQIPPhsALcA4VIf7mkP_wMwg69fYM2s,9458
27
28
  flashlite/observability/metrics.py,sha256=blRx5N3uN4ilnPpxBe7k_uDhYV3GmQWXoKPLVxnk8_s,7466
28
29
  flashlite/structured/__init__.py,sha256=9k5bwkzFo_JD3WZ1Tm4iyZqoZ1A51EIINI8N1H2_2ew,750
29
30
  flashlite/structured/outputs.py,sha256=Q_isfrtKJGybBadGMKmfo5UJ5vMaUQRCRgFpjGWZOF8,5070
@@ -35,7 +36,7 @@ flashlite/templating/registry.py,sha256=wp8RaibHKNyu5q4tCdOXJ0B4tey7bv-c0qb9h1a7
35
36
  flashlite/tools/__init__.py,sha256=zpQ5KyvZwZaVvaulnpMmL_JjCnMfD08nD_foI95TjVg,1791
36
37
  flashlite/tools/definitions.py,sha256=cqyk6GR1qeMkTPFqsadnJc-YkCG15QVafiaf-OjGYNU,11519
37
38
  flashlite/tools/execution.py,sha256=iQC7V3R5Tx19suISnnuaDpjpgl8wURwOHmKZbsHL16s,10814
38
- flashlite-0.1.1.dist-info/METADATA,sha256=2BbX7EfrpyvJh-SjiM0noo-gzyDd8bmNchpfLpjLx4A,4293
39
- flashlite-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
40
- flashlite-0.1.1.dist-info/licenses/LICENSE.md,sha256=z2KZcyoH16ayjxlbeBM01uD-bXn1WTcKFab5ZKBhfJE,1068
41
- flashlite-0.1.1.dist-info/RECORD,,
39
+ flashlite-0.2.0.dist-info/METADATA,sha256=a1YS4nT7UJJD98ibIlguAfWdhUc-2SDN9xQ9jBmjjSA,4293
40
+ flashlite-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
41
+ flashlite-0.2.0.dist-info/licenses/LICENSE.md,sha256=z2KZcyoH16ayjxlbeBM01uD-bXn1WTcKFab5ZKBhfJE,1068
42
+ flashlite-0.2.0.dist-info/RECORD,,