flashlite 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. flashlite/__init__.py +169 -0
  2. flashlite/cache/__init__.py +14 -0
  3. flashlite/cache/base.py +194 -0
  4. flashlite/cache/disk.py +285 -0
  5. flashlite/cache/memory.py +157 -0
  6. flashlite/client.py +671 -0
  7. flashlite/config.py +154 -0
  8. flashlite/conversation/__init__.py +30 -0
  9. flashlite/conversation/context.py +319 -0
  10. flashlite/conversation/manager.py +385 -0
  11. flashlite/conversation/multi_agent.py +378 -0
  12. flashlite/core/__init__.py +13 -0
  13. flashlite/core/completion.py +145 -0
  14. flashlite/core/messages.py +130 -0
  15. flashlite/middleware/__init__.py +18 -0
  16. flashlite/middleware/base.py +90 -0
  17. flashlite/middleware/cache.py +121 -0
  18. flashlite/middleware/logging.py +159 -0
  19. flashlite/middleware/rate_limit.py +211 -0
  20. flashlite/middleware/retry.py +149 -0
  21. flashlite/observability/__init__.py +34 -0
  22. flashlite/observability/callbacks.py +155 -0
  23. flashlite/observability/inspect_compat.py +266 -0
  24. flashlite/observability/logging.py +293 -0
  25. flashlite/observability/metrics.py +221 -0
  26. flashlite/py.typed +0 -0
  27. flashlite/structured/__init__.py +31 -0
  28. flashlite/structured/outputs.py +189 -0
  29. flashlite/structured/schema.py +165 -0
  30. flashlite/templating/__init__.py +11 -0
  31. flashlite/templating/engine.py +217 -0
  32. flashlite/templating/filters.py +143 -0
  33. flashlite/templating/registry.py +165 -0
  34. flashlite/tools/__init__.py +74 -0
  35. flashlite/tools/definitions.py +382 -0
  36. flashlite/tools/execution.py +353 -0
  37. flashlite/types.py +233 -0
  38. flashlite-0.1.0.dist-info/METADATA +173 -0
  39. flashlite-0.1.0.dist-info/RECORD +41 -0
  40. flashlite-0.1.0.dist-info/WHEEL +4 -0
  41. flashlite-0.1.0.dist-info/licenses/LICENSE.md +21 -0
flashlite/client.py ADDED
@@ -0,0 +1,671 @@
1
+ """Main Flashlite client class."""
2
+
3
+ import asyncio
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Any, TypeVar, overload
7
+
8
+ from pydantic import BaseModel
9
+
10
+ from .cache import CacheBackend, MemoryCache
11
+ from .config import FlashliteConfig, load_env_files
12
+ from .conversation import ContextManager, Conversation
13
+ from .core.completion import complete as core_complete
14
+ from .core.messages import format_messages
15
+ from .middleware.base import Middleware, MiddlewareChain
16
+ from .middleware.cache import CacheMiddleware
17
+ from .middleware.logging import LoggingMiddleware
18
+ from .middleware.rate_limit import ConcurrencyLimiter, RateLimitMiddleware
19
+ from .middleware.retry import RetryMiddleware
20
+ from .observability.callbacks import CallbackManager
21
+ from .observability.logging import StructuredLogger
22
+ from .observability.metrics import CostTracker
23
+ from .structured import (
24
+ StructuredOutputError,
25
+ format_validation_error_for_retry,
26
+ schema_to_prompt,
27
+ validate_response,
28
+ )
29
+ from .templating.engine import TemplateEngine
30
+ from .tools import ToolDefinition, tools_to_anthropic, tools_to_openai
31
+ from .types import (
32
+ CompletionRequest,
33
+ CompletionResponse,
34
+ Messages,
35
+ RateLimitConfig,
36
+ RetryConfig,
37
+ ThinkingConfig,
38
+ )
39
+
40
+ T = TypeVar("T", bound=BaseModel)
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ class Flashlite:
46
+ """
47
+ Batteries-included LLM client wrapping litellm.
48
+
49
+ Features:
50
+ - Automatic retries with exponential backoff
51
+ - Rate limiting (RPM and TPM)
52
+ - Jinja templating for prompts
53
+ - Async-first with sync wrappers
54
+ - Full passthrough of provider kwargs
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ # Environment
60
+ env_file: str | Path | None = None,
61
+ env_files: list[str | Path] | None = None,
62
+ # Configuration
63
+ config: FlashliteConfig | None = None,
64
+ default_model: str | None = None,
65
+ # Middleware configs
66
+ retry: RetryConfig | None = None,
67
+ rate_limit: RateLimitConfig | None = None,
68
+ # Caching (disabled by default)
69
+ cache: CacheBackend | None = None,
70
+ cache_ttl: float | None = None,
71
+ # Templating
72
+ template_dir: str | Path | None = None,
73
+ # Logging & Observability
74
+ log_requests: bool = False,
75
+ log_level: str = "INFO",
76
+ structured_logger: StructuredLogger | None = None,
77
+ # Cost tracking
78
+ track_costs: bool = False,
79
+ budget_limit: float | None = None,
80
+ # Callbacks
81
+ callbacks: CallbackManager | None = None,
82
+ # Defaults
83
+ default_kwargs: dict[str, Any] | None = None,
84
+ timeout: float = 600.0,
85
+ ):
86
+ """
87
+ Initialize the Flashlite client.
88
+
89
+ Args:
90
+ env_file: Path to .env file to load
91
+ env_files: Multiple .env files to load (later overrides earlier)
92
+ config: Full configuration object (overrides individual params)
93
+ default_model: Default model to use if not specified per-request
94
+ retry: Retry configuration
95
+ rate_limit: Rate limiting configuration
96
+ cache: Cache backend (None = disabled, pass MemoryCache or DiskCache to enable)
97
+ cache_ttl: Default TTL for cached entries (seconds)
98
+ template_dir: Directory containing prompt templates
99
+ log_requests: Whether to log requests/responses
100
+ log_level: Logging level
101
+ structured_logger: Structured logger for detailed logging to files
102
+ track_costs: Whether to track token costs
103
+ budget_limit: Maximum budget in USD (requires track_costs=True)
104
+ callbacks: Callback manager for event hooks
105
+ default_kwargs: Default kwargs for all completions
106
+ timeout: Request timeout in seconds
107
+
108
+ Note:
109
+ Caching is disabled by default. When enabled with temperature > 0 or
110
+ reasoning models, a warning will be emitted since responses may vary.
111
+ """
112
+ # Load environment files
113
+ load_env_files(env_file, env_files)
114
+
115
+ # Build configuration
116
+ if config:
117
+ self._config = config
118
+ else:
119
+ # Start with env-based config, then override with explicit params
120
+ self._config = FlashliteConfig.from_env()
121
+
122
+ if default_model:
123
+ self._config.default_model = default_model
124
+ if retry:
125
+ self._config.retry = retry
126
+ if rate_limit:
127
+ self._config.rate_limit = rate_limit
128
+ if template_dir:
129
+ self._config.template_dir = template_dir
130
+ if log_requests:
131
+ self._config.log_requests = log_requests
132
+ if log_level != "INFO":
133
+ self._config.log_level = log_level
134
+ if default_kwargs:
135
+ self._config.default_kwargs = default_kwargs
136
+ if timeout != 600.0:
137
+ self._config.timeout = timeout
138
+
139
+ # Setup logging
140
+ logging.basicConfig(level=getattr(logging, self._config.log_level))
141
+
142
+ # Store observability components
143
+ self._cache = cache
144
+ self._cache_ttl = cache_ttl
145
+ self._structured_logger = structured_logger
146
+ self._callbacks = callbacks
147
+
148
+ # Cost tracking
149
+ self._cost_tracker: CostTracker | None = None
150
+ if track_costs or budget_limit is not None:
151
+ self._cost_tracker = CostTracker(budget_limit=budget_limit)
152
+
153
+ # Emit info about caching status
154
+ if cache is None:
155
+ logger.info(
156
+ "Caching is disabled. To enable, pass cache=MemoryCache() or "
157
+ "cache=DiskCache('./cache.db') to the Flashlite client."
158
+ )
159
+
160
+ # Initialize template engine
161
+ self._template_engine: TemplateEngine | None = None
162
+ if self._config.template_dir:
163
+ self._template_engine = TemplateEngine(self._config.template_dir)
164
+
165
+ # Build middleware chain
166
+ self._middleware = self._build_middleware()
167
+
168
+ def _build_middleware(self) -> list[Middleware]:
169
+ """Build the middleware stack.
170
+
171
+ Middleware order (outermost to innermost):
172
+ 1. Logging - tracks timing and emits events
173
+ 2. Retry - handles transient failures
174
+ 3. Cache - returns cached responses (skips inner middleware on hit)
175
+ 4. Rate limiting - controls request rate
176
+ """
177
+ middleware: list[Middleware] = []
178
+
179
+ # Logging middleware (outermost - captures full timing)
180
+ if (
181
+ self._config.log_requests
182
+ or self._structured_logger
183
+ or self._cost_tracker
184
+ or self._callbacks
185
+ ):
186
+ middleware.append(
187
+ LoggingMiddleware(
188
+ structured_logger=self._structured_logger,
189
+ cost_tracker=self._cost_tracker,
190
+ callbacks=self._callbacks,
191
+ log_level=self._config.log_level,
192
+ )
193
+ )
194
+
195
+ # Retry middleware
196
+ middleware.append(RetryMiddleware(self._config.retry))
197
+
198
+ # Cache middleware (after retry - retries apply to cache misses)
199
+ if self._cache is not None:
200
+ middleware.append(
201
+ CacheMiddleware(
202
+ backend=self._cache,
203
+ ttl=self._cache_ttl,
204
+ warn_non_deterministic=True,
205
+ )
206
+ )
207
+
208
+ # Rate limiting middleware (innermost - rate limiting is per-request)
209
+ if (
210
+ self._config.rate_limit.requests_per_minute
211
+ or self._config.rate_limit.tokens_per_minute
212
+ ):
213
+ middleware.append(RateLimitMiddleware(self._config.rate_limit))
214
+
215
+ return middleware
216
+
217
+ def _get_chain(self) -> MiddlewareChain:
218
+ """Get the middleware chain with the core completion handler."""
219
+ return MiddlewareChain(self._middleware, self._core_complete)
220
+
221
+ async def _core_complete(self, request: CompletionRequest) -> CompletionResponse:
222
+ """Core completion handler - calls litellm."""
223
+ if self._config.log_requests:
224
+ logger.info(f"Completion request: model={request.model}")
225
+
226
+ response = await core_complete(request)
227
+
228
+ if self._config.log_requests:
229
+ logger.info(
230
+ f"Completion response: model={response.model}, "
231
+ f"tokens={response.usage.total_tokens if response.usage else 'N/A'}"
232
+ )
233
+
234
+ return response
235
+
236
+ @overload
237
+ async def complete(
238
+ self,
239
+ model: str | None = None,
240
+ messages: Messages | str | None = None,
241
+ *,
242
+ response_model: None = None,
243
+ template: str | None = None,
244
+ variables: dict[str, Any] | None = None,
245
+ system: str | None = None,
246
+ user: str | None = None,
247
+ temperature: float | None = None,
248
+ max_tokens: int | None = None,
249
+ max_completion_tokens: int | None = None,
250
+ top_p: float | None = None,
251
+ stop: str | list[str] | None = None,
252
+ reasoning_effort: str | None = None,
253
+ thinking: ThinkingConfig | None = None,
254
+ tools: list[ToolDefinition | Any] | None = None,
255
+ structured_retries: int = 1,
256
+ **kwargs: Any,
257
+ ) -> CompletionResponse: ...
258
+
259
+ @overload
260
+ async def complete(
261
+ self,
262
+ model: str | None = None,
263
+ messages: Messages | str | None = None,
264
+ *,
265
+ response_model: type[T],
266
+ template: str | None = None,
267
+ variables: dict[str, Any] | None = None,
268
+ system: str | None = None,
269
+ user: str | None = None,
270
+ temperature: float | None = None,
271
+ max_tokens: int | None = None,
272
+ max_completion_tokens: int | None = None,
273
+ top_p: float | None = None,
274
+ stop: str | list[str] | None = None,
275
+ reasoning_effort: str | None = None,
276
+ thinking: ThinkingConfig | None = None,
277
+ tools: list[ToolDefinition | Any] | None = None,
278
+ structured_retries: int = 1,
279
+ **kwargs: Any,
280
+ ) -> T: ...
281
+
282
+ async def complete(
283
+ self,
284
+ model: str | None = None,
285
+ messages: Messages | str | None = None,
286
+ *,
287
+ # Structured outputs
288
+ response_model: type[T] | None = None,
289
+ # Template support
290
+ template: str | None = None,
291
+ variables: dict[str, Any] | None = None,
292
+ # Message building
293
+ system: str | None = None,
294
+ user: str | None = None,
295
+ # Common params
296
+ temperature: float | None = None,
297
+ max_tokens: int | None = None,
298
+ max_completion_tokens: int | None = None,
299
+ top_p: float | None = None,
300
+ stop: str | list[str] | None = None,
301
+ # OpenAI reasoning model params (o1, o3)
302
+ reasoning_effort: str | None = None,
303
+ # Anthropic extended thinking params (Claude)
304
+ thinking: ThinkingConfig | None = None,
305
+ # Tool/function calling
306
+ tools: list[ToolDefinition | Any] | None = None,
307
+ # Structured output retries
308
+ structured_retries: int = 1,
309
+ # Additional kwargs passed through to litellm
310
+ **kwargs: Any,
311
+ ) -> CompletionResponse | T:
312
+ """
313
+ Make a completion request.
314
+
315
+ Args:
316
+ model: Model identifier (uses default if not specified)
317
+ messages: Messages list, or single string (becomes user message)
318
+ response_model: Pydantic model class for structured output parsing.
319
+ When provided, returns a validated instance of the model.
320
+ template: Template name to render (from template_dir or registered)
321
+ variables: Variables for template rendering
322
+ system: System prompt (prepended to messages)
323
+ user: User message (appended to messages)
324
+ temperature: Sampling temperature
325
+ max_tokens: Maximum tokens in response
326
+ max_completion_tokens: Max completion tokens (for reasoning models)
327
+ top_p: Nucleus sampling parameter
328
+ stop: Stop sequences
329
+ reasoning_effort: OpenAI reasoning effort level (low/medium/high) for o1/o3
330
+ thinking: Anthropic extended thinking config for Claude models.
331
+ Use thinking_enabled(budget_tokens) helper or pass dict directly.
332
+ tools: List of tools for function calling. Accepts @tool decorated functions
333
+ or ToolDefinition objects. Auto-converts to provider format.
334
+ structured_retries: Number of retries for structured output validation (default: 1)
335
+ **kwargs: Additional provider-specific parameters
336
+
337
+ Returns:
338
+ CompletionResponse if no response_model, or validated model instance
339
+ """
340
+ # Resolve model
341
+ resolved_model = model or self._config.default_model
342
+ if not resolved_model:
343
+ raise ValueError("No model specified and no default_model configured")
344
+
345
+ # Build messages
346
+ if template:
347
+ if not self._template_engine:
348
+ raise ValueError("Template specified but no template_dir configured")
349
+ rendered = self._template_engine.render(template, variables)
350
+ # Template renders to user message content
351
+ final_messages = format_messages(messages=rendered, system=system)
352
+ else:
353
+ final_messages = format_messages(messages=messages, system=system, user=user)
354
+
355
+ if not final_messages:
356
+ raise ValueError("No messages provided")
357
+
358
+ # Handle structured outputs
359
+ effective_system = system
360
+ if response_model is not None:
361
+ # Inject JSON schema into system prompt
362
+ schema_prompt = schema_to_prompt(response_model)
363
+ if effective_system:
364
+ effective_system = f"{effective_system}\n\n{schema_prompt}"
365
+ else:
366
+ effective_system = schema_prompt
367
+
368
+ # Rebuild messages with schema in system prompt
369
+ if template:
370
+ rendered = self._template_engine.render(template, variables) # type: ignore
371
+ final_messages = format_messages(messages=rendered, system=effective_system)
372
+ else:
373
+ final_messages = format_messages(
374
+ messages=messages, system=effective_system, user=user
375
+ )
376
+
377
+ # Enable JSON mode for supported providers
378
+ if "response_format" not in kwargs:
379
+ # Check if model supports JSON mode
380
+ model_lower = resolved_model.lower()
381
+ if any(
382
+ p in model_lower
383
+ for p in ["gpt-4", "gpt-3.5", "claude", "gemini", "mistral"]
384
+ ):
385
+ kwargs["response_format"] = {"type": "json_object"}
386
+
387
+ # Build extra kwargs (merge defaults with per-request)
388
+ extra_kwargs = {**self._config.default_kwargs, **kwargs}
389
+
390
+ # Handle tools - convert to provider format if provided
391
+ if tools is not None and "tools" not in extra_kwargs:
392
+ model_lower = resolved_model.lower()
393
+ if "claude" in model_lower or "anthropic" in model_lower:
394
+ extra_kwargs["tools"] = tools_to_anthropic(tools)
395
+ else:
396
+ extra_kwargs["tools"] = tools_to_openai(tools)
397
+
398
+ # Build request
399
+ request = CompletionRequest(
400
+ model=resolved_model,
401
+ messages=final_messages,
402
+ temperature=temperature,
403
+ max_tokens=max_tokens,
404
+ max_completion_tokens=max_completion_tokens,
405
+ top_p=top_p,
406
+ stop=stop,
407
+ reasoning_effort=reasoning_effort, # type: ignore
408
+ thinking=thinking,
409
+ extra_kwargs=extra_kwargs,
410
+ )
411
+
412
+ # Execute through middleware chain
413
+ chain = self._get_chain()
414
+ response = await chain(request)
415
+
416
+ # If no response model, return raw response
417
+ if response_model is None:
418
+ return response
419
+
420
+ # Validate structured output with retries
421
+ last_error: StructuredOutputError | None = None
422
+ current_messages = list(final_messages)
423
+
424
+ for attempt in range(structured_retries + 1):
425
+ try:
426
+ return validate_response(response, response_model)
427
+ except StructuredOutputError as e:
428
+ last_error = e
429
+ logger.warning(
430
+ f"Structured output validation failed (attempt {attempt + 1}): {e}"
431
+ )
432
+
433
+ # If we have retries left, ask the model to fix it
434
+ if attempt < structured_retries:
435
+ # Add the failed response and error feedback
436
+ error_feedback = format_validation_error_for_retry(e)
437
+ current_messages.append({"role": "assistant", "content": response.content})
438
+ current_messages.append({"role": "user", "content": error_feedback})
439
+
440
+ # Make another request
441
+ retry_request = CompletionRequest(
442
+ model=resolved_model,
443
+ messages=current_messages,
444
+ temperature=temperature,
445
+ max_tokens=max_tokens,
446
+ max_completion_tokens=max_completion_tokens,
447
+ top_p=top_p,
448
+ stop=stop,
449
+ reasoning_effort=reasoning_effort, # type: ignore
450
+ thinking=thinking,
451
+ extra_kwargs=extra_kwargs,
452
+ )
453
+ response = await chain(retry_request)
454
+
455
+ # All retries exhausted
456
+ raise last_error # type: ignore
457
+
458
+ def complete_sync(
459
+ self,
460
+ model: str | None = None,
461
+ messages: Messages | str | None = None,
462
+ **kwargs: Any,
463
+ ) -> CompletionResponse:
464
+ """
465
+ Synchronous version of complete().
466
+
467
+ See complete() for full parameter documentation.
468
+ """
469
+ try:
470
+ loop = asyncio.get_running_loop()
471
+ except RuntimeError:
472
+ loop = None
473
+
474
+ if loop and loop.is_running():
475
+ # We're in an async context - use thread
476
+ import concurrent.futures
477
+
478
+ with concurrent.futures.ThreadPoolExecutor() as executor:
479
+ future = executor.submit(
480
+ asyncio.run,
481
+ self.complete(model=model, messages=messages, **kwargs),
482
+ )
483
+ return future.result()
484
+ else:
485
+ return asyncio.run(self.complete(model=model, messages=messages, **kwargs))
486
+
487
+ async def complete_many(
488
+ self,
489
+ requests: list[dict[str, Any]],
490
+ max_concurrency: int = 10,
491
+ ) -> list[CompletionResponse | T]:
492
+ """
493
+ Execute multiple completion requests in parallel with concurrency control.
494
+
495
+ Each request can use a different model, making this suitable for
496
+ multi-agent scenarios where agents may use different models.
497
+
498
+ Note: This is NOT the same as OpenAI/Anthropic "Batch API" which processes
499
+ requests asynchronously over hours. This executes requests immediately
500
+ in parallel with controlled concurrency.
501
+
502
+ Args:
503
+ requests: List of kwargs dicts for complete(). Each can specify
504
+ its own model, messages, response_model, etc.
505
+ max_concurrency: Maximum concurrent requests (default: 10)
506
+
507
+ Returns:
508
+ List of responses in same order as requests. Type depends on
509
+ whether response_model was specified in each request.
510
+
511
+ Example:
512
+ # Different models in same batch
513
+ responses = await client.complete_many([
514
+ {"model": "gpt-4o", "messages": "Hello from GPT-4"},
515
+ {"model": "claude-sonnet-4-20250514", "messages": "Hello from Claude"},
516
+ {"model": "gpt-4o-mini", "messages": "Hello from mini"},
517
+ ])
518
+
519
+ # With structured outputs
520
+ responses = await client.complete_many([
521
+ {"messages": "Analyze: good", "response_model": Sentiment},
522
+ {"messages": "Analyze: bad", "response_model": Sentiment},
523
+ ])
524
+ """
525
+ limiter = ConcurrencyLimiter(max_concurrency)
526
+
527
+ async def process_one(req_kwargs: dict[str, Any]) -> CompletionResponse | T:
528
+ async with limiter:
529
+ return await self.complete(**req_kwargs)
530
+
531
+ tasks = [process_one(req) for req in requests]
532
+ return await asyncio.gather(*tasks)
533
+
534
+ # Template management
535
+ def register_template(self, name: str, template: str) -> None:
536
+ """Register an in-memory template."""
537
+ if not self._template_engine:
538
+ self._template_engine = TemplateEngine()
539
+ self._template_engine.register(name, template)
540
+
541
+ def render_template(
542
+ self,
543
+ template: str,
544
+ variables: dict[str, Any] | None = None,
545
+ ) -> str:
546
+ """Render a template without making a completion."""
547
+ if not self._template_engine:
548
+ self._template_engine = TemplateEngine()
549
+ return self._template_engine.render(template, variables)
550
+
551
+ # Conversation management
552
+ def conversation(
553
+ self,
554
+ system: str | None = None,
555
+ model: str | None = None,
556
+ max_turns: int | None = None,
557
+ ) -> Conversation:
558
+ """
559
+ Create a new conversation for multi-turn interactions.
560
+
561
+ Args:
562
+ system: System prompt for the conversation
563
+ model: Default model (overrides client default)
564
+ max_turns: Maximum turns to keep in history (None = unlimited)
565
+
566
+ Returns:
567
+ A new Conversation instance bound to this client
568
+
569
+ Example:
570
+ conv = client.conversation(system="You are helpful.")
571
+ response1 = await conv.say("What is Python?")
572
+ response2 = await conv.say("How do I install it?")
573
+
574
+ # Fork for exploration
575
+ branch = conv.fork()
576
+ alt = await branch.say("What about JavaScript?")
577
+ """
578
+ return Conversation(
579
+ client=self,
580
+ system=system,
581
+ model=model,
582
+ max_turns=max_turns,
583
+ )
584
+
585
+ def context_manager(
586
+ self,
587
+ model: str | None = None,
588
+ max_response_tokens: int = 4096,
589
+ auto_truncate: bool = True,
590
+ ) -> ContextManager:
591
+ """
592
+ Create a context manager for manual context window control.
593
+
594
+ Args:
595
+ model: Model to get limits for (uses default if not specified)
596
+ max_response_tokens: Expected max tokens in response
597
+ auto_truncate: Whether to auto-truncate when preparing messages
598
+
599
+ Returns:
600
+ A ContextManager for the specified model
601
+ """
602
+ effective_model = model or self._config.default_model
603
+ if not effective_model:
604
+ raise ValueError("No model specified and no default_model configured")
605
+ return ContextManager(
606
+ model=effective_model,
607
+ max_response_tokens=max_response_tokens,
608
+ auto_truncate=auto_truncate,
609
+ )
610
+
611
+ # Properties
612
+ @property
613
+ def config(self) -> FlashliteConfig:
614
+ """Get the current configuration."""
615
+ return self._config
616
+
617
+ @property
618
+ def template_engine(self) -> TemplateEngine | None:
619
+ """Get the template engine."""
620
+ return self._template_engine
621
+
622
+ @property
623
+ def cache(self) -> CacheBackend | None:
624
+ """Get the cache backend (None if caching disabled)."""
625
+ return self._cache
626
+
627
+ @property
628
+ def cost_tracker(self) -> CostTracker | None:
629
+ """Get the cost tracker (None if cost tracking disabled)."""
630
+ return self._cost_tracker
631
+
632
+ @property
633
+ def total_cost(self) -> float:
634
+ """Get total cost in USD (0.0 if cost tracking disabled)."""
635
+ return self._cost_tracker.total_cost if self._cost_tracker else 0.0
636
+
637
+ @property
638
+ def total_tokens(self) -> int:
639
+ """Get total tokens used (0 if cost tracking disabled)."""
640
+ return self._cost_tracker.total_tokens if self._cost_tracker else 0
641
+
642
+ def get_cost_report(self) -> dict[str, Any] | None:
643
+ """Get a detailed cost report (None if cost tracking disabled)."""
644
+ return self._cost_tracker.get_report() if self._cost_tracker else None
645
+
646
+ async def clear_cache(self) -> int:
647
+ """
648
+ Clear all cached entries.
649
+
650
+ Returns:
651
+ Number of entries cleared (0 if caching disabled)
652
+ """
653
+ if self._cache is not None:
654
+ return await self._cache.clear()
655
+ return 0
656
+
657
+ async def cache_stats(self) -> dict[str, Any] | None:
658
+ """
659
+ Get cache statistics.
660
+
661
+ Returns:
662
+ Cache stats dict, or None if caching disabled
663
+ """
664
+ if self._cache is not None:
665
+ if isinstance(self._cache, MemoryCache):
666
+ return self._cache.stats()
667
+ # For disk cache or other backends
668
+ return {
669
+ "size": await self._cache.size(),
670
+ }
671
+ return None