PyPI - jaf-py - Versions diffs - 2.6.1__py3-none-any.whl → 2.6.3__py3-none-any.whl - Mend

jaf-py 2.6.1py3-none-any.whl → 2.6.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

jaf/__init__.py +1 -1
jaf/core/engine.py +29 -0
jaf/core/tracing.py +11 -11
jaf/core/types.py +6 -0
jaf/providers/model.py +58 -6
jaf/server/server.py +1 -1
{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/METADATA +2 -2
{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/RECORD +12 -12
{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/WHEEL +0 -0
{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/entry_points.txt +0 -0
{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/licenses/LICENSE +0 -0
{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/top_level.txt +0 -0

jaf/__init__.py CHANGED Viewed

@@ -201,7 +201,7 @@ def generate_run_id() -> RunId:
     return create_run_id(str(uuid.uuid4()))
-__version__ = "2.6.1"
+__version__ = "2.6.3"
 __all__ = [
     # Core types and functions
     "TraceId",

jaf/core/engine.py CHANGED Viewed

@@ -395,6 +395,17 @@ async def _store_conversation_history(state: RunState[Ctx], config: RunConfig[Ct
         )
     messages_to_store = list(state.messages)
+    if config.before_memory_store:
+        if asyncio.iscoroutinefunction(config.before_memory_store):
+            messages_to_store = await config.before_memory_store(messages_to_store, state)
+        else:
+            result = config.before_memory_store(messages_to_store, state)
+            if asyncio.iscoroutine(result):
+                messages_to_store = await result
+            else:
+                messages_to_store = result
     if (
         config.memory.compression_threshold
         and len(messages_to_store) > config.memory.compression_threshold
@@ -681,8 +692,19 @@ async def _run_internal(state: RunState[Ctx], config: RunConfig[Ctx]) -> RunResu
                 aggregated_text = ""
                 # Working array of partial tool calls
                 partial_tool_calls: List[Dict[str, Any]] = []
+                # Capture usage and model from streaming chunks
+                stream_usage: Optional[Dict[str, int]] = None
+                stream_model: Optional[str] = None
                 async for chunk in get_stream(state, current_agent, config):  # type: ignore[arg-type]
+                    # Extract usage and model from raw chunk if available
+                    raw_chunk = getattr(chunk, "raw", None)
+                    if raw_chunk:
+                        if not stream_usage and "usage" in raw_chunk and raw_chunk["usage"]:
+                            stream_usage = raw_chunk["usage"]
+                        if not stream_model and "model" in raw_chunk and raw_chunk["model"]:
+                            stream_model = raw_chunk["model"]
                     # Text deltas
                     delta_text = getattr(chunk, "delta", None)
                     if delta_text:
@@ -792,6 +814,13 @@ async def _run_internal(state: RunState[Ctx], config: RunConfig[Ctx]) -> RunResu
                 llm_response = {
                     "message": {"content": aggregated_text or None, "tool_calls": final_tool_calls}
                 }
+                # Preserve usage and model from streaming if captured
+                if stream_usage:
+                    llm_response["usage"] = stream_usage
+                if stream_model:
+                    llm_response["model"] = stream_model
             except Exception:
                 # Fallback to non-streaming on error
                 assistant_event_streamed = False

jaf/core/tracing.py CHANGED Viewed

@@ -469,7 +469,7 @@ class LangfuseTraceCollector:
             public_key=public_key,
             secret_key=secret_key,
             host=host,
-            release="jaf-py-v2.6.1",
+            release="jaf-py-v2.6.3",
             httpx_client=client,
         )
         self._httpx_client = client
@@ -911,18 +911,18 @@ class LangfuseTraceCollector:
                     print(f"[LANGFUSE] Ending generation for LLM call")
                     # End the generation
                     generation = self.active_spans[span_id]
-                    choice = self._get_event_data(event, "choice", {})
-                    # Extract usage from the event data
+                    choice = self._get_event_data(event, "choice", {})
                     usage = self._get_event_data(event, "usage", {})
-                    # Extract model information from choice data or event data
-                    model = choice.get("model", "unknown")
-                    if model == "unknown":
-                        # Try to get model from the choice response structure
-                        if isinstance(choice, dict):
-                            model = choice.get("model") or choice.get("id", "unknown")
+                    model = self._get_event_data(event, "model", "unknown")
+                    # Also try to get model from the choice if not at top level
+                    if model == "unknown" and isinstance(choice, dict):
+                        model = choice.get("model", "unknown")
+                    print(f"[LANGFUSE] Extracted - model: '{model}', usage: {usage}")
                     # Convert to Langfuse v2 format - let Langfuse handle cost calculation automatically
                     langfuse_usage = None
                     if usage:

jaf/core/types.py CHANGED Viewed

@@ -1115,6 +1115,12 @@ class RunConfig(Generic[Ctx]):
             Union[ModelCompletionResponse, Awaitable[ModelCompletionResponse]],
         ]
     ] = None  # Callback after LLM call - can process response
+    before_memory_store: Optional[
+        Callable[
+            [List[Message], RunState[Ctx]],
+            Union[List[Message], Awaitable[List[Message]]],
+        ]
+    ] = None
     max_empty_response_retries: int = 3  # Maximum retries when LLM returns empty response
     empty_response_retry_delay: float = (
         1.0  # Initial delay in seconds before retrying empty response (uses exponential backoff)

jaf/providers/model.py CHANGED Viewed

@@ -706,21 +706,37 @@ def make_litellm_sdk_provider(
                     for tc in choice.message.tool_calls
                 ]
-            # Extract usage data
-            usage_data = None
+            # Extract usage data - ALWAYS return a dict with defaults for Langfuse cost tracking
+            # Initialize with zeros as defensive default (matches AzureDirectProvider pattern)
+            usage_data = {
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0,
+            }
+            actual_model = getattr(response, "model", model_name)
             if response.usage:
                 usage_data = {
                     "prompt_tokens": response.usage.prompt_tokens,
                     "completion_tokens": response.usage.completion_tokens,
                     "total_tokens": response.usage.total_tokens,
                 }
+            message_content = {
+                "content": choice.message.content,
+                "tool_calls": tool_calls,
+                # CRITICAL: Embed usage and model here so trace collector can find them
+                "_usage": usage_data,
+                "_model": actual_model,
+            }
             return {
                 "id": response.id,
                 "created": response.created,
-                "model": response.model,
+                "model": actual_model,
                 "system_fingerprint": getattr(response, "system_fingerprint", None),
-                "message": {"content": choice.message.content, "tool_calls": tool_calls},
+                "message": message_content,
                 "usage": usage_data,
                 "prompt": messages,
             }
@@ -769,6 +785,7 @@ def make_litellm_sdk_provider(
                 "model": model_name,
                 "messages": messages,
                 "stream": True,
+                "stream_options": {"include_usage": True},  # Request usage data in streaming
                 **self.litellm_kwargs,
             }
@@ -803,15 +820,31 @@ def make_litellm_sdk_provider(
             # Stream using litellm
             stream = await litellm.acompletion(**request_params)
+            accumulated_usage: Optional[Dict[str, int]] = None
+            response_model: Optional[str] = None
             async for chunk in stream:
                 try:
                     # Best-effort extraction of raw for debugging
                     try:
                         raw_obj = chunk.model_dump() if hasattr(chunk, "model_dump") else None
-                    except Exception:
+                        # Capture usage from chunk if present
+                        if raw_obj and "usage" in raw_obj and raw_obj["usage"]:
+                            accumulated_usage = raw_obj["usage"]
+                        # Capture model from chunk if present
+                        if raw_obj and "model" in raw_obj and raw_obj["model"]:
+                            response_model = raw_obj["model"]
+                    except Exception as e:
                         raw_obj = None
+                    if raw_obj and "usage" in raw_obj and raw_obj["usage"]:
+                        # Yield this chunk so engine.py can capture usage from raw
+                        yield CompletionStreamChunk(delta="", raw=raw_obj)
                     choice = None
                     if getattr(chunk, "choices", None):
                         choice = chunk.choices[0]
@@ -826,6 +859,12 @@ def make_litellm_sdk_provider(
                     if delta is not None:
                         content_delta = getattr(delta, "content", None)
                         if content_delta:
+                            # Include accumulated usage and model in raw_obj for engine
+                            if raw_obj and (accumulated_usage or response_model):
+                                if accumulated_usage:
+                                    raw_obj["usage"] = accumulated_usage
+                                if response_model:
+                                    raw_obj["model"] = response_model
                             yield CompletionStreamChunk(delta=content_delta, raw=raw_obj)
                         # Tool call deltas
@@ -841,6 +880,13 @@ def make_litellm_sdk_provider(
                                         getattr(fn, "arguments", None) if fn is not None else None
                                     )
+                                    # Include accumulated usage and model in raw_obj
+                                    if raw_obj and (accumulated_usage or response_model):
+                                        if accumulated_usage:
+                                            raw_obj["usage"] = accumulated_usage
+                                        if response_model:
+                                            raw_obj["model"] = response_model
                                     yield CompletionStreamChunk(
                                         tool_call_delta=ToolCallDelta(
                                             index=idx,
@@ -857,6 +903,12 @@ def make_litellm_sdk_provider(
                     # Completion ended
                     if finish_reason:
+                        # Include accumulated usage and model in final chunk
+                        if raw_obj and (accumulated_usage or response_model):
+                            if accumulated_usage:
+                                raw_obj["usage"] = accumulated_usage
+                            if response_model:
+                                raw_obj["model"] = response_model
                         yield CompletionStreamChunk(
                             is_done=True, finish_reason=finish_reason, raw=raw_obj
                         )

jaf/server/server.py CHANGED Viewed

@@ -220,7 +220,7 @@ def _convert_core_message_to_http(core_msg: Message) -> HttpMessage:
         content=content,
         attachments=attachments,
         tool_call_id=core_msg.tool_call_id,
-        tool_calls=core_msg.tool_calls,
+        tool_calls=[asdict(tc) for tc in core_msg.tool_calls] if core_msg.tool_calls else None,
     )

{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: jaf-py
-Version: 2.6.1
+Version: 2.6.3
 Summary: A purely functional agent framework with immutable state and composable tools - Python implementation
 Author: JAF Contributors
 Maintainer: JAF Contributors
@@ -82,7 +82,7 @@ Dynamic: license-file
 <!-- ![JAF Banner](docs/cover.png) -->
-[![Version](https://img.shields.io/badge/version-2.6.1-blue.svg)](https://github.com/xynehq/jaf-py)
+[![Version](https://img.shields.io/badge/version-2.6.3-blue.svg)](https://github.com/xynehq/jaf-py)
 [![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/)
 [![Docs](https://img.shields.io/badge/Docs-Live-brightgreen)](https://xynehq.github.io/jaf-py/)

{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-jaf/__init__.py,sha256=sFcfeb0oKFTS3TfOhCxV0-UnpRwvA1AAcvdD6_I_zPM,8652
+jaf/__init__.py,sha256=48U83mM6oMabMj8ZmPfEPn8YXnDKTvbN5ofHSJoJ5Lk,8652
 jaf/cli.py,sha256=EDMMA5uX0e3TUIedLdyP3p4Qy-aXADvpht3VgJPJagU,8299
 jaf/exceptions.py,sha256=FdLIw7bdCNtBYfqRyJBkRT4Z1vWuvkzrMqFiMAzjL8Y,9158
 jaf/a2a/__init__.py,sha256=r4W-WHZNjoxR8EQ0x41_rY3fl12OH5qcSn0KycXaKKU,7752
@@ -43,7 +43,7 @@ jaf/core/agent_tool.py,sha256=gZje8_gZSaWCecySg2ZBK07RcD8bc2hxHsR4z87oKJE,12075
 jaf/core/analytics.py,sha256=ypdhllyOThXZB-TY_eR1t1n2qrnAVN7Ljb8PaOtJft0,23267
 jaf/core/checkpoint.py,sha256=O7mfi7gFOAUgJ3zHzgJsr11uzn-BU-Vj1iKyKjcirMk,8398
 jaf/core/composition.py,sha256=Tj0-FRTVWygmAfsBLld7pnZK4nrGMMBx2YYJW_KQPoo,25393
-jaf/core/engine.py,sha256=tr1qHrBYLLmFLVuNNwqXb0g6EAuuqRlSw_VDb1DCF-k,69833
+jaf/core/engine.py,sha256=JqAPOll50FyU1kUelRCHu2_zGmkoO-a9edBJXefu_xs,71219
 jaf/core/errors.py,sha256=iDw00o3WH0gHcenRcTj3QEbbloZVpgwnPij6mtaJJk4,5710
 jaf/core/guardrails.py,sha256=oPB7MpD3xWiCWoyaS-xQQp-glaPON7GNVrIL0h1Jefs,26931
 jaf/core/handoff.py,sha256=M7TQfd7BXuer1ZeRJ51nLsI55KifbM6faNtmA2Nsj3I,6196
@@ -56,8 +56,8 @@ jaf/core/state.py,sha256=fdWDc2DQ-o_g_8E4ibg2QM0Vad_XUique3a5iYBwGZo,9516
 jaf/core/streaming.py,sha256=5ntOtJrZVCHuGsygquyCLG2J5yuSxE6DN5OM-BrQiGw,16818
 jaf/core/tool_results.py,sha256=L9U3JDQAjAH5YR7iMpSxfVky2Nxo6FYQs4WE05RATaQ,11283
 jaf/core/tools.py,sha256=rHxzAfGVGpYk3YJKmrq3AQLW0oE3ACkiJBOwle2bLdc,15146
-jaf/core/tracing.py,sha256=dPX1aHBk-auHcgMHttOZjLdo7aqB6GXmFLoGb3_bSoo,57320
-jaf/core/types.py,sha256=1QolTM3IYnQArbYAZkCIfj868j9vyEZTThqw37q7EyU,35395
+jaf/core/tracing.py,sha256=4as-pBYrg2nYvP5kj0IOLopDeXCl2tXUPXA6tQrpYHA,57324
+jaf/core/types.py,sha256=MwHSXSamOz3QDjTEaOQzNqOMU1JxwFbHg8Fd9Xzw33Y,35576
 jaf/core/workflows.py,sha256=0825AoD1QwEiGAs5IRlWHmaKrjurx6xF7oDJR6POBsg,25651
 jaf/memory/__init__.py,sha256=YfANOg5vUFSPVG7gpBE4_lYkV5X3_U6Yj9v1_QexfN0,1396
 jaf/memory/approval_storage.py,sha256=DcwtERcoIMH7B-abK9hqND3Moz4zSETsPlgJNkvqcaM,10573
@@ -75,10 +75,10 @@ jaf/policies/handoff.py,sha256=3lPegkSV_2LUf6jEZnj68_g3XUGFB_Fsj1C_6Svr2Kg,8128
 jaf/policies/validation.py,sha256=-zhB5ysH0Y4JnstHzo3I8tt-PFB9FSHBwSUALITBxw4,11016
 jaf/providers/__init__.py,sha256=PfIQkCtXb_yiTEjqs5msGv5-a6De2ujFCEaDGJEe_TQ,2100
 jaf/providers/mcp.py,sha256=fGfrlYx5g7ZX1fBUkPmAYSePKrCc4pG_HKngV_QCdRU,13148
-jaf/providers/model.py,sha256=MiPWEZl8MYAXLD010oX_qMCT7AkpGXIHLr9sTK4-xJM,45728
+jaf/providers/model.py,sha256=4RSjBUpmpkU4JePwjbVd3WlXdBDoU1w_n1VLVQSPL9Q,48591
 jaf/server/__init__.py,sha256=cYqdruJCJ3W1AMmmxMjAnDlj9gh3XbHhtegjq4nYRNY,391
 jaf/server/main.py,sha256=usdCRZfDP3GWQchh1o2tHd4KqTTFyQQCD9w4khd9rSo,2113
-jaf/server/server.py,sha256=eVxc4w7XHwLFid_3X8lLp9EugUqeLLtVxS6Ikh485Io,51476
+jaf/server/server.py,sha256=ZhZ2gmY10eQNaKUlE7ecMkrwMkYkAh-QgKdUJ2q7ktM,51532
 jaf/server/types.py,sha256=MsbADzpxVLlaVh0-VfgwbDybk1ZSavN5KSpPEamDEwE,14174
 jaf/utils/__init__.py,sha256=s3rsFFqSjsgRfnXrQFhcXXUc99HVFYizlfVbbkOYQDo,1229
 jaf/utils/attachments.py,sha256=SvZxEO7aCwl97bIJH3YtEYiuhBB6YcaBCp4UkXrWc4w,13179
@@ -89,9 +89,9 @@ jaf/visualization/functional_core.py,sha256=0Xs2R8ELADKNIgokcbjuxmWwxEyCH1yXIEdG
 jaf/visualization/graphviz.py,sha256=EwWVIRv8Z7gTiO5Spvcm-z_UUQ1oWNPRgdE33ZzFwx8,11569
 jaf/visualization/imperative_shell.py,sha256=N5lWzOLMIU_iCoy3n5WCg49eec8VxV8f7JIG6_wNtVw,2506
 jaf/visualization/types.py,sha256=90G8oClsFa_APqTuMrTW6KjD0oG9I4kVur773dXNW0E,1393
-jaf_py-2.6.1.dist-info/licenses/LICENSE,sha256=LXUQBJxdyr-7C4bk9cQBwvsF_xwA-UVstDTKabpcjlI,1063
-jaf_py-2.6.1.dist-info/METADATA,sha256=_tsvDUvHWYLcfhzn2CFTGlIIn-XvLpUMiF4NaxCqCKI,27743
-jaf_py-2.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-jaf_py-2.6.1.dist-info/entry_points.txt,sha256=OtIJeNJpb24kgGrqRx9szGgDx1vL9ayq8uHErmu7U5w,41
-jaf_py-2.6.1.dist-info/top_level.txt,sha256=Xu1RZbGaM4_yQX7bpalo881hg7N_dybaOW282F15ruE,4
-jaf_py-2.6.1.dist-info/RECORD,,
+jaf_py-2.6.3.dist-info/licenses/LICENSE,sha256=LXUQBJxdyr-7C4bk9cQBwvsF_xwA-UVstDTKabpcjlI,1063
+jaf_py-2.6.3.dist-info/METADATA,sha256=FxmX-n2tzG4xRB7ZDnzs_Veo1v_wHLN5SO0fBe0mJAM,27743
+jaf_py-2.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+jaf_py-2.6.3.dist-info/entry_points.txt,sha256=OtIJeNJpb24kgGrqRx9szGgDx1vL9ayq8uHErmu7U5w,41
+jaf_py-2.6.3.dist-info/top_level.txt,sha256=Xu1RZbGaM4_yQX7bpalo881hg7N_dybaOW282F15ruE,4
+jaf_py-2.6.3.dist-info/RECORD,,

{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

jaf-py 2.6.1__py3-none-any.whl → 2.6.3__py3-none-any.whl

jaf-py 2.6.1py3-none-any.whl → 2.6.3py3-none-any.whl