jaf-py 2.6.1__py3-none-any.whl → 2.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jaf/__init__.py +1 -1
- jaf/core/engine.py +29 -0
- jaf/core/tracing.py +11 -11
- jaf/core/types.py +6 -0
- jaf/providers/model.py +58 -6
- jaf/server/server.py +1 -1
- {jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/METADATA +2 -2
- {jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/RECORD +12 -12
- {jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/WHEEL +0 -0
- {jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/entry_points.txt +0 -0
- {jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/licenses/LICENSE +0 -0
- {jaf_py-2.6.1.dist-info → jaf_py-2.6.3.dist-info}/top_level.txt +0 -0
jaf/__init__.py
CHANGED
jaf/core/engine.py
CHANGED
|
@@ -395,6 +395,17 @@ async def _store_conversation_history(state: RunState[Ctx], config: RunConfig[Ct
|
|
|
395
395
|
)
|
|
396
396
|
|
|
397
397
|
messages_to_store = list(state.messages)
|
|
398
|
+
|
|
399
|
+
if config.before_memory_store:
|
|
400
|
+
if asyncio.iscoroutinefunction(config.before_memory_store):
|
|
401
|
+
messages_to_store = await config.before_memory_store(messages_to_store, state)
|
|
402
|
+
else:
|
|
403
|
+
result = config.before_memory_store(messages_to_store, state)
|
|
404
|
+
if asyncio.iscoroutine(result):
|
|
405
|
+
messages_to_store = await result
|
|
406
|
+
else:
|
|
407
|
+
messages_to_store = result
|
|
408
|
+
|
|
398
409
|
if (
|
|
399
410
|
config.memory.compression_threshold
|
|
400
411
|
and len(messages_to_store) > config.memory.compression_threshold
|
|
@@ -681,8 +692,19 @@ async def _run_internal(state: RunState[Ctx], config: RunConfig[Ctx]) -> RunResu
|
|
|
681
692
|
aggregated_text = ""
|
|
682
693
|
# Working array of partial tool calls
|
|
683
694
|
partial_tool_calls: List[Dict[str, Any]] = []
|
|
695
|
+
# Capture usage and model from streaming chunks
|
|
696
|
+
stream_usage: Optional[Dict[str, int]] = None
|
|
697
|
+
stream_model: Optional[str] = None
|
|
684
698
|
|
|
685
699
|
async for chunk in get_stream(state, current_agent, config): # type: ignore[arg-type]
|
|
700
|
+
# Extract usage and model from raw chunk if available
|
|
701
|
+
raw_chunk = getattr(chunk, "raw", None)
|
|
702
|
+
if raw_chunk:
|
|
703
|
+
if not stream_usage and "usage" in raw_chunk and raw_chunk["usage"]:
|
|
704
|
+
stream_usage = raw_chunk["usage"]
|
|
705
|
+
if not stream_model and "model" in raw_chunk and raw_chunk["model"]:
|
|
706
|
+
stream_model = raw_chunk["model"]
|
|
707
|
+
|
|
686
708
|
# Text deltas
|
|
687
709
|
delta_text = getattr(chunk, "delta", None)
|
|
688
710
|
if delta_text:
|
|
@@ -792,6 +814,13 @@ async def _run_internal(state: RunState[Ctx], config: RunConfig[Ctx]) -> RunResu
|
|
|
792
814
|
llm_response = {
|
|
793
815
|
"message": {"content": aggregated_text or None, "tool_calls": final_tool_calls}
|
|
794
816
|
}
|
|
817
|
+
|
|
818
|
+
# Preserve usage and model from streaming if captured
|
|
819
|
+
if stream_usage:
|
|
820
|
+
llm_response["usage"] = stream_usage
|
|
821
|
+
if stream_model:
|
|
822
|
+
llm_response["model"] = stream_model
|
|
823
|
+
|
|
795
824
|
except Exception:
|
|
796
825
|
# Fallback to non-streaming on error
|
|
797
826
|
assistant_event_streamed = False
|
jaf/core/tracing.py
CHANGED
|
@@ -469,7 +469,7 @@ class LangfuseTraceCollector:
|
|
|
469
469
|
public_key=public_key,
|
|
470
470
|
secret_key=secret_key,
|
|
471
471
|
host=host,
|
|
472
|
-
release="jaf-py-v2.6.
|
|
472
|
+
release="jaf-py-v2.6.3",
|
|
473
473
|
httpx_client=client,
|
|
474
474
|
)
|
|
475
475
|
self._httpx_client = client
|
|
@@ -911,18 +911,18 @@ class LangfuseTraceCollector:
|
|
|
911
911
|
print(f"[LANGFUSE] Ending generation for LLM call")
|
|
912
912
|
# End the generation
|
|
913
913
|
generation = self.active_spans[span_id]
|
|
914
|
-
choice = self._get_event_data(event, "choice", {})
|
|
915
914
|
|
|
916
|
-
|
|
915
|
+
|
|
916
|
+
choice = self._get_event_data(event, "choice", {})
|
|
917
917
|
usage = self._get_event_data(event, "usage", {})
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
if model == "unknown":
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
918
|
+
model = self._get_event_data(event, "model", "unknown")
|
|
919
|
+
|
|
920
|
+
# Also try to get model from the choice if not at top level
|
|
921
|
+
if model == "unknown" and isinstance(choice, dict):
|
|
922
|
+
model = choice.get("model", "unknown")
|
|
923
|
+
|
|
924
|
+
print(f"[LANGFUSE] Extracted - model: '{model}', usage: {usage}")
|
|
925
|
+
|
|
926
926
|
# Convert to Langfuse v2 format - let Langfuse handle cost calculation automatically
|
|
927
927
|
langfuse_usage = None
|
|
928
928
|
if usage:
|
jaf/core/types.py
CHANGED
|
@@ -1115,6 +1115,12 @@ class RunConfig(Generic[Ctx]):
|
|
|
1115
1115
|
Union[ModelCompletionResponse, Awaitable[ModelCompletionResponse]],
|
|
1116
1116
|
]
|
|
1117
1117
|
] = None # Callback after LLM call - can process response
|
|
1118
|
+
before_memory_store: Optional[
|
|
1119
|
+
Callable[
|
|
1120
|
+
[List[Message], RunState[Ctx]],
|
|
1121
|
+
Union[List[Message], Awaitable[List[Message]]],
|
|
1122
|
+
]
|
|
1123
|
+
] = None
|
|
1118
1124
|
max_empty_response_retries: int = 3 # Maximum retries when LLM returns empty response
|
|
1119
1125
|
empty_response_retry_delay: float = (
|
|
1120
1126
|
1.0 # Initial delay in seconds before retrying empty response (uses exponential backoff)
|
jaf/providers/model.py
CHANGED
|
@@ -706,21 +706,37 @@ def make_litellm_sdk_provider(
|
|
|
706
706
|
for tc in choice.message.tool_calls
|
|
707
707
|
]
|
|
708
708
|
|
|
709
|
-
# Extract usage data
|
|
710
|
-
|
|
709
|
+
# Extract usage data - ALWAYS return a dict with defaults for Langfuse cost tracking
|
|
710
|
+
# Initialize with zeros as defensive default (matches AzureDirectProvider pattern)
|
|
711
|
+
usage_data = {
|
|
712
|
+
"prompt_tokens": 0,
|
|
713
|
+
"completion_tokens": 0,
|
|
714
|
+
"total_tokens": 0,
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
actual_model = getattr(response, "model", model_name)
|
|
718
|
+
|
|
711
719
|
if response.usage:
|
|
712
720
|
usage_data = {
|
|
713
721
|
"prompt_tokens": response.usage.prompt_tokens,
|
|
714
722
|
"completion_tokens": response.usage.completion_tokens,
|
|
715
723
|
"total_tokens": response.usage.total_tokens,
|
|
716
724
|
}
|
|
717
|
-
|
|
725
|
+
|
|
726
|
+
message_content = {
|
|
727
|
+
"content": choice.message.content,
|
|
728
|
+
"tool_calls": tool_calls,
|
|
729
|
+
# CRITICAL: Embed usage and model here so trace collector can find them
|
|
730
|
+
"_usage": usage_data,
|
|
731
|
+
"_model": actual_model,
|
|
732
|
+
}
|
|
733
|
+
|
|
718
734
|
return {
|
|
719
735
|
"id": response.id,
|
|
720
736
|
"created": response.created,
|
|
721
|
-
"model":
|
|
737
|
+
"model": actual_model,
|
|
722
738
|
"system_fingerprint": getattr(response, "system_fingerprint", None),
|
|
723
|
-
"message":
|
|
739
|
+
"message": message_content,
|
|
724
740
|
"usage": usage_data,
|
|
725
741
|
"prompt": messages,
|
|
726
742
|
}
|
|
@@ -769,6 +785,7 @@ def make_litellm_sdk_provider(
|
|
|
769
785
|
"model": model_name,
|
|
770
786
|
"messages": messages,
|
|
771
787
|
"stream": True,
|
|
788
|
+
"stream_options": {"include_usage": True}, # Request usage data in streaming
|
|
772
789
|
**self.litellm_kwargs,
|
|
773
790
|
}
|
|
774
791
|
|
|
@@ -803,15 +820,31 @@ def make_litellm_sdk_provider(
|
|
|
803
820
|
|
|
804
821
|
# Stream using litellm
|
|
805
822
|
stream = await litellm.acompletion(**request_params)
|
|
823
|
+
|
|
824
|
+
accumulated_usage: Optional[Dict[str, int]] = None
|
|
825
|
+
response_model: Optional[str] = None
|
|
806
826
|
|
|
807
827
|
async for chunk in stream:
|
|
808
828
|
try:
|
|
809
829
|
# Best-effort extraction of raw for debugging
|
|
810
830
|
try:
|
|
811
831
|
raw_obj = chunk.model_dump() if hasattr(chunk, "model_dump") else None
|
|
812
|
-
|
|
832
|
+
|
|
833
|
+
# Capture usage from chunk if present
|
|
834
|
+
if raw_obj and "usage" in raw_obj and raw_obj["usage"]:
|
|
835
|
+
accumulated_usage = raw_obj["usage"]
|
|
836
|
+
|
|
837
|
+
# Capture model from chunk if present
|
|
838
|
+
if raw_obj and "model" in raw_obj and raw_obj["model"]:
|
|
839
|
+
response_model = raw_obj["model"]
|
|
840
|
+
|
|
841
|
+
except Exception as e:
|
|
813
842
|
raw_obj = None
|
|
814
843
|
|
|
844
|
+
if raw_obj and "usage" in raw_obj and raw_obj["usage"]:
|
|
845
|
+
# Yield this chunk so engine.py can capture usage from raw
|
|
846
|
+
yield CompletionStreamChunk(delta="", raw=raw_obj)
|
|
847
|
+
|
|
815
848
|
choice = None
|
|
816
849
|
if getattr(chunk, "choices", None):
|
|
817
850
|
choice = chunk.choices[0]
|
|
@@ -826,6 +859,12 @@ def make_litellm_sdk_provider(
|
|
|
826
859
|
if delta is not None:
|
|
827
860
|
content_delta = getattr(delta, "content", None)
|
|
828
861
|
if content_delta:
|
|
862
|
+
# Include accumulated usage and model in raw_obj for engine
|
|
863
|
+
if raw_obj and (accumulated_usage or response_model):
|
|
864
|
+
if accumulated_usage:
|
|
865
|
+
raw_obj["usage"] = accumulated_usage
|
|
866
|
+
if response_model:
|
|
867
|
+
raw_obj["model"] = response_model
|
|
829
868
|
yield CompletionStreamChunk(delta=content_delta, raw=raw_obj)
|
|
830
869
|
|
|
831
870
|
# Tool call deltas
|
|
@@ -841,6 +880,13 @@ def make_litellm_sdk_provider(
|
|
|
841
880
|
getattr(fn, "arguments", None) if fn is not None else None
|
|
842
881
|
)
|
|
843
882
|
|
|
883
|
+
# Include accumulated usage and model in raw_obj
|
|
884
|
+
if raw_obj and (accumulated_usage or response_model):
|
|
885
|
+
if accumulated_usage:
|
|
886
|
+
raw_obj["usage"] = accumulated_usage
|
|
887
|
+
if response_model:
|
|
888
|
+
raw_obj["model"] = response_model
|
|
889
|
+
|
|
844
890
|
yield CompletionStreamChunk(
|
|
845
891
|
tool_call_delta=ToolCallDelta(
|
|
846
892
|
index=idx,
|
|
@@ -857,6 +903,12 @@ def make_litellm_sdk_provider(
|
|
|
857
903
|
|
|
858
904
|
# Completion ended
|
|
859
905
|
if finish_reason:
|
|
906
|
+
# Include accumulated usage and model in final chunk
|
|
907
|
+
if raw_obj and (accumulated_usage or response_model):
|
|
908
|
+
if accumulated_usage:
|
|
909
|
+
raw_obj["usage"] = accumulated_usage
|
|
910
|
+
if response_model:
|
|
911
|
+
raw_obj["model"] = response_model
|
|
860
912
|
yield CompletionStreamChunk(
|
|
861
913
|
is_done=True, finish_reason=finish_reason, raw=raw_obj
|
|
862
914
|
)
|
jaf/server/server.py
CHANGED
|
@@ -220,7 +220,7 @@ def _convert_core_message_to_http(core_msg: Message) -> HttpMessage:
|
|
|
220
220
|
content=content,
|
|
221
221
|
attachments=attachments,
|
|
222
222
|
tool_call_id=core_msg.tool_call_id,
|
|
223
|
-
tool_calls=core_msg.tool_calls,
|
|
223
|
+
tool_calls=[asdict(tc) for tc in core_msg.tool_calls] if core_msg.tool_calls else None,
|
|
224
224
|
)
|
|
225
225
|
|
|
226
226
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: jaf-py
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.3
|
|
4
4
|
Summary: A purely functional agent framework with immutable state and composable tools - Python implementation
|
|
5
5
|
Author: JAF Contributors
|
|
6
6
|
Maintainer: JAF Contributors
|
|
@@ -82,7 +82,7 @@ Dynamic: license-file
|
|
|
82
82
|
|
|
83
83
|
<!--  -->
|
|
84
84
|
|
|
85
|
-
[](https://github.com/xynehq/jaf-py)
|
|
86
86
|
[](https://www.python.org/)
|
|
87
87
|
[](https://xynehq.github.io/jaf-py/)
|
|
88
88
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
jaf/__init__.py,sha256=
|
|
1
|
+
jaf/__init__.py,sha256=48U83mM6oMabMj8ZmPfEPn8YXnDKTvbN5ofHSJoJ5Lk,8652
|
|
2
2
|
jaf/cli.py,sha256=EDMMA5uX0e3TUIedLdyP3p4Qy-aXADvpht3VgJPJagU,8299
|
|
3
3
|
jaf/exceptions.py,sha256=FdLIw7bdCNtBYfqRyJBkRT4Z1vWuvkzrMqFiMAzjL8Y,9158
|
|
4
4
|
jaf/a2a/__init__.py,sha256=r4W-WHZNjoxR8EQ0x41_rY3fl12OH5qcSn0KycXaKKU,7752
|
|
@@ -43,7 +43,7 @@ jaf/core/agent_tool.py,sha256=gZje8_gZSaWCecySg2ZBK07RcD8bc2hxHsR4z87oKJE,12075
|
|
|
43
43
|
jaf/core/analytics.py,sha256=ypdhllyOThXZB-TY_eR1t1n2qrnAVN7Ljb8PaOtJft0,23267
|
|
44
44
|
jaf/core/checkpoint.py,sha256=O7mfi7gFOAUgJ3zHzgJsr11uzn-BU-Vj1iKyKjcirMk,8398
|
|
45
45
|
jaf/core/composition.py,sha256=Tj0-FRTVWygmAfsBLld7pnZK4nrGMMBx2YYJW_KQPoo,25393
|
|
46
|
-
jaf/core/engine.py,sha256=
|
|
46
|
+
jaf/core/engine.py,sha256=JqAPOll50FyU1kUelRCHu2_zGmkoO-a9edBJXefu_xs,71219
|
|
47
47
|
jaf/core/errors.py,sha256=iDw00o3WH0gHcenRcTj3QEbbloZVpgwnPij6mtaJJk4,5710
|
|
48
48
|
jaf/core/guardrails.py,sha256=oPB7MpD3xWiCWoyaS-xQQp-glaPON7GNVrIL0h1Jefs,26931
|
|
49
49
|
jaf/core/handoff.py,sha256=M7TQfd7BXuer1ZeRJ51nLsI55KifbM6faNtmA2Nsj3I,6196
|
|
@@ -56,8 +56,8 @@ jaf/core/state.py,sha256=fdWDc2DQ-o_g_8E4ibg2QM0Vad_XUique3a5iYBwGZo,9516
|
|
|
56
56
|
jaf/core/streaming.py,sha256=5ntOtJrZVCHuGsygquyCLG2J5yuSxE6DN5OM-BrQiGw,16818
|
|
57
57
|
jaf/core/tool_results.py,sha256=L9U3JDQAjAH5YR7iMpSxfVky2Nxo6FYQs4WE05RATaQ,11283
|
|
58
58
|
jaf/core/tools.py,sha256=rHxzAfGVGpYk3YJKmrq3AQLW0oE3ACkiJBOwle2bLdc,15146
|
|
59
|
-
jaf/core/tracing.py,sha256=
|
|
60
|
-
jaf/core/types.py,sha256=
|
|
59
|
+
jaf/core/tracing.py,sha256=4as-pBYrg2nYvP5kj0IOLopDeXCl2tXUPXA6tQrpYHA,57324
|
|
60
|
+
jaf/core/types.py,sha256=MwHSXSamOz3QDjTEaOQzNqOMU1JxwFbHg8Fd9Xzw33Y,35576
|
|
61
61
|
jaf/core/workflows.py,sha256=0825AoD1QwEiGAs5IRlWHmaKrjurx6xF7oDJR6POBsg,25651
|
|
62
62
|
jaf/memory/__init__.py,sha256=YfANOg5vUFSPVG7gpBE4_lYkV5X3_U6Yj9v1_QexfN0,1396
|
|
63
63
|
jaf/memory/approval_storage.py,sha256=DcwtERcoIMH7B-abK9hqND3Moz4zSETsPlgJNkvqcaM,10573
|
|
@@ -75,10 +75,10 @@ jaf/policies/handoff.py,sha256=3lPegkSV_2LUf6jEZnj68_g3XUGFB_Fsj1C_6Svr2Kg,8128
|
|
|
75
75
|
jaf/policies/validation.py,sha256=-zhB5ysH0Y4JnstHzo3I8tt-PFB9FSHBwSUALITBxw4,11016
|
|
76
76
|
jaf/providers/__init__.py,sha256=PfIQkCtXb_yiTEjqs5msGv5-a6De2ujFCEaDGJEe_TQ,2100
|
|
77
77
|
jaf/providers/mcp.py,sha256=fGfrlYx5g7ZX1fBUkPmAYSePKrCc4pG_HKngV_QCdRU,13148
|
|
78
|
-
jaf/providers/model.py,sha256=
|
|
78
|
+
jaf/providers/model.py,sha256=4RSjBUpmpkU4JePwjbVd3WlXdBDoU1w_n1VLVQSPL9Q,48591
|
|
79
79
|
jaf/server/__init__.py,sha256=cYqdruJCJ3W1AMmmxMjAnDlj9gh3XbHhtegjq4nYRNY,391
|
|
80
80
|
jaf/server/main.py,sha256=usdCRZfDP3GWQchh1o2tHd4KqTTFyQQCD9w4khd9rSo,2113
|
|
81
|
-
jaf/server/server.py,sha256=
|
|
81
|
+
jaf/server/server.py,sha256=ZhZ2gmY10eQNaKUlE7ecMkrwMkYkAh-QgKdUJ2q7ktM,51532
|
|
82
82
|
jaf/server/types.py,sha256=MsbADzpxVLlaVh0-VfgwbDybk1ZSavN5KSpPEamDEwE,14174
|
|
83
83
|
jaf/utils/__init__.py,sha256=s3rsFFqSjsgRfnXrQFhcXXUc99HVFYizlfVbbkOYQDo,1229
|
|
84
84
|
jaf/utils/attachments.py,sha256=SvZxEO7aCwl97bIJH3YtEYiuhBB6YcaBCp4UkXrWc4w,13179
|
|
@@ -89,9 +89,9 @@ jaf/visualization/functional_core.py,sha256=0Xs2R8ELADKNIgokcbjuxmWwxEyCH1yXIEdG
|
|
|
89
89
|
jaf/visualization/graphviz.py,sha256=EwWVIRv8Z7gTiO5Spvcm-z_UUQ1oWNPRgdE33ZzFwx8,11569
|
|
90
90
|
jaf/visualization/imperative_shell.py,sha256=N5lWzOLMIU_iCoy3n5WCg49eec8VxV8f7JIG6_wNtVw,2506
|
|
91
91
|
jaf/visualization/types.py,sha256=90G8oClsFa_APqTuMrTW6KjD0oG9I4kVur773dXNW0E,1393
|
|
92
|
-
jaf_py-2.6.
|
|
93
|
-
jaf_py-2.6.
|
|
94
|
-
jaf_py-2.6.
|
|
95
|
-
jaf_py-2.6.
|
|
96
|
-
jaf_py-2.6.
|
|
97
|
-
jaf_py-2.6.
|
|
92
|
+
jaf_py-2.6.3.dist-info/licenses/LICENSE,sha256=LXUQBJxdyr-7C4bk9cQBwvsF_xwA-UVstDTKabpcjlI,1063
|
|
93
|
+
jaf_py-2.6.3.dist-info/METADATA,sha256=FxmX-n2tzG4xRB7ZDnzs_Veo1v_wHLN5SO0fBe0mJAM,27743
|
|
94
|
+
jaf_py-2.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
95
|
+
jaf_py-2.6.3.dist-info/entry_points.txt,sha256=OtIJeNJpb24kgGrqRx9szGgDx1vL9ayq8uHErmu7U5w,41
|
|
96
|
+
jaf_py-2.6.3.dist-info/top_level.txt,sha256=Xu1RZbGaM4_yQX7bpalo881hg7N_dybaOW282F15ruE,4
|
|
97
|
+
jaf_py-2.6.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|