nvidia-nat 1.3.0rc1__py3-none-any.whl → 1.4.0a20251008__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nat/agent/prompt_optimizer/register.py +2 -2
- nat/agent/react_agent/register.py +9 -1
- nat/agent/rewoo_agent/register.py +8 -1
- nat/authentication/oauth2/oauth2_auth_code_flow_provider.py +31 -18
- nat/builder/context.py +22 -6
- nat/cli/commands/mcp/mcp.py +6 -6
- nat/cli/commands/workflow/templates/config.yml.j2 +14 -12
- nat/cli/commands/workflow/templates/register.py.j2 +2 -2
- nat/cli/commands/workflow/templates/workflow.py.j2 +35 -21
- nat/cli/commands/workflow/workflow_commands.py +54 -10
- nat/cli/main.py +3 -0
- nat/data_models/api_server.py +65 -57
- nat/data_models/span.py +41 -3
- nat/experimental/test_time_compute/functions/execute_score_select_function.py +1 -1
- nat/experimental/test_time_compute/functions/ttc_tool_wrapper_function.py +2 -2
- nat/front_ends/fastapi/fastapi_front_end_plugin_worker.py +5 -35
- nat/front_ends/fastapi/message_validator.py +3 -1
- nat/observability/exporter/span_exporter.py +34 -14
- nat/profiler/decorators/framework_wrapper.py +1 -1
- nat/profiler/forecasting/models/linear_model.py +1 -1
- nat/profiler/forecasting/models/random_forest_regressor.py +1 -1
- nat/profiler/inference_optimization/bottleneck_analysis/nested_stack_analysis.py +1 -1
- nat/profiler/inference_optimization/experimental/prefix_span_analysis.py +1 -1
- nat/runtime/runner.py +103 -6
- nat/runtime/session.py +26 -0
- nat/tool/memory_tools/get_memory_tool.py +1 -1
- nat/utils/decorators.py +210 -0
- {nvidia_nat-1.3.0rc1.dist-info → nvidia_nat-1.4.0a20251008.dist-info}/METADATA +1 -3
- {nvidia_nat-1.3.0rc1.dist-info → nvidia_nat-1.4.0a20251008.dist-info}/RECORD +34 -33
- {nvidia_nat-1.3.0rc1.dist-info → nvidia_nat-1.4.0a20251008.dist-info}/WHEEL +0 -0
- {nvidia_nat-1.3.0rc1.dist-info → nvidia_nat-1.4.0a20251008.dist-info}/entry_points.txt +0 -0
- {nvidia_nat-1.3.0rc1.dist-info → nvidia_nat-1.4.0a20251008.dist-info}/licenses/LICENSE-3rd-party.txt +0 -0
- {nvidia_nat-1.3.0rc1.dist-info → nvidia_nat-1.4.0a20251008.dist-info}/licenses/LICENSE.md +0 -0
- {nvidia_nat-1.3.0rc1.dist-info → nvidia_nat-1.4.0a20251008.dist-info}/top_level.txt +0 -0
nat/data_models/api_server.py
CHANGED
|
@@ -36,6 +36,15 @@ from nat.utils.type_converter import GlobalTypeConverter
|
|
|
36
36
|
FINISH_REASONS = frozenset({'stop', 'length', 'tool_calls', 'content_filter', 'function_call'})
|
|
37
37
|
|
|
38
38
|
|
|
39
|
+
class UserMessageContentRoleType(str, Enum):
|
|
40
|
+
"""
|
|
41
|
+
Enum representing chat message roles in API requests and responses.
|
|
42
|
+
"""
|
|
43
|
+
USER = "user"
|
|
44
|
+
ASSISTANT = "assistant"
|
|
45
|
+
SYSTEM = "system"
|
|
46
|
+
|
|
47
|
+
|
|
39
48
|
class Request(BaseModel):
|
|
40
49
|
"""
|
|
41
50
|
Request is a data model that represents HTTP request attributes.
|
|
@@ -108,7 +117,7 @@ UserContent = typing.Annotated[TextContent | ImageContent | AudioContent, Discri
|
|
|
108
117
|
|
|
109
118
|
class Message(BaseModel):
|
|
110
119
|
content: str | list[UserContent]
|
|
111
|
-
role:
|
|
120
|
+
role: UserMessageContentRoleType
|
|
112
121
|
|
|
113
122
|
|
|
114
123
|
class ChatRequest(BaseModel):
|
|
@@ -164,7 +173,7 @@ class ChatRequest(BaseModel):
|
|
|
164
173
|
max_tokens: int | None = None,
|
|
165
174
|
top_p: float | None = None) -> "ChatRequest":
|
|
166
175
|
|
|
167
|
-
return ChatRequest(messages=[Message(content=data, role=
|
|
176
|
+
return ChatRequest(messages=[Message(content=data, role=UserMessageContentRoleType.USER)],
|
|
168
177
|
model=model,
|
|
169
178
|
temperature=temperature,
|
|
170
179
|
max_tokens=max_tokens,
|
|
@@ -178,7 +187,7 @@ class ChatRequest(BaseModel):
|
|
|
178
187
|
max_tokens: int | None = None,
|
|
179
188
|
top_p: float | None = None) -> "ChatRequest":
|
|
180
189
|
|
|
181
|
-
return ChatRequest(messages=[Message(content=content, role=
|
|
190
|
+
return ChatRequest(messages=[Message(content=content, role=UserMessageContentRoleType.USER)],
|
|
182
191
|
model=model,
|
|
183
192
|
temperature=temperature,
|
|
184
193
|
max_tokens=max_tokens,
|
|
@@ -187,29 +196,40 @@ class ChatRequest(BaseModel):
|
|
|
187
196
|
|
|
188
197
|
class ChoiceMessage(BaseModel):
|
|
189
198
|
content: str | None = None
|
|
190
|
-
role:
|
|
199
|
+
role: UserMessageContentRoleType | None = None
|
|
191
200
|
|
|
192
201
|
|
|
193
202
|
class ChoiceDelta(BaseModel):
|
|
194
203
|
"""Delta object for streaming responses (OpenAI-compatible)"""
|
|
195
204
|
content: str | None = None
|
|
196
|
-
role:
|
|
205
|
+
role: UserMessageContentRoleType | None = None
|
|
197
206
|
|
|
198
207
|
|
|
199
|
-
class
|
|
208
|
+
class ChoiceBase(BaseModel):
|
|
209
|
+
"""Base choice model with common fields for both streaming and non-streaming responses"""
|
|
200
210
|
model_config = ConfigDict(extra="allow")
|
|
201
|
-
|
|
202
|
-
message: ChoiceMessage | None = None
|
|
203
|
-
delta: ChoiceDelta | None = None
|
|
204
211
|
finish_reason: typing.Literal['stop', 'length', 'tool_calls', 'content_filter', 'function_call'] | None = None
|
|
205
212
|
index: int
|
|
206
|
-
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class ChatResponseChoice(ChoiceBase):
|
|
216
|
+
"""Choice model for non-streaming responses - contains message field"""
|
|
217
|
+
message: ChoiceMessage
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class ChatResponseChunkChoice(ChoiceBase):
|
|
221
|
+
"""Choice model for streaming responses - contains delta field"""
|
|
222
|
+
delta: ChoiceDelta
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# Backward compatibility alias
|
|
226
|
+
Choice = ChatResponseChoice
|
|
207
227
|
|
|
208
228
|
|
|
209
229
|
class Usage(BaseModel):
|
|
210
|
-
prompt_tokens: int
|
|
211
|
-
completion_tokens: int
|
|
212
|
-
total_tokens: int
|
|
230
|
+
prompt_tokens: int | None = None
|
|
231
|
+
completion_tokens: int | None = None
|
|
232
|
+
total_tokens: int | None = None
|
|
213
233
|
|
|
214
234
|
|
|
215
235
|
class ResponseSerializable(abc.ABC):
|
|
@@ -245,10 +265,10 @@ class ChatResponse(ResponseBaseModelOutput):
|
|
|
245
265
|
model_config = ConfigDict(extra="allow")
|
|
246
266
|
id: str
|
|
247
267
|
object: str = "chat.completion"
|
|
248
|
-
model: str = ""
|
|
268
|
+
model: str = "unknown-model"
|
|
249
269
|
created: datetime.datetime
|
|
250
|
-
choices: list[
|
|
251
|
-
usage: Usage
|
|
270
|
+
choices: list[ChatResponseChoice]
|
|
271
|
+
usage: Usage
|
|
252
272
|
system_fingerprint: str | None = None
|
|
253
273
|
service_tier: typing.Literal["scale", "default"] | None = None
|
|
254
274
|
|
|
@@ -264,14 +284,14 @@ class ChatResponse(ResponseBaseModelOutput):
|
|
|
264
284
|
object_: str | None = None,
|
|
265
285
|
model: str | None = None,
|
|
266
286
|
created: datetime.datetime | None = None,
|
|
267
|
-
usage: Usage
|
|
287
|
+
usage: Usage) -> "ChatResponse":
|
|
268
288
|
|
|
269
289
|
if id_ is None:
|
|
270
290
|
id_ = str(uuid.uuid4())
|
|
271
291
|
if object_ is None:
|
|
272
292
|
object_ = "chat.completion"
|
|
273
293
|
if model is None:
|
|
274
|
-
model = ""
|
|
294
|
+
model = "unknown-model"
|
|
275
295
|
if created is None:
|
|
276
296
|
created = datetime.datetime.now(datetime.UTC)
|
|
277
297
|
|
|
@@ -279,7 +299,12 @@ class ChatResponse(ResponseBaseModelOutput):
|
|
|
279
299
|
object=object_,
|
|
280
300
|
model=model,
|
|
281
301
|
created=created,
|
|
282
|
-
choices=[
|
|
302
|
+
choices=[
|
|
303
|
+
ChatResponseChoice(index=0,
|
|
304
|
+
message=ChoiceMessage(content=data,
|
|
305
|
+
role=UserMessageContentRoleType.ASSISTANT),
|
|
306
|
+
finish_reason="stop")
|
|
307
|
+
],
|
|
283
308
|
usage=usage)
|
|
284
309
|
|
|
285
310
|
|
|
@@ -293,9 +318,9 @@ class ChatResponseChunk(ResponseBaseModelOutput):
|
|
|
293
318
|
model_config = ConfigDict(extra="allow")
|
|
294
319
|
|
|
295
320
|
id: str
|
|
296
|
-
choices: list[
|
|
321
|
+
choices: list[ChatResponseChunkChoice]
|
|
297
322
|
created: datetime.datetime
|
|
298
|
-
model: str = ""
|
|
323
|
+
model: str = "unknown-model"
|
|
299
324
|
object: str = "chat.completion.chunk"
|
|
300
325
|
system_fingerprint: str | None = None
|
|
301
326
|
service_tier: typing.Literal["scale", "default"] | None = None
|
|
@@ -319,12 +344,18 @@ class ChatResponseChunk(ResponseBaseModelOutput):
|
|
|
319
344
|
if created is None:
|
|
320
345
|
created = datetime.datetime.now(datetime.UTC)
|
|
321
346
|
if model is None:
|
|
322
|
-
model = ""
|
|
347
|
+
model = "unknown-model"
|
|
323
348
|
if object_ is None:
|
|
324
349
|
object_ = "chat.completion.chunk"
|
|
325
350
|
|
|
326
351
|
return ChatResponseChunk(id=id_,
|
|
327
|
-
choices=[
|
|
352
|
+
choices=[
|
|
353
|
+
ChatResponseChunkChoice(index=0,
|
|
354
|
+
delta=ChoiceDelta(
|
|
355
|
+
content=data,
|
|
356
|
+
role=UserMessageContentRoleType.ASSISTANT),
|
|
357
|
+
finish_reason="stop")
|
|
358
|
+
],
|
|
328
359
|
created=created,
|
|
329
360
|
model=model,
|
|
330
361
|
object=object_)
|
|
@@ -335,7 +366,7 @@ class ChatResponseChunk(ResponseBaseModelOutput):
|
|
|
335
366
|
id_: str | None = None,
|
|
336
367
|
created: datetime.datetime | None = None,
|
|
337
368
|
model: str | None = None,
|
|
338
|
-
role:
|
|
369
|
+
role: UserMessageContentRoleType | None = None,
|
|
339
370
|
finish_reason: str | None = None,
|
|
340
371
|
usage: Usage | None = None,
|
|
341
372
|
system_fingerprint: str | None = None) -> "ChatResponseChunk":
|
|
@@ -345,7 +376,7 @@ class ChatResponseChunk(ResponseBaseModelOutput):
|
|
|
345
376
|
if created is None:
|
|
346
377
|
created = datetime.datetime.now(datetime.UTC)
|
|
347
378
|
if model is None:
|
|
348
|
-
model = ""
|
|
379
|
+
model = "unknown-model"
|
|
349
380
|
|
|
350
381
|
delta = ChoiceDelta(content=content, role=role) if content is not None or role is not None else ChoiceDelta()
|
|
351
382
|
|
|
@@ -353,7 +384,14 @@ class ChatResponseChunk(ResponseBaseModelOutput):
|
|
|
353
384
|
|
|
354
385
|
return ChatResponseChunk(
|
|
355
386
|
id=id_,
|
|
356
|
-
choices=[
|
|
387
|
+
choices=[
|
|
388
|
+
ChatResponseChunkChoice(
|
|
389
|
+
index=0,
|
|
390
|
+
delta=delta,
|
|
391
|
+
finish_reason=typing.cast(
|
|
392
|
+
typing.Literal['stop', 'length', 'tool_calls', 'content_filter', 'function_call'] | None,
|
|
393
|
+
final_finish_reason))
|
|
394
|
+
],
|
|
357
395
|
created=created,
|
|
358
396
|
model=model,
|
|
359
397
|
object="chat.completion.chunk",
|
|
@@ -398,11 +436,6 @@ class GenerateResponse(BaseModel):
|
|
|
398
436
|
value: str | None = "default"
|
|
399
437
|
|
|
400
438
|
|
|
401
|
-
class UserMessageContentRoleType(str, Enum):
|
|
402
|
-
USER = "user"
|
|
403
|
-
ASSISTANT = "assistant"
|
|
404
|
-
|
|
405
|
-
|
|
406
439
|
class WebSocketMessageType(str, Enum):
|
|
407
440
|
"""
|
|
408
441
|
WebSocketMessageType is an Enum that represents WebSocket Message types.
|
|
@@ -622,7 +655,7 @@ GlobalTypeConverter.register_converter(_nat_chat_request_to_string)
|
|
|
622
655
|
|
|
623
656
|
|
|
624
657
|
def _string_to_nat_chat_request(data: str) -> ChatRequest:
|
|
625
|
-
return ChatRequest.from_string(data, model="")
|
|
658
|
+
return ChatRequest.from_string(data, model="unknown-model")
|
|
626
659
|
|
|
627
660
|
|
|
628
661
|
GlobalTypeConverter.register_converter(_string_to_nat_chat_request)
|
|
@@ -654,22 +687,12 @@ def _string_to_nat_chat_response(data: str) -> ChatResponse:
|
|
|
654
687
|
GlobalTypeConverter.register_converter(_string_to_nat_chat_response)
|
|
655
688
|
|
|
656
689
|
|
|
657
|
-
def _chat_response_to_chat_response_chunk(data: ChatResponse) -> ChatResponseChunk:
|
|
658
|
-
# Preserve original message structure for backward compatibility
|
|
659
|
-
return ChatResponseChunk(id=data.id, choices=data.choices, created=data.created, model=data.model)
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
GlobalTypeConverter.register_converter(_chat_response_to_chat_response_chunk)
|
|
663
|
-
|
|
664
|
-
|
|
665
690
|
# ======== ChatResponseChunk Converters ========
|
|
666
691
|
def _chat_response_chunk_to_string(data: ChatResponseChunk) -> str:
|
|
667
692
|
if data.choices and len(data.choices) > 0:
|
|
668
693
|
choice = data.choices[0]
|
|
669
694
|
if choice.delta and choice.delta.content:
|
|
670
695
|
return choice.delta.content
|
|
671
|
-
if choice.message and choice.message.content:
|
|
672
|
-
return choice.message.content
|
|
673
696
|
return ""
|
|
674
697
|
|
|
675
698
|
|
|
@@ -685,21 +708,6 @@ def _string_to_nat_chat_response_chunk(data: str) -> ChatResponseChunk:
|
|
|
685
708
|
|
|
686
709
|
GlobalTypeConverter.register_converter(_string_to_nat_chat_response_chunk)
|
|
687
710
|
|
|
688
|
-
|
|
689
|
-
# ======== AINodeMessageChunk Converters ========
|
|
690
|
-
def _ai_message_chunk_to_nat_chat_response_chunk(data) -> ChatResponseChunk:
|
|
691
|
-
'''Converts LangChain/LangGraph AINodeMessageChunk to ChatResponseChunk'''
|
|
692
|
-
content = ""
|
|
693
|
-
if hasattr(data, 'content') and data.content is not None:
|
|
694
|
-
content = str(data.content)
|
|
695
|
-
elif hasattr(data, 'text') and data.text is not None:
|
|
696
|
-
content = str(data.text)
|
|
697
|
-
elif hasattr(data, 'message') and data.message is not None:
|
|
698
|
-
content = str(data.message)
|
|
699
|
-
|
|
700
|
-
return ChatResponseChunk.create_streaming_chunk(content=content, role="assistant", finish_reason=None)
|
|
701
|
-
|
|
702
|
-
|
|
703
711
|
# Compatibility aliases with previous releases
|
|
704
712
|
AIQChatRequest = ChatRequest
|
|
705
713
|
AIQChoiceMessage = ChoiceMessage
|
nat/data_models/span.py
CHANGED
|
@@ -128,10 +128,48 @@ class SpanStatus(BaseModel):
|
|
|
128
128
|
message: str | None = Field(default=None, description="The status message of the span.")
|
|
129
129
|
|
|
130
130
|
|
|
131
|
+
def _generate_nonzero_trace_id() -> int:
|
|
132
|
+
"""Generate a non-zero 128-bit trace ID."""
|
|
133
|
+
return uuid.uuid4().int
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _generate_nonzero_span_id() -> int:
|
|
137
|
+
"""Generate a non-zero 64-bit span ID."""
|
|
138
|
+
return uuid.uuid4().int >> 64
|
|
139
|
+
|
|
140
|
+
|
|
131
141
|
class SpanContext(BaseModel):
|
|
132
|
-
trace_id: int = Field(default_factory=
|
|
133
|
-
|
|
134
|
-
|
|
142
|
+
trace_id: int = Field(default_factory=_generate_nonzero_trace_id,
|
|
143
|
+
description="The OTel-syle 128-bit trace ID of the span.")
|
|
144
|
+
span_id: int = Field(default_factory=_generate_nonzero_span_id,
|
|
145
|
+
description="The OTel-syle 64-bit span ID of the span.")
|
|
146
|
+
|
|
147
|
+
@field_validator("trace_id", mode="before")
|
|
148
|
+
@classmethod
|
|
149
|
+
def _validate_trace_id(cls, v: int | str | None) -> int:
|
|
150
|
+
"""Regenerate if trace_id is None; raise an exception if trace_id is invalid;"""
|
|
151
|
+
if isinstance(v, str):
|
|
152
|
+
v = uuid.UUID(v).int
|
|
153
|
+
if isinstance(v, type(None)):
|
|
154
|
+
v = _generate_nonzero_trace_id()
|
|
155
|
+
if v <= 0 or v >> 128:
|
|
156
|
+
raise ValueError(f"Invalid trace_id: must be a non-zero 128-bit integer, got {v}")
|
|
157
|
+
return v
|
|
158
|
+
|
|
159
|
+
@field_validator("span_id", mode="before")
|
|
160
|
+
@classmethod
|
|
161
|
+
def _validate_span_id(cls, v: int | str | None) -> int:
|
|
162
|
+
"""Regenerate if span_id is None; raise an exception if span_id is invalid;"""
|
|
163
|
+
if isinstance(v, str):
|
|
164
|
+
try:
|
|
165
|
+
v = int(v, 16)
|
|
166
|
+
except ValueError:
|
|
167
|
+
raise ValueError(f"span_id unable to be parsed: {v}")
|
|
168
|
+
if isinstance(v, type(None)):
|
|
169
|
+
v = _generate_nonzero_span_id()
|
|
170
|
+
if v <= 0 or v >> 64:
|
|
171
|
+
raise ValueError(f"Invalid span_id: must be a non-zero 64-bit integer, got {v}")
|
|
172
|
+
return v
|
|
135
173
|
|
|
136
174
|
|
|
137
175
|
class Span(BaseModel):
|
|
@@ -46,7 +46,7 @@ async def execute_score_select_function(config: ExecuteScoreSelectFunctionConfig
|
|
|
46
46
|
|
|
47
47
|
from pydantic import BaseModel
|
|
48
48
|
|
|
49
|
-
executable_fn: Function = builder.get_function(name=config.augmented_fn)
|
|
49
|
+
executable_fn: Function = await builder.get_function(name=config.augmented_fn)
|
|
50
50
|
|
|
51
51
|
if config.scorer:
|
|
52
52
|
scorer = await builder.get_ttc_strategy(strategy_name=config.scorer,
|
|
@@ -98,8 +98,8 @@ async def register_ttc_tool_wrapper_function(
|
|
|
98
98
|
|
|
99
99
|
augmented_function_desc = config.tool_description
|
|
100
100
|
|
|
101
|
-
fn_input_schema: BaseModel = augmented_function.input_schema
|
|
102
|
-
fn_output_schema: BaseModel = augmented_function.single_output_schema
|
|
101
|
+
fn_input_schema: type[BaseModel] = augmented_function.input_schema
|
|
102
|
+
fn_output_schema: type[BaseModel] | type[None] = augmented_function.single_output_schema
|
|
103
103
|
|
|
104
104
|
runnable_llm = input_llm.with_structured_output(schema=fn_input_schema)
|
|
105
105
|
|
|
@@ -689,10 +689,13 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
689
689
|
|
|
690
690
|
async def post_openai_api_compatible(response: Response, request: Request, payload: request_type):
|
|
691
691
|
# Check if streaming is requested
|
|
692
|
+
|
|
693
|
+
response.headers["Content-Type"] = "application/json"
|
|
692
694
|
stream_requested = getattr(payload, 'stream', False)
|
|
693
695
|
|
|
694
696
|
async with session_manager.session(http_connection=request):
|
|
695
697
|
if stream_requested:
|
|
698
|
+
|
|
696
699
|
# Return streaming response
|
|
697
700
|
return StreamingResponse(headers={"Content-Type": "text/event-stream; charset=utf-8"},
|
|
698
701
|
content=generate_streaming_response_as_str(
|
|
@@ -703,40 +706,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
703
706
|
result_type=ChatResponseChunk,
|
|
704
707
|
output_type=ChatResponseChunk))
|
|
705
708
|
|
|
706
|
-
|
|
707
|
-
try:
|
|
708
|
-
response.headers["Content-Type"] = "application/json"
|
|
709
|
-
return await generate_single_response(payload, session_manager, result_type=ChatResponse)
|
|
710
|
-
except ValueError as e:
|
|
711
|
-
if "Cannot get a single output value for streaming workflows" in str(e):
|
|
712
|
-
# Workflow only supports streaming, but client requested non-streaming
|
|
713
|
-
# Fall back to streaming and collect the result
|
|
714
|
-
chunks = []
|
|
715
|
-
async for chunk_str in generate_streaming_response_as_str(
|
|
716
|
-
payload,
|
|
717
|
-
session_manager=session_manager,
|
|
718
|
-
streaming=True,
|
|
719
|
-
step_adaptor=self.get_step_adaptor(),
|
|
720
|
-
result_type=ChatResponseChunk,
|
|
721
|
-
output_type=ChatResponseChunk):
|
|
722
|
-
if chunk_str.startswith("data: ") and not chunk_str.startswith("data: [DONE]"):
|
|
723
|
-
chunk_data = chunk_str[6:].strip() # Remove "data: " prefix
|
|
724
|
-
if chunk_data:
|
|
725
|
-
try:
|
|
726
|
-
chunk_json = ChatResponseChunk.model_validate_json(chunk_data)
|
|
727
|
-
if (chunk_json.choices and len(chunk_json.choices) > 0
|
|
728
|
-
and chunk_json.choices[0].delta
|
|
729
|
-
and chunk_json.choices[0].delta.content is not None):
|
|
730
|
-
chunks.append(chunk_json.choices[0].delta.content)
|
|
731
|
-
except Exception:
|
|
732
|
-
continue
|
|
733
|
-
|
|
734
|
-
# Create a single response from collected chunks
|
|
735
|
-
content = "".join(chunks)
|
|
736
|
-
single_response = ChatResponse.from_string(content)
|
|
737
|
-
response.headers["Content-Type"] = "application/json"
|
|
738
|
-
return single_response
|
|
739
|
-
raise
|
|
709
|
+
return await generate_single_response(payload, session_manager, result_type=ChatResponse)
|
|
740
710
|
|
|
741
711
|
return post_openai_api_compatible
|
|
742
712
|
|
|
@@ -1128,7 +1098,7 @@ class FastApiFrontEndPluginWorker(FastApiFrontEndPluginWorkerBase):
|
|
|
1128
1098
|
if configured_group.config.type != "mcp_client":
|
|
1129
1099
|
continue
|
|
1130
1100
|
|
|
1131
|
-
from nat.plugins.mcp.
|
|
1101
|
+
from nat.plugins.mcp.client_config import MCPClientConfig
|
|
1132
1102
|
|
|
1133
1103
|
config = configured_group.config
|
|
1134
1104
|
assert isinstance(config, MCPClientConfig)
|
|
@@ -139,8 +139,10 @@ class MessageValidator:
|
|
|
139
139
|
text_content: str = str(data_model.payload)
|
|
140
140
|
validated_message_content = SystemResponseContent(text=text_content)
|
|
141
141
|
|
|
142
|
-
elif
|
|
142
|
+
elif isinstance(data_model, ChatResponse):
|
|
143
143
|
validated_message_content = SystemResponseContent(text=data_model.choices[0].message.content)
|
|
144
|
+
elif isinstance(data_model, ChatResponseChunk):
|
|
145
|
+
validated_message_content = SystemResponseContent(text=data_model.choices[0].delta.content)
|
|
144
146
|
|
|
145
147
|
elif (isinstance(data_model, ResponseIntermediateStep)):
|
|
146
148
|
validated_message_content = SystemIntermediateStepContent(name=data_model.name,
|
|
@@ -126,6 +126,7 @@ class SpanExporter(ProcessingExporter[InputSpanT, OutputSpanT], SerializeMixin):
|
|
|
126
126
|
|
|
127
127
|
parent_span = None
|
|
128
128
|
span_ctx = None
|
|
129
|
+
workflow_trace_id = self._context_state.workflow_trace_id.get()
|
|
129
130
|
|
|
130
131
|
# Look up the parent span to establish hierarchy
|
|
131
132
|
# event.parent_id is the UUID of the last START step with a different UUID from current step
|
|
@@ -141,6 +142,9 @@ class SpanExporter(ProcessingExporter[InputSpanT, OutputSpanT], SerializeMixin):
|
|
|
141
142
|
parent_span = parent_span.model_copy() if isinstance(parent_span, Span) else None
|
|
142
143
|
if parent_span and parent_span.context:
|
|
143
144
|
span_ctx = SpanContext(trace_id=parent_span.context.trace_id)
|
|
145
|
+
# No parent: adopt workflow trace id if available to keep all spans in the same trace
|
|
146
|
+
if span_ctx is None and workflow_trace_id:
|
|
147
|
+
span_ctx = SpanContext(trace_id=workflow_trace_id)
|
|
144
148
|
|
|
145
149
|
# Extract start/end times from the step
|
|
146
150
|
# By convention, `span_event_timestamp` is the time we started, `event_timestamp` is the time we ended.
|
|
@@ -154,23 +158,39 @@ class SpanExporter(ProcessingExporter[InputSpanT, OutputSpanT], SerializeMixin):
|
|
|
154
158
|
else:
|
|
155
159
|
sub_span_name = f"{event.payload.event_type}"
|
|
156
160
|
|
|
161
|
+
# Prefer parent/context trace id for attribute, else workflow trace id
|
|
162
|
+
_attr_trace_id = None
|
|
163
|
+
if span_ctx is not None:
|
|
164
|
+
_attr_trace_id = span_ctx.trace_id
|
|
165
|
+
elif parent_span and parent_span.context:
|
|
166
|
+
_attr_trace_id = parent_span.context.trace_id
|
|
167
|
+
elif workflow_trace_id:
|
|
168
|
+
_attr_trace_id = workflow_trace_id
|
|
169
|
+
|
|
170
|
+
attributes = {
|
|
171
|
+
f"{self._span_prefix}.event_type":
|
|
172
|
+
event.payload.event_type.value,
|
|
173
|
+
f"{self._span_prefix}.function.id":
|
|
174
|
+
event.function_ancestry.function_id if event.function_ancestry else "unknown",
|
|
175
|
+
f"{self._span_prefix}.function.name":
|
|
176
|
+
event.function_ancestry.function_name if event.function_ancestry else "unknown",
|
|
177
|
+
f"{self._span_prefix}.subspan.name":
|
|
178
|
+
event.payload.name or "",
|
|
179
|
+
f"{self._span_prefix}.event_timestamp":
|
|
180
|
+
event.event_timestamp,
|
|
181
|
+
f"{self._span_prefix}.framework":
|
|
182
|
+
event.payload.framework.value if event.payload.framework else "unknown",
|
|
183
|
+
f"{self._span_prefix}.conversation.id":
|
|
184
|
+
self._context_state.conversation_id.get() or "unknown",
|
|
185
|
+
f"{self._span_prefix}.workflow.run_id":
|
|
186
|
+
self._context_state.workflow_run_id.get() or "unknown",
|
|
187
|
+
f"{self._span_prefix}.workflow.trace_id": (f"{_attr_trace_id:032x}" if _attr_trace_id else "unknown"),
|
|
188
|
+
}
|
|
189
|
+
|
|
157
190
|
sub_span = Span(name=sub_span_name,
|
|
158
191
|
parent=parent_span,
|
|
159
192
|
context=span_ctx,
|
|
160
|
-
attributes=
|
|
161
|
-
f"{self._span_prefix}.event_type":
|
|
162
|
-
event.payload.event_type.value,
|
|
163
|
-
f"{self._span_prefix}.function.id":
|
|
164
|
-
event.function_ancestry.function_id if event.function_ancestry else "unknown",
|
|
165
|
-
f"{self._span_prefix}.function.name":
|
|
166
|
-
event.function_ancestry.function_name if event.function_ancestry else "unknown",
|
|
167
|
-
f"{self._span_prefix}.subspan.name":
|
|
168
|
-
event.payload.name or "",
|
|
169
|
-
f"{self._span_prefix}.event_timestamp":
|
|
170
|
-
event.event_timestamp,
|
|
171
|
-
f"{self._span_prefix}.framework":
|
|
172
|
-
event.payload.framework.value if event.payload.framework else "unknown",
|
|
173
|
-
},
|
|
193
|
+
attributes=attributes,
|
|
174
194
|
start_time=start_ns)
|
|
175
195
|
|
|
176
196
|
span_kind = event_type_to_span_kind(event.event_type)
|
|
@@ -123,7 +123,7 @@ def set_framework_profiler_handler(
|
|
|
123
123
|
except ImportError as e:
|
|
124
124
|
logger.warning(
|
|
125
125
|
"ADK profiler not available. " +
|
|
126
|
-
"Install NAT with ADK extras: pip install
|
|
126
|
+
"Install NAT with ADK extras: pip install \"nvidia-nat[adk]\". Error: %s",
|
|
127
127
|
e)
|
|
128
128
|
else:
|
|
129
129
|
handler = ADKProfilerHandler()
|
|
@@ -36,7 +36,7 @@ class LinearModel(ForecastingBaseModel):
|
|
|
36
36
|
except ImportError:
|
|
37
37
|
logger.error(
|
|
38
38
|
"scikit-learn is not installed. Please install scikit-learn to use the LinearModel "
|
|
39
|
-
"profiling model or install
|
|
39
|
+
"profiling model or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
|
|
40
40
|
|
|
41
41
|
raise
|
|
42
42
|
|
|
@@ -36,7 +36,7 @@ class RandomForestModel(ForecastingBaseModel):
|
|
|
36
36
|
except ImportError:
|
|
37
37
|
logger.error(
|
|
38
38
|
"scikit-learn is not installed. Please install scikit-learn to use the RandomForest "
|
|
39
|
-
"profiling model or install
|
|
39
|
+
"profiling model or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
|
|
40
40
|
|
|
41
41
|
raise
|
|
42
42
|
|
|
@@ -304,7 +304,7 @@ def save_gantt_chart(all_nodes: list[CallNode], output_path: str) -> None:
|
|
|
304
304
|
import matplotlib.pyplot as plt
|
|
305
305
|
except ImportError:
|
|
306
306
|
logger.error("matplotlib is not installed. Please install matplotlib to use generate plots for the profiler "
|
|
307
|
-
"or install
|
|
307
|
+
"or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
|
|
308
308
|
|
|
309
309
|
raise
|
|
310
310
|
|
|
@@ -212,7 +212,7 @@ def run_prefixspan(sequences_map: dict[int, list[PrefixCallNode]],
|
|
|
212
212
|
from prefixspan import PrefixSpan
|
|
213
213
|
except ImportError:
|
|
214
214
|
logger.error("prefixspan is not installed. Please install prefixspan to run the prefix analysis in the "
|
|
215
|
-
"profiler or install
|
|
215
|
+
"profiler or install \"nvidia-nat[profiler]\" to install all necessary profiling packages.")
|
|
216
216
|
|
|
217
217
|
raise
|
|
218
218
|
|