letta-nightly 0.5.0.dev20241021104213__py3-none-any.whl → 0.5.0.dev20241022104124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

letta/llm_api/helpers.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import copy
2
2
  import json
3
3
  import warnings
4
+ from collections import OrderedDict
4
5
  from typing import Any, List, Union
5
6
 
6
7
  import requests
@@ -10,6 +11,30 @@ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
10
11
  from letta.utils import json_dumps, printd
11
12
 
12
13
 
14
+ def convert_to_structured_output(openai_function: dict) -> dict:
15
+ """Convert function call objects to structured output objects
16
+
17
+ See: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas
18
+ """
19
+ structured_output = {
20
+ "name": openai_function["name"],
21
+ "description": openai_function["description"],
22
+ "strict": True,
23
+ "parameters": {"type": "object", "properties": {}, "additionalProperties": False, "required": []},
24
+ }
25
+
26
+ for param, details in openai_function["parameters"]["properties"].items():
27
+ structured_output["parameters"]["properties"][param] = {"type": details["type"], "description": details["description"]}
28
+
29
+ if "enum" in details:
30
+ structured_output["parameters"]["properties"][param]["enum"] = details["enum"]
31
+
32
+ # Add all properties to required list
33
+ structured_output["parameters"]["required"] = list(structured_output["parameters"]["properties"].keys())
34
+
35
+ return structured_output
36
+
37
+
13
38
  def make_post_request(url: str, headers: dict[str, str], data: dict[str, Any]) -> dict[str, Any]:
14
39
  printd(f"Sending request to {url}")
15
40
  try:
@@ -78,33 +103,34 @@ def add_inner_thoughts_to_functions(
78
103
  inner_thoughts_key: str,
79
104
  inner_thoughts_description: str,
80
105
  inner_thoughts_required: bool = True,
81
- # inner_thoughts_to_front: bool = True, TODO support sorting somewhere, probably in the to_dict?
82
106
  ) -> List[dict]:
83
- """Add an inner_thoughts kwarg to every function in the provided list"""
84
- # return copies
107
+ """Add an inner_thoughts kwarg to every function in the provided list, ensuring it's the first parameter"""
85
108
  new_functions = []
86
-
87
- # functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
88
109
  for function_object in functions:
89
- function_params = function_object["parameters"]["properties"]
90
- required_params = list(function_object["parameters"]["required"])
110
+ new_function_object = copy.deepcopy(function_object)
91
111
 
92
- # if the inner thoughts arg doesn't exist, add it
93
- if inner_thoughts_key not in function_params:
94
- function_params[inner_thoughts_key] = {
95
- "type": "string",
96
- "description": inner_thoughts_description,
97
- }
112
+ # Create a new OrderedDict with inner_thoughts as the first item
113
+ new_properties = OrderedDict()
114
+ new_properties[inner_thoughts_key] = {
115
+ "type": "string",
116
+ "description": inner_thoughts_description,
117
+ }
98
118
 
99
- # make sure it's tagged as required
100
- new_function_object = copy.deepcopy(function_object)
101
- if inner_thoughts_required and inner_thoughts_key not in required_params:
102
- required_params.append(inner_thoughts_key)
103
- new_function_object["parameters"]["required"] = required_params
119
+ # Add the rest of the properties
120
+ new_properties.update(function_object["parameters"]["properties"])
121
+
122
+ # Cast OrderedDict back to a regular dict
123
+ new_function_object["parameters"]["properties"] = dict(new_properties)
124
+
125
+ # Update required parameters if necessary
126
+ if inner_thoughts_required:
127
+ required_params = new_function_object["parameters"].get("required", [])
128
+ if inner_thoughts_key not in required_params:
129
+ required_params.insert(0, inner_thoughts_key)
130
+ new_function_object["parameters"]["required"] = required_params
104
131
 
105
132
  new_functions.append(new_function_object)
106
133
 
107
- # return a list of copies
108
134
  return new_functions
109
135
 
110
136
 
letta/llm_api/openai.py CHANGED
@@ -9,7 +9,11 @@ from httpx_sse._exceptions import SSEError
9
9
 
10
10
  from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
11
11
  from letta.errors import LLMError
12
- from letta.llm_api.helpers import add_inner_thoughts_to_functions, make_post_request
12
+ from letta.llm_api.helpers import (
13
+ add_inner_thoughts_to_functions,
14
+ convert_to_structured_output,
15
+ make_post_request,
16
+ )
13
17
  from letta.local_llm.constants import (
14
18
  INNER_THOUGHTS_KWARG,
15
19
  INNER_THOUGHTS_KWARG_DESCRIPTION,
@@ -112,7 +116,7 @@ def build_openai_chat_completions_request(
112
116
  use_tool_naming: bool,
113
117
  max_tokens: Optional[int],
114
118
  ) -> ChatCompletionRequest:
115
- if llm_config.put_inner_thoughts_in_kwargs:
119
+ if functions and llm_config.put_inner_thoughts_in_kwargs:
116
120
  functions = add_inner_thoughts_to_functions(
117
121
  functions=functions,
118
122
  inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -154,8 +158,8 @@ def build_openai_chat_completions_request(
154
158
  )
155
159
  # https://platform.openai.com/docs/guides/text-generation/json-mode
156
160
  # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
157
- if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
158
- data.response_format = {"type": "json_object"}
161
+ # if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
162
+ # data.response_format = {"type": "json_object"}
159
163
 
160
164
  if "inference.memgpt.ai" in llm_config.model_endpoint:
161
165
  # override user id for inference.memgpt.ai
@@ -362,6 +366,8 @@ def openai_chat_completions_process_stream(
362
366
  chat_completion_response.usage.completion_tokens = n_chunks
363
367
  chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
364
368
 
369
+ assert len(chat_completion_response.choices) > 0, chat_completion_response
370
+
365
371
  # printd(chat_completion_response)
366
372
  return chat_completion_response
367
373
 
@@ -461,6 +467,13 @@ def openai_chat_completions_request_stream(
461
467
  data.pop("tools")
462
468
  data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
463
469
 
470
+ if "tools" in data:
471
+ for tool in data["tools"]:
472
+ # tool["strict"] = True
473
+ tool["function"] = convert_to_structured_output(tool["function"])
474
+
475
+ # print(f"\n\n\n\nData[tools]: {json.dumps(data['tools'], indent=2)}")
476
+
464
477
  printd(f"Sending request to {url}")
465
478
  try:
466
479
  return _sse_post(url=url, data=data, headers=headers)
@@ -8,6 +8,7 @@ from typing import AsyncGenerator, Literal, Optional, Union
8
8
 
9
9
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
10
10
  from letta.interface import AgentInterface
11
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
11
12
  from letta.schemas.enums import MessageStreamStatus
12
13
  from letta.schemas.letta_message import (
13
14
  AssistantMessage,
@@ -23,9 +24,14 @@ from letta.schemas.letta_message import (
23
24
  from letta.schemas.message import Message
24
25
  from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
25
26
  from letta.streaming_interface import AgentChunkStreamingInterface
27
+ from letta.streaming_utils import (
28
+ FunctionArgumentsStreamHandler,
29
+ JSONInnerThoughtsExtractor,
30
+ )
26
31
  from letta.utils import is_utc_datetime
27
32
 
28
33
 
34
+ # TODO strip from code / deprecate
29
35
  class QueuingInterface(AgentInterface):
30
36
  """Messages are queued inside an internal buffer and manually flushed"""
31
37
 
@@ -248,58 +254,6 @@ class QueuingInterface(AgentInterface):
248
254
  self._queue_push(message_api=new_message, message_obj=msg_obj)
249
255
 
250
256
 
251
- class FunctionArgumentsStreamHandler:
252
- """State machine that can process a stream of"""
253
-
254
- def __init__(self, json_key=DEFAULT_MESSAGE_TOOL_KWARG):
255
- self.json_key = json_key
256
- self.reset()
257
-
258
- def reset(self):
259
- self.in_message = False
260
- self.key_buffer = ""
261
- self.accumulating = False
262
- self.message_started = False
263
-
264
- def process_json_chunk(self, chunk: str) -> Optional[str]:
265
- """Process a chunk from the function arguments and return the plaintext version"""
266
-
267
- # Use strip to handle only leading and trailing whitespace in control structures
268
- if self.accumulating:
269
- clean_chunk = chunk.strip()
270
- if self.json_key in self.key_buffer:
271
- if ":" in clean_chunk:
272
- self.in_message = True
273
- self.accumulating = False
274
- return None
275
- self.key_buffer += clean_chunk
276
- return None
277
-
278
- if self.in_message:
279
- if chunk.strip() == '"' and self.message_started:
280
- self.in_message = False
281
- self.message_started = False
282
- return None
283
- if not self.message_started and chunk.strip() == '"':
284
- self.message_started = True
285
- return None
286
- if self.message_started:
287
- if chunk.strip().endswith('"'):
288
- self.in_message = False
289
- return chunk.rstrip('"\n')
290
- return chunk
291
-
292
- if chunk.strip() == "{":
293
- self.key_buffer = ""
294
- self.accumulating = True
295
- return None
296
- if chunk.strip() == "}":
297
- self.in_message = False
298
- self.message_started = False
299
- return None
300
- return None
301
-
302
-
303
257
  class StreamingServerInterface(AgentChunkStreamingInterface):
304
258
  """Maintain a generator that is a proxy for self.process_chunk()
305
259
 
@@ -316,9 +270,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
316
270
  def __init__(
317
271
  self,
318
272
  multi_step=True,
273
+ # Related to if we want to try and pass back the AssistantMessage as a special case function
319
274
  use_assistant_message=False,
320
275
  assistant_message_function_name=DEFAULT_MESSAGE_TOOL,
321
276
  assistant_message_function_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
277
+ # Related to if we expect inner_thoughts to be in the kwargs
278
+ inner_thoughts_in_kwargs=True,
279
+ inner_thoughts_kwarg=INNER_THOUGHTS_KWARG,
322
280
  ):
323
281
  # If streaming mode, ignores base interface calls like .assistant_message, etc
324
282
  self.streaming_mode = False
@@ -346,6 +304,15 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
346
304
  self.assistant_message_function_name = assistant_message_function_name
347
305
  self.assistant_message_function_kwarg = assistant_message_function_kwarg
348
306
 
307
+ # Support for inner_thoughts_in_kwargs
308
+ self.inner_thoughts_in_kwargs = inner_thoughts_in_kwargs
309
+ self.inner_thoughts_kwarg = inner_thoughts_kwarg
310
+ # A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
311
+ self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=inner_thoughts_kwarg, wait_for_first_key=True)
312
+ # Two buffers used to make sure that the 'name' comes after the inner thoughts stream (if inner_thoughts_in_kwargs)
313
+ self.function_name_buffer = None
314
+ self.function_args_buffer = None
315
+
349
316
  # extra prints
350
317
  self.debug = False
351
318
  self.timeout = 30
@@ -365,16 +332,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
365
332
  # Reset the event until a new item is pushed
366
333
  self._event.clear()
367
334
 
368
- # while self._active:
369
- # # Wait until there is an item in the deque or the stream is deactivated
370
- # await self._event.wait()
371
-
372
- # while self._chunks:
373
- # yield self._chunks.popleft()
374
-
375
- # # Reset the event until a new item is pushed
376
- # self._event.clear()
377
-
378
335
  def get_generator(self) -> AsyncGenerator:
379
336
  """Get the generator that yields processed chunks."""
380
337
  if not self._active:
@@ -419,18 +376,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
419
376
  if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
420
377
  self._push_to_buffer(self.multi_step_gen_indicator)
421
378
 
422
- # self._active = False
423
- # self._event.set() # Unblock the generator if it's waiting to allow it to complete
424
-
425
- # if not self.multi_step:
426
- # # end the stream
427
- # self._active = False
428
- # self._event.set() # Unblock the generator if it's waiting to allow it to complete
429
- # else:
430
- # # signal that a new step has started in the stream
431
- # self._chunks.append(self.multi_step_indicator)
432
- # self._event.set() # Signal that new data is available
433
-
434
379
  def step_complete(self):
435
380
  """Signal from the agent that one 'step' finished (step = LLM response + tool execution)"""
436
381
  if not self.multi_step:
@@ -443,8 +388,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
443
388
 
444
389
  def step_yield(self):
445
390
  """If multi_step, this is the true 'stream_end' function."""
446
- # if self.multi_step:
447
- # end the stream
448
391
  self._active = False
449
392
  self._event.set() # Unblock the generator if it's waiting to allow it to complete
450
393
 
@@ -479,8 +422,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
479
422
  elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
480
423
  tool_call = message_delta.tool_calls[0]
481
424
 
425
+ # TODO(charles) merge into logic for internal_monologue
482
426
  # special case for trapping `send_message`
483
427
  if self.use_assistant_message and tool_call.function:
428
+ if self.inner_thoughts_in_kwargs:
429
+ raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
484
430
 
485
431
  # If we just received a chunk with the message in it, we either enter "send_message" mode, or we do standard FunctionCallMessage passthrough mode
486
432
 
@@ -538,6 +484,181 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
538
484
  ),
539
485
  )
540
486
 
487
+ elif self.inner_thoughts_in_kwargs and tool_call.function:
488
+ if self.use_assistant_message:
489
+ raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
490
+
491
+ processed_chunk = None
492
+
493
+ if tool_call.function.name:
494
+ # If we're waiting for the first key, then we should hold back the name
495
+ # ie add it to a buffer instead of returning it as a chunk
496
+ if self.function_name_buffer is None:
497
+ self.function_name_buffer = tool_call.function.name
498
+ else:
499
+ self.function_name_buffer += tool_call.function.name
500
+
501
+ if tool_call.function.arguments:
502
+ updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
503
+
504
+ # If we have inner thoughts, we should output them as a chunk
505
+ if updates_inner_thoughts:
506
+ processed_chunk = InternalMonologue(
507
+ id=message_id,
508
+ date=message_date,
509
+ internal_monologue=updates_inner_thoughts,
510
+ )
511
+ # Additionally inner thoughts may stream back with a chunk of main JSON
512
+ # In that case, since we can only return a chunk at a time, we should buffer it
513
+ if updates_main_json:
514
+ if self.function_args_buffer is None:
515
+ self.function_args_buffer = updates_main_json
516
+ else:
517
+ self.function_args_buffer += updates_main_json
518
+
519
+ # If we have main_json, we should output a FunctionCallMessage
520
+ elif updates_main_json:
521
+ # If there's something in the function_name buffer, we should release it first
522
+ # NOTE: we could output it as part of a chunk that has both name and args,
523
+ # however the frontend may expect name first, then args, so to be
524
+ # safe we'll output name first in a separate chunk
525
+ if self.function_name_buffer:
526
+ processed_chunk = FunctionCallMessage(
527
+ id=message_id,
528
+ date=message_date,
529
+ function_call=FunctionCallDelta(name=self.function_name_buffer, arguments=None),
530
+ )
531
+ # Clear the buffer
532
+ self.function_name_buffer = None
533
+ # Since we're clearing the name buffer, we should store
534
+ # any updates to the arguments inside a separate buffer
535
+ if updates_main_json:
536
+ # Add any main_json updates to the arguments buffer
537
+ if self.function_args_buffer is None:
538
+ self.function_args_buffer = updates_main_json
539
+ else:
540
+ self.function_args_buffer += updates_main_json
541
+
542
+ # If there was nothing in the name buffer, we can proceed to
543
+ # output the arguments chunk as a FunctionCallMessage
544
+ else:
545
+ # There may be a buffer from a previous chunk, for example
546
+ # if the previous chunk had arguments but we needed to flush name
547
+ if self.function_args_buffer:
548
+ # In this case, we should release the buffer + new data at once
549
+ combined_chunk = self.function_args_buffer + updates_main_json
550
+ processed_chunk = FunctionCallMessage(
551
+ id=message_id,
552
+ date=message_date,
553
+ function_call=FunctionCallDelta(name=None, arguments=combined_chunk),
554
+ )
555
+ # clear buffer
556
+ self.function_args_buffer = None
557
+ else:
558
+ # If there's no buffer to clear, just output a new chunk with new data
559
+ processed_chunk = FunctionCallMessage(
560
+ id=message_id,
561
+ date=message_date,
562
+ function_call=FunctionCallDelta(name=None, arguments=updates_main_json),
563
+ )
564
+
565
+ # # If there's something in the main_json buffer, we should add if to the arguments and release it together
566
+ # tool_call_delta = {}
567
+ # if tool_call.id:
568
+ # tool_call_delta["id"] = tool_call.id
569
+ # if tool_call.function:
570
+ # if tool_call.function.arguments:
571
+ # # tool_call_delta["arguments"] = tool_call.function.arguments
572
+ # # NOTE: using the stripped one
573
+ # tool_call_delta["arguments"] = updates_main_json
574
+ # # We use the buffered name
575
+ # if self.function_name_buffer:
576
+ # tool_call_delta["name"] = self.function_name_buffer
577
+ # # if tool_call.function.name:
578
+ # # tool_call_delta["name"] = tool_call.function.name
579
+
580
+ # processed_chunk = FunctionCallMessage(
581
+ # id=message_id,
582
+ # date=message_date,
583
+ # function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
584
+ # )
585
+
586
+ else:
587
+ processed_chunk = None
588
+
589
+ return processed_chunk
590
+
591
+ # # NOTE: this is a simplified version of the parsing code that:
592
+ # # (1) assumes that the inner_thoughts key will always come first
593
+ # # (2) assumes that there's no extra spaces in the stringified JSON
594
+ # # i.e., the prefix will look exactly like: "{\"variable\":\"}"
595
+ # if tool_call.function.arguments:
596
+ # self.function_args_buffer += tool_call.function.arguments
597
+
598
+ # # prefix_str = f'{{"\\"{self.inner_thoughts_kwarg}\\":\\"}}'
599
+ # prefix_str = f'{{"{self.inner_thoughts_kwarg}":'
600
+ # if self.function_args_buffer.startswith(prefix_str):
601
+ # print(f"Found prefix!!!: {self.function_args_buffer}")
602
+ # else:
603
+ # print(f"No prefix found: {self.function_args_buffer}")
604
+
605
+ # tool_call_delta = {}
606
+ # if tool_call.id:
607
+ # tool_call_delta["id"] = tool_call.id
608
+ # if tool_call.function:
609
+ # if tool_call.function.arguments:
610
+ # tool_call_delta["arguments"] = tool_call.function.arguments
611
+ # if tool_call.function.name:
612
+ # tool_call_delta["name"] = tool_call.function.name
613
+
614
+ # processed_chunk = FunctionCallMessage(
615
+ # id=message_id,
616
+ # date=message_date,
617
+ # function_call=FunctionCallDelta(name=tool_call_delta.get("name"), arguments=tool_call_delta.get("arguments")),
618
+ # )
619
+
620
+ # elif False and self.inner_thoughts_in_kwargs and tool_call.function:
621
+ # if self.use_assistant_message:
622
+ # raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
623
+
624
+ # if tool_call.function.arguments:
625
+
626
+ # Maintain a state machine to track if we're reading a key vs reading a value
627
+ # Technically we can we pre-key, post-key, pre-value, post-value
628
+
629
+ # for c in tool_call.function.arguments:
630
+ # if self.function_chunks_parsing_state == FunctionChunksParsingState.PRE_KEY:
631
+ # if c == '"':
632
+ # self.function_chunks_parsing_state = FunctionChunksParsingState.READING_KEY
633
+ # elif self.function_chunks_parsing_state == FunctionChunksParsingState.READING_KEY:
634
+ # if c == '"':
635
+ # self.function_chunks_parsing_state = FunctionChunksParsingState.POST_KEY
636
+
637
+ # If we're reading a key:
638
+ # if self.function_chunks_parsing_state == FunctionChunksParsingState.READING_KEY:
639
+
640
+ # We need to buffer the function arguments until we get complete keys
641
+ # We are reading stringified-JSON, so we need to check for keys in data that looks like:
642
+ # "arguments":"{\""
643
+ # "arguments":"inner"
644
+ # "arguments":"_th"
645
+ # "arguments":"ought"
646
+ # "arguments":"s"
647
+ # "arguments":"\":\""
648
+
649
+ # Once we get a complete key, check if the key matches
650
+
651
+ # If it does match, start processing the value (stringified-JSON string
652
+ # And with each new chunk, output it as a chunk of type InternalMonologue
653
+
654
+ # If the key doesn't match, then flush the buffer as a single FunctionCallMessage chunk
655
+
656
+ # If we're reading a value
657
+
658
+ # If we're reading the inner thoughts value, we output chunks of type InternalMonologue
659
+
660
+ # Otherwise, do simple chunks of FunctionCallMessage
661
+
541
662
  else:
542
663
 
543
664
  tool_call_delta = {}
@@ -563,7 +684,14 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
563
684
  # skip if there's a finish
564
685
  return None
565
686
  else:
566
- raise ValueError(f"Couldn't find delta in chunk: {chunk}")
687
+ # Example case that would trigger here:
688
+ # id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ'
689
+ # choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)]
690
+ # created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC))
691
+ # model='gpt-4o-mini-2024-07-18'
692
+ # object='chat.completion.chunk'
693
+ warnings.warn(f"Couldn't find delta in chunk: {chunk}")
694
+ return None
567
695
 
568
696
  return processed_chunk
569
697
 
@@ -663,6 +791,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
663
791
  # "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
664
792
  # "id": str(msg_obj.id) if msg_obj is not None else None,
665
793
  # }
794
+ assert msg_obj is not None, "Internal monologue requires msg_obj references for metadata"
666
795
  processed_chunk = InternalMonologue(
667
796
  id=msg_obj.id,
668
797
  date=msg_obj.created_at,
@@ -676,18 +805,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
676
805
  def assistant_message(self, msg: str, msg_obj: Optional[Message] = None):
677
806
  """Letta uses send_message"""
678
807
 
679
- # if not self.streaming_mode and self.send_message_special_case:
680
-
681
- # # create a fake "chunk" of a stream
682
- # processed_chunk = {
683
- # "assistant_message": msg,
684
- # "date": msg_obj.created_at.isoformat() if msg_obj is not None else get_utc_time().isoformat(),
685
- # "id": str(msg_obj.id) if msg_obj is not None else None,
686
- # }
687
-
688
- # self._chunks.append(processed_chunk)
689
- # self._event.set() # Signal that new data is available
690
-
808
+ # NOTE: this is a no-op, we handle this special case in function_message instead
691
809
  return
692
810
 
693
811
  def function_message(self, msg: str, msg_obj: Optional[Message] = None):
@@ -699,6 +817,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
699
817
  if msg.startswith("Running "):
700
818
  if not self.streaming_mode:
701
819
  # create a fake "chunk" of a stream
820
+ assert msg_obj.tool_calls is not None and len(msg_obj.tool_calls) > 0, "Function call required for function_message"
702
821
  function_call = msg_obj.tool_calls[0]
703
822
 
704
823
  if self.nonstreaming_legacy_mode:
@@ -784,13 +903,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
784
903
  return
785
904
  else:
786
905
  return
787
- # msg = msg.replace("Running ", "")
788
- # new_message = {"function_call": msg}
789
906
 
790
907
  elif msg.startswith("Ran "):
791
908
  return
792
- # msg = msg.replace("Ran ", "Function call returned: ")
793
- # new_message = {"function_call": msg}
794
909
 
795
910
  elif msg.startswith("Success: "):
796
911
  msg = msg.replace("Success: ", "")
@@ -821,10 +936,4 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
821
936
  raise ValueError(msg)
822
937
  new_message = {"function_message": msg}
823
938
 
824
- # add extra metadata
825
- # if msg_obj is not None:
826
- # new_message["id"] = str(msg_obj.id)
827
- # assert is_utc_datetime(msg_obj.created_at), msg_obj.created_at
828
- # new_message["date"] = msg_obj.created_at.isoformat()
829
-
830
939
  self._push_to_buffer(new_message)
@@ -430,9 +430,6 @@ async def send_message_to_agent(
430
430
  # Get the generator object off of the agent's streaming interface
431
431
  # This will be attached to the POST SSE request used under-the-hood
432
432
  letta_agent = server._get_or_load_agent(agent_id=agent_id)
433
- streaming_interface = letta_agent.interface
434
- if not isinstance(streaming_interface, StreamingServerInterface):
435
- raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
436
433
 
437
434
  # Disable token streaming if not OpenAI
438
435
  # TODO: cleanup this logic
@@ -441,6 +438,12 @@ async def send_message_to_agent(
441
438
  print("Warning: token streaming is only supported for OpenAI models. Setting to False.")
442
439
  stream_tokens = False
443
440
 
441
+ # Create a new interface per request
442
+ letta_agent.interface = StreamingServerInterface()
443
+ streaming_interface = letta_agent.interface
444
+ if not isinstance(streaming_interface, StreamingServerInterface):
445
+ raise ValueError(f"Agent has wrong type of interface: {type(streaming_interface)}")
446
+
444
447
  # Enable token-streaming within the request if desired
445
448
  streaming_interface.streaming_mode = stream_tokens
446
449
  # "chatcompletion mode" does some remapping and ignores inner thoughts
@@ -454,6 +457,11 @@ async def send_message_to_agent(
454
457
  streaming_interface.assistant_message_function_name = assistant_message_function_name
455
458
  streaming_interface.assistant_message_function_kwarg = assistant_message_function_kwarg
456
459
 
460
+ # Related to JSON buffer reader
461
+ streaming_interface.inner_thoughts_in_kwargs = (
462
+ llm_config.put_inner_thoughts_in_kwargs if llm_config.put_inner_thoughts_in_kwargs is not None else False
463
+ )
464
+
457
465
  # Offload the synchronous message_func to a separate thread
458
466
  streaming_interface.stream_start()
459
467
  task = asyncio.create_task(
@@ -0,0 +1,270 @@
1
+ from typing import Optional
2
+
3
+ from letta.constants import DEFAULT_MESSAGE_TOOL_KWARG
4
+
5
+
6
+ class JSONInnerThoughtsExtractor:
7
+ """
8
+ A class to process incoming JSON fragments and extract 'inner_thoughts' separately from the main JSON.
9
+
10
+ This handler processes JSON fragments incrementally, parsing out the value associated with a specified key (default is 'inner_thoughts'). It maintains two separate buffers:
11
+
12
+ - `main_json`: Accumulates the JSON data excluding the 'inner_thoughts' key-value pair.
13
+ - `inner_thoughts`: Accumulates the value associated with the 'inner_thoughts' key.
14
+
15
+ **Parameters:**
16
+
17
+ - `inner_thoughts_key` (str): The key to extract from the JSON (default is 'inner_thoughts').
18
+ - `wait_for_first_key` (bool): If `True`, holds back main JSON output until after the 'inner_thoughts' value is processed.
19
+
20
+ **Functionality:**
21
+
22
+ - **Stateful Parsing:** Maintains parsing state across fragments.
23
+ - **String Handling:** Correctly processes strings, escape sequences, and quotation marks.
24
+ - **Selective Extraction:** Identifies and extracts the value of the specified key.
25
+ - **Fragment Processing:** Handles data that arrives in chunks.
26
+
27
+ **Usage:**
28
+
29
+ ```python
30
+ extractor = JSONInnerThoughtsExtractor(wait_for_first_key=True)
31
+ for fragment in fragments:
32
+ updates_main_json, updates_inner_thoughts = extractor.process_fragment(fragment)
33
+ ```
34
+
35
+ """
36
+
37
+ def __init__(self, inner_thoughts_key="inner_thoughts", wait_for_first_key=False):
38
+ self.inner_thoughts_key = inner_thoughts_key
39
+ self.wait_for_first_key = wait_for_first_key
40
+ self.main_buffer = ""
41
+ self.inner_thoughts_buffer = ""
42
+ self.state = "start" # Possible states: start, key, colon, value, comma_or_end, end
43
+ self.in_string = False
44
+ self.escaped = False
45
+ self.current_key = ""
46
+ self.is_inner_thoughts_value = False
47
+ self.inner_thoughts_processed = False
48
+ self.hold_main_json = wait_for_first_key
49
+ self.main_json_held_buffer = ""
50
+
51
+ def process_fragment(self, fragment):
52
+ updates_main_json = ""
53
+ updates_inner_thoughts = ""
54
+ i = 0
55
+ while i < len(fragment):
56
+ c = fragment[i]
57
+ if self.escaped:
58
+ self.escaped = False
59
+ if self.in_string:
60
+ if self.state == "key":
61
+ self.current_key += c
62
+ elif self.state == "value":
63
+ if self.is_inner_thoughts_value:
64
+ updates_inner_thoughts += c
65
+ self.inner_thoughts_buffer += c
66
+ else:
67
+ if self.hold_main_json:
68
+ self.main_json_held_buffer += c
69
+ else:
70
+ updates_main_json += c
71
+ self.main_buffer += c
72
+ else:
73
+ if not self.is_inner_thoughts_value:
74
+ if self.hold_main_json:
75
+ self.main_json_held_buffer += c
76
+ else:
77
+ updates_main_json += c
78
+ self.main_buffer += c
79
+ elif c == "\\":
80
+ self.escaped = True
81
+ if self.in_string:
82
+ if self.state == "key":
83
+ self.current_key += c
84
+ elif self.state == "value":
85
+ if self.is_inner_thoughts_value:
86
+ updates_inner_thoughts += c
87
+ self.inner_thoughts_buffer += c
88
+ else:
89
+ if self.hold_main_json:
90
+ self.main_json_held_buffer += c
91
+ else:
92
+ updates_main_json += c
93
+ self.main_buffer += c
94
+ else:
95
+ if not self.is_inner_thoughts_value:
96
+ if self.hold_main_json:
97
+ self.main_json_held_buffer += c
98
+ else:
99
+ updates_main_json += c
100
+ self.main_buffer += c
101
+ elif c == '"':
102
+ if not self.escaped:
103
+ self.in_string = not self.in_string
104
+ if self.in_string:
105
+ if self.state in ["start", "comma_or_end"]:
106
+ self.state = "key"
107
+ self.current_key = ""
108
+ # Release held main_json when starting to process the next key
109
+ if self.wait_for_first_key and self.hold_main_json and self.inner_thoughts_processed:
110
+ updates_main_json += self.main_json_held_buffer
111
+ self.main_buffer += self.main_json_held_buffer
112
+ self.main_json_held_buffer = ""
113
+ self.hold_main_json = False
114
+ else:
115
+ if self.state == "key":
116
+ self.state = "colon"
117
+ elif self.state == "value":
118
+ # End of value
119
+ if self.is_inner_thoughts_value:
120
+ self.inner_thoughts_processed = True
121
+ # Do not release held main_json here
122
+ else:
123
+ if self.hold_main_json:
124
+ self.main_json_held_buffer += '"'
125
+ else:
126
+ updates_main_json += '"'
127
+ self.main_buffer += '"'
128
+ self.state = "comma_or_end"
129
+ else:
130
+ self.escaped = False
131
+ if self.in_string:
132
+ if self.state == "key":
133
+ self.current_key += '"'
134
+ elif self.state == "value":
135
+ if self.is_inner_thoughts_value:
136
+ updates_inner_thoughts += '"'
137
+ self.inner_thoughts_buffer += '"'
138
+ else:
139
+ if self.hold_main_json:
140
+ self.main_json_held_buffer += '"'
141
+ else:
142
+ updates_main_json += '"'
143
+ self.main_buffer += '"'
144
+ elif self.in_string:
145
+ if self.state == "key":
146
+ self.current_key += c
147
+ elif self.state == "value":
148
+ if self.is_inner_thoughts_value:
149
+ updates_inner_thoughts += c
150
+ self.inner_thoughts_buffer += c
151
+ else:
152
+ if self.hold_main_json:
153
+ self.main_json_held_buffer += c
154
+ else:
155
+ updates_main_json += c
156
+ self.main_buffer += c
157
+ else:
158
+ if c == ":" and self.state == "colon":
159
+ self.state = "value"
160
+ self.is_inner_thoughts_value = self.current_key == self.inner_thoughts_key
161
+ if self.is_inner_thoughts_value:
162
+ pass # Do not include 'inner_thoughts' key in main_json
163
+ else:
164
+ key_colon = f'"{self.current_key}":'
165
+ if self.hold_main_json:
166
+ self.main_json_held_buffer += key_colon + '"'
167
+ else:
168
+ updates_main_json += key_colon + '"'
169
+ self.main_buffer += key_colon + '"'
170
+ elif c == "," and self.state == "comma_or_end":
171
+ if self.is_inner_thoughts_value:
172
+ # Inner thoughts value ended
173
+ self.is_inner_thoughts_value = False
174
+ self.state = "start"
175
+ # Do not release held main_json here
176
+ else:
177
+ if self.hold_main_json:
178
+ self.main_json_held_buffer += c
179
+ else:
180
+ updates_main_json += c
181
+ self.main_buffer += c
182
+ self.state = "start"
183
+ elif c == "{":
184
+ if not self.is_inner_thoughts_value:
185
+ if self.hold_main_json:
186
+ self.main_json_held_buffer += c
187
+ else:
188
+ updates_main_json += c
189
+ self.main_buffer += c
190
+ elif c == "}":
191
+ self.state = "end"
192
+ if self.hold_main_json:
193
+ self.main_json_held_buffer += c
194
+ else:
195
+ updates_main_json += c
196
+ self.main_buffer += c
197
+ else:
198
+ if self.state == "value":
199
+ if self.is_inner_thoughts_value:
200
+ updates_inner_thoughts += c
201
+ self.inner_thoughts_buffer += c
202
+ else:
203
+ if self.hold_main_json:
204
+ self.main_json_held_buffer += c
205
+ else:
206
+ updates_main_json += c
207
+ self.main_buffer += c
208
+ i += 1
209
+
210
+ return updates_main_json, updates_inner_thoughts
211
+
212
+ @property
213
+ def main_json(self):
214
+ return self.main_buffer
215
+
216
+ @property
217
+ def inner_thoughts(self):
218
+ return self.inner_thoughts_buffer
219
+
220
+
221
+ class FunctionArgumentsStreamHandler:
222
+ """State machine that can process a stream of"""
223
+
224
+ def __init__(self, json_key=DEFAULT_MESSAGE_TOOL_KWARG):
225
+ self.json_key = json_key
226
+ self.reset()
227
+
228
+ def reset(self):
229
+ self.in_message = False
230
+ self.key_buffer = ""
231
+ self.accumulating = False
232
+ self.message_started = False
233
+
234
+ def process_json_chunk(self, chunk: str) -> Optional[str]:
235
+ """Process a chunk from the function arguments and return the plaintext version"""
236
+
237
+ # Use strip to handle only leading and trailing whitespace in control structures
238
+ if self.accumulating:
239
+ clean_chunk = chunk.strip()
240
+ if self.json_key in self.key_buffer:
241
+ if ":" in clean_chunk:
242
+ self.in_message = True
243
+ self.accumulating = False
244
+ return None
245
+ self.key_buffer += clean_chunk
246
+ return None
247
+
248
+ if self.in_message:
249
+ if chunk.strip() == '"' and self.message_started:
250
+ self.in_message = False
251
+ self.message_started = False
252
+ return None
253
+ if not self.message_started and chunk.strip() == '"':
254
+ self.message_started = True
255
+ return None
256
+ if self.message_started:
257
+ if chunk.strip().endswith('"'):
258
+ self.in_message = False
259
+ return chunk.rstrip('"\n')
260
+ return chunk
261
+
262
+ if chunk.strip() == "{":
263
+ self.key_buffer = ""
264
+ self.accumulating = True
265
+ return None
266
+ if chunk.strip() == "}":
267
+ self.in_message = False
268
+ self.message_started = False
269
+ return None
270
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: letta-nightly
3
- Version: 0.5.0.dev20241021104213
3
+ Version: 0.5.0.dev20241022104124
4
4
  Summary: Create LLM agents with long-term memory and custom tools
5
5
  License: Apache License
6
6
  Author: Letta Team
@@ -41,10 +41,10 @@ letta/llm_api/azure_openai.py,sha256=C-fuuholudcLJDWjqnXJwpXsfmGWfNugEVWyj6YCrpg
41
41
  letta/llm_api/azure_openai_constants.py,sha256=oXtKrgBFHf744gyt5l1thILXgyi8NDNUrKEa2GGGpjw,278
42
42
  letta/llm_api/cohere.py,sha256=vDRd-SUGp1t_JUIdwC3RkIhwMl0OY7n-tAU9uPORYkY,14826
43
43
  letta/llm_api/google_ai.py,sha256=3xZ074nSOCC22c15yerA5ngWzh0ex4wxeI-6faNbHPE,17708
44
- letta/llm_api/helpers.py,sha256=8aG6LzB0T3NFlnab-RR2tj0ARUTMBHSd0icCur5-RCk,8813
44
+ letta/llm_api/helpers.py,sha256=sGCmNA1U_7-AhRFgvT668jdp_xyzSliKQYbTvRR6O7c,9812
45
45
  letta/llm_api/llm_api_tools.py,sha256=GEBO7Dlt7xtAQud1sVsigKZKPpLOZOt2IWL8LwcNV4o,14869
46
46
  letta/llm_api/mistral.py,sha256=fHdfD9ug-rQIk2qn8tRKay1U6w9maF11ryhKi91FfXM,1593
47
- letta/llm_api/openai.py,sha256=faJLzgx94cxz32VSeSxLDHeeSiKkb5WCM3BA_MNylkI,22821
47
+ letta/llm_api/openai.py,sha256=_kztTpd7IR12bRjvMT1n8dvy5bINvgJ3od65HBQunVk,23212
48
48
  letta/local_llm/README.md,sha256=hFJyw5B0TU2jrh9nb0zGZMgdH-Ei1dSRfhvPQG_NSoU,168
49
49
  letta/local_llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
50
  letta/local_llm/chat_completion_proxy.py,sha256=SiohxsjGTku4vOryOZx7I0t0xoO_sUuhXgoe62fKq3c,12995
@@ -151,7 +151,7 @@ letta/server/rest_api/app.py,sha256=JNmDnvp9fP--hJPtPpEWgQT-14O1YOceZbWELr2vedA,
151
151
  letta/server/rest_api/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
152
152
  letta/server/rest_api/auth/index.py,sha256=fQBGyVylGSRfEMLQ17cZzrHd5Y1xiVylvPqH5Rl-lXQ,1378
153
153
  letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
154
- letta/server/rest_api/interface.py,sha256=Mub9iOQFJh9HSwbc5X6OwHCdtwJYCBzhOjpSx9c5Lss,36181
154
+ letta/server/rest_api/interface.py,sha256=Km1nJSN8Ogyj5P-DmDRyR5fEov-G0o9KEIXAlsqqb5A,44525
155
155
  letta/server/rest_api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
156
  letta/server/rest_api/routers/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
157
157
  letta/server/rest_api/routers/openai/assistants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -161,7 +161,7 @@ letta/server/rest_api/routers/openai/assistants/threads.py,sha256=WXVGBaBvSNPB7Z
161
161
  letta/server/rest_api/routers/openai/chat_completions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
162
  letta/server/rest_api/routers/openai/chat_completions/chat_completions.py,sha256=-uye6cm4SnoQGwxhr1N1FrSXOlnO2Hvbfj6k8JSc45k,4918
163
163
  letta/server/rest_api/routers/v1/__init__.py,sha256=sqlVZa-u9DJwdRsp0_8YUGrac9DHguIB4wETlEDRylA,666
164
- letta/server/rest_api/routers/v1/agents.py,sha256=Yoktva6_pSCRztUdZNZXdbnrp9L5OKnP5E1mZkbUAGw,25066
164
+ letta/server/rest_api/routers/v1/agents.py,sha256=BY4rQOcwsi_WiWh6DwrO8Vz6Nu2JgMBKSHxiBqlaaYY,25397
165
165
  letta/server/rest_api/routers/v1/blocks.py,sha256=0WekE_yBD2U3jYgPxI0DCFjACWavCAlvm_Ybw5SZBnw,2583
166
166
  letta/server/rest_api/routers/v1/health.py,sha256=pKCuVESlVOhGIb4VC4K-H82eZqfghmT6kvj2iOkkKuc,401
167
167
  letta/server/rest_api/routers/v1/jobs.py,sha256=a-j0v-5A0un0pVCOHpfeWnzpOWkVDQO6ti42k_qAlZY,2272
@@ -187,10 +187,11 @@ letta/server/ws_api/protocol.py,sha256=M_-gM5iuDBwa1cuN2IGNCG5GxMJwU2d3XW93XALv9
187
187
  letta/server/ws_api/server.py,sha256=C2Kv48PCwl46DQFb0ZP30s86KJLQ6dZk2AhWQEZn9pY,6004
188
188
  letta/settings.py,sha256=gNdH-Ty6f-Nfz2j9ZMZFRQHac2KzgsxLZNt5l_TiAyo,3301
189
189
  letta/streaming_interface.py,sha256=_FPUWy58j50evHcpXyd7zB1wWqeCc71NCFeWh_TBvnw,15736
190
+ letta/streaming_utils.py,sha256=329fsvj1ZN0r0LpQtmMPZ2vSxkDBIUUwvGHZFkjm2I8,11745
190
191
  letta/system.py,sha256=buKYPqG5n2x41hVmWpu6JUpyd7vTWED9Km2_M7dLrvk,6960
191
192
  letta/utils.py,sha256=SXLEYhyp3gHyIjrxNIKNZZ5ittKo3KOj6zxgC_Trex0,31012
192
- letta_nightly-0.5.0.dev20241021104213.dist-info/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
193
- letta_nightly-0.5.0.dev20241021104213.dist-info/METADATA,sha256=HzdXSUkW_tN8xukhNa0g1wcCtIoZAlc9S4vaAHbmdDs,10620
194
- letta_nightly-0.5.0.dev20241021104213.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
195
- letta_nightly-0.5.0.dev20241021104213.dist-info/entry_points.txt,sha256=2zdiyGNEZGV5oYBuS-y2nAAgjDgcC9yM_mHJBFSRt5U,40
196
- letta_nightly-0.5.0.dev20241021104213.dist-info/RECORD,,
193
+ letta_nightly-0.5.0.dev20241022104124.dist-info/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
194
+ letta_nightly-0.5.0.dev20241022104124.dist-info/METADATA,sha256=v5hk_4eSmZRsN51JSel2I1mXzkM4XCHiAJrBF7fPC-Y,10620
195
+ letta_nightly-0.5.0.dev20241022104124.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
196
+ letta_nightly-0.5.0.dev20241022104124.dist-info/entry_points.txt,sha256=2zdiyGNEZGV5oYBuS-y2nAAgjDgcC9yM_mHJBFSRt5U,40
197
+ letta_nightly-0.5.0.dev20241022104124.dist-info/RECORD,,