langroid 0.1.59__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -393,11 +393,13 @@ class ChatAgent(Agent):
393
393
  )
394
394
 
395
395
  if output_len < self.config.llm.min_output_tokens:
396
- raise ValueError(
396
+ logger.warning(
397
397
  f"""
398
398
  Tried to shorten prompt history for chat mode
399
399
  but the feasible output length {output_len} is still
400
400
  less than the minimum output length {self.config.llm.min_output_tokens}.
401
+ Your chat history is too long for this model,
402
+ and the response may be truncated.
401
403
  """
402
404
  )
403
405
  with StreamingIfAllowed(self.llm):
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import json
3
+ import logging
3
4
  from abc import ABC, abstractmethod
4
5
  from enum import Enum
5
6
  from typing import Any, Dict, List, Optional, Tuple, Type, Union
@@ -9,6 +10,7 @@ from pydantic import BaseModel, BaseSettings
9
10
 
10
11
  from langroid.cachedb.momento_cachedb import MomentoCacheConfig
11
12
  from langroid.cachedb.redis_cachedb import RedisCacheConfig
13
+ from langroid.language_models.config import Llama2FormatterConfig, PromptFormatterConfig
12
14
  from langroid.mytypes import Document
13
15
  from langroid.parsing.agent_chats import parse_message
14
16
  from langroid.parsing.json import top_level_json_field
@@ -20,9 +22,60 @@ from langroid.prompts.templates import (
20
22
  from langroid.utils.configuration import settings
21
23
  from langroid.utils.output.printing import show_if_debug
22
24
 
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class LocalModelConfig(BaseModel):
29
+ """
30
+ Configuration for local model available via
31
+ an OpenAI-compatible API.
32
+
33
+ Support local LLM endpoint that spoofs the OpenAI API.
34
+ Examples of libraries that enable this are:
35
+ - llama-cpp-python (LCP)
36
+ - text-generation-webui (TGW) (from oobabooga or "ooba" for short)
37
+ Typically these allow spinning up a server that listens on
38
+ http://localhost:8000/v1
39
+ and we can continue using our OpenAI-API-based python code, except we
40
+ set openai.api_base to this URL.
41
+
42
+ These endpoints usually support both /completions and /chat/completions requests.
43
+ Supporting /chat/completions is more complex because each family of local model
44
+ has its own (finicky) formatting for turns and roles in a chat.
45
+ The TGW lib has an extensive set of templates for various model families,
46
+ and the template is auto-detected from the model, at least for common models,
47
+ so we can directly use the /chat/completions endpoint, and it works well (at
48
+ least on llama2 models). However, when in doubt,
49
+ we can always do our own formatting of the chat history and use the /completions
50
+ endpoint instead. This is what we do for LCP models. In this case,
51
+ we need to set `use_completion_for_chat` to True.
52
+ With a model served via TGW, for chats we can set this to either True or False
53
+ (in which case we rely on the TGW templates correctly formatting the chat history).
54
+
55
+ Both of the above libs assume a single model is available at the endpoint.
56
+ As far as I know, they do not support run-time switching of models.
57
+ There is another library that we can potentially integrate, `localAI`,
58
+ which does have model switching, and can be very useful, e.g.
59
+ when we want different agents to use different models.
60
+
61
+ All of the above considerations are outside of this interface, however.
62
+ All we care about here is the endpoint url.
63
+ """
64
+
65
+ # OPENAI_LOCAL.* env vars can be used to set these in .env file or environment
66
+
67
+ api_base: str = "http://localhost:8000/v1"
68
+ model: str = "local" # usually not needed
69
+ model_type: str = "llama2"
70
+ formatter: None | PromptFormatterConfig = Llama2FormatterConfig()
71
+ context_length: int = 2048 # default for llama-cpp-python
72
+ use_chat_for_completion: bool = False
73
+ use_completion_for_chat: bool = True
74
+
23
75
 
24
76
  class LLMConfig(BaseSettings):
25
77
  type: str = "openai"
78
+ local: None | LocalModelConfig = None
26
79
  timeout: int = 20 # timeout for API requests
27
80
  chat_model: Optional[str] = None
28
81
  completion_model: Optional[str] = None
@@ -32,6 +85,7 @@ class LLMConfig(BaseSettings):
32
85
  # if input length + max_output_tokens > context length of model,
33
86
  # we will try shortening requested output
34
87
  min_output_tokens: int = 64
88
+ use_completion_for_chat: bool = False # use completion model for chat?
35
89
  use_chat_for_completion: bool = True # use chat model for completion?
36
90
  stream: bool = False # stream output from API?
37
91
  cache_config: None | RedisCacheConfig | MomentoCacheConfig = None
@@ -223,6 +277,73 @@ class LanguageModel(ABC):
223
277
  ).get(config.type, openai)
224
278
  return cls(config) # type: ignore
225
279
 
280
+ @staticmethod
281
+ def user_assistant_pairs(lst: List[str]) -> List[Tuple[str, str]]:
282
+ """
283
+ Given an even-length sequence of strings, split into a sequence of pairs
284
+
285
+ Args:
286
+ lst (List[str]): sequence of strings
287
+
288
+ Returns:
289
+ List[Tuple[str,str]]: sequence of pairs of strings
290
+ """
291
+ evens = lst[::2]
292
+ odds = lst[1::2]
293
+ return list(zip(evens, odds))
294
+
295
+ @staticmethod
296
+ def get_chat_history_components(
297
+ messages: List[LLMMessage],
298
+ ) -> Tuple[str, List[Tuple[str, str]], str]:
299
+ """
300
+ From the chat history, extract system prompt, user-assistant turns, and
301
+ final user msg.
302
+
303
+ Args:
304
+ messages (List[LLMMessage]): List of messages in the chat history
305
+
306
+ Returns:
307
+ Tuple[str, List[Tuple[str,str]], str]:
308
+ system prompt, user-assistant turns, final user msg
309
+
310
+ """
311
+ # Handle various degenerate cases
312
+ messages = [m for m in messages] # copy
313
+ DUMMY_SYS_PROMPT = "You are a helpful assistant."
314
+ DUMMY_USER_PROMPT = "Follow the instructions above."
315
+ if len(messages) == 0 or messages[0].role != Role.SYSTEM:
316
+ logger.warning("No system msg, creating dummy system prompt")
317
+ messages.insert(0, LLMMessage(content=DUMMY_SYS_PROMPT, role=Role.SYSTEM))
318
+ system_prompt = messages[0].content
319
+
320
+ # now we have messages = [Sys,...]
321
+ if len(messages) == 1:
322
+ logger.warning(
323
+ "Got only system message in chat history, creating dummy user prompt"
324
+ )
325
+ messages.append(LLMMessage(content=DUMMY_USER_PROMPT, role=Role.USER))
326
+
327
+ # now we have messages = [Sys, msg, ...]
328
+
329
+ if messages[1].role != Role.USER:
330
+ messages.insert(1, LLMMessage(content=DUMMY_USER_PROMPT, role=Role.USER))
331
+
332
+ # now we have messages = [Sys, user, ...]
333
+ if messages[-1].role != Role.USER:
334
+ logger.warning(
335
+ "Last message in chat history is not a user message,"
336
+ " creating dummy user prompt"
337
+ )
338
+ messages.append(LLMMessage(content=DUMMY_USER_PROMPT, role=Role.USER))
339
+
340
+ # now we have messages = [Sys, user, ..., user]
341
+ # so we omit the first and last elements and make pairs of user-asst messages
342
+ conversation = [m.content for m in messages[1:-1]]
343
+ user_prompt = messages[-1].content
344
+ pairs = LanguageModel.user_assistant_pairs(conversation)
345
+ return system_prompt, pairs, user_prompt
346
+
226
347
  @abstractmethod
227
348
  def set_stream(self, stream: bool) -> bool:
228
349
  """Enable or disable streaming output from API.
@@ -0,0 +1,13 @@
1
+ from pydantic import BaseSettings
2
+
3
+
4
+ class PromptFormatterConfig(BaseSettings):
5
+ type: str = "llama2"
6
+
7
+ class Config:
8
+ env_prefix = "FORMAT_"
9
+ case_sensitive = False
10
+
11
+
12
+ class Llama2FormatterConfig(PromptFormatterConfig):
13
+ use_bos_eos: bool = False
@@ -22,6 +22,9 @@ from langroid.language_models.base import (
22
22
  LLMTokenUsage,
23
23
  Role,
24
24
  )
25
+ from langroid.language_models.prompt_formatter.base import (
26
+ PromptFormatter,
27
+ )
25
28
  from langroid.language_models.utils import (
26
29
  async_retry_with_exponential_backoff,
27
30
  retry_with_exponential_backoff,
@@ -50,45 +53,26 @@ class OpenAICompletionModel(str, Enum):
50
53
  LOCAL = "local" # dummy for any local model
51
54
 
52
55
 
53
- class LocalModelConfig(BaseModel):
54
- """
55
- Configuration for local model available via
56
- an OpenAI-compatible API.
57
- """
58
-
59
- # OPENAI_LOCAL.API_BASE env var can be used to set this
60
- api_base: str = "http://localhost:8000/v1"
61
- # OPENAI_LOCAL.CONTEXT_LENGTH env var can be used to set this
62
- context_length: int = 2048 # default for llama-cpp-python
63
-
64
-
65
56
  class OpenAIGPTConfig(LLMConfig):
66
57
  type: str = "openai"
67
- # This allows local configs to be set via OPENAI_LOCAL.* env vars
68
- local: LocalModelConfig = LocalModelConfig()
69
58
  api_base: str | None = None # used for local or other non-OpenAI models
70
59
  max_output_tokens: int = 1024
71
60
  min_output_tokens: int = 64
72
61
  timeout: int = 20
73
62
  temperature: float = 0.2
74
- chat_model: OpenAIChatModel = OpenAIChatModel.GPT4
75
- completion_model: OpenAICompletionModel = OpenAICompletionModel.GPT4
63
+ chat_model: str | OpenAIChatModel = OpenAIChatModel.GPT4
64
+ completion_model: str | OpenAICompletionModel = OpenAICompletionModel.GPT4
76
65
  context_length: Dict[str, int] = {
77
66
  OpenAIChatModel.GPT3_5_TURBO: 4096,
78
67
  OpenAIChatModel.GPT4: 8192,
79
68
  OpenAIChatModel.GPT4_NOFUNC: 8192,
80
69
  OpenAICompletionModel.TEXT_DA_VINCI_003: 4096,
81
- # 2048 is default in llama-cpp-python, but can be set
82
- # via cmd line, e.g.
83
- # python3 -m llama-cpp.server --n_ctx 4096
84
- OpenAICompletionModel.LOCAL: 2048,
85
70
  }
86
71
  cost_per_1k_tokens: Dict[str, Tuple[float, float]] = {
87
72
  # (input/prompt cost, output/completion cost)
88
73
  OpenAIChatModel.GPT3_5_TURBO: (0.0015, 0.002),
89
74
  OpenAIChatModel.GPT4: (0.03, 0.06), # 8K context
90
75
  OpenAIChatModel.GPT4_NOFUNC: (0.03, 0.06),
91
- OpenAIChatModel.LOCAL: (0.0, 0.0),
92
76
  }
93
77
 
94
78
  # all of the non-dict vars above can be set via env vars,
@@ -122,10 +106,16 @@ class OpenAIGPT(LanguageModel):
122
106
  if settings.nofunc:
123
107
  self.chat_model = OpenAIChatModel.GPT4_NOFUNC
124
108
  self.api_base: str | None = None
125
- if config.chat_model == OpenAIChatModel.LOCAL:
109
+ if config.local:
110
+ self.config.chat_model = config.local.model
111
+ self.config.use_completion_for_chat = config.local.use_completion_for_chat
112
+ self.config.use_chat_for_completion = config.local.use_chat_for_completion
126
113
  self.api_key = "sx-xxx"
127
114
  self.api_base = config.local.api_base
128
- config.context_length = {OpenAIChatModel.LOCAL: config.local.context_length}
115
+ config.context_length = {config.local.model: config.local.context_length}
116
+ config.cost_per_1k_tokens = {
117
+ config.local.model: (0.0, 0.0),
118
+ }
129
119
  else:
130
120
  # TODO: get rid of this and add `api_key` to the OpenAIGPTConfig
131
121
  # so we can get it from the OPENAI_API_KEY env var
@@ -433,6 +423,24 @@ class OpenAIGPT(LanguageModel):
433
423
  functions: Optional[List[LLMFunctionSpec]] = None,
434
424
  function_call: str | Dict[str, str] = "auto",
435
425
  ) -> LLMResponse:
426
+ if self.config.use_completion_for_chat:
427
+ # only makes sense for local models
428
+ if self.config.local is None or self.config.local.formatter is None:
429
+ raise ValueError(
430
+ """
431
+ `formatter` must be specified in config to use completion for chat.
432
+ """
433
+ )
434
+ formatter = PromptFormatter.create(self.config.local.formatter)
435
+ if isinstance(messages, str):
436
+ messages = [
437
+ LLMMessage(
438
+ role=Role.SYSTEM, content="You are a helpful assistant."
439
+ ),
440
+ LLMMessage(role=Role.USER, content=messages),
441
+ ]
442
+ prompt = formatter.format(messages)
443
+ return self.generate(prompt=prompt, max_tokens=max_tokens)
436
444
  try:
437
445
  return self._chat(messages, max_tokens, functions, function_call)
438
446
  except Exception as e:
File without changes
@@ -0,0 +1,42 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from typing import List
4
+
5
+ from langroid.language_models.base import LLMMessage
6
+ from langroid.language_models.config import PromptFormatterConfig
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class PromptFormatter(ABC):
12
+ """
13
+ Abstract base class for a prompt formatter
14
+ """
15
+
16
+ def __init__(self, config: PromptFormatterConfig):
17
+ self.config = config
18
+
19
+ @staticmethod
20
+ def create(config: PromptFormatterConfig) -> "PromptFormatter":
21
+ from langroid.language_models.prompt_formatter.llama2_formatter import (
22
+ Llama2Formatter,
23
+ )
24
+
25
+ formatter_class = dict(llama2=Llama2Formatter).get(config.type, Llama2Formatter)
26
+ return formatter_class(config)
27
+
28
+ @abstractmethod
29
+ def format(self, messages: List[LLMMessage]) -> str:
30
+ """
31
+ Convert sequence of messages (system, user, assistant, user, assistant...user)
32
+ to a single prompt formatted according to the specific format type,
33
+ to be used in a /completions endpoint.
34
+
35
+ Args:
36
+ messages (List[LLMMessage]): chat history as a sequence of messages
37
+
38
+ Returns:
39
+ (str): formatted version of chat history
40
+
41
+ """
42
+ pass
@@ -0,0 +1,75 @@
1
+ import logging
2
+ from typing import List, Tuple
3
+
4
+ from langroid.language_models.base import LanguageModel, LLMMessage
5
+ from langroid.language_models.config import Llama2FormatterConfig
6
+ from langroid.language_models.prompt_formatter.base import PromptFormatter
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ BOS: str = ""
12
+ EOS: str = ""
13
+ B_INST: str = "[INST]"
14
+ E_INST: str = "[/INST]"
15
+ B_SYS: str = "<<SYS>>\n"
16
+ E_SYS: str = "\n<</SYS>>\n\n"
17
+ SPECIAL_TAGS: List[str] = [B_INST, E_INST, BOS, EOS, "<<SYS>>", "<</SYS>>"]
18
+
19
+
20
+ class Llama2Formatter(PromptFormatter):
21
+ def __int__(self, config: Llama2FormatterConfig) -> None:
22
+ super().__init__(config)
23
+ self.config: Llama2FormatterConfig = config
24
+
25
+ def format(self, messages: List[LLMMessage]) -> str:
26
+ sys_msg, chat_msgs, user_msg = LanguageModel.get_chat_history_components(
27
+ messages
28
+ )
29
+ return self._get_prompt_from_components(sys_msg, chat_msgs, user_msg)
30
+
31
+ def _get_prompt_from_components(
32
+ self,
33
+ system_prompt: str,
34
+ chat_history: List[Tuple[str, str]],
35
+ user_message: str,
36
+ ) -> str:
37
+ """
38
+ For llama2 models, convert chat history into a single
39
+ prompt for Llama2 models, for use in the /completions endpoint
40
+ (as opposed to the /chat/completions endpoint).
41
+ See:
42
+ https://www.reddit.com/r/LocalLLaMA/comments/155po2p/get_llama_2_prompt_format_right/
43
+ https://github.com/facebookresearch/llama/blob/main/llama/generation.py#L44
44
+
45
+ Args:
46
+ system_prompt (str): system prompt, typically specifying role/task.
47
+ chat_history (List[Tuple[str,str]]): List of (user, assistant) pairs
48
+ user_message (str): user message, at the end of the chat, i.e. the message
49
+ for which we want to generate a response.
50
+
51
+ Returns:
52
+ str: Prompt for Llama2 models
53
+
54
+ Typical structure of the formatted prompt:
55
+ Note important that the first [INST], [/INST] surrounds the system prompt,
56
+ together with the first user message. A lot of libs seem to miss this detail.
57
+
58
+ <s>[INST] <<SYS>>
59
+ You are are a helpful... bla bla.. assistant
60
+ <</SYS>>
61
+
62
+ Hi there! [/INST] Hello! How can I help you today? </s><s>[INST]
63
+ What is a neutron star? [/INST] A neutron star is a ... </s><s>
64
+ [INST] Okay cool, thank you! [/INST] You're welcome! </s><s>
65
+ [INST] Ah, I have one more question.. [/INST]
66
+ """
67
+ bos = BOS if self.config.use_bos_eos else ""
68
+ eos = EOS if self.config.use_bos_eos else ""
69
+ text = f"{bos}{B_INST} {B_SYS}{system_prompt}{E_SYS}"
70
+ for user_input, response in chat_history:
71
+ text += (
72
+ f"{user_input.strip()} {E_INST} {response.strip()} {eos}{bos} {B_INST} "
73
+ )
74
+ text += f"{user_message.strip()} {E_INST}"
75
+ return text
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langroid
3
- Version: 0.1.59
3
+ Version: 0.1.60
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  License: MIT
6
6
  Author: Prasad Chalasani
@@ -1,7 +1,7 @@
1
1
  langroid/__init__.py,sha256=sEKJ_5WJBAMZApevfeE3gxLK-eotVzJMJlT83G0rAko,30
2
2
  langroid/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  langroid/agent/base.py,sha256=bnqa_PZsw1_RWDv1w67g1rMrhbGTdt_mTPWcZ_uAZIk,26530
4
- langroid/agent/chat_agent.py,sha256=Sma0-5XPHDzBOcduthwwlWBmkBgqpk8gGzStF8rcrps,22643
4
+ langroid/agent/chat_agent.py,sha256=eTXkF8ENugOhr39uoaat8yGqtDpkZGcugM36gveSEek,22755
5
5
  langroid/agent/chat_document.py,sha256=apaYj38sDu7ALCnsA8tJwoj3Z8zLNmIsNPd4-IujnGk,6153
6
6
  langroid/agent/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  langroid/agent/junk,sha256=LxfuuW7Cijsg0szAzT81OjWWv1PMNI-6w_-DspVIO2s,339
@@ -33,8 +33,12 @@ langroid/embedding_models/clustering.py,sha256=tZWElUqXl9Etqla0FAa7og96iDKgjqWju
33
33
  langroid/embedding_models/models.py,sha256=1xcv9hqmCTsbUbS8v7XeZRsf25Tu79JUoSipIYpvNoo,2765
34
34
  langroid/language_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  langroid/language_models/azure_openai.py,sha256=9NLr9s9l7JlCHSuMooxYLLgs1d04IwE_bO7r22bhrg8,3458
36
- langroid/language_models/base.py,sha256=CHSMWJd9kFwMsI38pLmFcPtgkBUUQ3a47sj77kD8-bw,14743
37
- langroid/language_models/openai_gpt.py,sha256=uTIa30d-ilo4VNlBdUdr9iE3fZo3szz-goFWiKQykNM,22953
36
+ langroid/language_models/base.py,sha256=zHCZIEmIk-sFMq7GWooZe8qq4GjaJ3YRhTzTC4irgGM,19931
37
+ langroid/language_models/config.py,sha256=PXcmEUq52GCDj2sekt8F9E1flWyyNjP2S0LTRs7T6Kg,269
38
+ langroid/language_models/openai_gpt.py,sha256=f9oegEQ8jeQ6emS-Oh5LE6lbuzRsrsOns_QGvdMuQKk,23486
39
+ langroid/language_models/prompt_formatter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
+ langroid/language_models/prompt_formatter/base.py,sha256=2y_GcwhstvB5ih3haS7l5Fv79jVnFJ_vEw1jqWJzB9k,1247
41
+ langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=2et_OIaDbFRf5fzBUki3E4_Di9xH-HwTxt9MMNINoXs,2892
38
42
  langroid/language_models/utils.py,sha256=rmnSn-sJ3aKl_wBdeLPkck0Li4Ed6zkCxZYYl7n1V34,4668
39
43
  langroid/mytypes.py,sha256=YA42IJcooJnTxAwk-B4FmZ1hqzIIF1ZZKcpUKzBTGGo,1537
40
44
  langroid/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -78,7 +82,7 @@ langroid/vector_store/base.py,sha256=QZx3NUNwf2I0r3A7iuoUHIRGbqt_pFGD0hq1R-Yg8iM
78
82
  langroid/vector_store/chromadb.py,sha256=s5pQkKjaMP-Tt5A8M10EInFzttaALPbJAq7q4gf0TKg,5235
79
83
  langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
80
84
  langroid/vector_store/qdrantdb.py,sha256=KRvIIj1IZG2zFqejofMnRs2hT86B-27LgBEnuczdqOU,9072
81
- langroid-0.1.59.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
82
- langroid-0.1.59.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
83
- langroid-0.1.59.dist-info/METADATA,sha256=GyR3K0U7ocxuI_I19-hD3mnsuAC3HNt1x8ebD3fAEPE,35745
84
- langroid-0.1.59.dist-info/RECORD,,
85
+ langroid-0.1.60.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
86
+ langroid-0.1.60.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
87
+ langroid-0.1.60.dist-info/METADATA,sha256=-DEq-l5sI_DEuNmYzEQ58cbxak_MNtwoVS-Dw1lreBE,35745
88
+ langroid-0.1.60.dist-info/RECORD,,