langroid 0.1.85__py3-none-any.whl → 0.1.219__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. langroid/__init__.py +95 -0
  2. langroid/agent/__init__.py +40 -0
  3. langroid/agent/base.py +222 -91
  4. langroid/agent/batch.py +264 -0
  5. langroid/agent/callbacks/chainlit.py +608 -0
  6. langroid/agent/chat_agent.py +247 -101
  7. langroid/agent/chat_document.py +41 -4
  8. langroid/agent/openai_assistant.py +842 -0
  9. langroid/agent/special/__init__.py +50 -0
  10. langroid/agent/special/doc_chat_agent.py +837 -141
  11. langroid/agent/special/lance_doc_chat_agent.py +258 -0
  12. langroid/agent/special/lance_rag/__init__.py +9 -0
  13. langroid/agent/special/lance_rag/critic_agent.py +136 -0
  14. langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
  15. langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
  16. langroid/agent/special/lance_tools.py +44 -0
  17. langroid/agent/special/neo4j/__init__.py +0 -0
  18. langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
  19. langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
  20. langroid/agent/special/neo4j/utils/__init__.py +0 -0
  21. langroid/agent/special/neo4j/utils/system_message.py +46 -0
  22. langroid/agent/special/relevance_extractor_agent.py +127 -0
  23. langroid/agent/special/retriever_agent.py +32 -198
  24. langroid/agent/special/sql/__init__.py +11 -0
  25. langroid/agent/special/sql/sql_chat_agent.py +47 -23
  26. langroid/agent/special/sql/utils/__init__.py +22 -0
  27. langroid/agent/special/sql/utils/description_extractors.py +95 -46
  28. langroid/agent/special/sql/utils/populate_metadata.py +28 -21
  29. langroid/agent/special/table_chat_agent.py +43 -9
  30. langroid/agent/task.py +475 -122
  31. langroid/agent/tool_message.py +75 -13
  32. langroid/agent/tools/__init__.py +13 -0
  33. langroid/agent/tools/duckduckgo_search_tool.py +66 -0
  34. langroid/agent/tools/google_search_tool.py +11 -0
  35. langroid/agent/tools/metaphor_search_tool.py +67 -0
  36. langroid/agent/tools/recipient_tool.py +16 -29
  37. langroid/agent/tools/run_python_code.py +60 -0
  38. langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
  39. langroid/agent/tools/segment_extract_tool.py +36 -0
  40. langroid/cachedb/__init__.py +9 -0
  41. langroid/cachedb/base.py +22 -2
  42. langroid/cachedb/momento_cachedb.py +26 -2
  43. langroid/cachedb/redis_cachedb.py +78 -11
  44. langroid/embedding_models/__init__.py +34 -0
  45. langroid/embedding_models/base.py +21 -2
  46. langroid/embedding_models/models.py +120 -18
  47. langroid/embedding_models/protoc/embeddings.proto +19 -0
  48. langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
  49. langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
  50. langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
  51. langroid/embedding_models/remote_embeds.py +153 -0
  52. langroid/language_models/__init__.py +45 -0
  53. langroid/language_models/azure_openai.py +80 -27
  54. langroid/language_models/base.py +117 -12
  55. langroid/language_models/config.py +5 -0
  56. langroid/language_models/openai_assistants.py +3 -0
  57. langroid/language_models/openai_gpt.py +558 -174
  58. langroid/language_models/prompt_formatter/__init__.py +15 -0
  59. langroid/language_models/prompt_formatter/base.py +4 -6
  60. langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
  61. langroid/language_models/utils.py +18 -21
  62. langroid/mytypes.py +25 -8
  63. langroid/parsing/__init__.py +46 -0
  64. langroid/parsing/document_parser.py +260 -63
  65. langroid/parsing/image_text.py +32 -0
  66. langroid/parsing/parse_json.py +143 -0
  67. langroid/parsing/parser.py +122 -59
  68. langroid/parsing/repo_loader.py +114 -52
  69. langroid/parsing/search.py +68 -63
  70. langroid/parsing/spider.py +3 -2
  71. langroid/parsing/table_loader.py +44 -0
  72. langroid/parsing/url_loader.py +59 -11
  73. langroid/parsing/urls.py +85 -37
  74. langroid/parsing/utils.py +298 -4
  75. langroid/parsing/web_search.py +73 -0
  76. langroid/prompts/__init__.py +11 -0
  77. langroid/prompts/chat-gpt4-system-prompt.md +68 -0
  78. langroid/prompts/prompts_config.py +1 -1
  79. langroid/utils/__init__.py +17 -0
  80. langroid/utils/algorithms/__init__.py +3 -0
  81. langroid/utils/algorithms/graph.py +103 -0
  82. langroid/utils/configuration.py +36 -5
  83. langroid/utils/constants.py +4 -0
  84. langroid/utils/globals.py +2 -2
  85. langroid/utils/logging.py +2 -5
  86. langroid/utils/output/__init__.py +21 -0
  87. langroid/utils/output/printing.py +47 -1
  88. langroid/utils/output/status.py +33 -0
  89. langroid/utils/pandas_utils.py +30 -0
  90. langroid/utils/pydantic_utils.py +616 -2
  91. langroid/utils/system.py +98 -0
  92. langroid/vector_store/__init__.py +40 -0
  93. langroid/vector_store/base.py +203 -6
  94. langroid/vector_store/chromadb.py +59 -32
  95. langroid/vector_store/lancedb.py +463 -0
  96. langroid/vector_store/meilisearch.py +10 -7
  97. langroid/vector_store/momento.py +262 -0
  98. langroid/vector_store/qdrantdb.py +104 -22
  99. {langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/METADATA +329 -149
  100. langroid-0.1.219.dist-info/RECORD +127 -0
  101. {langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/WHEEL +1 -1
  102. langroid/agent/special/recipient_validator_agent.py +0 -157
  103. langroid/parsing/json.py +0 -64
  104. langroid/utils/web/selenium_login.py +0 -36
  105. langroid-0.1.85.dist-info/RECORD +0 -94
  106. /langroid/{scripts → agent/callbacks}/__init__.py +0 -0
  107. {langroid-0.1.85.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
@@ -0,0 +1,15 @@
1
+ from .base import PromptFormatter
2
+ from .llama2_formatter import Llama2Formatter
3
+ from ..config import PromptFormatterConfig, Llama2FormatterConfig
4
+
5
+ from . import base
6
+ from . import llama2_formatter
7
+
8
+ __all__ = [
9
+ "PromptFormatter",
10
+ "Llama2Formatter",
11
+ "PromptFormatterConfig",
12
+ "Llama2FormatterConfig",
13
+ "base",
14
+ "llama2_formatter",
15
+ ]
@@ -17,13 +17,11 @@ class PromptFormatter(ABC):
17
17
  self.config = config
18
18
 
19
19
  @staticmethod
20
- def create(config: PromptFormatterConfig) -> "PromptFormatter":
21
- from langroid.language_models.prompt_formatter.llama2_formatter import (
22
- Llama2Formatter,
23
- )
20
+ def create(formatter: str) -> "PromptFormatter":
21
+ from langroid.language_models.config import HFPromptFormatterConfig
22
+ from langroid.language_models.prompt_formatter.hf_formatter import HFFormatter
24
23
 
25
- formatter_class = dict(llama2=Llama2Formatter).get(config.type, Llama2Formatter)
26
- return formatter_class(config)
24
+ return HFFormatter(HFPromptFormatterConfig(model_name=formatter))
27
25
 
28
26
  @abstractmethod
29
27
  def format(self, messages: List[LLMMessage]) -> str:
@@ -0,0 +1,135 @@
1
+ """
2
+ Prompt formatter based on HuggingFace `AutoTokenizer.apply_chat_template` method
3
+ from their Transformers library. It searches the hub for a model matching the
4
+ specified name, and uses the first one it finds. We assume that all matching
5
+ models will have the same tokenizer, so we just use the first one.
6
+ """
7
+
8
+ import logging
9
+ import re
10
+ from typing import Any, List, Set, Tuple, Type
11
+
12
+ from jinja2.exceptions import TemplateError
13
+
14
+ from langroid.language_models.base import LanguageModel, LLMMessage, Role
15
+ from langroid.language_models.config import HFPromptFormatterConfig
16
+ from langroid.language_models.prompt_formatter.base import PromptFormatter
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def try_import_hf_modules() -> Tuple[Type[Any], Type[Any], Type[Any]]:
22
+ """
23
+ Attempts to import the AutoTokenizer class from the transformers package.
24
+ Returns:
25
+ The AutoTokenizer class if successful.
26
+ Raises:
27
+ ImportError: If the transformers package is not installed.
28
+ """
29
+ try:
30
+ from huggingface_hub import HfApi, ModelFilter
31
+ from transformers import AutoTokenizer
32
+
33
+ return AutoTokenizer, HfApi, ModelFilter
34
+ except ImportError:
35
+ raise ImportError(
36
+ """
37
+ You are trying to use some/all of:
38
+ HuggingFace transformers.AutoTokenizer,
39
+ huggingface_hub.HfApi,
40
+ huggingface_hub.ModelFilter,
41
+ but these are not not installed
42
+ by default with Langroid. Please install langroid using the
43
+ `transformers` extra, like so:
44
+ pip install "langroid[transformers]"
45
+ or equivalent.
46
+ """
47
+ )
48
+
49
+
50
+ def find_hf_formatter(model_name: str) -> str:
51
+ AutoTokenizer, HfApi, ModelFilter = try_import_hf_modules()
52
+ hf_api = HfApi()
53
+ # try to find a matching model, with progressivly shorter prefixes of model_name
54
+ model_name = model_name.lower().split("/")[-1]
55
+ parts = re.split("[:\\-_]", model_name)
56
+ parts = [p.lower() for p in parts if p != ""]
57
+ for i in range(len(parts), 0, -1):
58
+ prefix = "-".join(parts[:i])
59
+ models = hf_api.list_models(
60
+ filter=ModelFilter(
61
+ task="text-generation",
62
+ model_name=prefix,
63
+ )
64
+ )
65
+ try:
66
+ mdl = next(models)
67
+ except StopIteration:
68
+ continue
69
+
70
+ tokenizer = AutoTokenizer.from_pretrained(mdl.id)
71
+ if tokenizer.chat_template is not None:
72
+ return str(mdl.id)
73
+ return ""
74
+
75
+
76
+ class HFFormatter(PromptFormatter):
77
+ models: Set[str] = set() # which models have been used for formatting
78
+
79
+ def __init__(self, config: HFPromptFormatterConfig):
80
+ super().__init__(config)
81
+ AutoTokenizer, HfApi, ModelFilter = try_import_hf_modules()
82
+ self.config: HFPromptFormatterConfig = config
83
+ hf_api = HfApi()
84
+ models = hf_api.list_models(
85
+ filter=ModelFilter(
86
+ task="text-generation",
87
+ model_name=config.model_name,
88
+ )
89
+ )
90
+ try:
91
+ mdl = next(models)
92
+ except StopIteration:
93
+ raise ValueError(f"Model {config.model_name} not found on HuggingFace Hub")
94
+
95
+ self.tokenizer = AutoTokenizer.from_pretrained(mdl.id)
96
+ if self.tokenizer.chat_template is None:
97
+ raise ValueError(
98
+ f"Model {config.model_name} does not support chat template"
99
+ )
100
+ elif mdl.id not in HFFormatter.models:
101
+ # only warn if this is the first time we've used this mdl.id
102
+ logger.warning(
103
+ f"""
104
+ Using HuggingFace {mdl.id} for prompt formatting:
105
+ This is the CHAT TEMPLATE. If this is not what you intended,
106
+ consider specifying a more complete model name for the formatter.
107
+
108
+ {self.tokenizer.chat_template}
109
+ """
110
+ )
111
+ HFFormatter.models.add(mdl.id)
112
+
113
+ def format(self, messages: List[LLMMessage]) -> str:
114
+ sys_msg, chat_msgs, user_msg = LanguageModel.get_chat_history_components(
115
+ messages
116
+ )
117
+ # build msg dicts expected by AutoTokenizer.apply_chat_template
118
+ sys_msg_dict = dict(role=Role.SYSTEM.value, content=sys_msg)
119
+ chat_dicts = []
120
+ for user, assistant in chat_msgs:
121
+ chat_dicts.append(dict(role=Role.USER.value, content=user))
122
+ chat_dicts.append(dict(role=Role.ASSISTANT.value, content=assistant))
123
+ chat_dicts.append(dict(role=Role.USER.value, content=user_msg))
124
+ all_dicts = [sys_msg_dict] + chat_dicts
125
+ try:
126
+ # apply chat template
127
+ result = self.tokenizer.apply_chat_template(all_dicts, tokenize=False)
128
+ except TemplateError:
129
+ # this likely means the model doesn't support a system msg,
130
+ # so combine it with the first user msg
131
+ first_user_msg = chat_msgs[0][0] if len(chat_msgs) > 0 else user_msg
132
+ first_user_msg = sys_msg + "\n\n" + first_user_msg
133
+ chat_dicts[0] = dict(role=Role.USER.value, content=first_user_msg)
134
+ result = self.tokenizer.apply_chat_template(chat_dicts, tokenize=False)
135
+ return str(result)
@@ -23,11 +23,10 @@ def retry_with_exponential_backoff(
23
23
  max_retries: int = 10,
24
24
  errors: tuple = ( # type: ignore
25
25
  requests.exceptions.RequestException,
26
- openai.error.Timeout,
27
- openai.error.RateLimitError,
28
- openai.error.APIError,
29
- openai.error.ServiceUnavailableError,
30
- openai.error.TryAgain,
26
+ openai.APITimeoutError,
27
+ openai.RateLimitError,
28
+ openai.AuthenticationError,
29
+ openai.APIError,
31
30
  aiohttp.ServerTimeoutError,
32
31
  asyncio.TimeoutError,
33
32
  ),
@@ -44,11 +43,15 @@ def retry_with_exponential_backoff(
44
43
  try:
45
44
  return func(*args, **kwargs)
46
45
 
47
- except openai.error.InvalidRequestError as e:
46
+ except openai.BadRequestError as e:
48
47
  # do not retry when the request itself is invalid,
49
48
  # e.g. when context is too long
50
49
  logger.error(f"OpenAI API request failed with error: {e}.")
51
50
  raise e
51
+ except openai.AuthenticationError as e:
52
+ # do not retry when there's an auth error
53
+ logger.error(f"OpenAI API request failed with error: {e}.")
54
+ raise e
52
55
 
53
56
  # Retry on specified errors
54
57
  except errors as e:
@@ -85,11 +88,10 @@ def async_retry_with_exponential_backoff(
85
88
  jitter: bool = True,
86
89
  max_retries: int = 10,
87
90
  errors: tuple = ( # type: ignore
88
- openai.error.Timeout,
89
- openai.error.RateLimitError,
90
- openai.error.APIError,
91
- openai.error.ServiceUnavailableError,
92
- openai.error.TryAgain,
91
+ openai.APITimeoutError,
92
+ openai.RateLimitError,
93
+ openai.AuthenticationError,
94
+ openai.APIError,
93
95
  aiohttp.ServerTimeoutError,
94
96
  asyncio.TimeoutError,
95
97
  ),
@@ -107,12 +109,15 @@ def async_retry_with_exponential_backoff(
107
109
  result = await func(*args, **kwargs)
108
110
  return result
109
111
 
110
- except openai.error.InvalidRequestError as e:
112
+ except openai.BadRequestError as e:
111
113
  # do not retry when the request itself is invalid,
112
114
  # e.g. when context is too long
113
115
  logger.error(f"OpenAI API request failed with error: {e}.")
114
116
  raise e
115
-
117
+ except openai.AuthenticationError as e:
118
+ # do not retry when there's an auth error
119
+ logger.error(f"OpenAI API request failed with error: {e}.")
120
+ raise e
116
121
  # Retry on specified errors
117
122
  except errors as e:
118
123
  # Increment retries
@@ -138,11 +143,3 @@ def async_retry_with_exponential_backoff(
138
143
  raise e
139
144
 
140
145
  return wrapper
141
-
142
-
143
- # @retry_with_exponential_backoff
144
- # def completions_with_backoff(**kwargs):
145
- # return openai.Completion.create(**kwargs)
146
-
147
-
148
- # completions_with_backoff(model="text-davinci-002", prompt="Once upon a time,")
langroid/mytypes.py CHANGED
@@ -1,13 +1,15 @@
1
1
  import hashlib
2
2
  import uuid
3
3
  from enum import Enum
4
- from typing import Any, Dict, List, Union
4
+ from textwrap import dedent
5
+ from typing import Any, Callable, Dict, List, Union
5
6
 
6
7
  from pydantic import BaseModel, Extra
7
8
 
8
9
  Number = Union[int, float]
9
10
  Embedding = List[Number]
10
11
  Embeddings = List[Embedding]
12
+ EmbeddingFunction = Callable[[List[str]], Embeddings]
11
13
 
12
14
 
13
15
  class Entity(str, Enum):
@@ -26,10 +28,12 @@ class DocMetaData(BaseModel):
26
28
 
27
29
  source: str = "context"
28
30
  is_chunk: bool = False # if it is a chunk, don't split
31
+ id: str = "" # unique id for the document
32
+ window_ids: List[str] = [] # for RAG: ids of chunks around this one
29
33
 
30
- def dict(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
34
+ def dict_bool_int(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
31
35
  """
32
- Override dict method to convert bool fields to int, to appease some
36
+ Special dict method to convert bool fields to int, to appease some
33
37
  downstream libraries, e.g. Chroma which complains about bool fields in
34
38
  metadata.
35
39
  """
@@ -51,9 +55,10 @@ class Document(BaseModel):
51
55
  content: str
52
56
  metadata: DocMetaData
53
57
 
54
- def _unique_hash_id(self) -> str:
58
+ @staticmethod
59
+ def hash_id(doc: str) -> str:
55
60
  # Encode the document as UTF-8
56
- doc_utf8 = str(self).encode("utf-8")
61
+ doc_utf8 = str(doc).encode("utf-8")
57
62
 
58
63
  # Create a SHA256 hash object
59
64
  sha256_hash = hashlib.sha256()
@@ -69,8 +74,15 @@ class Document(BaseModel):
69
74
 
70
75
  return str(hash_uuid)
71
76
 
72
- def id(self) -> Any:
73
- if hasattr(self.metadata, "id"):
77
+ def _unique_hash_id(self) -> str:
78
+ return self.hash_id(str(self))
79
+
80
+ def id(self) -> str:
81
+ if (
82
+ hasattr(self.metadata, "id")
83
+ and self.metadata.id is not None
84
+ and self.metadata.id != ""
85
+ ):
74
86
  return self.metadata.id
75
87
  else:
76
88
  return self._unique_hash_id()
@@ -78,4 +90,9 @@ class Document(BaseModel):
78
90
  def __str__(self) -> str:
79
91
  # TODO: make metadata a pydantic model to enforce "source"
80
92
  self.metadata.json()
81
- return f"{self.content} {self.metadata.json()}"
93
+ return dedent(
94
+ f"""
95
+ CONTENT: {self.content}
96
+ SOURCE:{self.metadata.source}
97
+ """
98
+ )
@@ -0,0 +1,46 @@
1
+ from . import parser
2
+ from . import agent_chats
3
+ from . import code_parser
4
+ from . import document_parser
5
+ from . import parse_json
6
+ from . import para_sentence_split
7
+ from . import repo_loader
8
+ from . import url_loader
9
+ from . import table_loader
10
+ from . import urls
11
+ from . import utils
12
+ from . import search
13
+ from . import web_search
14
+ from . import spider
15
+
16
+ from .parser import (
17
+ Splitter,
18
+ PdfParsingConfig,
19
+ DocxParsingConfig,
20
+ DocParsingConfig,
21
+ ParsingConfig,
22
+ Parser,
23
+ )
24
+
25
+ __all__ = [
26
+ "parser",
27
+ "agent_chats",
28
+ "code_parser",
29
+ "document_parser",
30
+ "parse_json",
31
+ "para_sentence_split",
32
+ "repo_loader",
33
+ "url_loader",
34
+ "table_loader",
35
+ "urls",
36
+ "utils",
37
+ "search",
38
+ "web_search",
39
+ "spider",
40
+ "Splitter",
41
+ "PdfParsingConfig",
42
+ "DocxParsingConfig",
43
+ "DocParsingConfig",
44
+ "ParsingConfig",
45
+ "Parser",
46
+ ]