langroid 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/__init__.py +70 -0
- langroid/agent/__init__.py +22 -0
- langroid/agent/base.py +120 -33
- langroid/agent/batch.py +134 -35
- langroid/agent/callbacks/__init__.py +0 -0
- langroid/agent/callbacks/chainlit.py +608 -0
- langroid/agent/chat_agent.py +164 -100
- langroid/agent/chat_document.py +19 -2
- langroid/agent/openai_assistant.py +20 -10
- langroid/agent/special/__init__.py +33 -10
- langroid/agent/special/doc_chat_agent.py +521 -108
- langroid/agent/special/lance_doc_chat_agent.py +258 -0
- langroid/agent/special/lance_rag/__init__.py +9 -0
- langroid/agent/special/lance_rag/critic_agent.py +136 -0
- langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
- langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
- langroid/agent/special/lance_tools.py +44 -0
- langroid/agent/special/neo4j/__init__.py +0 -0
- langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
- langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
- langroid/agent/special/neo4j/utils/__init__.py +0 -0
- langroid/agent/special/neo4j/utils/system_message.py +46 -0
- langroid/agent/special/relevance_extractor_agent.py +23 -7
- langroid/agent/special/retriever_agent.py +29 -174
- langroid/agent/special/sql/__init__.py +7 -0
- langroid/agent/special/sql/sql_chat_agent.py +47 -23
- langroid/agent/special/sql/utils/__init__.py +11 -0
- langroid/agent/special/sql/utils/description_extractors.py +95 -46
- langroid/agent/special/sql/utils/populate_metadata.py +28 -21
- langroid/agent/special/table_chat_agent.py +43 -9
- langroid/agent/task.py +423 -114
- langroid/agent/tool_message.py +67 -10
- langroid/agent/tools/__init__.py +8 -0
- langroid/agent/tools/duckduckgo_search_tool.py +66 -0
- langroid/agent/tools/google_search_tool.py +11 -0
- langroid/agent/tools/metaphor_search_tool.py +67 -0
- langroid/agent/tools/recipient_tool.py +6 -24
- langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
- langroid/cachedb/__init__.py +6 -0
- langroid/embedding_models/__init__.py +24 -0
- langroid/embedding_models/base.py +9 -1
- langroid/embedding_models/models.py +117 -17
- langroid/embedding_models/protoc/embeddings.proto +19 -0
- langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
- langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
- langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
- langroid/embedding_models/remote_embeds.py +153 -0
- langroid/language_models/__init__.py +22 -0
- langroid/language_models/azure_openai.py +47 -4
- langroid/language_models/base.py +26 -10
- langroid/language_models/config.py +5 -0
- langroid/language_models/openai_gpt.py +407 -121
- langroid/language_models/prompt_formatter/__init__.py +9 -0
- langroid/language_models/prompt_formatter/base.py +4 -6
- langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
- langroid/language_models/utils.py +10 -9
- langroid/mytypes.py +10 -4
- langroid/parsing/__init__.py +33 -1
- langroid/parsing/document_parser.py +259 -63
- langroid/parsing/image_text.py +32 -0
- langroid/parsing/parse_json.py +143 -0
- langroid/parsing/parser.py +20 -7
- langroid/parsing/repo_loader.py +108 -46
- langroid/parsing/search.py +8 -0
- langroid/parsing/table_loader.py +44 -0
- langroid/parsing/url_loader.py +59 -13
- langroid/parsing/urls.py +18 -9
- langroid/parsing/utils.py +130 -9
- langroid/parsing/web_search.py +73 -0
- langroid/prompts/__init__.py +7 -0
- langroid/prompts/chat-gpt4-system-prompt.md +68 -0
- langroid/prompts/prompts_config.py +1 -1
- langroid/utils/__init__.py +10 -0
- langroid/utils/algorithms/__init__.py +3 -0
- langroid/utils/configuration.py +0 -1
- langroid/utils/constants.py +4 -0
- langroid/utils/logging.py +2 -5
- langroid/utils/output/__init__.py +15 -2
- langroid/utils/output/status.py +33 -0
- langroid/utils/pandas_utils.py +30 -0
- langroid/utils/pydantic_utils.py +446 -4
- langroid/utils/system.py +36 -1
- langroid/vector_store/__init__.py +34 -2
- langroid/vector_store/base.py +33 -2
- langroid/vector_store/chromadb.py +42 -13
- langroid/vector_store/lancedb.py +226 -60
- langroid/vector_store/meilisearch.py +7 -6
- langroid/vector_store/momento.py +3 -2
- langroid/vector_store/qdrantdb.py +82 -11
- {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/METADATA +190 -129
- langroid-0.1.219.dist-info/RECORD +127 -0
- langroid/agent/special/recipient_validator_agent.py +0 -157
- langroid/parsing/json.py +0 -64
- langroid/utils/web/selenium_login.py +0 -36
- langroid-0.1.139.dist-info/RECORD +0 -103
- {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
- {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/WHEEL +0 -0
@@ -4,3 +4,12 @@ from ..config import PromptFormatterConfig, Llama2FormatterConfig
|
|
4
4
|
|
5
5
|
from . import base
|
6
6
|
from . import llama2_formatter
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
"PromptFormatter",
|
10
|
+
"Llama2Formatter",
|
11
|
+
"PromptFormatterConfig",
|
12
|
+
"Llama2FormatterConfig",
|
13
|
+
"base",
|
14
|
+
"llama2_formatter",
|
15
|
+
]
|
@@ -17,13 +17,11 @@ class PromptFormatter(ABC):
|
|
17
17
|
self.config = config
|
18
18
|
|
19
19
|
@staticmethod
|
20
|
-
def create(
|
21
|
-
from langroid.language_models.
|
22
|
-
|
23
|
-
)
|
20
|
+
def create(formatter: str) -> "PromptFormatter":
|
21
|
+
from langroid.language_models.config import HFPromptFormatterConfig
|
22
|
+
from langroid.language_models.prompt_formatter.hf_formatter import HFFormatter
|
24
23
|
|
25
|
-
|
26
|
-
return formatter_class(config)
|
24
|
+
return HFFormatter(HFPromptFormatterConfig(model_name=formatter))
|
27
25
|
|
28
26
|
@abstractmethod
|
29
27
|
def format(self, messages: List[LLMMessage]) -> str:
|
@@ -0,0 +1,135 @@
|
|
1
|
+
"""
|
2
|
+
Prompt formatter based on HuggingFace `AutoTokenizer.apply_chat_template` method
|
3
|
+
from their Transformers library. It searches the hub for a model matching the
|
4
|
+
specified name, and uses the first one it finds. We assume that all matching
|
5
|
+
models will have the same tokenizer, so we just use the first one.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import re
|
10
|
+
from typing import Any, List, Set, Tuple, Type
|
11
|
+
|
12
|
+
from jinja2.exceptions import TemplateError
|
13
|
+
|
14
|
+
from langroid.language_models.base import LanguageModel, LLMMessage, Role
|
15
|
+
from langroid.language_models.config import HFPromptFormatterConfig
|
16
|
+
from langroid.language_models.prompt_formatter.base import PromptFormatter
|
17
|
+
|
18
|
+
logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
|
21
|
+
def try_import_hf_modules() -> Tuple[Type[Any], Type[Any], Type[Any]]:
|
22
|
+
"""
|
23
|
+
Attempts to import the AutoTokenizer class from the transformers package.
|
24
|
+
Returns:
|
25
|
+
The AutoTokenizer class if successful.
|
26
|
+
Raises:
|
27
|
+
ImportError: If the transformers package is not installed.
|
28
|
+
"""
|
29
|
+
try:
|
30
|
+
from huggingface_hub import HfApi, ModelFilter
|
31
|
+
from transformers import AutoTokenizer
|
32
|
+
|
33
|
+
return AutoTokenizer, HfApi, ModelFilter
|
34
|
+
except ImportError:
|
35
|
+
raise ImportError(
|
36
|
+
"""
|
37
|
+
You are trying to use some/all of:
|
38
|
+
HuggingFace transformers.AutoTokenizer,
|
39
|
+
huggingface_hub.HfApi,
|
40
|
+
huggingface_hub.ModelFilter,
|
41
|
+
but these are not not installed
|
42
|
+
by default with Langroid. Please install langroid using the
|
43
|
+
`transformers` extra, like so:
|
44
|
+
pip install "langroid[transformers]"
|
45
|
+
or equivalent.
|
46
|
+
"""
|
47
|
+
)
|
48
|
+
|
49
|
+
|
50
|
+
def find_hf_formatter(model_name: str) -> str:
|
51
|
+
AutoTokenizer, HfApi, ModelFilter = try_import_hf_modules()
|
52
|
+
hf_api = HfApi()
|
53
|
+
# try to find a matching model, with progressivly shorter prefixes of model_name
|
54
|
+
model_name = model_name.lower().split("/")[-1]
|
55
|
+
parts = re.split("[:\\-_]", model_name)
|
56
|
+
parts = [p.lower() for p in parts if p != ""]
|
57
|
+
for i in range(len(parts), 0, -1):
|
58
|
+
prefix = "-".join(parts[:i])
|
59
|
+
models = hf_api.list_models(
|
60
|
+
filter=ModelFilter(
|
61
|
+
task="text-generation",
|
62
|
+
model_name=prefix,
|
63
|
+
)
|
64
|
+
)
|
65
|
+
try:
|
66
|
+
mdl = next(models)
|
67
|
+
except StopIteration:
|
68
|
+
continue
|
69
|
+
|
70
|
+
tokenizer = AutoTokenizer.from_pretrained(mdl.id)
|
71
|
+
if tokenizer.chat_template is not None:
|
72
|
+
return str(mdl.id)
|
73
|
+
return ""
|
74
|
+
|
75
|
+
|
76
|
+
class HFFormatter(PromptFormatter):
|
77
|
+
models: Set[str] = set() # which models have been used for formatting
|
78
|
+
|
79
|
+
def __init__(self, config: HFPromptFormatterConfig):
|
80
|
+
super().__init__(config)
|
81
|
+
AutoTokenizer, HfApi, ModelFilter = try_import_hf_modules()
|
82
|
+
self.config: HFPromptFormatterConfig = config
|
83
|
+
hf_api = HfApi()
|
84
|
+
models = hf_api.list_models(
|
85
|
+
filter=ModelFilter(
|
86
|
+
task="text-generation",
|
87
|
+
model_name=config.model_name,
|
88
|
+
)
|
89
|
+
)
|
90
|
+
try:
|
91
|
+
mdl = next(models)
|
92
|
+
except StopIteration:
|
93
|
+
raise ValueError(f"Model {config.model_name} not found on HuggingFace Hub")
|
94
|
+
|
95
|
+
self.tokenizer = AutoTokenizer.from_pretrained(mdl.id)
|
96
|
+
if self.tokenizer.chat_template is None:
|
97
|
+
raise ValueError(
|
98
|
+
f"Model {config.model_name} does not support chat template"
|
99
|
+
)
|
100
|
+
elif mdl.id not in HFFormatter.models:
|
101
|
+
# only warn if this is the first time we've used this mdl.id
|
102
|
+
logger.warning(
|
103
|
+
f"""
|
104
|
+
Using HuggingFace {mdl.id} for prompt formatting:
|
105
|
+
This is the CHAT TEMPLATE. If this is not what you intended,
|
106
|
+
consider specifying a more complete model name for the formatter.
|
107
|
+
|
108
|
+
{self.tokenizer.chat_template}
|
109
|
+
"""
|
110
|
+
)
|
111
|
+
HFFormatter.models.add(mdl.id)
|
112
|
+
|
113
|
+
def format(self, messages: List[LLMMessage]) -> str:
|
114
|
+
sys_msg, chat_msgs, user_msg = LanguageModel.get_chat_history_components(
|
115
|
+
messages
|
116
|
+
)
|
117
|
+
# build msg dicts expected by AutoTokenizer.apply_chat_template
|
118
|
+
sys_msg_dict = dict(role=Role.SYSTEM.value, content=sys_msg)
|
119
|
+
chat_dicts = []
|
120
|
+
for user, assistant in chat_msgs:
|
121
|
+
chat_dicts.append(dict(role=Role.USER.value, content=user))
|
122
|
+
chat_dicts.append(dict(role=Role.ASSISTANT.value, content=assistant))
|
123
|
+
chat_dicts.append(dict(role=Role.USER.value, content=user_msg))
|
124
|
+
all_dicts = [sys_msg_dict] + chat_dicts
|
125
|
+
try:
|
126
|
+
# apply chat template
|
127
|
+
result = self.tokenizer.apply_chat_template(all_dicts, tokenize=False)
|
128
|
+
except TemplateError:
|
129
|
+
# this likely means the model doesn't support a system msg,
|
130
|
+
# so combine it with the first user msg
|
131
|
+
first_user_msg = chat_msgs[0][0] if len(chat_msgs) > 0 else user_msg
|
132
|
+
first_user_msg = sys_msg + "\n\n" + first_user_msg
|
133
|
+
chat_dicts[0] = dict(role=Role.USER.value, content=first_user_msg)
|
134
|
+
result = self.tokenizer.apply_chat_template(chat_dicts, tokenize=False)
|
135
|
+
return str(result)
|
@@ -25,6 +25,7 @@ def retry_with_exponential_backoff(
|
|
25
25
|
requests.exceptions.RequestException,
|
26
26
|
openai.APITimeoutError,
|
27
27
|
openai.RateLimitError,
|
28
|
+
openai.AuthenticationError,
|
28
29
|
openai.APIError,
|
29
30
|
aiohttp.ServerTimeoutError,
|
30
31
|
asyncio.TimeoutError,
|
@@ -47,6 +48,10 @@ def retry_with_exponential_backoff(
|
|
47
48
|
# e.g. when context is too long
|
48
49
|
logger.error(f"OpenAI API request failed with error: {e}.")
|
49
50
|
raise e
|
51
|
+
except openai.AuthenticationError as e:
|
52
|
+
# do not retry when there's an auth error
|
53
|
+
logger.error(f"OpenAI API request failed with error: {e}.")
|
54
|
+
raise e
|
50
55
|
|
51
56
|
# Retry on specified errors
|
52
57
|
except errors as e:
|
@@ -85,6 +90,7 @@ def async_retry_with_exponential_backoff(
|
|
85
90
|
errors: tuple = ( # type: ignore
|
86
91
|
openai.APITimeoutError,
|
87
92
|
openai.RateLimitError,
|
93
|
+
openai.AuthenticationError,
|
88
94
|
openai.APIError,
|
89
95
|
aiohttp.ServerTimeoutError,
|
90
96
|
asyncio.TimeoutError,
|
@@ -108,7 +114,10 @@ def async_retry_with_exponential_backoff(
|
|
108
114
|
# e.g. when context is too long
|
109
115
|
logger.error(f"OpenAI API request failed with error: {e}.")
|
110
116
|
raise e
|
111
|
-
|
117
|
+
except openai.AuthenticationError as e:
|
118
|
+
# do not retry when there's an auth error
|
119
|
+
logger.error(f"OpenAI API request failed with error: {e}.")
|
120
|
+
raise e
|
112
121
|
# Retry on specified errors
|
113
122
|
except errors as e:
|
114
123
|
# Increment retries
|
@@ -134,11 +143,3 @@ def async_retry_with_exponential_backoff(
|
|
134
143
|
raise e
|
135
144
|
|
136
145
|
return wrapper
|
137
|
-
|
138
|
-
|
139
|
-
# @retry_with_exponential_backoff
|
140
|
-
# def completions_with_backoff(**kwargs):
|
141
|
-
# return openai.Completion.create(**kwargs)
|
142
|
-
|
143
|
-
|
144
|
-
# completions_with_backoff(model="text-davinci-002", prompt="Once upon a time,")
|
langroid/mytypes.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import hashlib
|
2
2
|
import uuid
|
3
3
|
from enum import Enum
|
4
|
+
from textwrap import dedent
|
4
5
|
from typing import Any, Callable, Dict, List, Union
|
5
6
|
|
6
7
|
from pydantic import BaseModel, Extra
|
@@ -27,12 +28,12 @@ class DocMetaData(BaseModel):
|
|
27
28
|
|
28
29
|
source: str = "context"
|
29
30
|
is_chunk: bool = False # if it is a chunk, don't split
|
30
|
-
id: str
|
31
|
+
id: str = "" # unique id for the document
|
31
32
|
window_ids: List[str] = [] # for RAG: ids of chunks around this one
|
32
33
|
|
33
|
-
def
|
34
|
+
def dict_bool_int(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
34
35
|
"""
|
35
|
-
|
36
|
+
Special dict method to convert bool fields to int, to appease some
|
36
37
|
downstream libraries, e.g. Chroma which complains about bool fields in
|
37
38
|
metadata.
|
38
39
|
"""
|
@@ -89,4 +90,9 @@ class Document(BaseModel):
|
|
89
90
|
def __str__(self) -> str:
|
90
91
|
# TODO: make metadata a pydantic model to enforce "source"
|
91
92
|
self.metadata.json()
|
92
|
-
return
|
93
|
+
return dedent(
|
94
|
+
f"""
|
95
|
+
CONTENT: {self.content}
|
96
|
+
SOURCE:{self.metadata.source}
|
97
|
+
"""
|
98
|
+
)
|
langroid/parsing/__init__.py
CHANGED
@@ -2,7 +2,7 @@ from . import parser
|
|
2
2
|
from . import agent_chats
|
3
3
|
from . import code_parser
|
4
4
|
from . import document_parser
|
5
|
-
from . import
|
5
|
+
from . import parse_json
|
6
6
|
from . import para_sentence_split
|
7
7
|
from . import repo_loader
|
8
8
|
from . import url_loader
|
@@ -12,3 +12,35 @@ from . import utils
|
|
12
12
|
from . import search
|
13
13
|
from . import web_search
|
14
14
|
from . import spider
|
15
|
+
|
16
|
+
from .parser import (
|
17
|
+
Splitter,
|
18
|
+
PdfParsingConfig,
|
19
|
+
DocxParsingConfig,
|
20
|
+
DocParsingConfig,
|
21
|
+
ParsingConfig,
|
22
|
+
Parser,
|
23
|
+
)
|
24
|
+
|
25
|
+
__all__ = [
|
26
|
+
"parser",
|
27
|
+
"agent_chats",
|
28
|
+
"code_parser",
|
29
|
+
"document_parser",
|
30
|
+
"parse_json",
|
31
|
+
"para_sentence_split",
|
32
|
+
"repo_loader",
|
33
|
+
"url_loader",
|
34
|
+
"table_loader",
|
35
|
+
"urls",
|
36
|
+
"utils",
|
37
|
+
"search",
|
38
|
+
"web_search",
|
39
|
+
"spider",
|
40
|
+
"Splitter",
|
41
|
+
"PdfParsingConfig",
|
42
|
+
"DocxParsingConfig",
|
43
|
+
"DocParsingConfig",
|
44
|
+
"ParsingConfig",
|
45
|
+
"Parser",
|
46
|
+
]
|