langroid 0.1.262__py3-none-any.whl → 0.1.263__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +1 -1
- langroid/agent/callbacks/chainlit.py +1 -2
- langroid/agent/chat_document.py +2 -3
- langroid/agent/openai_assistant.py +1 -1
- langroid/agent/special/doc_chat_agent.py +1 -1
- langroid/agent/special/lance_rag/query_planner_agent.py +1 -1
- langroid/agent/special/lance_tools.py +1 -2
- langroid/agent/special/neo4j/neo4j_chat_agent.py +1 -1
- langroid/agent/special/sql/utils/tools.py +1 -1
- langroid/agent/task.py +5 -2
- langroid/agent/tool_message.py +2 -2
- langroid/cachedb/base.py +1 -1
- langroid/embedding_models/base.py +1 -1
- langroid/language_models/base.py +2 -111
- langroid/language_models/config.py +1 -1
- langroid/language_models/openai_gpt.py +1 -1
- langroid/mytypes.py +1 -1
- langroid/parsing/code_parser.py +1 -1
- langroid/parsing/parser.py +1 -1
- langroid/parsing/repo_loader.py +1 -1
- langroid/parsing/urls.py +2 -1
- langroid/prompts/__init__.py +0 -2
- langroid/prompts/prompts_config.py +1 -1
- langroid/pydantic_v1/__init__.py +10 -0
- langroid/pydantic_v1/main.py +4 -0
- langroid/utils/configuration.py +2 -1
- langroid/utils/constants.py +1 -1
- langroid/utils/globals.py +1 -1
- langroid/utils/pydantic_utils.py +1 -1
- langroid/vector_store/base.py +1 -1
- langroid/vector_store/lancedb.py +39 -9
- {langroid-0.1.262.dist-info → langroid-0.1.263.dist-info}/METADATA +19 -17
- {langroid-0.1.262.dist-info → langroid-0.1.263.dist-info}/RECORD +36 -36
- pyproject.toml +8 -5
- langroid/parsing/parser.pyi +0 -56
- langroid/prompts/transforms.py +0 -84
- {langroid-0.1.262.dist-info → langroid-0.1.263.dist-info}/LICENSE +0 -0
- {langroid-0.1.262.dist-info → langroid-0.1.263.dist-info}/WHEEL +0 -0
langroid/agent/base.py
CHANGED
@@ -20,7 +20,6 @@ from typing import (
|
|
20
20
|
no_type_check,
|
21
21
|
)
|
22
22
|
|
23
|
-
from pydantic import BaseSettings, ValidationError, validator
|
24
23
|
from rich import print
|
25
24
|
from rich.console import Console
|
26
25
|
from rich.markup import escape
|
@@ -41,6 +40,7 @@ from langroid.mytypes import Entity
|
|
41
40
|
from langroid.parsing.parse_json import extract_top_level_json
|
42
41
|
from langroid.parsing.parser import Parser, ParsingConfig
|
43
42
|
from langroid.prompts.prompts_config import PromptsConfig
|
43
|
+
from langroid.pydantic_v1 import BaseSettings, ValidationError, validator
|
44
44
|
from langroid.utils.configuration import settings
|
45
45
|
from langroid.utils.constants import NO_ANSWER
|
46
46
|
from langroid.utils.output import status
|
@@ -7,9 +7,8 @@ import logging
|
|
7
7
|
import textwrap
|
8
8
|
from typing import Any, Callable, Dict, List, Literal, Optional, no_type_check
|
9
9
|
|
10
|
-
from pydantic import BaseSettings
|
11
|
-
|
12
10
|
from langroid.exceptions import LangroidImportError
|
11
|
+
from langroid.pydantic_v1 import BaseSettings
|
13
12
|
|
14
13
|
try:
|
15
14
|
import chainlit as cl
|
langroid/agent/chat_document.py
CHANGED
@@ -2,8 +2,6 @@ import json
|
|
2
2
|
from enum import Enum
|
3
3
|
from typing import List, Optional, Union
|
4
4
|
|
5
|
-
from pydantic import BaseModel, Extra
|
6
|
-
|
7
5
|
from langroid.agent.tool_message import ToolMessage
|
8
6
|
from langroid.language_models.base import (
|
9
7
|
LLMFunctionCall,
|
@@ -15,6 +13,7 @@ from langroid.language_models.base import (
|
|
15
13
|
from langroid.mytypes import DocMetaData, Document, Entity
|
16
14
|
from langroid.parsing.agent_chats import parse_message
|
17
15
|
from langroid.parsing.parse_json import extract_top_level_json, top_level_json_field
|
16
|
+
from langroid.pydantic_v1 import BaseModel, Extra
|
18
17
|
from langroid.utils.output.printing import shorten_text
|
19
18
|
|
20
19
|
|
@@ -48,7 +47,7 @@ class ChatDocMetaData(DocMetaData):
|
|
48
47
|
block: None | Entity = None
|
49
48
|
sender_name: str = ""
|
50
49
|
recipient: str = ""
|
51
|
-
usage: Optional[LLMTokenUsage]
|
50
|
+
usage: Optional[LLMTokenUsage] = None
|
52
51
|
cached: bool = False
|
53
52
|
displayed: bool = False
|
54
53
|
has_citation: bool = False
|
@@ -15,7 +15,6 @@ from openai.types.beta.assistant_update_params import (
|
|
15
15
|
)
|
16
16
|
from openai.types.beta.threads import Message, Run
|
17
17
|
from openai.types.beta.threads.runs import RunStep
|
18
|
-
from pydantic import BaseModel
|
19
18
|
from rich import print
|
20
19
|
|
21
20
|
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
|
@@ -27,6 +26,7 @@ from langroid.language_models.openai_gpt import (
|
|
27
26
|
OpenAIGPT,
|
28
27
|
OpenAIGPTConfig,
|
29
28
|
)
|
29
|
+
from langroid.pydantic_v1 import BaseModel
|
30
30
|
from langroid.utils.configuration import settings
|
31
31
|
from langroid.utils.system import generate_user_id, update_hash
|
32
32
|
|
@@ -1313,7 +1313,7 @@ class DocChatAgent(ChatAgent):
|
|
1313
1313
|
meta.update(extracts[0].metadata)
|
1314
1314
|
return ChatDocument(
|
1315
1315
|
content="\n\n".join([e.content for e in extracts]),
|
1316
|
-
metadata=ChatDocMetaData(**meta),
|
1316
|
+
metadata=ChatDocMetaData(**meta), # type: ignore
|
1317
1317
|
)
|
1318
1318
|
response = self.get_summary_answer(query, extracts)
|
1319
1319
|
|
@@ -191,7 +191,7 @@ class LanceQueryPlanAgent(ChatAgent):
|
|
191
191
|
# save result, to be used in query_plan_feedback()
|
192
192
|
self.result = msg.content
|
193
193
|
# assemble QueryPlanAnswerTool...
|
194
|
-
query_plan_answer_tool = QueryPlanAnswerTool(
|
194
|
+
query_plan_answer_tool = QueryPlanAnswerTool( # type: ignore
|
195
195
|
plan=self.curr_query_plan,
|
196
196
|
answer=self.result,
|
197
197
|
)
|
@@ -2,11 +2,11 @@ import json
|
|
2
2
|
import logging
|
3
3
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
4
4
|
|
5
|
-
from pydantic import BaseModel, BaseSettings
|
6
5
|
from rich import print
|
7
6
|
from rich.console import Console
|
8
7
|
|
9
8
|
from langroid.agent import ToolMessage
|
9
|
+
from langroid.pydantic_v1 import BaseModel, BaseSettings
|
10
10
|
|
11
11
|
if TYPE_CHECKING:
|
12
12
|
import neo4j
|
langroid/agent/task.py
CHANGED
@@ -20,7 +20,6 @@ from typing import (
|
|
20
20
|
)
|
21
21
|
|
22
22
|
import numpy as np
|
23
|
-
from pydantic import BaseModel
|
24
23
|
from rich import print
|
25
24
|
from rich.markup import escape
|
26
25
|
|
@@ -37,6 +36,7 @@ from langroid.exceptions import InfiniteLoopException
|
|
37
36
|
from langroid.mytypes import Entity
|
38
37
|
from langroid.parsing.parse_json import extract_top_level_json
|
39
38
|
from langroid.parsing.routing import parse_addressed_message
|
39
|
+
from langroid.pydantic_v1 import BaseModel
|
40
40
|
from langroid.utils.configuration import settings
|
41
41
|
from langroid.utils.constants import (
|
42
42
|
DONE,
|
@@ -1059,6 +1059,9 @@ class Task:
|
|
1059
1059
|
"""
|
1060
1060
|
Get result of task. This is the default behavior.
|
1061
1061
|
Derived classes can override this.
|
1062
|
+
|
1063
|
+
Note the result of a task is returned as if it is from the User entity.
|
1064
|
+
|
1062
1065
|
Returns:
|
1063
1066
|
ChatDocument: result of task
|
1064
1067
|
"""
|
@@ -1071,7 +1074,7 @@ class Task:
|
|
1071
1074
|
fun_call = result_msg.function_call if result_msg else None
|
1072
1075
|
tool_messages = result_msg.tool_messages if result_msg else []
|
1073
1076
|
block = result_msg.metadata.block if result_msg else None
|
1074
|
-
recipient = result_msg.metadata.recipient if result_msg else
|
1077
|
+
recipient = result_msg.metadata.recipient if result_msg else ""
|
1075
1078
|
tool_ids = result_msg.metadata.tool_ids if result_msg else []
|
1076
1079
|
status = result_msg.metadata.status if result_msg else None
|
1077
1080
|
|
langroid/agent/tool_message.py
CHANGED
@@ -13,9 +13,9 @@ from random import choice
|
|
13
13
|
from typing import Any, Dict, List, Tuple, Type
|
14
14
|
|
15
15
|
from docstring_parser import parse
|
16
|
-
from pydantic import BaseModel
|
17
16
|
|
18
17
|
from langroid.language_models.base import LLMFunctionSpec
|
18
|
+
from langroid.pydantic_v1 import BaseModel
|
19
19
|
from langroid.utils.pydantic_utils import (
|
20
20
|
_recursive_purge_dict_key,
|
21
21
|
generate_simple_schema,
|
@@ -73,7 +73,7 @@ class ToolMessage(ABC, BaseModel):
|
|
73
73
|
- a tuple (description, ToolMessage instance), where the description is
|
74
74
|
a natural language "thought" that leads to the tool usage,
|
75
75
|
e.g. ("I want to find the square of 5", SquareTool(num=5))
|
76
|
-
In some scenarios,
|
76
|
+
In some scenarios, including such a description can significantly
|
77
77
|
enhance reliability of tool use.
|
78
78
|
Returns:
|
79
79
|
"""
|
langroid/cachedb/base.py
CHANGED
@@ -2,9 +2,9 @@ import logging
|
|
2
2
|
from abc import ABC, abstractmethod
|
3
3
|
|
4
4
|
import numpy as np
|
5
|
-
from pydantic import BaseSettings
|
6
5
|
|
7
6
|
from langroid.mytypes import EmbeddingFunction
|
7
|
+
from langroid.pydantic_v1 import BaseSettings
|
8
8
|
|
9
9
|
logging.getLogger("openai").setLevel(logging.ERROR)
|
10
10
|
|
langroid/language_models/base.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
import ast
|
2
|
-
import asyncio
|
3
2
|
import json
|
4
3
|
import logging
|
5
4
|
from abc import ABC, abstractmethod
|
@@ -7,18 +6,11 @@ from datetime import datetime
|
|
7
6
|
from enum import Enum
|
8
7
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
|
9
8
|
|
10
|
-
import aiohttp
|
11
|
-
from pydantic import BaseModel, BaseSettings, Field
|
12
|
-
|
13
9
|
from langroid.cachedb.base import CacheDBConfig
|
14
|
-
from langroid.mytypes import Document
|
15
10
|
from langroid.parsing.agent_chats import parse_message
|
16
11
|
from langroid.parsing.parse_json import top_level_json_field
|
17
12
|
from langroid.prompts.dialog import collate_chat_history
|
18
|
-
from langroid.
|
19
|
-
EXTRACTION_PROMPT_GPT4,
|
20
|
-
SUMMARY_ANSWER_PROMPT_GPT4,
|
21
|
-
)
|
13
|
+
from langroid.pydantic_v1 import BaseModel, BaseSettings, Field
|
22
14
|
from langroid.utils.configuration import settings
|
23
15
|
from langroid.utils.output.printing import show_if_debug
|
24
16
|
|
@@ -184,7 +176,7 @@ class LLMResponse(BaseModel):
|
|
184
176
|
message: str
|
185
177
|
tool_id: str = "" # used by OpenAIAssistant
|
186
178
|
function_call: Optional[LLMFunctionCall] = None
|
187
|
-
usage: Optional[LLMTokenUsage]
|
179
|
+
usage: Optional[LLMTokenUsage] = None
|
188
180
|
cached: bool = False
|
189
181
|
|
190
182
|
def __str__(self) -> str:
|
@@ -487,107 +479,6 @@ class LanguageModel(ABC):
|
|
487
479
|
show_if_debug(prompt, "FOLLOWUP->STANDALONE-RESPONSE= ")
|
488
480
|
return standalone
|
489
481
|
|
490
|
-
async def get_verbatim_extract_async(self, question: str, passage: Document) -> str:
|
491
|
-
"""
|
492
|
-
Asynchronously, get verbatim extract from passage
|
493
|
-
that is relevant to a question.
|
494
|
-
Asynch allows parallel calls to the LLM API.
|
495
|
-
"""
|
496
|
-
async with aiohttp.ClientSession():
|
497
|
-
templatized_prompt = EXTRACTION_PROMPT_GPT4
|
498
|
-
final_prompt = templatized_prompt.format(
|
499
|
-
question=question, content=passage.content
|
500
|
-
)
|
501
|
-
show_if_debug(final_prompt, "EXTRACT-PROMPT= ")
|
502
|
-
final_extract = await self.agenerate(prompt=final_prompt, max_tokens=1024)
|
503
|
-
show_if_debug(final_extract.message.strip(), "EXTRACT-RESPONSE= ")
|
504
|
-
return final_extract.message.strip()
|
505
|
-
|
506
|
-
async def _get_verbatim_extracts(
|
507
|
-
self,
|
508
|
-
question: str,
|
509
|
-
passages: List[Document],
|
510
|
-
) -> List[Document]:
|
511
|
-
async with aiohttp.ClientSession():
|
512
|
-
verbatim_extracts = await asyncio.gather(
|
513
|
-
*(self.get_verbatim_extract_async(question, P) for P in passages)
|
514
|
-
)
|
515
|
-
metadatas = [P.metadata for P in passages]
|
516
|
-
# return with metadata so we can use it downstream, e.g. to cite sources
|
517
|
-
return [
|
518
|
-
Document(content=e, metadata=m)
|
519
|
-
for e, m in zip(verbatim_extracts, metadatas)
|
520
|
-
]
|
521
|
-
|
522
|
-
def get_verbatim_extracts(
|
523
|
-
self, question: str, passages: List[Document]
|
524
|
-
) -> List[Document]:
|
525
|
-
"""
|
526
|
-
From each passage, extract verbatim text that is relevant to a question,
|
527
|
-
using concurrent API calls to the LLM.
|
528
|
-
Args:
|
529
|
-
question: question to be answered
|
530
|
-
passages: list of passages from which to extract relevant verbatim text
|
531
|
-
LLM: LanguageModel to use for generating the prompt and extract
|
532
|
-
Returns:
|
533
|
-
list of verbatim extracts from passages that are relevant to question
|
534
|
-
"""
|
535
|
-
docs = asyncio.run(self._get_verbatim_extracts(question, passages))
|
536
|
-
return docs
|
537
|
-
|
538
|
-
def get_summary_answer(self, question: str, passages: List[Document]) -> Document:
|
539
|
-
"""
|
540
|
-
Given a question and a list of (possibly) doc snippets,
|
541
|
-
generate an answer if possible
|
542
|
-
Args:
|
543
|
-
question: question to answer
|
544
|
-
passages: list of `Document` objects each containing a possibly relevant
|
545
|
-
snippet, and metadata
|
546
|
-
Returns:
|
547
|
-
a `Document` object containing the answer,
|
548
|
-
and metadata containing source citations
|
549
|
-
|
550
|
-
"""
|
551
|
-
|
552
|
-
# Define an auxiliary function to transform the list of
|
553
|
-
# passages into a single string
|
554
|
-
def stringify_passages(passages: List[Document]) -> str:
|
555
|
-
return "\n".join(
|
556
|
-
[
|
557
|
-
f"""
|
558
|
-
Extract: {p.content}
|
559
|
-
Source: {p.metadata.source}
|
560
|
-
"""
|
561
|
-
for p in passages
|
562
|
-
]
|
563
|
-
)
|
564
|
-
|
565
|
-
passages_str = stringify_passages(passages)
|
566
|
-
# Substitute Q and P into the templatized prompt
|
567
|
-
|
568
|
-
final_prompt = SUMMARY_ANSWER_PROMPT_GPT4.format(
|
569
|
-
question=f"Question:{question}", extracts=passages_str
|
570
|
-
)
|
571
|
-
show_if_debug(final_prompt, "SUMMARIZE_PROMPT= ")
|
572
|
-
# Generate the final verbatim extract based on the final prompt
|
573
|
-
llm_response = self.generate(prompt=final_prompt, max_tokens=1024)
|
574
|
-
final_answer = llm_response.message.strip()
|
575
|
-
show_if_debug(final_answer, "SUMMARIZE_RESPONSE= ")
|
576
|
-
parts = final_answer.split("SOURCE:", maxsplit=1)
|
577
|
-
if len(parts) > 1:
|
578
|
-
content = parts[0].strip()
|
579
|
-
sources = parts[1].strip()
|
580
|
-
else:
|
581
|
-
content = final_answer
|
582
|
-
sources = ""
|
583
|
-
return Document(
|
584
|
-
content=content,
|
585
|
-
metadata={
|
586
|
-
"source": "SOURCE: " + sources,
|
587
|
-
"cached": llm_response.cached,
|
588
|
-
},
|
589
|
-
)
|
590
|
-
|
591
482
|
|
592
483
|
class StreamingIfAllowed:
|
593
484
|
"""Context to temporarily enable or disable streaming, if allowed globally via
|
@@ -24,7 +24,6 @@ import openai
|
|
24
24
|
from groq import AsyncGroq, Groq
|
25
25
|
from httpx import Timeout
|
26
26
|
from openai import AsyncOpenAI, OpenAI
|
27
|
-
from pydantic import BaseModel
|
28
27
|
from rich import print
|
29
28
|
from rich.markup import escape
|
30
29
|
|
@@ -50,6 +49,7 @@ from langroid.language_models.utils import (
|
|
50
49
|
async_retry_with_exponential_backoff,
|
51
50
|
retry_with_exponential_backoff,
|
52
51
|
)
|
52
|
+
from langroid.pydantic_v1 import BaseModel
|
53
53
|
from langroid.utils.configuration import settings
|
54
54
|
from langroid.utils.constants import Colors
|
55
55
|
from langroid.utils.system import friendly_error
|
langroid/mytypes.py
CHANGED
langroid/parsing/code_parser.py
CHANGED
@@ -2,12 +2,12 @@ from functools import reduce
|
|
2
2
|
from typing import Callable, List
|
3
3
|
|
4
4
|
import tiktoken
|
5
|
-
from pydantic import BaseSettings
|
6
5
|
from pygments import lex
|
7
6
|
from pygments.lexers import get_lexer_by_name
|
8
7
|
from pygments.token import Token
|
9
8
|
|
10
9
|
from langroid.mytypes import Document
|
10
|
+
from langroid.pydantic_v1 import BaseSettings
|
11
11
|
|
12
12
|
|
13
13
|
def chunk_code(
|
langroid/parsing/parser.py
CHANGED
@@ -3,10 +3,10 @@ from enum import Enum
|
|
3
3
|
from typing import Dict, List, Literal
|
4
4
|
|
5
5
|
import tiktoken
|
6
|
-
from pydantic import BaseSettings
|
7
6
|
|
8
7
|
from langroid.mytypes import Document
|
9
8
|
from langroid.parsing.para_sentence_split import create_chunks, remove_extra_whitespace
|
9
|
+
from langroid.pydantic_v1 import BaseSettings
|
10
10
|
|
11
11
|
logger = logging.getLogger(__name__)
|
12
12
|
logger.setLevel(logging.WARNING)
|
langroid/parsing/repo_loader.py
CHANGED
@@ -15,11 +15,11 @@ from github import Github
|
|
15
15
|
from github.ContentFile import ContentFile
|
16
16
|
from github.Label import Label
|
17
17
|
from github.Repository import Repository
|
18
|
-
from pydantic import BaseModel, BaseSettings, Field
|
19
18
|
|
20
19
|
from langroid.mytypes import DocMetaData, Document
|
21
20
|
from langroid.parsing.document_parser import DocumentParser, DocumentType
|
22
21
|
from langroid.parsing.parser import Parser, ParsingConfig
|
22
|
+
from langroid.pydantic_v1 import BaseModel, BaseSettings, Field
|
23
23
|
|
24
24
|
logger = logging.getLogger(__name__)
|
25
25
|
|
langroid/parsing/urls.py
CHANGED
@@ -9,11 +9,12 @@ from urllib.parse import urldefrag, urljoin, urlparse
|
|
9
9
|
import fire
|
10
10
|
import requests
|
11
11
|
from bs4 import BeautifulSoup
|
12
|
-
from pydantic import BaseModel, HttpUrl, ValidationError, parse_obj_as
|
13
12
|
from rich import print
|
14
13
|
from rich.prompt import Prompt
|
15
14
|
from trafilatura.spider import focused_crawler
|
16
15
|
|
16
|
+
from langroid.pydantic_v1 import BaseModel, HttpUrl, ValidationError, parse_obj_as
|
17
|
+
|
17
18
|
logger = logging.getLogger(__name__)
|
18
19
|
|
19
20
|
|
langroid/prompts/__init__.py
CHANGED
@@ -0,0 +1,10 @@
|
|
1
|
+
"""
|
2
|
+
If we're on Pydantic v2, use the v1 namespace, else just use the main namespace.
|
3
|
+
|
4
|
+
This allows compatibility with both Pydantic v1 and v2
|
5
|
+
"""
|
6
|
+
|
7
|
+
try:
|
8
|
+
from pydantic.v1 import * # noqa: F403, F401
|
9
|
+
except ImportError:
|
10
|
+
from pydantic import * # type: ignore # noqa: F403, F401
|
langroid/utils/configuration.py
CHANGED
langroid/utils/constants.py
CHANGED
langroid/utils/globals.py
CHANGED
langroid/utils/pydantic_utils.py
CHANGED
@@ -16,9 +16,9 @@ from typing import (
|
|
16
16
|
|
17
17
|
import numpy as np
|
18
18
|
import pandas as pd
|
19
|
-
from pydantic import BaseModel, ValidationError, create_model
|
20
19
|
|
21
20
|
from langroid.mytypes import DocMetaData, Document
|
21
|
+
from langroid.pydantic_v1 import BaseModel, ValidationError, create_model
|
22
22
|
|
23
23
|
logger = logging.getLogger(__name__)
|
24
24
|
|
langroid/vector_store/base.py
CHANGED
@@ -5,11 +5,11 @@ from typing import Dict, List, Optional, Sequence, Tuple
|
|
5
5
|
|
6
6
|
import numpy as np
|
7
7
|
import pandas as pd
|
8
|
-
from pydantic import BaseSettings
|
9
8
|
|
10
9
|
from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
|
11
10
|
from langroid.embedding_models.models import OpenAIEmbeddingsConfig
|
12
11
|
from langroid.mytypes import Document
|
12
|
+
from langroid.pydantic_v1 import BaseSettings
|
13
13
|
from langroid.utils.algorithms.graph import components, topological_sort
|
14
14
|
from langroid.utils.configuration import settings
|
15
15
|
from langroid.utils.output.printing import print_long_text
|
langroid/vector_store/lancedb.py
CHANGED
@@ -15,7 +15,8 @@ from typing import (
|
|
15
15
|
|
16
16
|
import pandas as pd
|
17
17
|
from dotenv import load_dotenv
|
18
|
-
|
18
|
+
|
19
|
+
from langroid.pydantic_v1 import BaseModel, ValidationError, create_model
|
19
20
|
|
20
21
|
if TYPE_CHECKING:
|
21
22
|
from lancedb.query import LanceVectorQueryBuilder
|
@@ -111,13 +112,26 @@ class LanceDB(VectorStore):
|
|
111
112
|
)
|
112
113
|
|
113
114
|
def _setup_schemas(self, doc_cls: Type[Document] | None) -> None:
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
self.
|
118
|
-
|
119
|
-
|
120
|
-
|
115
|
+
try:
|
116
|
+
doc_cls = doc_cls or self.config.document_class
|
117
|
+
self.unflattened_schema = self._create_lance_schema(doc_cls)
|
118
|
+
self.schema = (
|
119
|
+
self._create_flat_lance_schema(doc_cls)
|
120
|
+
if self.config.flatten
|
121
|
+
else self.unflattened_schema
|
122
|
+
)
|
123
|
+
except (AttributeError, TypeError) as e:
|
124
|
+
raise ValueError(
|
125
|
+
f"""
|
126
|
+
{e}
|
127
|
+
====
|
128
|
+
One reason for this error is that you may be using Pydantic v2,
|
129
|
+
which is not yet compatible with Langroid's LanceDB integration.
|
130
|
+
If so, to use Lancedb with Langroid, please install the
|
131
|
+
latest pydantic 1.10.15 instead of pydantic v2, e.g.
|
132
|
+
pip install pydantic==1.10.15
|
133
|
+
"""
|
134
|
+
)
|
121
135
|
|
122
136
|
def clear_empty_collections(self) -> int:
|
123
137
|
coll_names = self.list_collections()
|
@@ -246,7 +260,23 @@ class LanceDB(VectorStore):
|
|
246
260
|
return
|
247
261
|
else:
|
248
262
|
logger.warning("Recreating fresh collection")
|
249
|
-
|
263
|
+
try:
|
264
|
+
self.client.create_table(
|
265
|
+
collection_name, schema=self.schema, mode="overwrite"
|
266
|
+
)
|
267
|
+
except TypeError as e:
|
268
|
+
raise TypeError(
|
269
|
+
f"""
|
270
|
+
{e}
|
271
|
+
====
|
272
|
+
One reason for this error is that you may be using Pydantic v2,
|
273
|
+
which is not yet compatible with Langroid's LanceDB integration.
|
274
|
+
If so, to use Lancedb with Langroid, please install the
|
275
|
+
latest pydantic 1.10.15 instead of pydantic v2, e.g.
|
276
|
+
pip install pydantic==1.10.15
|
277
|
+
"""
|
278
|
+
)
|
279
|
+
|
250
280
|
if settings.debug:
|
251
281
|
level = logger.getEffectiveLevel()
|
252
282
|
logger.setLevel(logging.INFO)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.263
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
License: MIT
|
6
6
|
Author: Prasad Chalasani
|
@@ -19,6 +19,7 @@ Provides-Extra: docx
|
|
19
19
|
Provides-Extra: hf-embeddings
|
20
20
|
Provides-Extra: hf-transformers
|
21
21
|
Provides-Extra: lancedb
|
22
|
+
Provides-Extra: langroid-pydantic-v1
|
22
23
|
Provides-Extra: litellm
|
23
24
|
Provides-Extra: meilisearch
|
24
25
|
Provides-Extra: metaphor
|
@@ -51,7 +52,8 @@ Requires-Dist: grpcio (>=1.62.1,<2.0.0)
|
|
51
52
|
Requires-Dist: halo (>=0.0.31,<0.0.32)
|
52
53
|
Requires-Dist: huggingface-hub (>=0.21.2,<0.22.0) ; extra == "hf-transformers" or extra == "all" or extra == "transformers"
|
53
54
|
Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
|
54
|
-
Requires-Dist: lancedb (>=0.
|
55
|
+
Requires-Dist: lancedb (>=0.8.2,<0.9.0) ; extra == "vecdbs" or extra == "lancedb"
|
56
|
+
Requires-Dist: langroid_pydantic_v1 (>=0.1.0,<0.2.0) ; extra == "langroid-pydantic-v1" or extra == "lancedb"
|
55
57
|
Requires-Dist: litellm (>=1.30.1,<2.0.0) ; extra == "all" or extra == "litellm"
|
56
58
|
Requires-Dist: lxml (>=4.9.3,<5.0.0)
|
57
59
|
Requires-Dist: meilisearch (>=0.28.3,<0.29.0) ; extra == "meilisearch"
|
@@ -68,8 +70,8 @@ Requires-Dist: pdf2image (>=1.17.0,<2.0.0) ; extra == "doc-chat" or extra == "al
|
|
68
70
|
Requires-Dist: pdfplumber (>=0.10.2,<0.11.0) ; extra == "doc-chat" or extra == "all" or extra == "pdf-parsers"
|
69
71
|
Requires-Dist: prettytable (>=3.8.0,<4.0.0)
|
70
72
|
Requires-Dist: psycopg2 (>=2.9.7,<3.0.0) ; extra == "db" or extra == "all" or extra == "postgres" or extra == "sql"
|
71
|
-
Requires-Dist: pyarrow (==15.0.0) ; extra == "vecdbs" or extra == "
|
72
|
-
Requires-Dist: pydantic (
|
73
|
+
Requires-Dist: pyarrow (==15.0.0) ; extra == "vecdbs" or extra == "lancedb"
|
74
|
+
Requires-Dist: pydantic (>=1,<3)
|
73
75
|
Requires-Dist: pygithub (>=1.58.1,<2.0.0)
|
74
76
|
Requires-Dist: pygments (>=2.15.1,<3.0.0)
|
75
77
|
Requires-Dist: pymupdf (>=1.23.3,<2.0.0) ; extra == "doc-chat" or extra == "all" or extra == "pdf-parsers"
|
@@ -91,7 +93,7 @@ Requires-Dist: rich (>=13.3.4,<14.0.0)
|
|
91
93
|
Requires-Dist: scrapy (>=2.11.0,<3.0.0) ; extra == "scrapy"
|
92
94
|
Requires-Dist: sentence-transformers (==2.2.2) ; extra == "hf-transformers" or extra == "all" or extra == "hf-embeddings"
|
93
95
|
Requires-Dist: sqlalchemy (>=2.0.19,<3.0.0) ; extra == "db" or extra == "all" or extra == "sql"
|
94
|
-
Requires-Dist: tantivy (>=0.21.0,<0.22.0) ; extra == "vecdbs" or extra == "
|
96
|
+
Requires-Dist: tantivy (>=0.21.0,<0.22.0) ; extra == "vecdbs" or extra == "lancedb"
|
95
97
|
Requires-Dist: thefuzz (>=0.20.0,<0.21.0)
|
96
98
|
Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
|
97
99
|
Requires-Dist: torch (==2.0.0) ; extra == "hf-transformers" or extra == "all" or extra == "hf-embeddings" or extra == "transformers"
|
@@ -515,9 +517,10 @@ with a postgres db, you will need to:
|
|
515
517
|
### Set up environment variables (API keys, etc)
|
516
518
|
|
517
519
|
To get started, all you need is an OpenAI API Key.
|
518
|
-
If you don't have one, see [this OpenAI Page](https://
|
519
|
-
|
520
|
-
|
520
|
+
If you don't have one, see [this OpenAI Page](https://platform.openai.com/docs/quickstart).
|
521
|
+
(Note that while this is the simplest way to get started, Langroid works with practically any LLM, not just those from OpenAI.
|
522
|
+
See the guides to using [Open/Local LLMs](https://langroid.github.io/langroid/tutorials/local-llm-setup/),
|
523
|
+
and other [non-OpenAI](https://langroid.github.io/langroid/tutorials/non-openai-llms/) proprietary LLMs.)
|
521
524
|
|
522
525
|
In the root of the repo, copy the `.env-template` file to a new file `.env`:
|
523
526
|
```bash
|
@@ -545,9 +548,7 @@ All of the following environment variable settings are optional, and some are on
|
|
545
548
|
to use specific features (as noted below).
|
546
549
|
|
547
550
|
- **Qdrant** Vector Store API Key, URL. This is only required if you want to use Qdrant cloud.
|
548
|
-
|
549
|
-
and you do not need to set up any environment variables for that.
|
550
|
-
Alternatively [Chroma](https://docs.trychroma.com/) is also currently supported.
|
551
|
+
Alternatively [Chroma](https://docs.trychroma.com/) or [LanceDB](https://lancedb.com/) are also currently supported.
|
551
552
|
We use the local-storage version of Chroma, so there is no need for an API key.
|
552
553
|
- **Redis** Password, host, port: This is optional, and only needed to cache LLM API responses
|
553
554
|
using Redis Cloud. Redis [offers](https://redis.com/try-free/) a free 30MB Redis account
|
@@ -642,11 +643,12 @@ and they are **not** complete runnable examples! For that we encourage you to
|
|
642
643
|
consult the [`langroid-examples`](https://github.com/langroid/langroid-examples)
|
643
644
|
repository.
|
644
645
|
|
645
|
-
:information_source:
|
646
|
-
|
647
|
-
|
648
|
-
(
|
649
|
-
and may suffice for some applications, but in general you may see inferior results
|
646
|
+
:information_source:
|
647
|
+
The various LLM prompts and instructions in Langroid
|
648
|
+
have been tested to work well with GPT-4 (and to some extent GPT-4o).
|
649
|
+
Switching to other LLMs (local/open and proprietary) is easy (see guides mentioned above),
|
650
|
+
and may suffice for some applications, but in general you may see inferior results
|
651
|
+
unless you adjust the prompts and/or the multi-agent setup.
|
650
652
|
|
651
653
|
|
652
654
|
:book: Also see the
|
@@ -921,7 +923,7 @@ config = DocChatAgentConfig(
|
|
921
923
|
"https://en.wikipedia.org/wiki/N-gram_language_model",
|
922
924
|
"/path/to/my/notes-on-language-models.txt",
|
923
925
|
],
|
924
|
-
vecdb=lr.vector_store.
|
926
|
+
vecdb=lr.vector_store.QdrantDBConfig(),
|
925
927
|
)
|
926
928
|
```
|
927
929
|
|
@@ -1,25 +1,25 @@
|
|
1
1
|
langroid/__init__.py,sha256=z_fCOLQJPOw3LLRPBlFB5-2HyCjpPgQa4m4iY5Fvb8Y,1800
|
2
2
|
langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
|
3
|
-
langroid/agent/base.py,sha256=
|
3
|
+
langroid/agent/base.py,sha256=CHFUZ4vnC1CSMMT5POnY4AWu96RQ-4uDUARUbhGAGo8,37195
|
4
4
|
langroid/agent/batch.py,sha256=feRA_yRG768ElOQjrKEefcRv6Aefd_yY7qktuYUQDwc,10040
|
5
5
|
langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
langroid/agent/callbacks/chainlit.py,sha256=
|
6
|
+
langroid/agent/callbacks/chainlit.py,sha256=UKG2_v4ktfkEaGvdouVRHEqQejEYya2Rli8jrP65TmA,22055
|
7
7
|
langroid/agent/chat_agent.py,sha256=hnmeOxdi4i5w8WaL2kPjQOEpenoRW_hG5EfeMWuuVsQ,39478
|
8
|
-
langroid/agent/chat_document.py,sha256=
|
8
|
+
langroid/agent/chat_document.py,sha256=tXFuUw2fs8WVIhVt10Sne0IpOzPPEaFz_1yqQgU6V4U,9218
|
9
9
|
langroid/agent/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
langroid/agent/junk,sha256=LxfuuW7Cijsg0szAzT81OjWWv1PMNI-6w_-DspVIO2s,339
|
11
|
-
langroid/agent/openai_assistant.py,sha256=
|
11
|
+
langroid/agent/openai_assistant.py,sha256=rmGJD5n0eE7_O1EkPyXgHFMNGc3vb2GKweZMhzmRWvI,33068
|
12
12
|
langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
|
13
|
-
langroid/agent/special/doc_chat_agent.py,sha256=
|
13
|
+
langroid/agent/special/doc_chat_agent.py,sha256=dlXsY7brhrGOf_TA39EgkcL80L102Gkunoid7GvIZNY,54444
|
14
14
|
langroid/agent/special/lance_doc_chat_agent.py,sha256=USp0U3eTaJzwF_3bdqE7CedSLbaqAi2tm-VzygcyLaA,10175
|
15
15
|
langroid/agent/special/lance_rag/__init__.py,sha256=QTbs0IVE2ZgDg8JJy1zN97rUUg4uEPH7SLGctFNumk4,174
|
16
16
|
langroid/agent/special/lance_rag/critic_agent.py,sha256=ufTdpHSeHgCzN85Q0sfWOrpBpsCjGVZdAg5yOH1ogU8,7296
|
17
17
|
langroid/agent/special/lance_rag/lance_rag_task.py,sha256=l_HQgrYY-CX2FwIsS961aEF3bYog3GDYo98fj0C0mSk,2889
|
18
|
-
langroid/agent/special/lance_rag/query_planner_agent.py,sha256=
|
19
|
-
langroid/agent/special/lance_tools.py,sha256=
|
18
|
+
langroid/agent/special/lance_rag/query_planner_agent.py,sha256=M4RC_0f98_pwVL7ygrr1VI80LgJiFcmKjJFH0M4tccI,9830
|
19
|
+
langroid/agent/special/lance_tools.py,sha256=BksGrrNgGgyYWP0HnfAuXMc0KzXooFOzY2l5rDDMtQ8,1467
|
20
20
|
langroid/agent/special/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
21
|
langroid/agent/special/neo4j/csv_kg_chat.py,sha256=dRsAgMBa1H_EMI2YYgJR2Xyv1D7e4o3G9M64mTewq_c,6409
|
22
|
-
langroid/agent/special/neo4j/neo4j_chat_agent.py,sha256=
|
22
|
+
langroid/agent/special/neo4j/neo4j_chat_agent.py,sha256=Y4Zu-m8WKO1xjeBRarV_m4y00Y5n_NR2B-hepjZp_cY,13104
|
23
23
|
langroid/agent/special/neo4j/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
24
|
langroid/agent/special/neo4j/utils/system_message.py,sha256=vRpz1P-OYLLiC6OGYYoK6x77yxVzDxMTCEJSsYUIuG4,2242
|
25
25
|
langroid/agent/special/relevance_extractor_agent.py,sha256=zIx8GUdVo1aGW6ASla0NPQjYYIpmriK_TYMijqAx3F8,4796
|
@@ -30,10 +30,10 @@ langroid/agent/special/sql/utils/__init__.py,sha256=JFif6CRTrN-bc91uuAI4K9fe2ndI
|
|
30
30
|
langroid/agent/special/sql/utils/description_extractors.py,sha256=cX8TIpmTPXZXQTMpIi3OUFwFsPywxFFdurpx717Kq0I,6529
|
31
31
|
langroid/agent/special/sql/utils/populate_metadata.py,sha256=1J22UsyEPKzwK0XlJZtYn9r6kYc0FXIr8-lZrndYlhc,3131
|
32
32
|
langroid/agent/special/sql/utils/system_message.py,sha256=qKLHkvQWRQodTtPLPxr1GSLUYUFASZU8x-ybV67cB68,1885
|
33
|
-
langroid/agent/special/sql/utils/tools.py,sha256=
|
33
|
+
langroid/agent/special/sql/utils/tools.py,sha256=vFYysk6Vi7HJjII8B4RitA3pt_z3gkSglDNdhNVMiFc,1332
|
34
34
|
langroid/agent/special/table_chat_agent.py,sha256=d9v2wsblaRx7oMnKhLV7uO_ujvk9gh59pSGvBXyeyNc,9659
|
35
|
-
langroid/agent/task.py,sha256=
|
36
|
-
langroid/agent/tool_message.py,sha256=
|
35
|
+
langroid/agent/task.py,sha256=YwunQpw64t0y8Sd9QgYST7bYCnhJBD1skSWg9VSO6oM,60695
|
36
|
+
langroid/agent/tool_message.py,sha256=wIyZnUcZpxkiRPvM9O3MO3b5BBAdLEEan9kqPbvtApc,9743
|
37
37
|
langroid/agent/tools/__init__.py,sha256=8Pc9BlGCB5FQ2IDGKS_WPpHCoWp5jblMU8EHJwwikAY,303
|
38
38
|
langroid/agent/tools/duckduckgo_search_tool.py,sha256=NhsCaGZkdv28nja7yveAhSK_w6l_Ftym8agbrdzqgfo,1935
|
39
39
|
langroid/agent/tools/extract_tool.py,sha256=u5lL9rKBzaLBOrRyLnTAZ97pQ1uxyLP39XsWMnpaZpw,3789
|
@@ -46,11 +46,11 @@ langroid/agent/tools/run_python_code.py,sha256=BvoxYzzHijU-p4703n2iVlt5BCieR1oMS
|
|
46
46
|
langroid/agent/tools/segment_extract_tool.py,sha256=__srZ_VGYLVOdPrITUM8S0HpmX4q7r5FHWMDdHdEv8w,1440
|
47
47
|
langroid/agent_config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
48
|
langroid/cachedb/__init__.py,sha256=icAT2s7Vhf-ZGUeqpDQGNU6ob6o0aFEyjwcxxUGRFjg,225
|
49
|
-
langroid/cachedb/base.py,sha256=
|
49
|
+
langroid/cachedb/base.py,sha256=ztVjB1DtN6pLCujCWnR6xruHxwVj3XkYniRTYAKKqk0,1354
|
50
50
|
langroid/cachedb/momento_cachedb.py,sha256=YEOJ62hEcV6iIeMr5aGgRYgWQqFYaej9gEDEcY0sm7M,3172
|
51
51
|
langroid/cachedb/redis_cachedb.py,sha256=NukuCWgdp1AWWNgguiZfuypbH9GHwiYe34ZZy866u54,4981
|
52
52
|
langroid/embedding_models/__init__.py,sha256=lsu8qxCjfGujXGueJWU-VI3LMZYGjLSYgqUKDd4F3Qo,715
|
53
|
-
langroid/embedding_models/base.py,sha256=
|
53
|
+
langroid/embedding_models/base.py,sha256=MSjaTkFcfoMGY6SHPOqAsbZbKctj8-1N6zgaFYmOFTg,1830
|
54
54
|
langroid/embedding_models/clustering.py,sha256=tZWElUqXl9Etqla0FAa7og96iDKgjqWjucZR_Egtp-A,6684
|
55
55
|
langroid/embedding_models/models.py,sha256=-xeN0irBPc1tUgRFHGM1ki4NwOIHr6F3SKuEjD5nTOg,7144
|
56
56
|
langroid/embedding_models/protoc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -62,50 +62,50 @@ langroid/embedding_models/remote_embeds.py,sha256=6_kjXByVbqhY9cGwl9R83ZcYC2km-n
|
|
62
62
|
langroid/exceptions.py,sha256=w_Cr41nPAmsa6gW5nNFaO9yDcBCWdQqRspL1jYvZf5w,2209
|
63
63
|
langroid/language_models/__init__.py,sha256=55602F5QA58MmRq-yRjoXK6xZOMRHQrR4QGaCnlX218,822
|
64
64
|
langroid/language_models/azure_openai.py,sha256=ncRCbKooqLVOY-PWQUIo9C3yTuKEFbAwyngXT_M4P7k,5989
|
65
|
-
langroid/language_models/base.py,sha256=
|
66
|
-
langroid/language_models/config.py,sha256=
|
65
|
+
langroid/language_models/base.py,sha256=UbHcXSzQ5pTc_H5VSDh45c6dsyJwfY0H5_6ofEBXH-8,16917
|
66
|
+
langroid/language_models/config.py,sha256=9Q8wk5a7RQr8LGMT_0WkpjY8S4ywK06SalVRjXlfCiI,378
|
67
67
|
langroid/language_models/openai_assistants.py,sha256=9K-DEAL2aSWHeXj2hwCo2RAlK9_1oCPtqX2u1wISCj8,36
|
68
|
-
langroid/language_models/openai_gpt.py,sha256=
|
68
|
+
langroid/language_models/openai_gpt.py,sha256=RXnLKULuCSeDeUPQvaZ4naqJgMKcMZogCtRDLycd4j8,50714
|
69
69
|
langroid/language_models/prompt_formatter/__init__.py,sha256=2-5cdE24XoFDhifOLl8yiscohil1ogbP1ECkYdBlBsk,372
|
70
70
|
langroid/language_models/prompt_formatter/base.py,sha256=eDS1sgRNZVnoajwV_ZIha6cba5Dt8xjgzdRbPITwx3Q,1221
|
71
71
|
langroid/language_models/prompt_formatter/hf_formatter.py,sha256=TFL6ppmeQWnzr6CKQzRZFYY810zE1mr8DZnhw6i85ok,5217
|
72
72
|
langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeuMENVIVvVqSYuEpvYSTndUe_jd6hVTko4,2899
|
73
73
|
langroid/language_models/utils.py,sha256=j8xEEm__-2b9eql1oTiWQk5dHW59UwmrRKs5kMHaGGo,4803
|
74
|
-
langroid/mytypes.py,sha256=
|
74
|
+
langroid/mytypes.py,sha256=B5uKN4FMdHiJaAKozg8EmcPfle3sHlj11NcBnpXn7nE,3057
|
75
75
|
langroid/parsing/__init__.py,sha256=ZgSAfgTC6VsTLFlRSWT-TwYco7SQeRMeZG-49MnKYGY,936
|
76
76
|
langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
|
77
77
|
langroid/parsing/code-parsing.md,sha256=--cyyNiSZSDlIwcjAV4-shKrSiRe2ytF3AdSoS_hD2g,3294
|
78
|
-
langroid/parsing/code_parser.py,sha256=
|
78
|
+
langroid/parsing/code_parser.py,sha256=Fwa8MWY5EGk7Ekr8II5c-o9vBf4m1HfB5_K7e_EDYzo,3739
|
79
79
|
langroid/parsing/config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
80
80
|
langroid/parsing/document_parser.py,sha256=bN-D1kqx6qe1Sx-AMR8a8WbPJYPdoxLYPdgGl7dfW3I,24017
|
81
81
|
langroid/parsing/image_text.py,sha256=sbLIQ5nHe2UnYUksBaQsmZGaX-X0qgEpPd7CEzi_z5M,910
|
82
82
|
langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
|
83
83
|
langroid/parsing/parse_json.py,sha256=tgB_oatcrgt6L9ZplC-xBBXjLzL1gjSQf1L2_W5kwFA,4230
|
84
|
-
langroid/parsing/parser.py,sha256=
|
85
|
-
langroid/parsing/
|
86
|
-
langroid/parsing/repo_loader.py,sha256=My5UIe-h1xr0I-6Icu0ZVwRHmGRRRW8SrJYMc9J1M9Q,29361
|
84
|
+
langroid/parsing/parser.py,sha256=VU3G2GXwho5vJ3tCDgw6_1W-guqAbPtml8RcJxvp-Xs,10755
|
85
|
+
langroid/parsing/repo_loader.py,sha256=3GjvPJS6Vf5L6gV2zOU8s-Tf1oq_fZm-IB_RL_7CTsY,29373
|
87
86
|
langroid/parsing/routing.py,sha256=_NFPe7wLjd5B6s47w3M8-5vldL8e2Sz51Gb5bwF5ooY,1072
|
88
87
|
langroid/parsing/search.py,sha256=plQtjarB9afGfJLB0CyPXPq3mM4m7kRsfd0_4brziEI,8846
|
89
88
|
langroid/parsing/spider.py,sha256=Y6y7b86Y2k770LdhxgjVlImBxuuy1V9n8-XQ3QPaG5s,3199
|
90
89
|
langroid/parsing/table_loader.py,sha256=qNM4obT_0Y4tjrxNBCNUYjKQ9oETCZ7FbolKBTcz-GM,3410
|
91
90
|
langroid/parsing/url_loader.py,sha256=Na2TBlKuQkloZzkE2d7xl6mh9olS3CbpgCsJbJ-xhIA,4472
|
92
91
|
langroid/parsing/url_loader_cookies.py,sha256=Lg4sNpRz9MByWq2mde6T0hKv68VZSV3mtMjNEHuFeSU,2327
|
93
|
-
langroid/parsing/urls.py,sha256=
|
92
|
+
langroid/parsing/urls.py,sha256=XjpaV5onG7gKQ5iQeFTzHSw5P08Aqw0g-rMUu61lR6s,7988
|
94
93
|
langroid/parsing/utils.py,sha256=pbSAbfwA28EBNESpQRJee_Kp1b44qze-2_2b9qJOKfM,12646
|
95
94
|
langroid/parsing/web_search.py,sha256=XSiSHB4c1Wa8RjWkC4Yh-ac8S7a2WPPYj0n-Ma716RY,4759
|
96
|
-
langroid/prompts/__init__.py,sha256=
|
95
|
+
langroid/prompts/__init__.py,sha256=RW11vK6jiLPuaUh4GpeFvstti73gkm8_rDMtrbo2YsU,142
|
97
96
|
langroid/prompts/chat-gpt4-system-prompt.md,sha256=Q3uLCJTPQvmUkZN2XDnkBC7M2K3X0F3C3GIQBaFvYvw,5329
|
98
97
|
langroid/prompts/dialog.py,sha256=SpfiSyofSgy2pwD1YboHR_yHO3LEEMbv6j2sm874jKo,331
|
99
|
-
langroid/prompts/prompts_config.py,sha256=
|
98
|
+
langroid/prompts/prompts_config.py,sha256=p_lp9nbMuQwhhMwAZsOxveRw9C0ZFZvql7pdIfgVZYo,143
|
100
99
|
langroid/prompts/templates.py,sha256=kz0rPiM6iLGhhpDonF3Y87OznSe9FRI6A0pHU0wgW4Q,6314
|
101
|
-
langroid/
|
100
|
+
langroid/pydantic_v1/__init__.py,sha256=HxPGVERapVueRUORgSpj2JX_vTZxVlVbWvhpQlpjygE,283
|
101
|
+
langroid/pydantic_v1/main.py,sha256=p_k7kDY9eDrsA5dxNNqXusKLgx7mS_icGnS7fu4goqY,147
|
102
102
|
langroid/utils/__init__.py,sha256=ARx5To4Hsv1K5QAzK4uUqdEoB_iq5HK797vae1AcMBI,300
|
103
103
|
langroid/utils/algorithms/__init__.py,sha256=WylYoZymA0fnzpB4vrsH_0n7WsoLhmuZq8qxsOCjUpM,41
|
104
104
|
langroid/utils/algorithms/graph.py,sha256=JbdpPnUOhw4-D6O7ou101JLA3xPCD0Lr3qaPoFCaRfo,2866
|
105
|
-
langroid/utils/configuration.py,sha256=
|
106
|
-
langroid/utils/constants.py,sha256=
|
105
|
+
langroid/utils/configuration.py,sha256=A70LdvdMuunlLSGI1gBmBL5j6Jhz-1syNP8R4AdjqDc,3295
|
106
|
+
langroid/utils/constants.py,sha256=eTiXfx8Nq2kmq0WChVLqV9C58UWju0NCIuW28sMgd5g,575
|
107
107
|
langroid/utils/docker.py,sha256=kJQOLTgM0x9j9pgIIqp0dZNZCTvoUDhp6i8tYBq1Jr0,1105
|
108
|
-
langroid/utils/globals.py,sha256=
|
108
|
+
langroid/utils/globals.py,sha256=Az9dOFqR6n9CoTYSqa2kLikQWS0oCQ9DFQIQAnG-2q8,1355
|
109
109
|
langroid/utils/llms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
110
110
|
langroid/utils/llms/strings.py,sha256=CSAX9Z6FQOLXOzbLMe_Opqtc3ruDAKTTk7cPqc6Blh0,263
|
111
111
|
langroid/utils/logging.py,sha256=WN180zjxhlozwtyTcLWmbVkXylBs5EvQj85dBPeVUwc,3985
|
@@ -114,20 +114,20 @@ langroid/utils/output/citations.py,sha256=PSY2cpti8W-ZGFMAgj1lYoEIZy0lsniLpCliMs
|
|
114
114
|
langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4eK_AE,2962
|
115
115
|
langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
|
116
116
|
langroid/utils/pandas_utils.py,sha256=UctS986Jtl_MvU5rA7-GfrjEHXP7MNu8ePhepv0bTn0,755
|
117
|
-
langroid/utils/pydantic_utils.py,sha256=
|
117
|
+
langroid/utils/pydantic_utils.py,sha256=FKC8VKXH2uBEpFjnnMgIcEsQn6hs31ftea8zv5pMK9g,21740
|
118
118
|
langroid/utils/system.py,sha256=RfAcQODu4tjl-pAO8zZ65yKB9-6WsvzSz2dEPkJdSdw,4909
|
119
119
|
langroid/utils/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
120
120
|
langroid/utils/web/login.py,sha256=1iz9eUAHa87vpKIkzwkmFa00avwFWivDSAr7QUhK7U0,2528
|
121
121
|
langroid/vector_store/__init__.py,sha256=6xBjb_z4QtUy4vz4RuFbcbSwmHrggHL8-q0DwCf3PMM,972
|
122
|
-
langroid/vector_store/base.py,sha256=
|
122
|
+
langroid/vector_store/base.py,sha256=1bzFEDJcbKIaZnTPhBjnQ260c6QYs5SpZwGMtwt0-6Y,13481
|
123
123
|
langroid/vector_store/chromadb.py,sha256=bZ5HjwgKgfJj1PUHsatYsrHv-v0dpOfMR2l0tJ2H0_A,7890
|
124
|
-
langroid/vector_store/lancedb.py,sha256=
|
124
|
+
langroid/vector_store/lancedb.py,sha256=p4g2S_esbxCo_BsSZz2IJMkSSWeuSLzh77KCY2AyeFc,20147
|
125
125
|
langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
|
126
126
|
langroid/vector_store/momento.py,sha256=QaPzUnTwlswoawGB-paLtUPyLRvckFXLfLDfvbTzjNQ,10505
|
127
127
|
langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
|
128
128
|
langroid/vector_store/qdrantdb.py,sha256=wYOuu5c2vIKn9ZgvTXcAiZXMpV8AOXEWFAzI8S8UP-0,16828
|
129
|
-
pyproject.toml,sha256=
|
130
|
-
langroid-0.1.
|
131
|
-
langroid-0.1.
|
132
|
-
langroid-0.1.
|
133
|
-
langroid-0.1.
|
129
|
+
pyproject.toml,sha256=4J2ukt5v_190twniY2Aet4nZWx40eisdFR3SzV3hUTM,7159
|
130
|
+
langroid-0.1.263.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
131
|
+
langroid-0.1.263.dist-info/METADATA,sha256=637o8UgB10tXSmhZ0qIPymp58_BpDdzIX3Ze0QcORBQ,52731
|
132
|
+
langroid-0.1.263.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
133
|
+
langroid-0.1.263.dist-info/RECORD,,
|
pyproject.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "langroid"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.263"
|
4
4
|
description = "Harness LLMs with Multi-Agent Programming"
|
5
5
|
authors = ["Prasad Chalasani <pchalasani@gmail.com>"]
|
6
6
|
readme = "README.md"
|
@@ -29,7 +29,8 @@ python-socketio = {version="^5.11.0", optional=true}
|
|
29
29
|
neo4j = {version = "^5.14.1", optional = true}
|
30
30
|
huggingface-hub = {version="^0.21.2", optional=true}
|
31
31
|
transformers = {version="^4.40.1", optional=true}
|
32
|
-
lancedb = {version="^0.
|
32
|
+
lancedb = {version="^0.8.2", optional=true}
|
33
|
+
langroid_pydantic_v1 = {version="^0.1.0", optional=true}
|
33
34
|
tantivy = {version="^0.21.0", optional=true}
|
34
35
|
pypdf = {version="^3.12.2", optional=true}
|
35
36
|
pymupdf = {version="^1.23.3", optional=true}
|
@@ -65,7 +66,7 @@ requests = "^2.31.0"
|
|
65
66
|
pyparsing = "^3.0.9"
|
66
67
|
nltk = "^3.8.1"
|
67
68
|
qdrant-client = "^1.8.0"
|
68
|
-
pydantic = "1
|
69
|
+
pydantic = ">=1,<3"
|
69
70
|
pandas = "^2.0.3"
|
70
71
|
prettytable = "^3.8.0"
|
71
72
|
|
@@ -107,13 +108,15 @@ all = [
|
|
107
108
|
"pymupdf", "pdf2image", "pytesseract",
|
108
109
|
"postgres", "mysql", "sqlalchemy", "psycopg2", "pymysql",
|
109
110
|
"sentence-transformers", "torch", "transformers", "huggingface-hub",
|
110
|
-
"lancedb", "tantivy", "pyarrow",
|
111
|
+
# "lancedb", "tantivy", "pyarrow",
|
112
|
+
"chromadb",
|
111
113
|
"metaphor-python", "neo4j",
|
112
114
|
"litellm",
|
113
115
|
"chainlit", "python-socketio",
|
114
116
|
]
|
115
117
|
# more granular groupings
|
116
|
-
|
118
|
+
langroid_pydantic_v1 = ["langroid_pydantic_v1"]
|
119
|
+
lancedb = ["lancedb", "tantivy", "pyarrow", "langroid_pydantic_v1"]
|
117
120
|
pdf-parsers = ["pdfplumber", "pypdf", "pymupdf", "pdf2image", "pytesseract"]
|
118
121
|
docx = ["python-docx"]
|
119
122
|
scrapy = ["scrapy"]
|
langroid/parsing/parser.pyi
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
from enum import Enum
|
2
|
-
from typing import Literal
|
3
|
-
|
4
|
-
from _typeshed import Incomplete
|
5
|
-
from pydantic import BaseSettings
|
6
|
-
|
7
|
-
from langroid.mytypes import Document as Document
|
8
|
-
from langroid.parsing.para_sentence_split import (
|
9
|
-
create_chunks as create_chunks,
|
10
|
-
)
|
11
|
-
from langroid.parsing.para_sentence_split import (
|
12
|
-
remove_extra_whitespace as remove_extra_whitespace,
|
13
|
-
)
|
14
|
-
|
15
|
-
logger: Incomplete
|
16
|
-
|
17
|
-
class Splitter(str, Enum):
|
18
|
-
TOKENS: str
|
19
|
-
PARA_SENTENCE: str
|
20
|
-
SIMPLE: str
|
21
|
-
|
22
|
-
class PdfParsingConfig(BaseSettings):
|
23
|
-
library: Literal["fitz", "pdfplumber", "pypdf", "unstructured", "pdf2image"]
|
24
|
-
|
25
|
-
class DocxParsingConfig(BaseSettings):
|
26
|
-
library: Literal["python-docx", "unstructured"]
|
27
|
-
|
28
|
-
class DocParsingConfig(BaseSettings):
|
29
|
-
library: Literal["unstructured"]
|
30
|
-
|
31
|
-
class ParsingConfig(BaseSettings):
|
32
|
-
splitter: str
|
33
|
-
chunk_size: int
|
34
|
-
overlap: int
|
35
|
-
max_chunks: int
|
36
|
-
min_chunk_chars: int
|
37
|
-
discard_chunk_chars: int
|
38
|
-
n_similar_docs: int
|
39
|
-
n_neighbor_ids: int
|
40
|
-
separators: list[str]
|
41
|
-
token_encoding_model: str
|
42
|
-
pdf: PdfParsingConfig
|
43
|
-
docx: DocxParsingConfig
|
44
|
-
doc: DocParsingConfig
|
45
|
-
|
46
|
-
class Parser:
|
47
|
-
config: Incomplete
|
48
|
-
tokenizer: Incomplete
|
49
|
-
def __init__(self, config: ParsingConfig) -> None: ...
|
50
|
-
def num_tokens(self, text: str) -> int: ...
|
51
|
-
def add_window_ids(self, chunks: list[Document]) -> None: ...
|
52
|
-
def split_simple(self, docs: list[Document]) -> list[Document]: ...
|
53
|
-
def split_para_sentence(self, docs: list[Document]) -> list[Document]: ...
|
54
|
-
def split_chunk_tokens(self, docs: list[Document]) -> list[Document]: ...
|
55
|
-
def chunk_tokens(self, text: str) -> list[str]: ...
|
56
|
-
def split(self, docs: list[Document]) -> list[Document]: ...
|
langroid/prompts/transforms.py
DELETED
@@ -1,84 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
from typing import List, Tuple
|
3
|
-
|
4
|
-
import aiohttp
|
5
|
-
|
6
|
-
from langroid.language_models.base import LanguageModel
|
7
|
-
from langroid.mytypes import Document
|
8
|
-
from langroid.prompts.dialog import collate_chat_history
|
9
|
-
from langroid.prompts.templates import EXTRACTION_PROMPT
|
10
|
-
|
11
|
-
|
12
|
-
async def get_verbatim_extract_async(
|
13
|
-
question: str,
|
14
|
-
passage: Document,
|
15
|
-
LLM: LanguageModel,
|
16
|
-
) -> str:
|
17
|
-
"""
|
18
|
-
Asynchronously, get verbatim extract from passage that is relevant to a question.
|
19
|
-
"""
|
20
|
-
async with aiohttp.ClientSession():
|
21
|
-
templatized_prompt = EXTRACTION_PROMPT
|
22
|
-
final_prompt = templatized_prompt.format(question=question, content=passage)
|
23
|
-
final_extract = await LLM.agenerate(prompt=final_prompt, max_tokens=1024)
|
24
|
-
|
25
|
-
return final_extract.message.strip()
|
26
|
-
|
27
|
-
|
28
|
-
async def _get_verbatim_extracts(
|
29
|
-
question: str,
|
30
|
-
passages: List[Document],
|
31
|
-
LLM: LanguageModel,
|
32
|
-
) -> List[Document]:
|
33
|
-
async with aiohttp.ClientSession():
|
34
|
-
verbatim_extracts = await asyncio.gather(
|
35
|
-
*(get_verbatim_extract_async(question, P, LLM) for P in passages)
|
36
|
-
)
|
37
|
-
metadatas = [P.metadata for P in passages]
|
38
|
-
# return with metadata so we can use it downstream, e.g. to cite sources
|
39
|
-
return [
|
40
|
-
Document(content=e, metadata=m) for e, m in zip(verbatim_extracts, metadatas)
|
41
|
-
]
|
42
|
-
|
43
|
-
|
44
|
-
def get_verbatim_extracts(
|
45
|
-
question: str,
|
46
|
-
passages: List[Document],
|
47
|
-
LLM: LanguageModel,
|
48
|
-
) -> List[Document]:
|
49
|
-
"""
|
50
|
-
From each passage, extract verbatim text that is relevant to a question,
|
51
|
-
using concurrent API calls to the LLM.
|
52
|
-
Args:
|
53
|
-
question: question to be answered
|
54
|
-
passages: list of passages from which to extract relevant verbatim text
|
55
|
-
LLM: LanguageModel to use for generating the prompt and extract
|
56
|
-
Returns:
|
57
|
-
list of verbatim extracts (Documents) from passages that are relevant to
|
58
|
-
question
|
59
|
-
"""
|
60
|
-
return asyncio.run(_get_verbatim_extracts(question, passages, LLM))
|
61
|
-
|
62
|
-
|
63
|
-
def followup_to_standalone(
|
64
|
-
LLM: LanguageModel, chat_history: List[Tuple[str, str]], question: str
|
65
|
-
) -> str:
|
66
|
-
"""
|
67
|
-
Given a chat history and a question, convert it to a standalone question.
|
68
|
-
Args:
|
69
|
-
chat_history: list of tuples of (question, answer)
|
70
|
-
query: follow-up question
|
71
|
-
|
72
|
-
Returns: standalone version of the question
|
73
|
-
"""
|
74
|
-
history = collate_chat_history(chat_history)
|
75
|
-
|
76
|
-
prompt = f"""
|
77
|
-
Given the conversationn below, and a follow-up question, rephrase the follow-up
|
78
|
-
question as a standalone question.
|
79
|
-
|
80
|
-
Chat history: {history}
|
81
|
-
Follow-up question: {question}
|
82
|
-
""".strip()
|
83
|
-
standalone = LLM.generate(prompt=prompt, max_tokens=1024).message.strip()
|
84
|
-
return standalone
|
File without changes
|
File without changes
|