langroid 0.45.1__py3-none-any.whl → 0.45.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/base.py +31 -8
- langroid/agent/chat_agent.py +10 -6
- langroid/agent/special/doc_chat_agent.py +3 -2
- langroid/mytypes.py +2 -1
- langroid/parsing/document_parser.py +10 -4
- langroid/parsing/parser.py +3 -0
- langroid/utils/output/citations.py +15 -5
- {langroid-0.45.1.dist-info → langroid-0.45.3.dist-info}/METADATA +1 -1
- {langroid-0.45.1.dist-info → langroid-0.45.3.dist-info}/RECORD +11 -11
- {langroid-0.45.1.dist-info → langroid-0.45.3.dist-info}/WHEEL +0 -0
- {langroid-0.45.1.dist-info → langroid-0.45.3.dist-info}/licenses/LICENSE +0 -0
langroid/agent/base.py
CHANGED
@@ -90,6 +90,7 @@ class AgentConfig(BaseSettings):
|
|
90
90
|
parsing: Optional[ParsingConfig] = ParsingConfig()
|
91
91
|
prompts: Optional[PromptsConfig] = PromptsConfig()
|
92
92
|
show_stats: bool = True # show token usage/cost stats?
|
93
|
+
hide_agent_response: bool = True # hide agent response?
|
93
94
|
add_to_registry: bool = True # register agent in ObjectRegistry?
|
94
95
|
respond_tools_only: bool = False # respond only to tool messages (not plain text)?
|
95
96
|
# allow multiple tool messages in a single response?
|
@@ -460,6 +461,28 @@ class Agent(ABC):
|
|
460
461
|
recipient=recipient,
|
461
462
|
)
|
462
463
|
|
464
|
+
def render_agent_response(
|
465
|
+
self,
|
466
|
+
results: Optional[str | OrderedDict[str, str] | ChatDocument],
|
467
|
+
) -> None:
|
468
|
+
"""
|
469
|
+
Render the response from the agent, typically from tool-handling.
|
470
|
+
Args:
|
471
|
+
results: results from tool-handling, which may be a string,
|
472
|
+
a dict of tool results, or a ChatDocument.
|
473
|
+
"""
|
474
|
+
if self.config.hide_agent_response or results is None:
|
475
|
+
return
|
476
|
+
if isinstance(results, str):
|
477
|
+
results_str = results
|
478
|
+
elif isinstance(results, ChatDocument):
|
479
|
+
results_str = results.content
|
480
|
+
elif isinstance(results, dict):
|
481
|
+
results_str = json.dumps(results, indent=2)
|
482
|
+
if not settings.quiet:
|
483
|
+
console.print(f"[red]{self.indent}", end="")
|
484
|
+
print(f"[red]Agent: {escape(results_str)}")
|
485
|
+
|
463
486
|
def _agent_response_final(
|
464
487
|
self,
|
465
488
|
msg: Optional[str | ChatDocument],
|
@@ -477,8 +500,7 @@ class Agent(ABC):
|
|
477
500
|
elif isinstance(results, dict):
|
478
501
|
results_str = json.dumps(results, indent=2)
|
479
502
|
if not settings.quiet:
|
480
|
-
|
481
|
-
print(f"[red]Agent: {escape(results_str)}")
|
503
|
+
self.render_agent_response(results)
|
482
504
|
maybe_json = len(extract_top_level_json(results_str)) > 0
|
483
505
|
self.callbacks.show_agent_response(
|
484
506
|
content=results_str,
|
@@ -1341,8 +1363,7 @@ class Agent(ABC):
|
|
1341
1363
|
|
1342
1364
|
has_orch = any(isinstance(t, ORCHESTRATION_TOOLS) for t in tools)
|
1343
1365
|
if has_orch and len(tools) > 1:
|
1344
|
-
|
1345
|
-
return [err_str for _ in tools]
|
1366
|
+
return ["ERROR: Use ONE tool at a time!"] * len(tools)
|
1346
1367
|
|
1347
1368
|
return []
|
1348
1369
|
|
@@ -1477,8 +1498,6 @@ class Agent(ABC):
|
|
1477
1498
|
# as a response to the tool message even though the tool was not intended
|
1478
1499
|
# for this agent.
|
1479
1500
|
return None
|
1480
|
-
if len(tools) > 1 and not self.config.allow_multiple_tools:
|
1481
|
-
return self.to_ChatDocument("ERROR: Use ONE tool at a time!")
|
1482
1501
|
if len(tools) == 0:
|
1483
1502
|
fallback_result = self.handle_message_fallback(msg)
|
1484
1503
|
if fallback_result is None:
|
@@ -1487,10 +1506,14 @@ class Agent(ABC):
|
|
1487
1506
|
fallback_result,
|
1488
1507
|
chat_doc=msg if isinstance(msg, ChatDocument) else None,
|
1489
1508
|
)
|
1490
|
-
chat_doc = msg if isinstance(msg, ChatDocument) else None
|
1491
1509
|
|
1492
|
-
results =
|
1510
|
+
results: List[str | ChatDocument | None] = []
|
1511
|
+
if len(tools) > 1 and not self.config.allow_multiple_tools:
|
1512
|
+
results = ["ERROR: Use ONE tool at a time!"] * len(tools)
|
1513
|
+
if not results:
|
1514
|
+
results = self._get_multiple_orch_tool_errs(tools)
|
1493
1515
|
if not results:
|
1516
|
+
chat_doc = msg if isinstance(msg, ChatDocument) else None
|
1494
1517
|
results = [self.handle_tool_message(t, chat_doc=chat_doc) for t in tools]
|
1495
1518
|
# if there's a solitary ChatDocument|str result, return it as is
|
1496
1519
|
if len(results) == 1 and isinstance(results[0], (str, ChatDocument)):
|
langroid/agent/chat_agent.py
CHANGED
@@ -85,6 +85,8 @@ class ChatAgentConfig(AgentConfig):
|
|
85
85
|
enabled when such tool calls are not desired.
|
86
86
|
output_format_include_defaults: Whether to include fields with default arguments
|
87
87
|
in the output schema
|
88
|
+
full_citations: Whether to show source reference citation + content for each
|
89
|
+
citation, or just the main reference citation.
|
88
90
|
"""
|
89
91
|
|
90
92
|
system_message: str = "You are a helpful assistant."
|
@@ -101,6 +103,7 @@ class ChatAgentConfig(AgentConfig):
|
|
101
103
|
instructions_output_format: bool = True
|
102
104
|
output_format_include_defaults: bool = True
|
103
105
|
use_tools_on_output_format: bool = True
|
106
|
+
full_citations: bool = True # show source + content for each citation?
|
104
107
|
|
105
108
|
def _set_fn_or_tools(self, fn_available: bool) -> None:
|
106
109
|
"""
|
@@ -1854,14 +1857,15 @@ class ChatAgent(Agent):
|
|
1854
1857
|
# we won't have citations yet, so we're done
|
1855
1858
|
return
|
1856
1859
|
if response.metadata.has_citation:
|
1860
|
+
citation = (
|
1861
|
+
response.metadata.source_content
|
1862
|
+
if self.config.full_citations
|
1863
|
+
else response.metadata.source
|
1864
|
+
)
|
1857
1865
|
if not settings.quiet:
|
1858
|
-
print(
|
1859
|
-
"[grey37]SOURCES:\n"
|
1860
|
-
+ escape(response.metadata.source)
|
1861
|
-
+ "[/grey37]"
|
1862
|
-
)
|
1866
|
+
print("[grey37]SOURCES:\n" + escape(citation) + "[/grey37]")
|
1863
1867
|
self.callbacks.show_llm_response(
|
1864
|
-
content=str(
|
1868
|
+
content=str(citation),
|
1865
1869
|
is_tool=False,
|
1866
1870
|
cached=False,
|
1867
1871
|
language="text",
|
@@ -863,12 +863,13 @@ class DocChatAgent(ChatAgent):
|
|
863
863
|
# extract references like [^2], [^3], etc. from the final answer
|
864
864
|
citations = extract_markdown_references(final_answer)
|
865
865
|
# format the cited references as a string suitable for markdown footnote
|
866
|
-
citations_str = format_cited_references(citations, passages)
|
866
|
+
full_citations_str, citations_str = format_cited_references(citations, passages)
|
867
867
|
|
868
868
|
return ChatDocument(
|
869
869
|
content=final_answer, # does not contain citations
|
870
870
|
metadata=ChatDocMetaData(
|
871
|
-
source=citations_str, # only the
|
871
|
+
source=citations_str, # only the reference headers
|
872
|
+
source_content=full_citations_str, # reference + content
|
872
873
|
sender=Entity.LLM,
|
873
874
|
has_citation=len(citations) > 0,
|
874
875
|
cached=getattr(answer_doc.metadata, "cached", False),
|
langroid/mytypes.py
CHANGED
@@ -43,7 +43,8 @@ class Entity(str, Enum):
|
|
43
43
|
class DocMetaData(BaseModel):
|
44
44
|
"""Metadata for a document."""
|
45
45
|
|
46
|
-
source: str = "context"
|
46
|
+
source: str = "context" # just reference
|
47
|
+
source_content: str = "context" # reference and content
|
47
48
|
is_chunk: bool = False # if it is a chunk, don't split
|
48
49
|
id: str = Field(default_factory=lambda: str(uuid4()))
|
49
50
|
window_ids: List[str] = [] # for RAG: ids of chunks around this one
|
@@ -404,8 +404,8 @@ class DocumentParser(Parser):
|
|
404
404
|
# that it needs to be combined with the next chunk.
|
405
405
|
while len(split) > self.config.chunk_size:
|
406
406
|
# pretty formatting of pages (e.g. 1-3, 4, 5-7)
|
407
|
-
p_0 = int(pages[0])
|
408
|
-
p_n = int(pages[-1])
|
407
|
+
p_0 = int(pages[0]) - self.config.page_number_offset
|
408
|
+
p_n = int(pages[-1]) - self.config.page_number_offset
|
409
409
|
page_str = f"pages {p_0}-{p_n}" if p_0 != p_n else f"page {p_0}"
|
410
410
|
text = self.tokenizer.decode(split[: self.config.chunk_size])
|
411
411
|
docs.append(
|
@@ -426,13 +426,15 @@ class DocumentParser(Parser):
|
|
426
426
|
# since it's already included in the prior chunk;
|
427
427
|
# the only exception is if there have been no chunks so far.
|
428
428
|
if len(split) > self.config.overlap or n_chunks == 0:
|
429
|
-
|
429
|
+
p_0 = int(pages[0]) - self.config.page_number_offset
|
430
|
+
p_n = int(pages[-1]) - self.config.page_number_offset
|
431
|
+
page_str = f"pages {p_0}-{p_n}" if p_0 != p_n else f"page {p_0}"
|
430
432
|
text = self.tokenizer.decode(split[: self.config.chunk_size])
|
431
433
|
docs.append(
|
432
434
|
Document(
|
433
435
|
content=text,
|
434
436
|
metadata=DocMetaData(
|
435
|
-
source=f"{self.source}
|
437
|
+
source=f"{self.source} {page_str}",
|
436
438
|
is_chunk=True,
|
437
439
|
id=common_id,
|
438
440
|
),
|
@@ -1361,6 +1363,10 @@ class GeminiPdfParser(DocumentParser):
|
|
1361
1363
|
|
1362
1364
|
|
1363
1365
|
class MarkerPdfParser(DocumentParser):
|
1366
|
+
"""
|
1367
|
+
Parse PDF files using the `marker` library: https://github.com/VikParuchuri/marker
|
1368
|
+
"""
|
1369
|
+
|
1364
1370
|
DEFAULT_CONFIG = {"paginate_output": True, "output_format": "markdown"}
|
1365
1371
|
|
1366
1372
|
def __init__(self, source: Union[str, bytes], config: ParsingConfig):
|
langroid/parsing/parser.py
CHANGED
@@ -103,6 +103,9 @@ class ParsingConfig(BaseSettings):
|
|
103
103
|
chunk_size: int = 200 # aim for this many tokens per chunk
|
104
104
|
overlap: int = 50 # overlap between chunks
|
105
105
|
max_chunks: int = 10_000
|
106
|
+
# offset to subtract from page numbers:
|
107
|
+
# e.g. if physical page 12 is displayed as page 1, set page_number_offset = 11
|
108
|
+
page_number_offset: int = 0
|
106
109
|
# aim to have at least this many chars per chunk when truncating due to punctuation
|
107
110
|
min_chunk_chars: int = 350
|
108
111
|
discard_chunk_chars: int = 5 # discard chunks with fewer than this many chars
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import List
|
1
|
+
from typing import List, Tuple
|
2
2
|
|
3
3
|
from langroid.mytypes import Document
|
4
4
|
|
@@ -66,7 +66,9 @@ def format_footnote_text(content: str, width: int = 0) -> str:
|
|
66
66
|
return "\n".join(output_lines)
|
67
67
|
|
68
68
|
|
69
|
-
def format_cited_references(
|
69
|
+
def format_cited_references(
|
70
|
+
citations: List[int], passages: list[Document]
|
71
|
+
) -> Tuple[str, str]:
|
70
72
|
"""
|
71
73
|
Given a list of (integer) citations, and a list of passages, return a string
|
72
74
|
that can be added as a footer to the main text, to show sources cited.
|
@@ -76,16 +78,24 @@ def format_cited_references(citations: List[int], passages: list[Document]) -> s
|
|
76
78
|
passages (list[Document]): list of passages (Document objects)
|
77
79
|
|
78
80
|
Returns:
|
79
|
-
str: formatted string of citations
|
81
|
+
str: formatted string of FULL citations (i.e. reference AND content)
|
82
|
+
for footnote in markdown;
|
83
|
+
str: formatted string of BRIEF citations (i.e. reference only)
|
84
|
+
for footnote in markdown.
|
80
85
|
"""
|
81
86
|
citations_str = ""
|
87
|
+
full_citations_str = ""
|
82
88
|
if len(citations) > 0:
|
83
89
|
# append [i] source, content for each citation
|
84
|
-
|
90
|
+
full_citations_str = "\n".join(
|
85
91
|
[
|
86
92
|
f"[^{c}] {passages[c-1].metadata.source}"
|
87
93
|
f"\n{format_footnote_text(passages[c-1].content)}"
|
88
94
|
for c in citations
|
89
95
|
]
|
90
96
|
)
|
91
|
-
|
97
|
+
# append [i] source for each citation
|
98
|
+
citations_str = "\n".join(
|
99
|
+
[f"[^{c}] {passages[c-1].metadata.source}" for c in citations]
|
100
|
+
)
|
101
|
+
return full_citations_str, citations_str
|
@@ -1,11 +1,11 @@
|
|
1
1
|
langroid/__init__.py,sha256=z_fCOLQJPOw3LLRPBlFB5-2HyCjpPgQa4m4iY5Fvb8Y,1800
|
2
2
|
langroid/exceptions.py,sha256=OPjece_8cwg94DLPcOGA1ddzy5bGh65pxzcHMnssTz8,2995
|
3
|
-
langroid/mytypes.py,sha256=
|
3
|
+
langroid/mytypes.py,sha256=wfb320SFnZVTv_CgcLWsvoKBXxAFfY4EISeue8MFqpQ,2912
|
4
4
|
langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
|
6
|
-
langroid/agent/base.py,sha256=
|
6
|
+
langroid/agent/base.py,sha256=yM7ul2byHhwCFm6w8_4RULkPdhI8XR3n7XqNBS0hD20,79567
|
7
7
|
langroid/agent/batch.py,sha256=vi1r5i1-vN80WfqHDSwjEym_KfGsqPGUtwktmiK1nuk,20635
|
8
|
-
langroid/agent/chat_agent.py,sha256=
|
8
|
+
langroid/agent/chat_agent.py,sha256=Z53oleOUcOXVs_UL90spttGoAooe0mrx3tDtOuhKVms,85214
|
9
9
|
langroid/agent/chat_document.py,sha256=xzMtrPbaW-Y-BnF7kuhr2dorsD-D5rMWzfOqJ8HAoo8,17885
|
10
10
|
langroid/agent/openai_assistant.py,sha256=JkAcs02bIrgPNVvUWVR06VCthc5-ulla2QMBzux_q6o,34340
|
11
11
|
langroid/agent/task.py,sha256=HB6N-Jn80HFqCf0ZYOC1v3Bn3oO7NLjShHQJJFwW0q4,90557
|
@@ -14,7 +14,7 @@ langroid/agent/xml_tool_message.py,sha256=6SshYZJKIfi4mkE-gIoSwjkEYekQ8GwcSiCv7a
|
|
14
14
|
langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
15
|
langroid/agent/callbacks/chainlit.py,sha256=UHB6P_J40vsVnssosqkpkOVWRf9NK4TOY0_G2g_Arsg,20900
|
16
16
|
langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
|
17
|
-
langroid/agent/special/doc_chat_agent.py,sha256=
|
17
|
+
langroid/agent/special/doc_chat_agent.py,sha256=nEiHzU5Ztb0Y7rPMg4kSf2M6bGS5s1Av_y5w0idAGIE,64763
|
18
18
|
langroid/agent/special/lance_doc_chat_agent.py,sha256=s8xoRs0gGaFtDYFUSIRchsgDVbS5Q3C2b2mr3V1Fd-Q,10419
|
19
19
|
langroid/agent/special/lance_tools.py,sha256=qS8x4wi8mrqfbYV2ztFzrcxyhHQ0ZWOc-zkYiH7awj0,2105
|
20
20
|
langroid/agent/special/relevance_extractor_agent.py,sha256=zIx8GUdVo1aGW6ASla0NPQjYYIpmriK_TYMijqAx3F8,4796
|
@@ -81,10 +81,10 @@ langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeu
|
|
81
81
|
langroid/parsing/__init__.py,sha256=2oUWJJAxIavq9Wtw5RGlkXLq3GF3zgXeVLLW4j7yeb8,1138
|
82
82
|
langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
|
83
83
|
langroid/parsing/code_parser.py,sha256=5ze0MBytrGGkU69pA_bJDjRm6QZz_QYfPcIwkagUa7U,3796
|
84
|
-
langroid/parsing/document_parser.py,sha256=
|
84
|
+
langroid/parsing/document_parser.py,sha256=fyCx4X1192asom5tp3DNV4J5Em2u4Z7rCC0FA8dNsSQ,52954
|
85
85
|
langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
|
86
86
|
langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
|
87
|
-
langroid/parsing/parser.py,sha256=
|
87
|
+
langroid/parsing/parser.py,sha256=ZUvBhzMZQWKerbb9UECbcqkNc9wWKuUgPyC8L6baxao,14295
|
88
88
|
langroid/parsing/pdf_utils.py,sha256=rmNJ9UzuBgXTAYwj1TtRJcD8h53x7cizhgyYHKO88I4,1513
|
89
89
|
langroid/parsing/repo_loader.py,sha256=NpysuyzRHvgL3F4BB_wGo5sCUnZ3FOlVCJmZ7CaUdbs,30202
|
90
90
|
langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
|
@@ -115,7 +115,7 @@ langroid/utils/types.py,sha256=-BvyIf_LmAJ5jR9NC7S4CSVNEr3XayAaxJ5o0TiIej0,2992
|
|
115
115
|
langroid/utils/algorithms/__init__.py,sha256=WylYoZymA0fnzpB4vrsH_0n7WsoLhmuZq8qxsOCjUpM,41
|
116
116
|
langroid/utils/algorithms/graph.py,sha256=JbdpPnUOhw4-D6O7ou101JLA3xPCD0Lr3qaPoFCaRfo,2866
|
117
117
|
langroid/utils/output/__init__.py,sha256=7P0f--4IZneNsTxXY5fd6d6iW-CeVe-KSsl-87sbBPc,340
|
118
|
-
langroid/utils/output/citations.py,sha256=
|
118
|
+
langroid/utils/output/citations.py,sha256=ltdhBNRlF5qh8XnCVeeGKp1k0XZRcF22avDO4fadxH0,3547
|
119
119
|
langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4eK_AE,2962
|
120
120
|
langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
|
121
121
|
langroid/vector_store/__init__.py,sha256=8ktJUVsVUoc7FMmkUFpFBZu7VMWUqQY9zpm4kEJ8yTs,1537
|
@@ -127,7 +127,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
|
|
127
127
|
langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
|
128
128
|
langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
|
129
129
|
langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
|
130
|
-
langroid-0.45.
|
131
|
-
langroid-0.45.
|
132
|
-
langroid-0.45.
|
133
|
-
langroid-0.45.
|
130
|
+
langroid-0.45.3.dist-info/METADATA,sha256=_6oOG_rHqN8JUymnv8uIbHaqpk5N7gHiUUqMqEFXvFc,63335
|
131
|
+
langroid-0.45.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
132
|
+
langroid-0.45.3.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
133
|
+
langroid-0.45.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|