langroid 0.1.249__py3-none-any.whl → 0.1.250__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,6 @@ pip install "langroid[hf-embeddings]"
14
14
  """
15
15
 
16
16
  import logging
17
- import re
18
17
  from functools import cache
19
18
  from typing import Any, Dict, List, Optional, Set, Tuple, no_type_check
20
19
 
@@ -31,6 +30,7 @@ from langroid.agent.special.relevance_extractor_agent import (
31
30
  RelevanceExtractorAgentConfig,
32
31
  )
33
32
  from langroid.agent.task import Task
33
+ from langroid.agent.tools.retrieval_tool import RetrievalTool
34
34
  from langroid.embedding_models.models import OpenAIEmbeddingsConfig
35
35
  from langroid.language_models.base import StreamingIfAllowed
36
36
  from langroid.language_models.openai_gpt import OpenAIChatModel, OpenAIGPTConfig
@@ -82,21 +82,47 @@ except ImportError:
82
82
  pass
83
83
 
84
84
 
85
- def extract_citations(text: str) -> List[int]:
86
- # Find all patterns that match [[<numbers>]]
87
- matches = re.findall(r"\[\[([\d,]+)\]\]", text)
85
+ def extract_markdown_references(md_string: str) -> list[int]:
86
+ """
87
+ Extracts markdown references (e.g., [^1], [^2]) from a string and returns
88
+ them as a sorted list of integers.
89
+
90
+ Args:
91
+ md_string (str): The markdown string containing references.
92
+
93
+ Returns:
94
+ list[int]: A sorted list of unique integers from the markdown references.
95
+ """
96
+ import re
97
+
98
+ # Regex to find all occurrences of [^<number>]
99
+ matches = re.findall(r"\[\^(\d+)\]", md_string)
100
+ # Convert matches to integers, remove duplicates with set, and sort
101
+ return sorted(set(int(match) for match in matches))
88
102
 
89
- # Initialize a set to hold distinct citation numbers
90
- citations: Set[int] = set()
91
103
 
92
- # Process each match
93
- for match in matches:
94
- # Split numbers by comma and convert to integers
95
- numbers = match.split(",")
96
- citations.update(int(number) for number in numbers)
104
+ def format_footnote_text(content: str, width: int = 80) -> str:
105
+ """
106
+ Formats the content part of a footnote (i.e. not the first line that
107
+ appears right after the reference [^4])
108
+ It wraps the text so that no line is longer than the specified width and indents
109
+ lines as necessary for markdown footnotes.
110
+
111
+ Args:
112
+ content (str): The text of the footnote to be formatted.
113
+ width (int): Maximum width of the text lines.
97
114
 
98
- # Return a sorted list of unique citations
99
- return sorted(citations)
115
+ Returns:
116
+ str: Properly formatted markdown footnote text.
117
+ """
118
+ import textwrap
119
+
120
+ # Wrap the text to the specified width
121
+ wrapped_lines = textwrap.wrap(content, width)
122
+ if len(wrapped_lines) == 0:
123
+ return ""
124
+ indent = " " # Indentation for markdown footnotes
125
+ return indent + ("\n" + indent).join(wrapped_lines)
100
126
 
101
127
 
102
128
  class DocChatAgentConfig(ChatAgentConfig):
@@ -438,6 +464,13 @@ class DocChatAgent(ChatAgent):
438
464
  self.setup_documents(docs, filter=self.config.filter)
439
465
  return len(docs)
440
466
 
467
+ def retrieval_tool(self, msg: RetrievalTool) -> str:
468
+ """Handle the RetrievalTool message"""
469
+ self.config.retrieve_only = True
470
+ self.config.parsing.n_similar_docs = msg.num_results
471
+ content_doc = self.answer_from_docs(msg.query)
472
+ return content_doc.content
473
+
441
474
  @staticmethod
442
475
  def document_compatible_dataframe(
443
476
  df: pd.DataFrame,
@@ -808,14 +841,15 @@ class DocChatAgent(ChatAgent):
808
841
  final_answer = answer_doc.content.strip()
809
842
  show_if_debug(final_answer, "SUMMARIZE_RESPONSE= ")
810
843
 
811
- citations = extract_citations(final_answer)
844
+ citations = extract_markdown_references(final_answer)
812
845
 
813
846
  citations_str = ""
814
847
  if len(citations) > 0:
815
848
  # append [i] source, content for each citation
816
849
  citations_str = "\n".join(
817
850
  [
818
- f"[{c}] {passages[c-1].metadata.source}\n{passages[c-1].content}"
851
+ f"[^{c}] {passages[c-1].metadata.source}"
852
+ f"\n{format_footnote_text(passages[c-1].content)}"
819
853
  for c in citations
820
854
  ]
821
855
  )
@@ -0,0 +1,29 @@
1
+ from typing import List
2
+
3
+ from langroid.agent.tool_message import ToolMessage
4
+
5
+
6
+ class RetrievalTool(ToolMessage):
7
+ """Retrieval tool, only to be used by a DocChatAgent."""
8
+
9
+ request: str = "retrieval_tool"
10
+ purpose: str = """
11
+ To retrieve up to <num_results> passages from a document-set, that are
12
+ relevant to a <query>, which could be a question or simply a topic or
13
+ search phrase.
14
+ """
15
+ query: str
16
+ num_results: int
17
+
18
+ @classmethod
19
+ def examples(cls) -> List["ToolMessage"]:
20
+ return [
21
+ cls(
22
+ query="What are the eligibility criteria for the scholarship?",
23
+ num_results=3,
24
+ ),
25
+ cls(
26
+ query="Self-Attention mechanism in RNNs",
27
+ num_results=5,
28
+ ),
29
+ ]
@@ -55,13 +55,14 @@ SUMMARY_ANSWER_PROMPT_GPT4 = f"""
55
55
  information in these extracts, even if your answer is factually incorrect,
56
56
  and even if the answer contradicts other parts of the document. The only
57
57
  important thing is that your answer is consistent with and supported by the
58
- extracts. Compose your complete answer, inserting CITATIONS
59
- in the format [[i,j,...]] where i,j,... are the extract NUMBERS you are citing.
58
+ extracts. Compose your complete answer, inserting CITATIONS in MARKDOWN format
59
+ [^i][^j] where i,j,... are the extract NUMBERS you are
60
+ citing.
60
61
  For example your answer might look like this (NOTE HOW multiple citations
61
- are grouped as [[2,5]]):
62
+ are grouped as [^2][^5]):
62
63
 
63
- Beethoven composed the 9th symphony in 1824. [[1]] After that he became deaf
64
- and could not hear his own music. [[2,5]] He was a prolific composer and
64
+ Beethoven composed the 9th symphony in 1824.[^1] After that he became deaf
65
+ and could not hear his own music. [^2][^5]. He was a prolific composer and
65
66
  wrote many famous pieces.
66
67
 
67
68
  NUMBERED EXTRACTS:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langroid
3
- Version: 0.1.249
3
+ Version: 0.1.250
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  License: MIT
6
6
  Author: Prasad Chalasani
@@ -237,6 +237,8 @@ teacher_task.run()
237
237
  <summary> <b>Click to expand</b></summary>
238
238
 
239
239
  - **May 2024:**
240
+ - [Much-Improved Citation](https://github.com/langroid/langroid/issues/477)
241
+ generation and display when using `DocChatAgent`.
240
242
  - `gpt-4o` is now the default LLM throughout; Update tests and examples to work
241
243
  with this LLM; use tokenizer corresponding to the LLM.
242
244
  - `gemini 1.5 pro` support via `litellm`
@@ -10,7 +10,7 @@ langroid/agent/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  langroid/agent/junk,sha256=LxfuuW7Cijsg0szAzT81OjWWv1PMNI-6w_-DspVIO2s,339
11
11
  langroid/agent/openai_assistant.py,sha256=kIVDI4r-xGvplLU5s0nShPVHs6Jq-wOsfWE0kcMhAdQ,33056
12
12
  langroid/agent/special/__init__.py,sha256=NG0JkB5y4K0bgnd9Q9UIvFExun3uTfVOWEVLVymff1M,1207
13
- langroid/agent/special/doc_chat_agent.py,sha256=SBatLDoa2_Ju_Gk_El8FmlMekgHmgpBU1ihx26yFIvc,54008
13
+ langroid/agent/special/doc_chat_agent.py,sha256=MTUrUyCZ7_wksTo11AwSTHMOKZf1WX-cAJowi_sfT2o,55320
14
14
  langroid/agent/special/lance_doc_chat_agent.py,sha256=USp0U3eTaJzwF_3bdqE7CedSLbaqAi2tm-VzygcyLaA,10175
15
15
  langroid/agent/special/lance_rag/__init__.py,sha256=QTbs0IVE2ZgDg8JJy1zN97rUUg4uEPH7SLGctFNumk4,174
16
16
  langroid/agent/special/lance_rag/critic_agent.py,sha256=OsOcpcU_AmU2MagpZ5X5yxFeXyteKN9QJMzJGqIITig,6871
@@ -41,6 +41,7 @@ langroid/agent/tools/generator_tool.py,sha256=y0fB0ZObjA0b3L0uSTtrqRCKHDUR95arBf
41
41
  langroid/agent/tools/google_search_tool.py,sha256=cQxcNtb8XCNpOo_yCeYRwG_y-OATjPgkbr01kea9qWE,1421
42
42
  langroid/agent/tools/metaphor_search_tool.py,sha256=NKHss-AkI942_XhfMgUctAwHjIHpqp5NfYIebKV4UcE,2454
43
43
  langroid/agent/tools/recipient_tool.py,sha256=61vdKv06qgVdtnE3gxjzV8RvUEy8JhbC9eWa0J0BPdw,9171
44
+ langroid/agent/tools/retrieval_tool.py,sha256=6uvRNg-kG_ItPa3sF9NWkthQ5frHn8bkB1Z3GSd3Oas,836
44
45
  langroid/agent/tools/run_python_code.py,sha256=V3mHdHQYn0M0PAtyoHxjNvk6KvWWcQ4ugo0TOKc8HyI,1752
45
46
  langroid/agent/tools/segment_extract_tool.py,sha256=W39poS7Av2EuJ34tGKhLhzgj3zEyZnBplpSt2goRAp4,1285
46
47
  langroid/agent_config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -93,7 +94,7 @@ langroid/prompts/__init__.py,sha256=B0vpJzIJlMR3mFRtoQwyALsFzBHvLp9f92acD8xJA_0,
93
94
  langroid/prompts/chat-gpt4-system-prompt.md,sha256=Q3uLCJTPQvmUkZN2XDnkBC7M2K3X0F3C3GIQBaFvYvw,5329
94
95
  langroid/prompts/dialog.py,sha256=SpfiSyofSgy2pwD1YboHR_yHO3LEEMbv6j2sm874jKo,331
95
96
  langroid/prompts/prompts_config.py,sha256=XRQHzod7KBnoKn3B_V878jZiqBA7rcn-CtGPkuAe_yM,131
96
- langroid/prompts/templates.py,sha256=NxMyPIhDjmL3pNXBaNLrIsebETPQHr6VG5NWO_93NeA,6303
97
+ langroid/prompts/templates.py,sha256=kz0rPiM6iLGhhpDonF3Y87OznSe9FRI6A0pHU0wgW4Q,6314
97
98
  langroid/prompts/transforms.py,sha256=GsQo1klGxUy0fACh6j0lTblk6XEl2erRnhRWlN2M4-c,2706
98
99
  langroid/utils/__init__.py,sha256=ARx5To4Hsv1K5QAzK4uUqdEoB_iq5HK797vae1AcMBI,300
99
100
  langroid/utils/algorithms/__init__.py,sha256=WylYoZymA0fnzpB4vrsH_0n7WsoLhmuZq8qxsOCjUpM,41
@@ -121,7 +122,7 @@ langroid/vector_store/meilisearch.py,sha256=d2huA9P-NoYRuAQ9ZeXJmMKr7ry8u90RUSR2
121
122
  langroid/vector_store/momento.py,sha256=9cui31TTrILid2KIzUpBkN2Ey3g_CZWOQVdaFsA4Ors,10045
122
123
  langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
123
124
  langroid/vector_store/qdrantdb.py,sha256=sk5Qb2ZNbooi0rorsMuqIMokF7WADw6PJ0D6goM2XBw,16802
124
- langroid-0.1.249.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
125
- langroid-0.1.249.dist-info/METADATA,sha256=1md_MzQhAHs9J7_OEEOfiL8C4N8GlWj5BBQGK4HrkGw,49426
126
- langroid-0.1.249.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
127
- langroid-0.1.249.dist-info/RECORD,,
125
+ langroid-0.1.250.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
126
+ langroid-0.1.250.dist-info/METADATA,sha256=FOv0qbbaZxR3TJWncDj2NvvslRp7RXOI0Xahb4yBT3I,49559
127
+ langroid-0.1.250.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
128
+ langroid-0.1.250.dist-info/RECORD,,