PyPI - langroid - Versions diffs - 0.13.0__py3-none-any.whl → 0.15.0__py3-none-any.whl - Mend

langroid 0.13.0py3-none-any.whl → 0.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

langroid/agent/special/doc_chat_agent.py CHANGED Viewed

@@ -14,6 +14,7 @@ pip install "langroid[hf-embeddings]"
 """
 import logging
+from collections import OrderedDict
 from functools import cache
 from typing import Any, Dict, List, Optional, Set, Tuple, no_type_check
@@ -130,12 +131,16 @@ class DocChatAgentConfig(ChatAgentConfig):
     n_fuzzy_neighbor_words: int = 100  # num neighbor words to retrieve for fuzzy match
     use_fuzzy_match: bool = True
     use_bm25_search: bool = True
+    use_reciprocal_rank_fusion: bool = True  # ignored if using cross-encoder reranking
     cross_encoder_reranking_model: str = (
         "cross-encoder/ms-marco-MiniLM-L-6-v2" if has_sentence_transformers else ""
     )
     rerank_diversity: bool = True  # rerank to maximize diversity?
     rerank_periphery: bool = True  # rerank to avoid Lost In the Middle effect?
     rerank_after_adding_context: bool = True  # rerank after adding context window?
+    # RRF (Reciprocal Rank Fusion) score = 1/(rank + reciprocal_rank_fusion_constant)
+    # see https://learn.microsoft.com/en-us/azure/search/hybrid-search-ranking#how-rrf-ranking-works
+    reciprocal_rank_fusion_constant: float = 60.0
     cache: bool = True  # cache results
     debug: bool = False
     stream: bool = True  # allow streaming where needed
@@ -1105,10 +1110,17 @@ class DocChatAgent(ChatAgent):
         Returns:
         """
-        # if we are using cross-encoder reranking, we can retrieve more docs
-        # during retrieval, and leave it to the cross-encoder re-ranking
-        # to whittle down to self.config.parsing.n_similar_docs
-        retrieval_multiple = 1 if self.config.cross_encoder_reranking_model == "" else 3
+        # if we are using cross-encoder reranking or reciprocal rank fusion (RRF),
+        # we can retrieve more docs during retrieval, and leave it to the cross-encoder
+        # or RRF reranking to whittle down to self.config.parsing.n_similar_docs
+        retrieval_multiple = (
+            1
+            if (
+                self.config.cross_encoder_reranking_model == ""
+                and not self.config.use_reciprocal_rank_fusion
+            )
+            else 3
+        )
         if self.vecdb is None:
             raise ValueError("VecDB not set")
@@ -1120,28 +1132,98 @@ class DocChatAgent(ChatAgent):
                     q,
                     k=self.config.parsing.n_similar_docs * retrieval_multiple,
                 )
+                # sort by score descending
+                docs_and_scores = sorted(
+                    docs_and_scores, key=lambda x: x[1], reverse=True
+                )
         # keep only docs with unique d.id()
-        id2doc_score = {d.id(): (d, s) for d, s in docs_and_scores}
-        docs_and_scores = list(id2doc_score.values())
-        passages = [d for (d, _) in docs_and_scores]
-        # passages = [
-        #     Document(content=d.content, metadata=d.metadata)
-        #     for (d, _) in docs_and_scores
-        # ]
+        id2_rank_semantic = {d.id(): i for i, (d, _) in enumerate(docs_and_scores)}
+        id2doc = {d.id(): d for d, _ in docs_and_scores}
+        # make sure we get unique docs
+        passages = [id2doc[id] for id, _ in id2_rank_semantic.items()]
+        id2_rank_bm25 = {}
         if self.config.use_bm25_search:
             # TODO: Add score threshold in config
             docs_scores = self.get_similar_chunks_bm25(query, retrieval_multiple)
-            passages += [d for (d, _) in docs_scores]
+            if self.config.cross_encoder_reranking_model == "":
+                # only if we're not re-ranking with a cross-encoder,
+                # we collect these ranks for Reciprocal Rank Fusion down below.
+                docs_scores = sorted(docs_scores, key=lambda x: x[1], reverse=True)
+                id2_rank_bm25 = {d.id(): i for i, (d, _) in enumerate(docs_scores)}
+                id2doc.update({d.id(): d for d, _ in docs_scores})
+            else:
+                passages += [d for (d, _) in docs_scores]
+        id2_rank_fuzzy = {}
         if self.config.use_fuzzy_match:
             # TODO: Add score threshold in config
             fuzzy_match_doc_scores = self.get_fuzzy_matches(query, retrieval_multiple)
-            passages += [d for (d, _) in fuzzy_match_doc_scores]
+            if self.config.cross_encoder_reranking_model == "":
+                # only if we're not re-ranking with a cross-encoder,
+                # we collect these ranks for Reciprocal Rank Fusion down below.
+                fuzzy_match_doc_scores = sorted(
+                    fuzzy_match_doc_scores, key=lambda x: x[1], reverse=True
+                )
+                id2_rank_fuzzy = {
+                    d.id(): i for i, (d, _) in enumerate(fuzzy_match_doc_scores)
+                }
+                id2doc.update({d.id(): d for d, _ in fuzzy_match_doc_scores})
+            else:
+                passages += [d for (d, _) in fuzzy_match_doc_scores]
-        # keep unique passages
-        id2passage = {p.id(): p for p in passages}
-        passages = list(id2passage.values())
+        if (
+            self.config.cross_encoder_reranking_model == ""
+            and self.config.use_reciprocal_rank_fusion
+            and (self.config.use_bm25_search or self.config.use_fuzzy_match)
+        ):
+            # Since we're not using cross-enocder re-ranking,
+            # we need to re-order the retrieved chunks from potentially three
+            # different retrieval methods (semantic, bm25, fuzzy), where the
+            # similarity scores are on different scales.
+            # We order the retrieved chunks using Reciprocal Rank Fusion (RRF) score.
+            # Combine the ranks from each id2doc_rank_* dict into a single dict,
+            # where the reciprocal rank score is the sum of
+            # 1/(rank + self.config.reciprocal_rank_fusion_constant).
+            # See https://learn.microsoft.com/en-us/azure/search/hybrid-search-ranking
+            #
+            # Note: diversity/periphery-reranking below may modify the final ranking.
+            id2_reciprocal_score = {}
+            for id_ in (
+                set(id2_rank_semantic.keys())
+                | set(id2_rank_bm25.keys())
+                | set(id2_rank_fuzzy.keys())
+            ):
+                rank_semantic = id2_rank_semantic.get(id_, float("inf"))
+                rank_bm25 = id2_rank_bm25.get(id_, float("inf"))
+                rank_fuzzy = id2_rank_fuzzy.get(id_, float("inf"))
+                c = self.config.reciprocal_rank_fusion_constant
+                reciprocal_fusion_score = (
+                    1 / (rank_semantic + c) + 1 / (rank_bm25 + c) + 1 / (rank_fuzzy + c)
+                )
+                id2_reciprocal_score[id_] = reciprocal_fusion_score
+            # sort the docs by the reciprocal score, in descending order
+            id2_reciprocal_score = OrderedDict(
+                sorted(
+                    id2_reciprocal_score.items(),
+                    key=lambda x: x[1],
+                    reverse=True,
+                )
+            )
+            # each method retrieved up to retrieval_multiple * n_similar_docs,
+            # so we need to take the top n_similar_docs from the combined list
+            passages = [
+                id2doc[id]
+                for i, (id, _) in enumerate(id2_reciprocal_score.items())
+                if i < self.config.parsing.n_similar_docs
+            ]
+            # passages must have distinct ids
+            assert len(passages) == len(set([d.id() for d in passages])), (
+                f"Duplicate passages in retrieved docs: {len(passages)} != "
+                f"{len(set([d.id() for d in passages]))}"
+            )
         if len(passages) == 0:
             return []
@@ -1171,7 +1253,7 @@ class DocChatAgent(ChatAgent):
             passages_scores = self.add_context_window(passages_scores)
             passages = [p for p, _ in passages_scores]
-        return passages
+        return passages[: self.config.parsing.n_similar_docs]
     @no_type_check
     def get_relevant_extracts(self, query: str) -> Tuple[str, List[Document]]:

langroid/language_models/openai_gpt.py CHANGED Viewed

@@ -21,6 +21,7 @@ from typing import (
 )
 import openai
+from cerebras.cloud.sdk import AsyncCerebras, Cerebras
 from groq import AsyncGroq, Groq
 from httpx import Timeout
 from openai import AsyncOpenAI, OpenAI
@@ -371,8 +372,8 @@ class OpenAIGPT(LanguageModel):
     Class for OpenAI LLMs
     """
-    client: OpenAI | Groq
-    async_client: AsyncOpenAI | AsyncGroq
+    client: OpenAI | Groq | Cerebras
+    async_client: AsyncOpenAI | AsyncGroq | AsyncCerebras
     def __init__(self, config: OpenAIGPTConfig = OpenAIGPTConfig()):
         """
@@ -479,6 +480,7 @@ class OpenAIGPT(LanguageModel):
             self.api_key = DUMMY_API_KEY
         self.is_groq = self.config.chat_model.startswith("groq/")
+        self.is_cerebras = self.config.chat_model.startswith("cerebras/")
         if self.is_groq:
             self.config.chat_model = self.config.chat_model.replace("groq/", "")
@@ -489,6 +491,16 @@ class OpenAIGPT(LanguageModel):
             self.async_client = AsyncGroq(
                 api_key=self.api_key,
             )
+        elif self.is_cerebras:
+            self.config.chat_model = self.config.chat_model.replace("cerebras/", "")
+            self.api_key = os.getenv("CEREBRAS_API_KEY", DUMMY_API_KEY)
+            self.client = Cerebras(
+                api_key=self.api_key,
+            )
+            # TODO there is not async client, so should we do anything here?
+            self.async_client = AsyncCerebras(
+                api_key=self.api_key,
+            )
         else:
             self.client = OpenAI(
                 api_key=self.api_key,
@@ -1096,8 +1108,8 @@ class OpenAIGPT(LanguageModel):
         if self.config.use_chat_for_completion:
             return self.chat(messages=prompt, max_tokens=max_tokens)
-        if self.is_groq:
-            raise ValueError("Groq does not support pure completions")
+        if self.is_groq or self.is_cerebras:
+            raise ValueError("Groq, Cerebras do not support pure completions")
         if settings.debug:
             print(f"[grey37]PROMPT: {escape(prompt)}[/grey37]")
@@ -1174,8 +1186,8 @@ class OpenAIGPT(LanguageModel):
         if self.config.use_chat_for_completion:
             return await self.achat(messages=prompt, max_tokens=max_tokens)
-        if self.is_groq:
-            raise ValueError("Groq does not support pure completions")
+        if self.is_groq or self.is_cerebras:
+            raise ValueError("Groq, Cerebras do not support pure completions")
         if settings.debug:
             print(f"[grey37]PROMPT: {escape(prompt)}[/grey37]")

{langroid-0.13.0.dist-info → langroid-0.15.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: langroid
-Version: 0.13.0
+Version: 0.15.0
 Summary: Harness LLMs with Multi-Agent Programming
 License: MIT
 Author: Prasad Chalasani
@@ -38,6 +38,7 @@ Provides-Extra: vecdbs
 Requires-Dist: aiohttp (>=3.9.1,<4.0.0)
 Requires-Dist: async-generator (>=1.10,<2.0)
 Requires-Dist: bs4 (>=0.0.1,<0.0.2)
+Requires-Dist: cerebras-cloud-sdk (>=1.1.0,<2.0.0)
 Requires-Dist: chainlit (==1.1.202) ; extra == "all" or extra == "chainlit"
 Requires-Dist: chromadb (>=0.4.21,<=0.4.23) ; extra == "vecdbs" or extra == "all" or extra == "chromadb"
 Requires-Dist: colorlog (>=6.7.0,<7.0.0)
@@ -153,6 +154,8 @@ This Multi-Agent paradigm is inspired by the
 `Langroid` is a fresh take on LLM app-development, where considerable thought has gone
 into simplifying the developer experience; it does not use `Langchain`.
+:fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/)
 📢 Companies are using/adapting Langroid in **production**. Here is a quote:
 >[Nullify](https://www.nullify.ai) uses AI Agents for secure software development.

{langroid-0.13.0.dist-info → langroid-0.15.0.dist-info}/RECORD RENAMED Viewed

@@ -10,7 +10,7 @@ langroid/agent/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/junk,sha256=LxfuuW7Cijsg0szAzT81OjWWv1PMNI-6w_-DspVIO2s,339
 langroid/agent/openai_assistant.py,sha256=2rjCZw45ysNBEGNzQM4uf0bTC4KkatGYAWcVcW4xcek,34337
 langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
-langroid/agent/special/doc_chat_agent.py,sha256=dqm0Gp11Mfl4hOWN4sUR1uZL-oHEmHzcB6bNN6WFgqw,54784
+langroid/agent/special/doc_chat_agent.py,sha256=r1uPunYf2lQcqYQ4fsD8Q5gB9cZyf7cn0KPcR_CLtrU,59065
 langroid/agent/special/lance_doc_chat_agent.py,sha256=s8xoRs0gGaFtDYFUSIRchsgDVbS5Q3C2b2mr3V1Fd-Q,10419
 langroid/agent/special/lance_rag/__init__.py,sha256=QTbs0IVE2ZgDg8JJy1zN97rUUg4uEPH7SLGctFNumk4,174
 langroid/agent/special/lance_rag/critic_agent.py,sha256=OtFuHthKQLkdVkvuZ2m0GNq1qOYLqHkm1pfLRFnSg5c,9548
@@ -72,7 +72,7 @@ langroid/language_models/azure_openai.py,sha256=G4le3j4YLHV7IwgB2C37hO3MKijZ1Kjy
 langroid/language_models/base.py,sha256=ytJ_0Jw5erbqrqLPp4JMCo_nIkwzUvBqoKUr8Sae9Qg,21792
 langroid/language_models/config.py,sha256=9Q8wk5a7RQr8LGMT_0WkpjY8S4ywK06SalVRjXlfCiI,378
 langroid/language_models/mock_lm.py,sha256=HuiAvjHiCfffYF5xjFJUq945HVTW0QPbeUUctOnNCzQ,3868
-langroid/language_models/openai_gpt.py,sha256=T-Gznbv8Nqrkf8rmO3L6pRVracYE1oG_LhrfenzdfNA,61386
+langroid/language_models/openai_gpt.py,sha256=1wG1nXho6bLOWyWqlR51uY45ZFkt5NWXx0hbXzKLVoQ,62050
 langroid/language_models/prompt_formatter/__init__.py,sha256=2-5cdE24XoFDhifOLl8yiscohil1ogbP1ECkYdBlBsk,372
 langroid/language_models/prompt_formatter/base.py,sha256=eDS1sgRNZVnoajwV_ZIha6cba5Dt8xjgzdRbPITwx3Q,1221
 langroid/language_models/prompt_formatter/hf_formatter.py,sha256=PVJppmjRvD-2DF-XNC6mE05vTZ9wbu37SmXwZBQhad0,5055
@@ -137,8 +137,8 @@ langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3Hmh
 langroid/vector_store/momento.py,sha256=qR-zBF1RKVHQZPZQYW_7g-XpTwr46p8HJuYPCkfJbM4,10534
 langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
 langroid/vector_store/qdrantdb.py,sha256=v88lqFkepADvlN6lByUj9I4NEKa9X9lWH16uTPPbYrE,17457
-pyproject.toml,sha256=g99bgxP-XUiTx-KsdFICVJuV2bB89areQkDRU5sIgmk,7107
-langroid-0.13.0.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
-langroid-0.13.0.dist-info/METADATA,sha256=Znhge-Z8nn_L7Lxeh8dWs04d4ejZfj0NCCRutJJSkdg,55259
-langroid-0.13.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-langroid-0.13.0.dist-info/RECORD,,
+pyproject.toml,sha256=lazmZZ-COR5jUFIhJLYTnonrxAPmAstppjFy6GkS5UE,7137
+langroid-0.15.0.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
+langroid-0.15.0.dist-info/METADATA,sha256=8Ffr9Et34izfKr-7AJdO7HjRalVA5rzu1ADDss089Dk,55481
+langroid-0.15.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+langroid-0.15.0.dist-info/RECORD,,

pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langroid"
-version = "0.13.0"
+version = "0.15.0"
 description = "Harness LLMs with Multi-Agent Programming"
 authors = ["Prasad Chalasani <pchalasani@gmail.com>"]
 readme = "README.md"
@@ -89,6 +89,7 @@ async-generator = "^1.10"
 python-magic = "^0.4.27"
 json-repair = "^0.27.0"
+cerebras-cloud-sdk = "^1.1.0"
 [tool.poetry.extras]

{langroid-0.13.0.dist-info → langroid-0.15.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{langroid-0.13.0.dist-info → langroid-0.15.0.dist-info}/WHEEL RENAMED Viewed

File without changes

langroid 0.13.0__py3-none-any.whl → 0.15.0__py3-none-any.whl

langroid 0.13.0py3-none-any.whl → 0.15.0py3-none-any.whl