langroid 0.43.1__py3-none-any.whl → 0.44.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,16 @@ Callbacks for Chainlit integration.
5
5
  import json
6
6
  import logging
7
7
  import textwrap
8
- from typing import Any, Callable, Dict, List, Literal, Optional, no_type_check
8
+ from typing import (
9
+ TYPE_CHECKING,
10
+ Any,
11
+ Callable,
12
+ Dict,
13
+ List,
14
+ Literal,
15
+ Optional,
16
+ no_type_check,
17
+ )
9
18
 
10
19
  from langroid.exceptions import LangroidImportError
11
20
  from langroid.pydantic_v1 import BaseSettings
@@ -18,7 +27,8 @@ except ImportError:
18
27
  from chainlit import run_sync
19
28
  from chainlit.logger import logger
20
29
 
21
- import langroid as lr
30
+ if TYPE_CHECKING:
31
+ from langroid import Agent, Task
22
32
  import langroid.language_models as lm
23
33
  from langroid.language_models import StreamEventType
24
34
  from langroid.utils.configuration import settings
@@ -222,11 +232,11 @@ class ChainlitAgentCallbacks:
222
232
  last_step: Optional[cl.Step] = None # used to display sub-steps under this
223
233
  curr_step: Optional[cl.Step] = None # used to update an initiated step
224
234
  stream: Optional[cl.Step] = None # pushed into openai_gpt.py to stream tokens
225
- parent_agent: Optional[lr.Agent] = None # used to get parent id, for step nesting
235
+ parent_agent: Optional["Agent"] = None # used to get parent id, for step nesting
226
236
 
227
237
  def __init__(
228
238
  self,
229
- agent: lr.Agent,
239
+ agent: "Agent",
230
240
  config: ChainlitCallbackConfig = ChainlitCallbackConfig(),
231
241
  ):
232
242
  """Add callbacks to the agent, and save the initial message,
@@ -245,7 +255,7 @@ class ChainlitAgentCallbacks:
245
255
  agent.callbacks.show_error_message = self.show_error_message
246
256
  agent.callbacks.show_start_response = self.show_start_response
247
257
  self.config = config
248
- self.agent: lr.Agent = agent
258
+ self.agent: "Agent" = agent
249
259
  if self.agent.llm is not None:
250
260
  # We don't want to suppress LLM output in async + streaming,
251
261
  # since we often use chainlit async callbacks to display LLM output
@@ -271,7 +281,7 @@ class ChainlitAgentCallbacks:
271
281
  )
272
282
  return last_step.id # type: ignore
273
283
 
274
- def set_parent_agent(self, parent: lr.Agent) -> None:
284
+ def set_parent_agent(self, parent: "Agent") -> None:
275
285
  self.parent_agent = parent
276
286
 
277
287
  def get_last_step(self) -> Optional[cl.Step]:
@@ -559,7 +569,7 @@ class ChainlitTaskCallbacks(ChainlitAgentCallbacks):
559
569
 
560
570
  def __init__(
561
571
  self,
562
- task: lr.Task,
572
+ task: "Task",
563
573
  config: ChainlitCallbackConfig = ChainlitCallbackConfig(),
564
574
  ):
565
575
  """Inject callbacks recursively, ensuring msg is passed to the
@@ -573,7 +583,7 @@ class ChainlitTaskCallbacks(ChainlitAgentCallbacks):
573
583
 
574
584
  @classmethod
575
585
  def _inject_callbacks(
576
- cls, task: lr.Task, config: ChainlitCallbackConfig = ChainlitCallbackConfig()
586
+ cls, task: "Task", config: ChainlitCallbackConfig = ChainlitCallbackConfig()
577
587
  ) -> None:
578
588
  # recursively apply ChainlitAgentCallbacks to agents of sub-tasks
579
589
  for t in task.sub_tasks:
@@ -581,7 +591,7 @@ class ChainlitTaskCallbacks(ChainlitAgentCallbacks):
581
591
  # ChainlitTaskCallbacks(t, config=config)
582
592
 
583
593
  def show_subtask_response(
584
- self, task: lr.Task, content: str, is_tool: bool = False
594
+ self, task: "Task", content: str, is_tool: bool = False
585
595
  ) -> None:
586
596
  """Show sub-task response as a step, nested at the right level."""
587
597
 
@@ -14,6 +14,7 @@ pip install "langroid[hf-embeddings]"
14
14
 
15
15
  """
16
16
 
17
+ import importlib
17
18
  import logging
18
19
  from collections import OrderedDict
19
20
  from functools import cache
@@ -82,14 +83,13 @@ about them, or summarize them into coherent answers.
82
83
  """
83
84
 
84
85
  CHUNK_ENRICHMENT_DELIMITER = "\n<##-##-##>\n"
85
-
86
- has_sentence_transformers = False
87
86
  try:
88
- from sentence_transformers import SentenceTransformer # noqa: F401
89
-
90
- has_sentence_transformers = True
91
- except ImportError:
92
- pass
87
+ # Check if module exists in sys.path
88
+ spec = importlib.util.find_spec("sentence_transformers")
89
+ has_sentence_transformers = spec is not None
90
+ except Exception as e:
91
+ logger.warning(f"Error checking sentence_transformers: {e}")
92
+ has_sentence_transformers = False
93
93
 
94
94
 
95
95
  hf_embed_config = SentenceTransformerEmbeddingsConfig(
@@ -236,6 +236,7 @@ class DocChatAgent(ChatAgent):
236
236
  self.chunked_docs: List[Document] = []
237
237
  self.chunked_docs_clean: List[Document] = []
238
238
  self.response: None | Document = None
239
+
239
240
  if len(config.doc_paths) > 0:
240
241
  self.ingest()
241
242
 
@@ -16,28 +16,11 @@ from dotenv import load_dotenv
16
16
  from langroid.exceptions import LangroidImportError
17
17
  from langroid.utils.object_registry import ObjectRegistry
18
18
 
19
- try:
19
+ if TYPE_CHECKING:
20
+ import docling # noqa
20
21
  import fitz
21
- except ImportError:
22
- if not TYPE_CHECKING:
23
- fitz = None
24
- try:
25
- import pymupdf4llm
26
- except ImportError:
27
- if not TYPE_CHECKING:
28
- pymupdf4llm = None
29
-
30
- try:
31
- import docling
32
- except ImportError:
33
- if not TYPE_CHECKING:
34
- docling = None
35
-
36
- try:
22
+ import pymupdf4llm # noqa
37
23
  import pypdf
38
- except ImportError:
39
- if not TYPE_CHECKING:
40
- pypdf = None
41
24
 
42
25
 
43
26
  import requests
@@ -469,8 +452,10 @@ class FitzPDFParser(DocumentParser):
469
452
  Returns:
470
453
  Generator[fitz.Page]: Generator yielding each page.
471
454
  """
472
- if fitz is None:
473
- raise LangroidImportError("fitz", "pdf-parsers")
455
+ try:
456
+ import fitz
457
+ except ImportError:
458
+ LangroidImportError("fitz", "doc-chat")
474
459
  doc = fitz.open(stream=self.doc_bytes, filetype="pdf")
475
460
  for i, page in enumerate(doc):
476
461
  yield i, page
@@ -504,7 +489,10 @@ class PyMuPDF4LLMParser(DocumentParser):
504
489
  Returns:
505
490
  Generator[fitz.Page]: Generator yielding each page.
506
491
  """
507
- if fitz is None:
492
+ try:
493
+ import pymupdf4llm # noqa
494
+ import fitz
495
+ except ImportError:
508
496
  raise LangroidImportError(
509
497
  "pymupdf4llm", ["pymupdf4llm", "all", "pdf-parsers", "doc-chat"]
510
498
  )
@@ -548,7 +536,9 @@ class DoclingParser(DocumentParser):
548
536
  Returns:
549
537
  Generator[docling.Page]: Generator yielding each page.
550
538
  """
551
- if docling is None:
539
+ try:
540
+ import docling # noqa
541
+ except ImportError:
552
542
  raise LangroidImportError(
553
543
  "docling", ["docling", "pdf-parsers", "all", "doc-chat"]
554
544
  )
@@ -637,7 +627,9 @@ class PyPDFParser(DocumentParser):
637
627
  Returns:
638
628
  Generator[pypdf.pdf.PageObject]: Generator yielding each page.
639
629
  """
640
- if pypdf is None:
630
+ try:
631
+ import pypdf
632
+ except ImportError:
641
633
  raise LangroidImportError("pypdf", "pdf-parsers")
642
634
  reader = pypdf.PdfReader(self.doc_bytes)
643
635
  for i, page in enumerate(reader.pages):
@@ -7,14 +7,16 @@ import tempfile
7
7
  import time
8
8
  from collections import deque
9
9
  from pathlib import Path
10
- from typing import Any, Dict, List, Optional, Tuple, Union
10
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
11
11
  from urllib.parse import urlparse
12
12
 
13
13
  from dotenv import load_dotenv
14
- from github import Github
15
- from github.ContentFile import ContentFile
16
- from github.Label import Label
17
- from github.Repository import Repository
14
+
15
+ if TYPE_CHECKING:
16
+ from github import Github
17
+ from github.ContentFile import ContentFile
18
+ from github.Label import Label
19
+ from github.Repository import Repository
18
20
 
19
21
  from langroid.mytypes import DocMetaData, Document
20
22
  from langroid.parsing.document_parser import DocumentParser, DocumentType
@@ -24,7 +26,7 @@ from langroid.pydantic_v1 import BaseModel, BaseSettings, Field
24
26
  logger = logging.getLogger(__name__)
25
27
 
26
28
 
27
- def _get_decoded_content(content_file: ContentFile) -> str:
29
+ def _get_decoded_content(content_file: "ContentFile") -> str:
28
30
  if content_file.encoding == "base64":
29
31
  return content_file.decoded_content.decode("utf-8") or ""
30
32
  elif content_file.encoding == "none":
@@ -54,7 +56,7 @@ class IssueData(BaseModel):
54
56
  text: str = Field(..., description="Text of issue, i.e. description body")
55
57
 
56
58
 
57
- def get_issue_size(labels: List[Label]) -> str | None:
59
+ def get_issue_size(labels: List["Label"]) -> str | None:
58
60
  sizes = ["XS", "S", "M", "L", "XL", "XXL"]
59
61
  return next((label.name for label in labels if label.name in sizes), None)
60
62
 
@@ -117,6 +119,8 @@ class RepoLoader:
117
119
  self.config = config
118
120
  self.clone_path: Optional[str] = None
119
121
  self.log_file = ".logs/repo_loader/download_log.json"
122
+ self.repo: Optional["Repository"] = None # Initialize repo as Optional
123
+
120
124
  os.makedirs(os.path.dirname(self.log_file), exist_ok=True)
121
125
  if not os.path.exists(self.log_file):
122
126
  with open(self.log_file, "w") as f:
@@ -127,20 +131,25 @@ class RepoLoader:
127
131
  logger.info(f"Repo Already downloaded in {log[self.url]}")
128
132
  self.clone_path = log[self.url]
129
133
 
134
+ # it's a core dependency, so we don't need to enclose in try/except
135
+ from github import Github # Late import
136
+
137
+ load_dotenv()
138
+ # authenticated calls to github api have higher rate limit
139
+ token = os.getenv("GITHUB_ACCESS_TOKEN")
140
+
130
141
  if "github.com" in self.url:
131
142
  repo_name = self.url.split("github.com/")[1]
132
143
  else:
133
144
  repo_name = self.url
134
- load_dotenv()
135
- # authenticated calls to github api have higher rate limit
136
- token = os.getenv("GITHUB_ACCESS_TOKEN")
145
+
137
146
  g = Github(token)
138
147
  self.repo = self._get_repo_with_retry(g, repo_name)
139
148
 
140
149
  @staticmethod
141
150
  def _get_repo_with_retry(
142
- g: Github, repo_name: str, max_retries: int = 5
143
- ) -> Repository:
151
+ g: "Github", repo_name: str, max_retries: int = 5
152
+ ) -> "Repository":
144
153
  """
145
154
  Get a repo from the GitHub API, retrying if the request fails,
146
155
  with exponential backoff.
@@ -173,6 +182,10 @@ class RepoLoader:
173
182
 
174
183
  def get_issues(self, k: int | None = 100) -> List[IssueData]:
175
184
  """Get up to k issues from the GitHub repo."""
185
+ if self.repo is None:
186
+ logger.warning("No repo found. Ensure the URL is correct.")
187
+ return [] # Return an empty list rather than raise an error in this case
188
+
176
189
  if k is None:
177
190
  issues = self.repo.get_issues(state="all")
178
191
  else:
@@ -224,7 +237,7 @@ class RepoLoader:
224
237
  """
225
238
  return file_type not in self.config.non_code_types
226
239
 
227
- def _is_allowed(self, content: ContentFile) -> bool:
240
+ def _is_allowed(self, content: "ContentFile") -> bool:
228
241
  """
229
242
  Check if a file or directory content is allowed to be included.
230
243
 
@@ -301,6 +314,10 @@ class RepoLoader:
301
314
  Dict[str, Union[str, List[Dict]]]:
302
315
  A dictionary containing file and directory names, with file contents.
303
316
  """
317
+ if self.repo is None:
318
+ logger.warning("No repo found. Ensure the URL is correct.")
319
+ return {} # Return an empty dict rather than raise an error in this case
320
+
304
321
  root_contents = self.repo.get_contents("")
305
322
  if not isinstance(root_contents, list):
306
323
  root_contents = [root_contents]
@@ -519,8 +536,7 @@ class RepoLoader:
519
536
  which includes all depths.
520
537
  lines (int, optional): Number of lines to read from each file.
521
538
  Defaults to None, which reads all lines.
522
- doc_type (str|DocumentType, optional): The type of document to parse.
523
-
539
+ doc_type (str|DocumentType | None, optional): The type of document to parse.
524
540
  Returns:
525
541
  List[Document]: List of Document objects representing files.
526
542
 
@@ -584,6 +600,10 @@ class RepoLoader:
584
600
  list of Document objects, each has fields `content` and `metadata`,
585
601
  and `metadata` has fields `url`, `filename`, `extension`, `language`
586
602
  """
603
+ if self.repo is None:
604
+ logger.warning("No repo found. Ensure the URL is correct.")
605
+ return [] # Return an empty list rather than raise an error
606
+
587
607
  contents = self.repo.get_contents("")
588
608
  if not isinstance(contents, list):
589
609
  contents = [contents]
@@ -10,9 +10,6 @@ import difflib
10
10
  import re
11
11
  from typing import List, Tuple
12
12
 
13
- from nltk.corpus import stopwords
14
- from nltk.stem import WordNetLemmatizer
15
- from nltk.tokenize import RegexpTokenizer
16
13
  from rank_bm25 import BM25Okapi
17
14
  from thefuzz import fuzz, process
18
15
 
@@ -120,6 +117,9 @@ def preprocess_text(text: str) -> str:
120
117
  # Ensure the NLTK resources are available
121
118
  for resource in ["tokenizers/punkt", "corpora/wordnet", "corpora/stopwords"]:
122
119
  download_nltk_resource(resource)
120
+ from nltk.corpus import stopwords
121
+ from nltk.stem import WordNetLemmatizer
122
+ from nltk.tokenize import RegexpTokenizer
123
123
 
124
124
  # Lowercase the text
125
125
  text = text.lower()
@@ -4,12 +4,6 @@ from tempfile import NamedTemporaryFile
4
4
  from typing import List, no_type_check
5
5
 
6
6
  import requests
7
- import trafilatura
8
- from trafilatura.downloads import (
9
- add_to_compressed_dict,
10
- buffered_downloads,
11
- load_download_buffer,
12
- )
13
7
 
14
8
  from langroid.mytypes import DocMetaData, Document
15
9
  from langroid.parsing.document_parser import DocumentParser, ImagePdfParser
@@ -36,6 +30,13 @@ class URLLoader:
36
30
 
37
31
  @no_type_check
38
32
  def load(self) -> List[Document]:
33
+ import trafilatura
34
+ from trafilatura.downloads import (
35
+ add_to_compressed_dict,
36
+ buffered_downloads,
37
+ load_download_buffer,
38
+ )
39
+
39
40
  docs = []
40
41
  threads = 4
41
42
  # converted the input list to an internal format
langroid/parsing/urls.py CHANGED
@@ -11,7 +11,6 @@ import requests
11
11
  from bs4 import BeautifulSoup
12
12
  from rich import print
13
13
  from rich.prompt import Prompt
14
- from trafilatura.spider import focused_crawler
15
14
 
16
15
  from langroid.pydantic_v1 import BaseModel, HttpUrl, ValidationError, parse_obj_as
17
16
 
@@ -150,6 +149,8 @@ def crawl_url(url: str, max_urls: int = 1) -> List[str]:
150
149
  up to a maximum of `max_urls`.
151
150
  This has not been tested to work as intended. Ignore.
152
151
  """
152
+ from trafilatura.spider import focused_crawler
153
+
153
154
  if max_urls == 1:
154
155
  # no need to crawl, just return the original list
155
156
  return [url]
langroid/parsing/utils.py CHANGED
@@ -6,7 +6,6 @@ from functools import cache
6
6
  from itertools import islice
7
7
  from typing import Iterable, List, Sequence, TypeVar
8
8
 
9
- import nltk
10
9
  from faker import Faker
11
10
 
12
11
  from langroid.mytypes import Document
@@ -22,19 +21,19 @@ random.seed(43)
22
21
  logger = logging.getLogger(__name__)
23
22
 
24
23
 
25
- # Ensures the NLTK resource is available
26
- @cache
27
24
  def download_nltk_resource(resource: str) -> None:
28
- try:
29
- nltk.data.find(resource)
30
- except LookupError:
31
- model = resource.split("/")[-1]
32
- nltk.download(model, quiet=True)
25
+ import nltk
26
+
27
+ @cache
28
+ def _download() -> None:
29
+ try:
30
+ nltk.data.find(resource)
31
+ except LookupError:
32
+ model = resource.split("/")[-1]
33
+ nltk.download(model, quiet=True)
33
34
 
35
+ _download()
34
36
 
35
- # Download punkt_tab resource at module import
36
- download_nltk_resource("tokenizers/punkt_tab")
37
- download_nltk_resource("corpora/gutenberg")
38
37
 
39
38
  T = TypeVar("T")
40
39
 
@@ -51,9 +50,12 @@ def batched(iterable: Iterable[T], n: int) -> Iterable[Sequence[T]]:
51
50
 
52
51
  def generate_random_sentences(k: int) -> str:
53
52
  # Load the sample text
54
-
53
+ import nltk
55
54
  from nltk.corpus import gutenberg
56
55
 
56
+ download_nltk_resource("corpora/gutenberg")
57
+ download_nltk_resource("tokenizers/punkt")
58
+
57
59
  text = gutenberg.raw("austen-emma.txt")
58
60
 
59
61
  # Split the text into sentences
@@ -155,6 +157,8 @@ def number_segments(s: str, granularity: int = 1) -> str:
155
157
  >>> number_segments("Hello world! How are you? Have a good day.")
156
158
  '<#1#> Hello world! <#2#> How are you? <#3#> Have a good day.'
157
159
  """
160
+ import nltk
161
+
158
162
  if granularity < 0:
159
163
  return "<#1#> " + s
160
164
  numbered_text = []
@@ -27,7 +27,6 @@ try:
27
27
  )
28
28
  from sqlalchemy.dialects.postgresql import JSONB
29
29
  from sqlalchemy.engine import Connection, Engine
30
- from sqlalchemy.orm import sessionmaker
31
30
  from sqlalchemy.sql.expression import insert
32
31
  except ImportError:
33
32
  Engine = Any # type: ignore
@@ -56,6 +55,11 @@ class PostgresDB(VectorStore):
56
55
  super().__init__(config)
57
56
  if not has_postgres:
58
57
  raise LangroidImportError("pgvector", "postgres")
58
+ try:
59
+ from sqlalchemy.orm import sessionmaker
60
+ except ImportError:
61
+ raise LangroidImportError("sqlalchemy", "postgres")
62
+
59
63
  self.config: PostgresDBConfig = config
60
64
  self.engine = self._create_engine()
61
65
  PostgresDB._create_vector_extension(self.engine)
@@ -4,24 +4,9 @@ import logging
4
4
  import os
5
5
  import time
6
6
  import uuid
7
- from typing import Dict, List, Optional, Sequence, Tuple, TypeVar
7
+ from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, TypeVar
8
8
 
9
9
  from dotenv import load_dotenv
10
- from qdrant_client import QdrantClient
11
- from qdrant_client.conversions.common_types import ScoredPoint
12
- from qdrant_client.http.models import (
13
- Batch,
14
- CollectionStatus,
15
- Distance,
16
- Filter,
17
- NamedSparseVector,
18
- NamedVector,
19
- SearchRequest,
20
- SparseIndexParams,
21
- SparseVector,
22
- SparseVectorParams,
23
- VectorParams,
24
- )
25
10
 
26
11
  from langroid.embedding_models.base import (
27
12
  EmbeddingModelsConfig,
@@ -32,6 +17,8 @@ from langroid.utils.configuration import settings
32
17
  from langroid.vector_store.base import VectorStore, VectorStoreConfig
33
18
 
34
19
  logger = logging.getLogger(__name__)
20
+ if TYPE_CHECKING:
21
+ from qdrant_client.http.models import SparseVector
35
22
 
36
23
 
37
24
  T = TypeVar("T")
@@ -67,16 +54,18 @@ class QdrantDBConfig(VectorStoreConfig):
67
54
  collection_name: str | None = "temp"
68
55
  storage_path: str = ".qdrant/data"
69
56
  embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
70
- distance: str = Distance.COSINE
71
57
  use_sparse_embeddings: bool = False
72
58
  sparse_embedding_model: str = "naver/splade-v3-distilbert"
73
59
  sparse_limit: int = 3
60
+ distance: str = "cosine"
74
61
 
75
62
 
76
63
  class QdrantDB(VectorStore):
77
64
  def __init__(self, config: QdrantDBConfig = QdrantDBConfig()):
78
65
  super().__init__(config)
79
66
  self.config: QdrantDBConfig = config
67
+ from qdrant_client import QdrantClient
68
+
80
69
  if self.config.use_sparse_embeddings:
81
70
  try:
82
71
  from transformers import AutoModelForMaskedLM, AutoTokenizer
@@ -166,6 +155,7 @@ class QdrantDB(VectorStore):
166
155
 
167
156
  def clear_all_collections(self, really: bool = False, prefix: str = "") -> int:
168
157
  """Clear all collections with the given prefix."""
158
+
169
159
  if not really:
170
160
  logger.warning("Not deleting all collections, set really=True to confirm")
171
161
  return 0
@@ -200,6 +190,7 @@ class QdrantDB(VectorStore):
200
190
  Args:
201
191
  empty (bool, optional): Whether to include empty collections.
202
192
  """
193
+
203
194
  colls = list(self.client.get_collections())[0][1]
204
195
  if empty:
205
196
  return [coll.name for coll in colls]
@@ -228,6 +219,14 @@ class QdrantDB(VectorStore):
228
219
  replace (bool): Whether to replace an existing collection
229
220
  with the same name. Defaults to False.
230
221
  """
222
+ from qdrant_client.http.models import (
223
+ CollectionStatus,
224
+ Distance,
225
+ SparseIndexParams,
226
+ SparseVectorParams,
227
+ VectorParams,
228
+ )
229
+
231
230
  self.config.collection_name = collection_name
232
231
  if self.client.collection_exists(collection_name=collection_name):
233
232
  coll = self.client.get_collection(collection_name=collection_name)
@@ -268,7 +267,9 @@ class QdrantDB(VectorStore):
268
267
  logger.info(collection_info)
269
268
  logger.setLevel(level)
270
269
 
271
- def get_sparse_embeddings(self, inputs: List[str]) -> List[SparseVector]:
270
+ def get_sparse_embeddings(self, inputs: List[str]) -> List["SparseVector"]:
271
+ from qdrant_client.http.models import SparseVector
272
+
272
273
  if not self.config.use_sparse_embeddings:
273
274
  return []
274
275
  import torch
@@ -295,6 +296,12 @@ class QdrantDB(VectorStore):
295
296
  return sparse_embeddings
296
297
 
297
298
  def add_documents(self, documents: Sequence[Document]) -> None:
299
+ from qdrant_client.http.models import (
300
+ Batch,
301
+ CollectionStatus,
302
+ SparseVector,
303
+ )
304
+
298
305
  # Add id to metadata if not already present
299
306
  super().maybe_add_ids(documents)
300
307
  # Fix the ids due to qdrant finickiness
@@ -383,6 +390,10 @@ class QdrantDB(VectorStore):
383
390
  return str(formatted_uuid)
384
391
 
385
392
  def get_all_documents(self, where: str = "") -> List[Document]:
393
+ from qdrant_client.http.models import (
394
+ Filter,
395
+ )
396
+
386
397
  if self.config.collection_name is None:
387
398
  raise ValueError("No collection name set, cannot retrieve docs")
388
399
  docs = []
@@ -431,6 +442,14 @@ class QdrantDB(VectorStore):
431
442
  where: Optional[str] = None,
432
443
  neighbors: int = 0,
433
444
  ) -> List[Tuple[Document, float]]:
445
+ from qdrant_client.conversions.common_types import ScoredPoint
446
+ from qdrant_client.http.models import (
447
+ Filter,
448
+ NamedSparseVector,
449
+ NamedVector,
450
+ SearchRequest,
451
+ )
452
+
434
453
  embedding = self.embedding_fn([text])[0]
435
454
  # TODO filter may not work yet
436
455
  if where is None or where == "":
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  import os
3
3
  import re
4
- from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple
4
+ from typing import Any, List, Optional, Sequence, Tuple
5
5
 
6
6
  from dotenv import load_dotenv
7
7
 
@@ -15,29 +15,16 @@ from langroid.utils.configuration import settings
15
15
  from langroid.vector_store.base import VectorStore, VectorStoreConfig
16
16
 
17
17
  logger = logging.getLogger(__name__)
18
- has_weaviate: bool = True
19
- try:
20
- import weaviate
21
- from weaviate.classes.config import (
22
- Configure,
23
- VectorDistances,
24
- )
25
- from weaviate.classes.init import Auth
26
- from weaviate.classes.query import Filter, MetadataQuery
27
- from weaviate.util import generate_uuid5, get_valid_uuid
28
- except ImportError:
29
- has_weaviate = False
30
-
31
- if not TYPE_CHECKING:
32
-
33
- class VectorDistances:
34
- """
35
- Fallback class when weaviate is not installed, to avoid import errors.
36
- """
37
18
 
38
- COSINE: str = "cosine"
39
- DOTPRODUCT: str = "dot"
40
- L2: str = "l2"
19
+
20
+ class VectorDistances:
21
+ """
22
+ Fallback class when weaviate is not installed, to avoid import errors.
23
+ """
24
+
25
+ COSINE: str = "cosine"
26
+ DOTPRODUCT: str = "dot"
27
+ L2: str = "l2"
41
28
 
42
29
 
43
30
  class WeaviateDBConfig(VectorStoreConfig):
@@ -54,8 +41,12 @@ class WeaviateDBConfig(VectorStoreConfig):
54
41
  class WeaviateDB(VectorStore):
55
42
  def __init__(self, config: WeaviateDBConfig = WeaviateDBConfig()):
56
43
  super().__init__(config)
57
- if not has_weaviate:
44
+ try:
45
+ import weaviate
46
+ from weaviate.classes.init import Auth
47
+ except ImportError:
58
48
  raise LangroidImportError("weaviate", "weaviate")
49
+
59
50
  self.config: WeaviateDBConfig = config
60
51
  load_dotenv()
61
52
  if self.config.docker:
@@ -141,6 +132,13 @@ class WeaviateDB(VectorStore):
141
132
  self.client.collections.delete(name=collection_name)
142
133
 
143
134
  def create_collection(self, collection_name: str, replace: bool = False) -> None:
135
+ try:
136
+ from weaviate.classes.config import (
137
+ Configure,
138
+ VectorDistances,
139
+ )
140
+ except ImportError:
141
+ raise LangroidImportError("weaviate", "weaviate")
144
142
  collection_name = WeaviateDB.validate_and_format_collection_name(
145
143
  collection_name
146
144
  )
@@ -207,6 +205,8 @@ class WeaviateDB(VectorStore):
207
205
  return [self.weaviate_obj_to_doc(item) for item in coll.iterator()]
208
206
 
209
207
  def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
208
+ from weaviate.classes.query import Filter
209
+
210
210
  if self.config.collection_name is None:
211
211
  raise ValueError("No collection name set, cannot retrieve docs")
212
212
 
@@ -230,6 +230,8 @@ class WeaviateDB(VectorStore):
230
230
  def similar_texts_with_scores(
231
231
  self, text: str, k: int = 1, where: Optional[str] = None
232
232
  ) -> List[Tuple[Document, float]]:
233
+ from weaviate.classes.query import MetadataQuery
234
+
233
235
  embedding = self.embedding_fn([text])[0]
234
236
  if self.config.collection_name is None:
235
237
  raise ValueError("No collections name set,cannot search")
@@ -246,6 +248,8 @@ class WeaviateDB(VectorStore):
246
248
  return list(zip(docs, similarities))
247
249
 
248
250
  def _create_valid_uuid_id(self, id: str) -> Any:
251
+ from weaviate.util import generate_uuid5, get_valid_uuid
252
+
249
253
  try:
250
254
  id = get_valid_uuid(id)
251
255
  return id
@@ -253,6 +257,8 @@ class WeaviateDB(VectorStore):
253
257
  return generate_uuid5(id)
254
258
 
255
259
  def weaviate_obj_to_doc(self, input_object: Any) -> Document:
260
+ from weaviate.util import get_valid_uuid
261
+
256
262
  content = input_object.properties.get("content", "")
257
263
  metadata_dict = input_object.properties.get("metadata", {})
258
264
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.43.1
3
+ Version: 0.44.0
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -237,9 +237,11 @@ This Multi-Agent paradigm is inspired by the
237
237
 
238
238
  `Langroid` is a fresh take on LLM app-development, where considerable thought has gone
239
239
  into simplifying the developer experience;
240
- it does not use `Langchain`, or any other LLM framework.
240
+ it does not use `Langchain`, or any other LLM framework,
241
+ and works with [practically any LLM](https://langroid.github.io/langroid/tutorials/supported-models/).
241
242
 
242
- :fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/) and a [quick tour of Langroid](https://langroid.github.io/langroid/tutorials/langroid-tour/)
243
+ :fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/),
244
+ and a [quick tour of Langroid](https://langroid.github.io/langroid/tutorials/langroid-tour/).
243
245
 
244
246
  📢 Companies are using/adapting Langroid in **production**. Here is a quote:
245
247
 
@@ -327,6 +329,18 @@ teacher_task.run()
327
329
  <details>
328
330
  <summary> <b>Click to expand</b></summary>
329
331
 
332
+ - **Feb 2025:**
333
+ - [0.43.0](https://github.com/langroid/langroid/releases/tag/0.43.0): `GeminiPdfParser` for parsing PDF using
334
+ Gemini LLMs - Thanks @abab-dev.
335
+ - [0.42.0](https://github.com/langroid/langroid/releases/tag/0.42.0): `markitdown` parser for `pptx,xlsx,xls` files
336
+ Thanks @abab-dev.
337
+ - [0.41.0](https://github.com/langroid/langroid/releases/tag/0.41.0): `pinecone` vector-db (Thanks @coretado),
338
+ `Tavily` web-search (Thanks @Sozhan308), `Exa` web-search (Thanks @MuddyHope).
339
+ - [0.40.0](https://github.com/langroid/langroid/releases/tag/0.40.0): `pgvector` vector-db. Thanks @abab-dev.
340
+ - [0.39.0](https://github.com/langroid/langroid/releases/tag/0.39.0): `ChatAgentConfig.handle_llm_no_tool` for
341
+ handling LLM "forgetting" to use a tool.
342
+ - [0.38.0](https://github.com/langroid/langroid/releases/tag/0.38.0): Gemini embeddings - Thanks @abab-dev)
343
+ - [0.37.0](https://github.com/langroid/langroid/releases/tag/0.37.0): New PDF Parsers: `docling`, `pymupdf4llm`
330
344
  - **Jan 2025:**
331
345
  - [0.36.0](https://github.com/langroid/langroid/releases/tag/0.36.0): Weaviate vector-db support (thanks @abab-dev).
332
346
  - [0.35.0](https://github.com/langroid/langroid/releases/tag/0.35.0): Capture/Stream reasoning content from
@@ -591,7 +605,8 @@ section above)
591
605
  Agents with specific skills, wrap them in Tasks, and combine tasks in a flexible way.
592
606
  - **LLM Support**: Langroid supports OpenAI LLMs as well as LLMs from hundreds of
593
607
  providers ([local/open](https://langroid.github.io/langroid/tutorials/local-llm-setup/) or [remote/commercial](https://langroid.github.io/langroid/tutorials/non-openai-llms/)) via proxy libraries and local model servers
594
- such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui), [LiteLLM](https://docs.litellm.ai/docs/providers) that in effect mimic the OpenAI API.
608
+ such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui),
609
+ [LiteLLM](https://docs.litellm.ai/docs/providers) that in effect mimic the OpenAI API. See the [supported LLMs](https://langroid.github.io/langroid/tutorials/supported-models/).
595
610
  - **Caching of LLM responses:** Langroid supports [Redis](https://redis.com/try-free/) and
596
611
  [Momento](https://www.gomomento.com/) to cache LLM responses.
597
612
  - **Vector-stores**: [LanceDB](https://github.com/lancedb/lancedb), [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) are currently supported.
@@ -12,9 +12,9 @@ langroid/agent/task.py,sha256=HB6N-Jn80HFqCf0ZYOC1v3Bn3oO7NLjShHQJJFwW0q4,90557
12
12
  langroid/agent/tool_message.py,sha256=BhjP-_TfQ2tgxuY4Yo_JHLOwwt0mJ4BwjPnREvEY4vk,14744
13
13
  langroid/agent/xml_tool_message.py,sha256=6SshYZJKIfi4mkE-gIoSwjkEYekQ8GwcSiCv7a5uO9E,15054
14
14
  langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- langroid/agent/callbacks/chainlit.py,sha256=RH8qUXaZE5o2WQz3WJQ1SdFtASGlxWCA6_HYz_3meDQ,20822
15
+ langroid/agent/callbacks/chainlit.py,sha256=UHB6P_J40vsVnssosqkpkOVWRf9NK4TOY0_G2g_Arsg,20900
16
16
  langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
17
- langroid/agent/special/doc_chat_agent.py,sha256=Q16HOg6MXa26szTO29OOIv1kv8QfuUjxLlLOP3eqZvA,64539
17
+ langroid/agent/special/doc_chat_agent.py,sha256=Pnx_gb-3-QUeUoZfYYzU3l6f3PecpHTiymrB4Tu_wEg,64661
18
18
  langroid/agent/special/lance_doc_chat_agent.py,sha256=s8xoRs0gGaFtDYFUSIRchsgDVbS5Q3C2b2mr3V1Fd-Q,10419
19
19
  langroid/agent/special/lance_tools.py,sha256=qS8x4wi8mrqfbYV2ztFzrcxyhHQ0ZWOc-zkYiH7awj0,2105
20
20
  langroid/agent/special/relevance_extractor_agent.py,sha256=zIx8GUdVo1aGW6ASla0NPQjYYIpmriK_TYMijqAx3F8,4796
@@ -81,19 +81,19 @@ langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeu
81
81
  langroid/parsing/__init__.py,sha256=2oUWJJAxIavq9Wtw5RGlkXLq3GF3zgXeVLLW4j7yeb8,1138
82
82
  langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
83
83
  langroid/parsing/code_parser.py,sha256=5ze0MBytrGGkU69pA_bJDjRm6QZz_QYfPcIwkagUa7U,3796
84
- langroid/parsing/document_parser.py,sha256=tov34uYB_2ecq7-G7P7CWSOv5alcfwkrrwfsnCCVdIk,49714
84
+ langroid/parsing/document_parser.py,sha256=QThgCm9iZyRZd1pmANZ3lO20p2TNH0NIU5_a5v8q8Ck,49649
85
85
  langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
86
86
  langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
87
87
  langroid/parsing/parser.py,sha256=8MDoKQO60RGXod9E5jMj-k90QNhdim4blVJB9L0rrSA,13789
88
88
  langroid/parsing/pdf_utils.py,sha256=rmNJ9UzuBgXTAYwj1TtRJcD8h53x7cizhgyYHKO88I4,1513
89
- langroid/parsing/repo_loader.py,sha256=3GjvPJS6Vf5L6gV2zOU8s-Tf1oq_fZm-IB_RL_7CTsY,29373
89
+ langroid/parsing/repo_loader.py,sha256=NpysuyzRHvgL3F4BB_wGo5sCUnZ3FOlVCJmZ7CaUdbs,30202
90
90
  langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
91
- langroid/parsing/search.py,sha256=YPCwezM0c4PWbNUMEmQ5RrJBtvX4aWZ1CMCJFs4sqFo,9806
91
+ langroid/parsing/search.py,sha256=0NJ5-Rou_BbrHAD7O9b20bKjZJnbadjObvGm4Zq8Kis,9818
92
92
  langroid/parsing/spider.py,sha256=hAVM6wxh1pQ0EN4tI5wMBtAjIk0T-xnpi-ZUzWybhos,3258
93
93
  langroid/parsing/table_loader.py,sha256=qNM4obT_0Y4tjrxNBCNUYjKQ9oETCZ7FbolKBTcz-GM,3410
94
- langroid/parsing/url_loader.py,sha256=MPJFhAdMl4LYVtL9f8r1BOtTkDOg2-hKkANUBUoXCG0,4846
95
- langroid/parsing/urls.py,sha256=86omykgxo4hg2jyF10Ef-FJa9n6MgXdSXy2mImqgo5c,8076
96
- langroid/parsing/utils.py,sha256=ZWMS7oG04GUY9EAIwnFN6KKo_ePCKhqk_H8jW6TDT0s,12805
94
+ langroid/parsing/url_loader.py,sha256=obi_kj6ehBkdh5mXNtYCXpm3KCuExoy2D1ODVlFbXbQ,4895
95
+ langroid/parsing/urls.py,sha256=Tjzr64YsCusiYkY0LEGB5-rSuX8T2P_4DVoOFKAeKuI,8081
96
+ langroid/parsing/utils.py,sha256=WwqzOhbQRlorbVvddDIZKv9b1KqZCBDm955lgIHDXRw,12828
97
97
  langroid/parsing/web_search.py,sha256=wWSmV0METFTGPhHJIs-M4tog2Aur_75Pxr4a49cKDkU,7042
98
98
  langroid/prompts/__init__.py,sha256=RW11vK6jiLPuaUh4GpeFvstti73gkm8_rDMtrbo2YsU,142
99
99
  langroid/prompts/dialog.py,sha256=SpfiSyofSgy2pwD1YboHR_yHO3LEEMbv6j2sm874jKo,331
@@ -124,10 +124,10 @@ langroid/vector_store/chromadb.py,sha256=p9mEqJwO2BrL2jSSXfa23kCPlPOwWpF3xJYd5zo
124
124
  langroid/vector_store/lancedb.py,sha256=Qd20gKjWozPWfW5-D66J6U8dSrJo1yl-maj6s1lbf1c,14688
125
125
  langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
126
126
  langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZXpEY9M,14994
127
- langroid/vector_store/postgres.py,sha256=DQHd6dt-OcV_QVNm-ymn28rlTfhI6hqgcpLTPCsm0jI,15990
128
- langroid/vector_store/qdrantdb.py,sha256=v7TAsIoj_vxeKDYS9tpwJLBZA8fuTweTYxHo0X_uawM,17949
129
- langroid/vector_store/weaviatedb.py,sha256=tjlqEtkwrhykelt-nbr2WIuHWJBuSAGjZuG6gsAMBsc,11753
130
- langroid-0.43.1.dist-info/METADATA,sha256=AQaUq3J9kszROM1HO3-8s9us3eGpSt9yJy7SI8eznkU,61773
131
- langroid-0.43.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
132
- langroid-0.43.1.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
133
- langroid-0.43.1.dist-info/RECORD,,
127
+ langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
128
+ langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
129
+ langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
130
+ langroid-0.44.0.dist-info/METADATA,sha256=mKlCCdQQhV31aMCklT9QcRpUs5iHsOeDGAd55axAevU,62973
131
+ langroid-0.44.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
132
+ langroid-0.44.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
133
+ langroid-0.44.0.dist-info/RECORD,,