langroid 0.48.0__tar.gz → 0.48.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. {langroid-0.48.0 → langroid-0.48.2}/PKG-INFO +1 -1
  2. {langroid-0.48.0 → langroid-0.48.2}/langroid/mytypes.py +10 -1
  3. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/url_loader.py +37 -9
  4. {langroid-0.48.0 → langroid-0.48.2}/pyproject.toml +1 -1
  5. {langroid-0.48.0 → langroid-0.48.2}/.gitignore +0 -0
  6. {langroid-0.48.0 → langroid-0.48.2}/LICENSE +0 -0
  7. {langroid-0.48.0 → langroid-0.48.2}/README.md +0 -0
  8. {langroid-0.48.0 → langroid-0.48.2}/langroid/__init__.py +0 -0
  9. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/__init__.py +0 -0
  10. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/base.py +0 -0
  11. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/batch.py +0 -0
  12. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/callbacks/__init__.py +0 -0
  13. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/callbacks/chainlit.py +0 -0
  14. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/chat_agent.py +0 -0
  15. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/chat_document.py +0 -0
  16. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/openai_assistant.py +0 -0
  17. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/__init__.py +0 -0
  18. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/arangodb/__init__.py +0 -0
  19. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
  20. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/arangodb/system_messages.py +0 -0
  21. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/arangodb/tools.py +0 -0
  22. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/arangodb/utils.py +0 -0
  23. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/doc_chat_agent.py +0 -0
  24. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  25. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/lance_rag/__init__.py +0 -0
  26. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
  27. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  28. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
  29. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/lance_tools.py +0 -0
  30. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/neo4j/__init__.py +0 -0
  31. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
  32. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
  33. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/neo4j/system_messages.py +0 -0
  34. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/neo4j/tools.py +0 -0
  35. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  36. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/retriever_agent.py +0 -0
  37. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/sql/__init__.py +0 -0
  38. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  39. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/sql/utils/__init__.py +0 -0
  40. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  41. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  42. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/sql/utils/system_message.py +0 -0
  43. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/sql/utils/tools.py +0 -0
  44. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/special/table_chat_agent.py +0 -0
  45. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/task.py +0 -0
  46. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tool_message.py +0 -0
  47. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/__init__.py +0 -0
  48. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
  49. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/exa_search_tool.py +0 -0
  50. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/file_tools.py +0 -0
  51. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/google_search_tool.py +0 -0
  52. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/metaphor_search_tool.py +0 -0
  53. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/orchestration.py +0 -0
  54. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/recipient_tool.py +0 -0
  55. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/retrieval_tool.py +0 -0
  56. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/rewind_tool.py +0 -0
  57. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/segment_extract_tool.py +0 -0
  58. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/tools/tavily_search_tool.py +0 -0
  59. {langroid-0.48.0 → langroid-0.48.2}/langroid/agent/xml_tool_message.py +0 -0
  60. {langroid-0.48.0 → langroid-0.48.2}/langroid/cachedb/__init__.py +0 -0
  61. {langroid-0.48.0 → langroid-0.48.2}/langroid/cachedb/base.py +0 -0
  62. {langroid-0.48.0 → langroid-0.48.2}/langroid/cachedb/momento_cachedb.py +0 -0
  63. {langroid-0.48.0 → langroid-0.48.2}/langroid/cachedb/redis_cachedb.py +0 -0
  64. {langroid-0.48.0 → langroid-0.48.2}/langroid/embedding_models/__init__.py +0 -0
  65. {langroid-0.48.0 → langroid-0.48.2}/langroid/embedding_models/base.py +0 -0
  66. {langroid-0.48.0 → langroid-0.48.2}/langroid/embedding_models/models.py +0 -0
  67. {langroid-0.48.0 → langroid-0.48.2}/langroid/embedding_models/protoc/__init__.py +0 -0
  68. {langroid-0.48.0 → langroid-0.48.2}/langroid/embedding_models/protoc/embeddings.proto +0 -0
  69. {langroid-0.48.0 → langroid-0.48.2}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
  70. {langroid-0.48.0 → langroid-0.48.2}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
  71. {langroid-0.48.0 → langroid-0.48.2}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
  72. {langroid-0.48.0 → langroid-0.48.2}/langroid/embedding_models/remote_embeds.py +0 -0
  73. {langroid-0.48.0 → langroid-0.48.2}/langroid/exceptions.py +0 -0
  74. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/__init__.py +0 -0
  75. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/azure_openai.py +0 -0
  76. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/base.py +0 -0
  77. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/config.py +0 -0
  78. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/mock_lm.py +0 -0
  79. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/model_info.py +0 -0
  80. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/openai_gpt.py +0 -0
  81. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  82. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/prompt_formatter/base.py +0 -0
  83. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
  84. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  85. {langroid-0.48.0 → langroid-0.48.2}/langroid/language_models/utils.py +0 -0
  86. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/__init__.py +0 -0
  87. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/agent_chats.py +0 -0
  88. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/code_parser.py +0 -0
  89. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/document_parser.py +0 -0
  90. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/para_sentence_split.py +0 -0
  91. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/parse_json.py +0 -0
  92. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/parser.py +0 -0
  93. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/pdf_utils.py +0 -0
  94. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/repo_loader.py +0 -0
  95. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/routing.py +0 -0
  96. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/search.py +0 -0
  97. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/spider.py +0 -0
  98. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/table_loader.py +0 -0
  99. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/urls.py +0 -0
  100. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/utils.py +0 -0
  101. {langroid-0.48.0 → langroid-0.48.2}/langroid/parsing/web_search.py +0 -0
  102. {langroid-0.48.0 → langroid-0.48.2}/langroid/prompts/__init__.py +0 -0
  103. {langroid-0.48.0 → langroid-0.48.2}/langroid/prompts/dialog.py +0 -0
  104. {langroid-0.48.0 → langroid-0.48.2}/langroid/prompts/prompts_config.py +0 -0
  105. {langroid-0.48.0 → langroid-0.48.2}/langroid/prompts/templates.py +0 -0
  106. {langroid-0.48.0 → langroid-0.48.2}/langroid/py.typed +0 -0
  107. {langroid-0.48.0 → langroid-0.48.2}/langroid/pydantic_v1/__init__.py +0 -0
  108. {langroid-0.48.0 → langroid-0.48.2}/langroid/pydantic_v1/main.py +0 -0
  109. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/__init__.py +0 -0
  110. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/algorithms/__init__.py +0 -0
  111. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/algorithms/graph.py +0 -0
  112. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/configuration.py +0 -0
  113. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/constants.py +0 -0
  114. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/git_utils.py +0 -0
  115. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/globals.py +0 -0
  116. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/logging.py +0 -0
  117. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/object_registry.py +0 -0
  118. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/output/__init__.py +0 -0
  119. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/output/citations.py +0 -0
  120. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/output/printing.py +0 -0
  121. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/output/status.py +0 -0
  122. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/pandas_utils.py +0 -0
  123. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/pydantic_utils.py +0 -0
  124. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/system.py +0 -0
  125. {langroid-0.48.0 → langroid-0.48.2}/langroid/utils/types.py +0 -0
  126. {langroid-0.48.0 → langroid-0.48.2}/langroid/vector_store/__init__.py +0 -0
  127. {langroid-0.48.0 → langroid-0.48.2}/langroid/vector_store/base.py +0 -0
  128. {langroid-0.48.0 → langroid-0.48.2}/langroid/vector_store/chromadb.py +0 -0
  129. {langroid-0.48.0 → langroid-0.48.2}/langroid/vector_store/lancedb.py +0 -0
  130. {langroid-0.48.0 → langroid-0.48.2}/langroid/vector_store/meilisearch.py +0 -0
  131. {langroid-0.48.0 → langroid-0.48.2}/langroid/vector_store/pineconedb.py +0 -0
  132. {langroid-0.48.0 → langroid-0.48.2}/langroid/vector_store/postgres.py +0 -0
  133. {langroid-0.48.0 → langroid-0.48.2}/langroid/vector_store/qdrantdb.py +0 -0
  134. {langroid-0.48.0 → langroid-0.48.2}/langroid/vector_store/weaviatedb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.48.0
3
+ Version: 0.48.2
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -3,7 +3,7 @@ from textwrap import dedent
3
3
  from typing import Any, Callable, Dict, List, Union
4
4
  from uuid import uuid4
5
5
 
6
- from langroid.pydantic_v1 import BaseModel, Extra, Field
6
+ from langroid.pydantic_v1 import BaseModel, Extra, Field, validator
7
7
 
8
8
  Number = Union[int, float]
9
9
  Embedding = List[Number]
@@ -45,10 +45,19 @@ class DocMetaData(BaseModel):
45
45
 
46
46
  source: str = "context" # just reference
47
47
  source_content: str = "context" # reference and content
48
+ title: str = "Unknown Title"
49
+ published_date: str = "Unknown Date"
48
50
  is_chunk: bool = False # if it is a chunk, don't split
49
51
  id: str = Field(default_factory=lambda: str(uuid4()))
50
52
  window_ids: List[str] = [] # for RAG: ids of chunks around this one
51
53
 
54
+ @validator("source", "source_content", "id", "title", "published_date")
55
+ def ensure_not_empty(cls, v: str) -> str:
56
+ """Ensure required string fields are not empty."""
57
+ if not v:
58
+ raise ValueError("Field cannot be empty")
59
+ return v
60
+
52
61
  def dict_bool_int(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
53
62
  """
54
63
  Special dict method to convert bool fields to int, to appease some
@@ -258,7 +258,13 @@ class FirecrawlCrawler(BaseCrawler):
258
258
  with open(filename, "w") as f:
259
259
  f.write(content)
260
260
  docs.append(
261
- Document(content=content, metadata=DocMetaData(source=url))
261
+ Document(
262
+ content=content,
263
+ metadata=DocMetaData(
264
+ source=url,
265
+ title=page["metadata"].get("title", "Unknown Title"),
266
+ ),
267
+ )
262
268
  )
263
269
  processed_urls.add(url)
264
270
  new_pages += 1
@@ -300,7 +306,10 @@ class FirecrawlCrawler(BaseCrawler):
300
306
  docs.append(
301
307
  Document(
302
308
  content=result["markdown"],
303
- metadata=DocMetaData(source=url),
309
+ metadata=DocMetaData(
310
+ source=url,
311
+ title=metadata.get("title", "Unknown Title"),
312
+ ),
304
313
  )
305
314
  )
306
315
  except Exception as e:
@@ -336,7 +345,7 @@ class ExaCrawler(BaseCrawler):
336
345
 
337
346
  @property
338
347
  def needs_parser(self) -> bool:
339
- return False
348
+ return True
340
349
 
341
350
  def crawl(self, urls: List[str]) -> List[Document]:
342
351
  """Crawl the given URLs using Exa SDK.
@@ -363,12 +372,29 @@ class ExaCrawler(BaseCrawler):
363
372
  docs = []
364
373
 
365
374
  try:
366
- results = exa.get_contents(urls, text=True)
367
-
368
- for result in results.results:
369
- if result.text:
370
- metadata = DocMetaData(source=result.url)
371
- docs.append(Document(content=result.text, metadata=metadata))
375
+ for url in urls:
376
+ parsed_doc_chunks = self._process_document(url)
377
+ if parsed_doc_chunks:
378
+ docs.extend(parsed_doc_chunks)
379
+ continue
380
+ else:
381
+ results = exa.get_contents([url], livecrawl="always", text=True)
382
+ result = results.results[0]
383
+ if result.text:
384
+ # append a NON-chunked document
385
+ # (metadata.is_chunk = False, so will be chunked downstream)
386
+ docs.append(
387
+ Document(
388
+ content=result.text,
389
+ metadata=DocMetaData(
390
+ source=url,
391
+ title=getattr(result, "title", "Unknown Title"),
392
+ published_date=getattr(
393
+ result, "published_date", "Unknown Date"
394
+ ),
395
+ ),
396
+ )
397
+ )
372
398
 
373
399
  except Exception as e:
374
400
  logging.error(f"Error retrieving content from Exa API: {e}")
@@ -399,6 +425,8 @@ class URLLoader:
399
425
  crawler_config = TrafilaturaConfig(parser=Parser(parsing_config))
400
426
 
401
427
  self.crawler = CrawlerFactory.create_crawler(crawler_config)
428
+ if self.crawler.needs_parser:
429
+ self.crawler.parser = Parser(parsing_config)
402
430
 
403
431
  def load(self) -> List[Document]:
404
432
  """Load the URLs using the specified crawler."""
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "langroid"
3
- version = "0.48.0"
3
+ version = "0.48.2"
4
4
  authors = [
5
5
  {name = "Prasad Chalasani", email = "pchalasani@gmail.com"},
6
6
  ]
File without changes
File without changes
File without changes
File without changes