langroid 0.47.1__tar.gz → 0.48.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. {langroid-0.47.1 → langroid-0.48.0}/PKG-INFO +5 -3
  2. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/retriever_agent.py +2 -1
  3. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/openai_gpt.py +2 -5
  4. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/document_parser.py +40 -0
  5. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/parser.py +1 -1
  6. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/url_loader.py +65 -0
  7. {langroid-0.47.1 → langroid-0.48.0}/pyproject.toml +7 -3
  8. {langroid-0.47.1 → langroid-0.48.0}/.gitignore +0 -0
  9. {langroid-0.47.1 → langroid-0.48.0}/LICENSE +0 -0
  10. {langroid-0.47.1 → langroid-0.48.0}/README.md +0 -0
  11. {langroid-0.47.1 → langroid-0.48.0}/langroid/__init__.py +0 -0
  12. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/__init__.py +0 -0
  13. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/base.py +0 -0
  14. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/batch.py +0 -0
  15. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/callbacks/__init__.py +0 -0
  16. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/callbacks/chainlit.py +0 -0
  17. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/chat_agent.py +0 -0
  18. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/chat_document.py +0 -0
  19. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/openai_assistant.py +0 -0
  20. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/__init__.py +0 -0
  21. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/arangodb/__init__.py +0 -0
  22. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
  23. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/arangodb/system_messages.py +0 -0
  24. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/arangodb/tools.py +0 -0
  25. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/arangodb/utils.py +0 -0
  26. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/doc_chat_agent.py +0 -0
  27. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  28. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/lance_rag/__init__.py +0 -0
  29. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
  30. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  31. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
  32. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/lance_tools.py +0 -0
  33. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/neo4j/__init__.py +0 -0
  34. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
  35. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
  36. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/neo4j/system_messages.py +0 -0
  37. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/neo4j/tools.py +0 -0
  38. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  39. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/sql/__init__.py +0 -0
  40. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  41. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/sql/utils/__init__.py +0 -0
  42. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  43. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  44. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/sql/utils/system_message.py +0 -0
  45. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/sql/utils/tools.py +0 -0
  46. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/special/table_chat_agent.py +0 -0
  47. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/task.py +0 -0
  48. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tool_message.py +0 -0
  49. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/__init__.py +0 -0
  50. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
  51. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/exa_search_tool.py +0 -0
  52. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/file_tools.py +0 -0
  53. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/google_search_tool.py +0 -0
  54. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/metaphor_search_tool.py +0 -0
  55. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/orchestration.py +0 -0
  56. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/recipient_tool.py +0 -0
  57. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/retrieval_tool.py +0 -0
  58. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/rewind_tool.py +0 -0
  59. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/segment_extract_tool.py +0 -0
  60. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/tools/tavily_search_tool.py +0 -0
  61. {langroid-0.47.1 → langroid-0.48.0}/langroid/agent/xml_tool_message.py +0 -0
  62. {langroid-0.47.1 → langroid-0.48.0}/langroid/cachedb/__init__.py +0 -0
  63. {langroid-0.47.1 → langroid-0.48.0}/langroid/cachedb/base.py +0 -0
  64. {langroid-0.47.1 → langroid-0.48.0}/langroid/cachedb/momento_cachedb.py +0 -0
  65. {langroid-0.47.1 → langroid-0.48.0}/langroid/cachedb/redis_cachedb.py +0 -0
  66. {langroid-0.47.1 → langroid-0.48.0}/langroid/embedding_models/__init__.py +0 -0
  67. {langroid-0.47.1 → langroid-0.48.0}/langroid/embedding_models/base.py +0 -0
  68. {langroid-0.47.1 → langroid-0.48.0}/langroid/embedding_models/models.py +0 -0
  69. {langroid-0.47.1 → langroid-0.48.0}/langroid/embedding_models/protoc/__init__.py +0 -0
  70. {langroid-0.47.1 → langroid-0.48.0}/langroid/embedding_models/protoc/embeddings.proto +0 -0
  71. {langroid-0.47.1 → langroid-0.48.0}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
  72. {langroid-0.47.1 → langroid-0.48.0}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
  73. {langroid-0.47.1 → langroid-0.48.0}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
  74. {langroid-0.47.1 → langroid-0.48.0}/langroid/embedding_models/remote_embeds.py +0 -0
  75. {langroid-0.47.1 → langroid-0.48.0}/langroid/exceptions.py +0 -0
  76. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/__init__.py +0 -0
  77. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/azure_openai.py +0 -0
  78. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/base.py +0 -0
  79. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/config.py +0 -0
  80. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/mock_lm.py +0 -0
  81. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/model_info.py +0 -0
  82. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  83. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/prompt_formatter/base.py +0 -0
  84. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
  85. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  86. {langroid-0.47.1 → langroid-0.48.0}/langroid/language_models/utils.py +0 -0
  87. {langroid-0.47.1 → langroid-0.48.0}/langroid/mytypes.py +0 -0
  88. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/__init__.py +0 -0
  89. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/agent_chats.py +0 -0
  90. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/code_parser.py +0 -0
  91. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/para_sentence_split.py +0 -0
  92. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/parse_json.py +0 -0
  93. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/pdf_utils.py +0 -0
  94. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/repo_loader.py +0 -0
  95. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/routing.py +0 -0
  96. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/search.py +0 -0
  97. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/spider.py +0 -0
  98. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/table_loader.py +0 -0
  99. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/urls.py +0 -0
  100. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/utils.py +0 -0
  101. {langroid-0.47.1 → langroid-0.48.0}/langroid/parsing/web_search.py +0 -0
  102. {langroid-0.47.1 → langroid-0.48.0}/langroid/prompts/__init__.py +0 -0
  103. {langroid-0.47.1 → langroid-0.48.0}/langroid/prompts/dialog.py +0 -0
  104. {langroid-0.47.1 → langroid-0.48.0}/langroid/prompts/prompts_config.py +0 -0
  105. {langroid-0.47.1 → langroid-0.48.0}/langroid/prompts/templates.py +0 -0
  106. {langroid-0.47.1 → langroid-0.48.0}/langroid/py.typed +0 -0
  107. {langroid-0.47.1 → langroid-0.48.0}/langroid/pydantic_v1/__init__.py +0 -0
  108. {langroid-0.47.1 → langroid-0.48.0}/langroid/pydantic_v1/main.py +0 -0
  109. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/__init__.py +0 -0
  110. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/algorithms/__init__.py +0 -0
  111. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/algorithms/graph.py +0 -0
  112. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/configuration.py +0 -0
  113. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/constants.py +0 -0
  114. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/git_utils.py +0 -0
  115. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/globals.py +0 -0
  116. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/logging.py +0 -0
  117. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/object_registry.py +0 -0
  118. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/output/__init__.py +0 -0
  119. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/output/citations.py +0 -0
  120. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/output/printing.py +0 -0
  121. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/output/status.py +0 -0
  122. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/pandas_utils.py +0 -0
  123. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/pydantic_utils.py +0 -0
  124. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/system.py +0 -0
  125. {langroid-0.47.1 → langroid-0.48.0}/langroid/utils/types.py +0 -0
  126. {langroid-0.47.1 → langroid-0.48.0}/langroid/vector_store/__init__.py +0 -0
  127. {langroid-0.47.1 → langroid-0.48.0}/langroid/vector_store/base.py +0 -0
  128. {langroid-0.47.1 → langroid-0.48.0}/langroid/vector_store/chromadb.py +0 -0
  129. {langroid-0.47.1 → langroid-0.48.0}/langroid/vector_store/lancedb.py +0 -0
  130. {langroid-0.47.1 → langroid-0.48.0}/langroid/vector_store/meilisearch.py +0 -0
  131. {langroid-0.47.1 → langroid-0.48.0}/langroid/vector_store/pineconedb.py +0 -0
  132. {langroid-0.47.1 → langroid-0.48.0}/langroid/vector_store/postgres.py +0 -0
  133. {langroid-0.47.1 → langroid-0.48.0}/langroid/vector_store/qdrantdb.py +0 -0
  134. {langroid-0.47.1 → langroid-0.48.0}/langroid/vector_store/weaviatedb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.47.1
3
+ Version: 0.48.0
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -108,7 +108,7 @@ Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == 'doc-chat'
108
108
  Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'doc-chat'
109
109
  Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'doc-chat'
110
110
  Provides-Extra: doc-parsers
111
- Requires-Dist: markitdown>=0.0.1a3; extra == 'doc-parsers'
111
+ Requires-Dist: markitdown[docx,pptx,xlsx]>=0.0.1a3; extra == 'doc-parsers'
112
112
  Requires-Dist: openpyxl>=3.1.5; extra == 'doc-parsers'
113
113
  Requires-Dist: python-docx>=1.1.2; extra == 'doc-parsers'
114
114
  Requires-Dist: python-pptx>=1.0.2; extra == 'doc-parsers'
@@ -144,6 +144,8 @@ Requires-Dist: litellm<2.0.0,>=1.30.1; extra == 'litellm'
144
144
  Provides-Extra: marker-pdf
145
145
  Requires-Dist: marker-pdf[full]>=1.6.0; (sys_platform != 'darwin' or platform_machine != 'x86_64') and extra == 'marker-pdf'
146
146
  Requires-Dist: opencv-python>=4.11.0.86; extra == 'marker-pdf'
147
+ Provides-Extra: markitdown
148
+ Requires-Dist: markitdown[docx,pptx,xlsx]>=0.0.1a3; extra == 'markitdown'
147
149
  Provides-Extra: meilisearch
148
150
  Requires-Dist: meilisearch-python-sdk<3.0.0,>=2.2.3; extra == 'meilisearch'
149
151
  Provides-Extra: metaphor
@@ -157,7 +159,7 @@ Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == 'neo4j'
157
159
  Provides-Extra: pdf-parsers
158
160
  Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'pdf-parsers'
159
161
  Requires-Dist: marker-pdf; extra == 'pdf-parsers'
160
- Requires-Dist: markitdown>=0.0.1a3; extra == 'pdf-parsers'
162
+ Requires-Dist: markitdown[docx,pptx,xlsx]>=0.0.1a3; extra == 'pdf-parsers'
161
163
  Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'pdf-parsers'
162
164
  Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'pdf-parsers'
163
165
  Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'pdf-parsers'
@@ -54,4 +54,5 @@ class RetrieverAgent(DocChatAgent):
54
54
  records = self.get_records()
55
55
  if self.vecdb is None:
56
56
  logger.warning("Vector store not configured. Cannot ingest records.")
57
- self.vecdb.add_documents(records)
57
+ else:
58
+ self.vecdb.add_documents(records)
@@ -48,7 +48,6 @@ from langroid.language_models.base import (
48
48
  from langroid.language_models.config import HFPromptFormatterConfig
49
49
  from langroid.language_models.model_info import (
50
50
  DeepSeekModel,
51
- GeminiModel,
52
51
  OpenAI_API_ParamInfo,
53
52
  )
54
53
  from langroid.language_models.model_info import (
@@ -672,10 +671,8 @@ class OpenAIGPT(LanguageModel):
672
671
  return self.config.completion_model in openai_completion_models
673
672
 
674
673
  def is_gemini_model(self) -> bool:
675
- gemini_models = [e.value for e in GeminiModel]
676
- return self.chat_model_orig in gemini_models or self.chat_model_orig.startswith(
677
- "gemini/"
678
- )
674
+ """Are we using the gemini OpenAI-compatible API?"""
675
+ return self.chat_model_orig.startswith("gemini/")
679
676
 
680
677
  def is_deepseek_model(self) -> bool:
681
678
  deepseek_models = [e.value for e in DeepSeekModel]
@@ -161,6 +161,8 @@ class DocumentParser(Parser):
161
161
  return UnstructuredDocxParser(source, config)
162
162
  elif config.docx.library == "python-docx":
163
163
  return PythonDocxParser(source, config)
164
+ elif config.docx.library == "markitdown-docx":
165
+ return MarkitdownDocxParser(source, config)
164
166
  else:
165
167
  raise ValueError(
166
168
  f"Unsupported DOCX library specified: {config.docx.library}"
@@ -887,6 +889,44 @@ class PythonDocxParser(DocumentParser):
887
889
  )
888
890
 
889
891
 
892
+ class MarkitdownDocxParser(DocumentParser):
893
+ def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:
894
+ try:
895
+ from markitdown import MarkItDown
896
+ except ImportError:
897
+ LangroidImportError("markitdown", ["markitdown", "doc-parsers"])
898
+ md = MarkItDown()
899
+ self.doc_bytes.seek(0) # Reset to start
900
+
901
+ # Direct conversion from stream works for DOCX (unlike XLSX)
902
+ result = md.convert_stream(self.doc_bytes, file_extension=".docx")
903
+
904
+ # Split content into logical sections (paragraphs, sections, etc.)
905
+ # This approach differs from the strict page-based approach used for PDFs
906
+ sections = re.split(r"(?=# |\n## |\n### )", result.text_content)
907
+
908
+ # Filter out empty sections
909
+ sections = [section for section in sections if section.strip()]
910
+
911
+ for i, section in enumerate(sections):
912
+ yield i, section
913
+
914
+ def get_document_from_page(self, md_content: str) -> Document:
915
+ """
916
+ Get Document object from a given markdown section.
917
+
918
+ Args:
919
+ md_content (str): The markdown content for the section.
920
+
921
+ Returns:
922
+ Document: Document object, with content and possible metadata.
923
+ """
924
+ return Document(
925
+ content=self.fix_text(md_content),
926
+ metadata=DocMetaData(source=self.source),
927
+ )
928
+
929
+
890
930
  class MarkitdownXLSXParser(DocumentParser):
891
931
  def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:
892
932
  try:
@@ -78,7 +78,7 @@ class PdfParsingConfig(BaseParsingConfig):
78
78
 
79
79
 
80
80
  class DocxParsingConfig(BaseSettings):
81
- library: Literal["python-docx", "unstructured"] = "unstructured"
81
+ library: Literal["python-docx", "unstructured", "markitdown-docx"] = "unstructured"
82
82
 
83
83
 
84
84
  class DocParsingConfig(BaseSettings):
@@ -48,6 +48,15 @@ class FirecrawlConfig(BaseCrawlerConfig):
48
48
  env_prefix = "FIRECRAWL_"
49
49
 
50
50
 
51
+ class ExaCrawlerConfig(BaseCrawlerConfig):
52
+ api_key: str = ""
53
+
54
+ class Config:
55
+ # Allow setting of fields via env vars with prefix EXA_
56
+ # e.g., EXA_API_KEY=your_api_key
57
+ env_prefix = "EXA_"
58
+
59
+
51
60
  class BaseCrawler(ABC):
52
61
  """Abstract base class for web crawlers."""
53
62
 
@@ -150,6 +159,8 @@ class CrawlerFactory:
150
159
  return TrafilaturaCrawler(config)
151
160
  elif isinstance(config, FirecrawlConfig):
152
161
  return FirecrawlCrawler(config)
162
+ elif isinstance(config, ExaCrawlerConfig):
163
+ return ExaCrawler(config)
153
164
  else:
154
165
  raise ValueError(f"Unsupported crawler configuration type: {type(config)}")
155
166
 
@@ -311,6 +322,60 @@ class FirecrawlCrawler(BaseCrawler):
311
322
  return docs
312
323
 
313
324
 
325
+ class ExaCrawler(BaseCrawler):
326
+ """Crawler implementation using Exa API."""
327
+
328
+ def __init__(self, config: ExaCrawlerConfig) -> None:
329
+ """Initialize the Exa crawler.
330
+
331
+ Args:
332
+ config: Configuration for the crawler
333
+ """
334
+ super().__init__(config)
335
+ self.config: ExaCrawlerConfig = config
336
+
337
+ @property
338
+ def needs_parser(self) -> bool:
339
+ return False
340
+
341
+ def crawl(self, urls: List[str]) -> List[Document]:
342
+ """Crawl the given URLs using Exa SDK.
343
+
344
+ Args:
345
+ urls: List of URLs to crawl
346
+
347
+ Returns:
348
+ List of Documents with content extracted from the URLs
349
+
350
+ Raises:
351
+ LangroidImportError: If the exa package is not installed
352
+ ValueError: If the Exa API key is not set
353
+ """
354
+ try:
355
+ from exa_py import Exa
356
+ except ImportError:
357
+ raise LangroidImportError("exa", "exa")
358
+
359
+ if not self.config.api_key:
360
+ raise ValueError("EXA_API_KEY key is required in your env or .env")
361
+
362
+ exa = Exa(self.config.api_key)
363
+ docs = []
364
+
365
+ try:
366
+ results = exa.get_contents(urls, text=True)
367
+
368
+ for result in results.results:
369
+ if result.text:
370
+ metadata = DocMetaData(source=result.url)
371
+ docs.append(Document(content=result.text, metadata=metadata))
372
+
373
+ except Exception as e:
374
+ logging.error(f"Error retrieving content from Exa API: {e}")
375
+
376
+ return docs
377
+
378
+
314
379
  class URLLoader:
315
380
  """Loads URLs and extracts text using a specified crawler."""
316
381
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "langroid"
3
- version = "0.47.1"
3
+ version = "0.48.0"
4
4
  authors = [
5
5
  {name = "Prasad Chalasani", email = "pchalasani@gmail.com"},
6
6
  ]
@@ -147,7 +147,7 @@ pdf-parsers = [
147
147
  "pymupdf4llm<0.1.0,>=0.0.17",
148
148
  "pdf2image<2.0.0,>=1.17.0",
149
149
  "pytesseract<0.4.0,>=0.3.10",
150
- "markitdown>=0.0.1a3",
150
+ "markitdown[docx,xlsx,pptx]>=0.0.1a3",
151
151
  "marker-pdf",
152
152
  ]
153
153
 
@@ -155,6 +155,10 @@ docx = [
155
155
  "python-docx<2.0.0,>=1.1.0",
156
156
  ]
157
157
 
158
+ markitdown = [
159
+ "markitdown[docx,xlsx,pptx]>=0.0.1a3",
160
+ ]
161
+
158
162
  marker-pdf = [
159
163
  "marker-pdf[full]>=1.6.0; sys_platform != 'darwin' or platform_machine != 'x86_64'",
160
164
  "opencv-python>=4.11.0.86",
@@ -252,7 +256,7 @@ google-generativeai = [
252
256
  "google-genai>=1.0.0",
253
257
  ]
254
258
  doc-parsers = [
255
- "markitdown>=0.0.1a3",
259
+ "markitdown[docx,xlsx,pptx]>=0.0.1a3",
256
260
  "openpyxl>=3.1.5",
257
261
  "python-docx>=1.1.2",
258
262
  "python-pptx>=1.0.2",
File without changes
File without changes
File without changes
File without changes
File without changes