langroid 0.35.1__tar.gz → 0.36.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {langroid-0.35.1 → langroid-0.36.1}/PKG-INFO +18 -6
  2. {langroid-0.35.1 → langroid-0.36.1}/README.md +13 -5
  3. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/doc_chat_agent.py +11 -7
  4. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/parser.py +5 -3
  5. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/utils.py +2 -2
  6. langroid-0.36.1/langroid/utils/output/citations.py +61 -0
  7. {langroid-0.35.1 → langroid-0.36.1}/langroid/vector_store/__init__.py +11 -0
  8. {langroid-0.35.1 → langroid-0.36.1}/langroid/vector_store/base.py +4 -1
  9. langroid-0.36.1/langroid/vector_store/weaviatedb.py +271 -0
  10. {langroid-0.35.1 → langroid-0.36.1}/pyproject.toml +7 -2
  11. langroid-0.35.1/langroid/utils/output/citations.py +0 -41
  12. {langroid-0.35.1 → langroid-0.36.1}/.gitignore +0 -0
  13. {langroid-0.35.1 → langroid-0.36.1}/LICENSE +0 -0
  14. {langroid-0.35.1 → langroid-0.36.1}/langroid/__init__.py +0 -0
  15. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/__init__.py +0 -0
  16. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/base.py +0 -0
  17. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/batch.py +0 -0
  18. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/callbacks/__init__.py +0 -0
  19. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/callbacks/chainlit.py +0 -0
  20. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/chat_agent.py +0 -0
  21. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/chat_document.py +0 -0
  22. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/openai_assistant.py +0 -0
  23. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/__init__.py +0 -0
  24. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/arangodb/__init__.py +0 -0
  25. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
  26. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/arangodb/system_messages.py +0 -0
  27. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/arangodb/tools.py +0 -0
  28. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/arangodb/utils.py +0 -0
  29. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  30. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/lance_rag/__init__.py +0 -0
  31. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
  32. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  33. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
  34. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/lance_tools.py +0 -0
  35. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/neo4j/__init__.py +0 -0
  36. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
  37. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
  38. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/neo4j/system_messages.py +0 -0
  39. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/neo4j/tools.py +0 -0
  40. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  41. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/retriever_agent.py +0 -0
  42. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/sql/__init__.py +0 -0
  43. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  44. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/sql/utils/__init__.py +0 -0
  45. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  46. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  47. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/sql/utils/system_message.py +0 -0
  48. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/sql/utils/tools.py +0 -0
  49. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/special/table_chat_agent.py +0 -0
  50. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/task.py +0 -0
  51. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tool_message.py +0 -0
  52. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tools/__init__.py +0 -0
  53. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
  54. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tools/file_tools.py +0 -0
  55. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tools/google_search_tool.py +0 -0
  56. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tools/metaphor_search_tool.py +0 -0
  57. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tools/orchestration.py +0 -0
  58. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tools/recipient_tool.py +0 -0
  59. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tools/retrieval_tool.py +0 -0
  60. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tools/rewind_tool.py +0 -0
  61. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/tools/segment_extract_tool.py +0 -0
  62. {langroid-0.35.1 → langroid-0.36.1}/langroid/agent/xml_tool_message.py +0 -0
  63. {langroid-0.35.1 → langroid-0.36.1}/langroid/cachedb/__init__.py +0 -0
  64. {langroid-0.35.1 → langroid-0.36.1}/langroid/cachedb/base.py +0 -0
  65. {langroid-0.35.1 → langroid-0.36.1}/langroid/cachedb/momento_cachedb.py +0 -0
  66. {langroid-0.35.1 → langroid-0.36.1}/langroid/cachedb/redis_cachedb.py +0 -0
  67. {langroid-0.35.1 → langroid-0.36.1}/langroid/embedding_models/__init__.py +0 -0
  68. {langroid-0.35.1 → langroid-0.36.1}/langroid/embedding_models/base.py +0 -0
  69. {langroid-0.35.1 → langroid-0.36.1}/langroid/embedding_models/models.py +0 -0
  70. {langroid-0.35.1 → langroid-0.36.1}/langroid/embedding_models/protoc/__init__.py +0 -0
  71. {langroid-0.35.1 → langroid-0.36.1}/langroid/embedding_models/protoc/embeddings.proto +0 -0
  72. {langroid-0.35.1 → langroid-0.36.1}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
  73. {langroid-0.35.1 → langroid-0.36.1}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
  74. {langroid-0.35.1 → langroid-0.36.1}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
  75. {langroid-0.35.1 → langroid-0.36.1}/langroid/embedding_models/remote_embeds.py +0 -0
  76. {langroid-0.35.1 → langroid-0.36.1}/langroid/exceptions.py +0 -0
  77. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/__init__.py +0 -0
  78. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/azure_openai.py +0 -0
  79. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/base.py +0 -0
  80. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/config.py +0 -0
  81. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/mock_lm.py +0 -0
  82. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/openai_gpt.py +0 -0
  83. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  84. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/prompt_formatter/base.py +0 -0
  85. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
  86. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  87. {langroid-0.35.1 → langroid-0.36.1}/langroid/language_models/utils.py +0 -0
  88. {langroid-0.35.1 → langroid-0.36.1}/langroid/mytypes.py +0 -0
  89. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/__init__.py +0 -0
  90. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/agent_chats.py +0 -0
  91. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/code_parser.py +0 -0
  92. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/document_parser.py +0 -0
  93. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/para_sentence_split.py +0 -0
  94. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/parse_json.py +0 -0
  95. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/repo_loader.py +0 -0
  96. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/routing.py +0 -0
  97. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/search.py +0 -0
  98. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/spider.py +0 -0
  99. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/table_loader.py +0 -0
  100. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/url_loader.py +0 -0
  101. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/urls.py +0 -0
  102. {langroid-0.35.1 → langroid-0.36.1}/langroid/parsing/web_search.py +0 -0
  103. {langroid-0.35.1 → langroid-0.36.1}/langroid/prompts/__init__.py +0 -0
  104. {langroid-0.35.1 → langroid-0.36.1}/langroid/prompts/dialog.py +0 -0
  105. {langroid-0.35.1 → langroid-0.36.1}/langroid/prompts/prompts_config.py +0 -0
  106. {langroid-0.35.1 → langroid-0.36.1}/langroid/prompts/templates.py +0 -0
  107. {langroid-0.35.1 → langroid-0.36.1}/langroid/py.typed +0 -0
  108. {langroid-0.35.1 → langroid-0.36.1}/langroid/pydantic_v1/__init__.py +0 -0
  109. {langroid-0.35.1 → langroid-0.36.1}/langroid/pydantic_v1/main.py +0 -0
  110. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/__init__.py +0 -0
  111. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/algorithms/__init__.py +0 -0
  112. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/algorithms/graph.py +0 -0
  113. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/configuration.py +0 -0
  114. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/constants.py +0 -0
  115. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/git_utils.py +0 -0
  116. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/globals.py +0 -0
  117. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/logging.py +0 -0
  118. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/object_registry.py +0 -0
  119. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/output/__init__.py +0 -0
  120. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/output/printing.py +0 -0
  121. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/output/status.py +0 -0
  122. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/pandas_utils.py +0 -0
  123. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/pydantic_utils.py +0 -0
  124. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/system.py +0 -0
  125. {langroid-0.35.1 → langroid-0.36.1}/langroid/utils/types.py +0 -0
  126. {langroid-0.35.1 → langroid-0.36.1}/langroid/vector_store/chromadb.py +0 -0
  127. {langroid-0.35.1 → langroid-0.36.1}/langroid/vector_store/lancedb.py +0 -0
  128. {langroid-0.35.1 → langroid-0.36.1}/langroid/vector_store/meilisearch.py +0 -0
  129. {langroid-0.35.1 → langroid-0.36.1}/langroid/vector_store/momento.py +0 -0
  130. {langroid-0.35.1 → langroid-0.36.1}/langroid/vector_store/qdrantdb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.35.1
3
+ Version: 0.36.1
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -75,6 +75,7 @@ Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'all'
75
75
  Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'all'
76
76
  Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'all'
77
77
  Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == 'all'
78
+ Requires-Dist: weaviate-client>=4.9.6; extra == 'all'
78
79
  Provides-Extra: arango
79
80
  Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == 'arango'
80
81
  Requires-Dist: python-arango<9.0.0,>=8.1.2; extra == 'arango'
@@ -148,6 +149,9 @@ Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'vecdbs'
148
149
  Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == 'vecdbs'
149
150
  Requires-Dist: pyarrow<16.0.0,>=15.0.0; extra == 'vecdbs'
150
151
  Requires-Dist: tantivy<0.22.0,>=0.21.0; extra == 'vecdbs'
152
+ Requires-Dist: weaviate-client>=4.9.6; extra == 'vecdbs'
153
+ Provides-Extra: weaviate
154
+ Requires-Dist: weaviate-client>=4.9.6; extra == 'weaviate'
151
155
  Description-Content-Type: text/markdown
152
156
 
153
157
  <div align="center">
@@ -288,20 +292,28 @@ teacher_task.run()
288
292
  <summary> <b>Click to expand</b></summary>
289
293
 
290
294
  - **Jan 2025:**
291
- - [0.33.0](https://github.com/langroid/langroid/releases/tag/0.33.3) Move from Poetry to uv!
295
+ - [0.36.0](https://github.com/langroid/langroid/releases/tag/0.36.0): Weaviate vector-db support (thanks @abab-dev).
296
+ - [0.35.0](https://github.com/langroid/langroid/releases/tag/0.35.0): Capture/Stream reasoning content from
297
+ Reasoning LLMs (e.g. DeepSeek, OpenAI o1) in addition to final answer.
298
+ - [0.34.0](https://github.com/langroid/langroid/releases/tag/0.34.0): DocChatAgent
299
+ chunk enrichment to improve retrieval. (collaboration with @dfm88).
300
+ - [0.33.0](https://github.com/langroid/langroid/releases/tag/0.33.3) Move from Poetry to uv! (thanks @abab-dev).
292
301
  - [0.32.0](https://github.com/langroid/langroid/releases/tag/0.32.0) DeepSeek v3 support.
293
302
  - **Dec 2024:**
294
303
  - [0.31.0](https://github.com/langroid/langroid/releases/tag/0.31.0) Azure OpenAI Embeddings
295
- - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings.
296
- - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client
304
+ - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings (thanks @Kwigg).
305
+ - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client (thanks
306
+ @johannestang).
297
307
  - [0.28.0](https://github.com/langroid/langroid/releases/tag/0.28.0) `ToolMessage`: `_handler` field to override
298
- default handler method name in `request` field.
308
+ default handler method name in `request` field (thanks @alexagr).
299
309
  - [0.27.0](https://github.com/langroid/langroid/releases/tag/0.27.0) OpenRouter Support.
300
310
  - [0.26.0](https://github.com/langroid/langroid/releases/tag/0.26.0) Update to latest Chainlit.
301
- - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and user-response.
311
+ - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and
312
+ user-response (thanks @alexagr).
302
313
  - **Nov 2024:**
303
314
  - **[0.24.0](https://langroid.github.io/langroid/notes/structured-output/)**:
304
315
  Enables support for `Agent`s with strict JSON schema output format on compatible LLMs and strict mode for the OpenAI tools API.
316
+ (thanks @nilspalumbo).
305
317
  - **[0.23.0](https://langroid.github.io/langroid/tutorials/local-llm-setup/#local-llms-hosted-on-glhfchat)**:
306
318
  support for LLMs (e.g. `Qwen2.5-Coder-32b-Instruct`) hosted on glhf.chat
307
319
  - **[0.22.0](https://langroid.github.io/langroid/notes/large-tool-results/)**:
@@ -136,20 +136,28 @@ teacher_task.run()
136
136
  <summary> <b>Click to expand</b></summary>
137
137
 
138
138
  - **Jan 2025:**
139
- - [0.33.0](https://github.com/langroid/langroid/releases/tag/0.33.3) Move from Poetry to uv!
139
+ - [0.36.0](https://github.com/langroid/langroid/releases/tag/0.36.0): Weaviate vector-db support (thanks @abab-dev).
140
+ - [0.35.0](https://github.com/langroid/langroid/releases/tag/0.35.0): Capture/Stream reasoning content from
141
+ Reasoning LLMs (e.g. DeepSeek, OpenAI o1) in addition to final answer.
142
+ - [0.34.0](https://github.com/langroid/langroid/releases/tag/0.34.0): DocChatAgent
143
+ chunk enrichment to improve retrieval. (collaboration with @dfm88).
144
+ - [0.33.0](https://github.com/langroid/langroid/releases/tag/0.33.3) Move from Poetry to uv! (thanks @abab-dev).
140
145
  - [0.32.0](https://github.com/langroid/langroid/releases/tag/0.32.0) DeepSeek v3 support.
141
146
  - **Dec 2024:**
142
147
  - [0.31.0](https://github.com/langroid/langroid/releases/tag/0.31.0) Azure OpenAI Embeddings
143
- - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings.
144
- - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client
148
+ - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings (thanks @Kwigg).
149
+ - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client (thanks
150
+ @johannestang).
145
151
  - [0.28.0](https://github.com/langroid/langroid/releases/tag/0.28.0) `ToolMessage`: `_handler` field to override
146
- default handler method name in `request` field.
152
+ default handler method name in `request` field (thanks @alexagr).
147
153
  - [0.27.0](https://github.com/langroid/langroid/releases/tag/0.27.0) OpenRouter Support.
148
154
  - [0.26.0](https://github.com/langroid/langroid/releases/tag/0.26.0) Update to latest Chainlit.
149
- - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and user-response.
155
+ - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and
156
+ user-response (thanks @alexagr).
150
157
  - **Nov 2024:**
151
158
  - **[0.24.0](https://langroid.github.io/langroid/notes/structured-output/)**:
152
159
  Enables support for `Agent`s with strict JSON schema output format on compatible LLMs and strict mode for the OpenAI tools API.
160
+ (thanks @nilspalumbo).
153
161
  - **[0.23.0](https://langroid.github.io/langroid/tutorials/local-llm-setup/#local-llms-hosted-on-glhfchat)**:
154
162
  support for LLMs (e.g. `Qwen2.5-Coder-32b-Instruct`) hosted on glhf.chat
155
163
  - **[0.22.0](https://langroid.github.io/langroid/notes/large-tool-results/)**:
@@ -15,6 +15,7 @@ pip install "langroid[hf-embeddings]"
15
15
  """
16
16
 
17
17
  import logging
18
+ import textwrap
18
19
  from collections import OrderedDict
19
20
  from functools import cache
20
21
  from typing import Any, Callable, Dict, List, Optional, Set, Tuple, no_type_check
@@ -81,7 +82,7 @@ You will be given various passages from these documents, and asked to answer que
81
82
  about them, or summarize them into coherent answers.
82
83
  """
83
84
 
84
- CHUNK_ENRICHMENT_DELIMITER = "<##-##-##>"
85
+ CHUNK_ENRICHMENT_DELIMITER = "\n<##-##-##>"
85
86
 
86
87
  has_sentence_transformers = False
87
88
  try:
@@ -810,9 +811,11 @@ class DocChatAgent(ChatAgent):
810
811
  return "\n".join(
811
812
  [
812
813
  f"""
813
- [{i+1}]
814
+ -----[EXTRACT #{i+1}]----------
814
815
  {content}
815
816
  {source}
817
+ -----END OF EXTRACT------------
818
+
816
819
  """
817
820
  for i, (content, source) in enumerate(zip(contents, sources))
818
821
  ]
@@ -949,12 +952,13 @@ class DocChatAgent(ChatAgent):
949
952
  continue
950
953
 
951
954
  # Combine original content with questions in a structured way
952
- combined_content = f"""
953
- {doc.content}
954
-
955
+ combined_content = textwrap.dedent(
956
+ f"""\
957
+ {doc.content}
955
958
  {enrichment_config.delimiter}
956
959
  {enrichment}
957
- """.strip()
960
+ """
961
+ )
958
962
 
959
963
  new_doc = doc.copy(
960
964
  update={
@@ -1440,7 +1444,7 @@ class DocChatAgent(ChatAgent):
1440
1444
  delimiter = self.config.chunk_enrichment_config.delimiter
1441
1445
  return [
1442
1446
  (
1443
- doc.copy(update={"content": doc.content.split(delimiter)[0].strip()})
1447
+ doc.copy(update={"content": doc.content.split(delimiter)[0]})
1444
1448
  if doc.content and getattr(doc.metadata, "has_enrichment", False)
1445
1449
  else doc
1446
1450
  )
@@ -267,9 +267,11 @@ class Parser:
267
267
  # Truncate the chunk text at the punctuation mark
268
268
  chunk_text = chunk_text[: last_punctuation + 1]
269
269
 
270
- # Remove any newline characters and strip any leading or
271
- # trailing whitespace
272
- chunk_text_to_append = re.sub(r"\n{2,}", "\n", chunk_text).strip()
270
+ # Replace redundant (3 or more) newlines with 2 newlines to preser
271
+ # paragraph separation!
272
+ # But do NOT strip leading/trailing whitespace, to preserve formatting
273
+ # (e.g. code blocks, or in case we want to stitch chunks back together)
274
+ chunk_text_to_append = re.sub(r"\n{3,}", "\n\n", chunk_text)
273
275
 
274
276
  if len(chunk_text_to_append) > self.config.discard_chunk_chars:
275
277
  # Append the chunk text to the list of chunks
@@ -310,9 +310,9 @@ def extract_numbered_segments(s: str, specs: str) -> str:
310
310
  ]
311
311
 
312
312
  # If we extracted any segments from this paragraph,
313
- # join them and append to results
313
+ # join them with ellipsis (...) and append to results.
314
314
  if extracted_segments:
315
- extracted_paragraphs.append(" ".join(extracted_segments))
315
+ extracted_paragraphs.append("...".join(extracted_segments))
316
316
 
317
317
  return "\n\n".join(extracted_paragraphs)
318
318
 
@@ -0,0 +1,61 @@
1
+ def extract_markdown_references(md_string: str) -> list[int]:
2
+ """
3
+ Extracts markdown references (e.g., [^1], [^2]) from a string and returns
4
+ them as a sorted list of integers.
5
+
6
+ Args:
7
+ md_string (str): The markdown string containing references.
8
+
9
+ Returns:
10
+ list[int]: A sorted list of unique integers from the markdown references.
11
+ """
12
+ import re
13
+
14
+ # Regex to find all occurrences of [^<number>]
15
+ matches = re.findall(r"\[\^(\d+)\]", md_string)
16
+ # Convert matches to integers, remove duplicates with set, and sort
17
+ return sorted(set(int(match) for match in matches))
18
+
19
+
20
+ def format_footnote_text(content: str, width: int = 0) -> str:
21
+ """
22
+ Formats the content so that each original line is individually processed.
23
+ - If width=0, no wrapping is done (lines remain as is).
24
+ - If width>0, lines are wrapped to that width.
25
+ - Blank lines remain blank (with indentation).
26
+ - Everything is indented by 4 spaces (for markdown footnotes).
27
+
28
+ Args:
29
+ content (str): The text of the footnote to be formatted.
30
+ width (int): Maximum width of the text lines. If 0, lines are not wrapped.
31
+
32
+ Returns:
33
+ str: Properly formatted markdown footnote text.
34
+ """
35
+ import textwrap
36
+
37
+ indent = " " # 4 spaces for markdown footnotes
38
+ lines = content.split("\n") # keep original line structure
39
+
40
+ output_lines = []
41
+ for line in lines:
42
+ # If the line is empty (or just spaces), keep it blank (but indented)
43
+ if not line.strip():
44
+ output_lines.append(indent)
45
+ continue
46
+
47
+ if width > 0:
48
+ # Wrap each non-empty line to the specified width
49
+ wrapped = textwrap.wrap(line, width=width)
50
+ if not wrapped:
51
+ # If textwrap gives nothing, add a blank (indented) line
52
+ output_lines.append(indent)
53
+ else:
54
+ for subline in wrapped:
55
+ output_lines.append(indent + subline)
56
+ else:
57
+ # No wrapping: just indent the original line
58
+ output_lines.append(indent + line)
59
+
60
+ # Join them with newline so we preserve the paragraph/blank line structure
61
+ return "\n".join(output_lines)
@@ -48,3 +48,14 @@ try:
48
48
  __all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])
49
49
  except ImportError:
50
50
  pass
51
+
52
+ try:
53
+ from . import weaviatedb
54
+ from .weaviatedb import WeaviateDBConfig, WeaviateDB
55
+
56
+ weaviatedb
57
+ WeaviateDB
58
+ WeaviateDBConfig
59
+ __all__.extend(["weaviatedb", "WeaviateDB", "WeaviateDBConfig"])
60
+ except ImportError:
61
+ pass
@@ -59,6 +59,7 @@ class VectorStore(ABC):
59
59
  from langroid.vector_store.meilisearch import MeiliSearch, MeiliSearchConfig
60
60
  from langroid.vector_store.momento import MomentoVI, MomentoVIConfig
61
61
  from langroid.vector_store.qdrantdb import QdrantDB, QdrantDBConfig
62
+ from langroid.vector_store.weaviatedb import WeaviateDB, WeaviateDBConfig
62
63
 
63
64
  if isinstance(config, QdrantDBConfig):
64
65
  return QdrantDB(config)
@@ -70,6 +71,8 @@ class VectorStore(ABC):
70
71
  return LanceDB(config)
71
72
  elif isinstance(config, MeiliSearchConfig):
72
73
  return MeiliSearch(config)
74
+ elif isinstance(config, WeaviateDBConfig):
75
+ return WeaviateDB(config)
73
76
 
74
77
  else:
75
78
  logger.warning(
@@ -261,7 +264,7 @@ class VectorStore(ABC):
261
264
  metadata = copy.deepcopy(id2metadata[w[0]])
262
265
  metadata.window_ids = w
263
266
  document = Document(
264
- content=" ".join([d.content for d in self.get_documents_by_ids(w)]),
267
+ content="".join([d.content for d in self.get_documents_by_ids(w)]),
265
268
  metadata=metadata,
266
269
  )
267
270
  # make a fresh id since content is in general different
@@ -0,0 +1,271 @@
1
+ import logging
2
+ import os
3
+ import re
4
+ from typing import Any, List, Optional, Sequence, Tuple
5
+
6
+ from dotenv import load_dotenv
7
+
8
+ from langroid.embedding_models.base import (
9
+ EmbeddingModelsConfig,
10
+ )
11
+ from langroid.embedding_models.models import OpenAIEmbeddingsConfig
12
+ from langroid.exceptions import LangroidImportError
13
+ from langroid.mytypes import DocMetaData, Document, EmbeddingFunction
14
+ from langroid.utils.configuration import settings
15
+ from langroid.vector_store.base import VectorStore, VectorStoreConfig
16
+
17
+ logger = logging.getLogger(__name__)
18
+ try:
19
+ import weaviate
20
+ from weaviate.classes.config import (
21
+ Configure,
22
+ VectorDistances,
23
+ )
24
+ from weaviate.classes.init import Auth
25
+ from weaviate.classes.query import Filter, MetadataQuery
26
+ from weaviate.util import generate_uuid5, get_valid_uuid
27
+ except ImportError:
28
+ raise LangroidImportError("weaviate", "weaviate")
29
+
30
+
31
+ class WeaviateDBConfig(VectorStoreConfig):
32
+ collection_name: str | None = "temp"
33
+ embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
34
+ distance: str = VectorDistances.COSINE
35
+
36
+
37
+ class WeaviateDB(VectorStore):
38
+ def __init__(self, config: WeaviateDBConfig = WeaviateDBConfig()):
39
+ super().__init__(config)
40
+ self.config: WeaviateDBConfig = config
41
+ self.embedding_fn: EmbeddingFunction = self.embedding_model.embedding_fn()
42
+ self.embedding_dim = self.embedding_model.embedding_dims
43
+ load_dotenv()
44
+ key = os.getenv("WEAVIATE_API_KEY")
45
+ url = os.getenv("WEAVIATE_API_URL")
46
+ if None in [key, url]:
47
+ logger.warning(
48
+ """WEAVIATE_API_KEY, WEAVIATE_API_URL env variable must be set to use
49
+ WeaviateDB in cloud mode. Please set these values
50
+ in your .env file.
51
+ """
52
+ )
53
+ self.client = weaviate.connect_to_weaviate_cloud(
54
+ cluster_url=url,
55
+ auth_credentials=Auth.api_key(key),
56
+ )
57
+ if config.collection_name is not None:
58
+ WeaviateDB.validate_and_format_collection_name(config.collection_name)
59
+
60
+ def clear_empty_collections(self) -> int:
61
+ colls = self.client.collections.list_all()
62
+ n_deletes = 0
63
+ for coll_name, _ in colls.items():
64
+ val = self.client.collections.get(coll_name)
65
+ if len(val) == 0:
66
+ n_deletes += 1
67
+ self.client.collections.delete(coll_name)
68
+ return n_deletes
69
+
70
+ def list_collections(self, empty: bool = False) -> List[str]:
71
+ colls = self.client.collections.list_all()
72
+ if empty:
73
+ return list(colls.keys())
74
+ non_empty_colls = [
75
+ coll_name
76
+ for coll_name in colls.keys()
77
+ if len(self.client.collections.get(coll_name)) > 0
78
+ ]
79
+
80
+ return non_empty_colls
81
+
82
+ def clear_all_collections(self, really: bool = False, prefix: str = "") -> int:
83
+ if not really:
84
+ logger.warning(
85
+ "Not really deleting all collections ,set really=True to confirm"
86
+ )
87
+ return 0
88
+ coll_names = [
89
+ c for c in self.list_collections(empty=True) if c.startswith(prefix)
90
+ ]
91
+ if len(coll_names) == 0:
92
+ logger.warning(f"No collections found with prefix {prefix}")
93
+ return 0
94
+ n_empty_deletes = 0
95
+ n_non_empty_deletes = 0
96
+ for name in coll_names:
97
+ info = self.client.collections.get(name)
98
+ points_count = len(info)
99
+
100
+ n_empty_deletes += points_count == 0
101
+ n_non_empty_deletes += points_count > 0
102
+ self.client.collections.delete(name)
103
+ logger.warning(
104
+ f"""
105
+ Deleted {n_empty_deletes} empty collections and
106
+ {n_non_empty_deletes} non-empty collections.
107
+ """
108
+ )
109
+ return n_empty_deletes + n_non_empty_deletes
110
+
111
+ def delete_collection(self, collection_name: str) -> None:
112
+ self.client.collections.delete(name=collection_name)
113
+
114
+ def create_collection(self, collection_name: str, replace: bool = False) -> None:
115
+ collection_name = WeaviateDB.validate_and_format_collection_name(
116
+ collection_name
117
+ )
118
+ self.config.collection_name = collection_name
119
+ if self.client.collections.exists(name=collection_name):
120
+ coll = self.client.collections.get(name=collection_name)
121
+ if len(coll) > 0:
122
+ logger.warning(f"Non-empty Collection {collection_name} already exists")
123
+ if not replace:
124
+ logger.warning("Not replacing collection")
125
+ return
126
+ else:
127
+ logger.warning("Recreating fresh collection")
128
+ self.client.collections.delete(name=collection_name)
129
+
130
+ vector_index_config = Configure.VectorIndex.hnsw(
131
+ distance_metric=VectorDistances.COSINE,
132
+ )
133
+ if self.config.embedding == OpenAIEmbeddingsConfig:
134
+ vectorizer_config = Configure.Vectorizer.text2vec_openai(
135
+ model=self.embedding_model
136
+ )
137
+ else:
138
+ vectorizer_config = None
139
+
140
+ collection_info = self.client.collections.create(
141
+ name=collection_name,
142
+ vector_index_config=vector_index_config,
143
+ vectorizer_config=vectorizer_config,
144
+ )
145
+ collection_info = self.client.collections.get(name=collection_name)
146
+ assert len(collection_info) in [0, None]
147
+ if settings.debug:
148
+ level = logger.getEffectiveLevel()
149
+ logger.setLevel(logging.INFO)
150
+ logger.info(collection_info)
151
+ logger.setLevel(level)
152
+
153
+ def add_documents(self, documents: Sequence[Document]) -> None:
154
+ super().maybe_add_ids(documents)
155
+ colls = self.list_collections(empty=True)
156
+ for doc in documents:
157
+ doc.metadata.id = str(self._create_valid_uuid_id(doc.metadata.id))
158
+ if len(documents) == 0:
159
+ return
160
+
161
+ document_dicts = [doc.dict() for doc in documents]
162
+ embedding_vecs = self.embedding_fn([doc.content for doc in documents])
163
+ if self.config.collection_name is None:
164
+ raise ValueError("No collection name set, cannot ingest docs")
165
+ if self.config.collection_name not in colls:
166
+ self.create_collection(self.config.collection_name, replace=True)
167
+ coll_name = self.client.collections.get(self.config.collection_name)
168
+ with coll_name.batch.dynamic() as batch:
169
+ for i, doc_dict in enumerate(document_dicts):
170
+ id = doc_dict["metadata"].pop("id", None)
171
+ batch.add_object(properties=doc_dict, uuid=id, vector=embedding_vecs[i])
172
+
173
+ def get_all_documents(self, where: str = "") -> List[Document]:
174
+ if self.config.collection_name is None:
175
+ raise ValueError("No collection name set, cannot retrieve docs")
176
+ # cannot use filter as client does not support json type queries
177
+ coll = self.client.collections.get(self.config.collection_name)
178
+ return [self.weaviate_obj_to_doc(item) for item in coll.iterator()]
179
+
180
+ def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
181
+ if self.config.collection_name is None:
182
+ raise ValueError("No collection name set, cannot retrieve docs")
183
+
184
+ docs = []
185
+ coll_name = self.client.collections.get(self.config.collection_name)
186
+
187
+ result = coll_name.query.fetch_objects(
188
+ filters=Filter.by_property("_id").contains_any(ids), limit=len(coll_name)
189
+ )
190
+
191
+ id_to_doc = {}
192
+ for item in result.objects:
193
+ doc = self.weaviate_obj_to_doc(item)
194
+ id_to_doc[doc.metadata.id] = doc
195
+
196
+ # Reconstruct the list of documents in the original order of input ids
197
+ docs = [id_to_doc[id] for id in ids if id in id_to_doc]
198
+
199
+ return docs
200
+
201
+ def similar_texts_with_scores(
202
+ self, text: str, k: int = 1, where: Optional[str] = None
203
+ ) -> List[Tuple[Document, float]]:
204
+ embedding = self.embedding_fn([text])[0]
205
+ if self.config.collection_name is None:
206
+ raise ValueError("No collections name set,cannot search")
207
+ coll = self.client.collections.get(self.config.collection_name)
208
+ response = coll.query.near_vector(
209
+ near_vector=embedding,
210
+ limit=k,
211
+ return_properties=True,
212
+ return_metadata=MetadataQuery(distance=True),
213
+ )
214
+ return [
215
+ (self.weaviate_obj_to_doc(item), 1 - item.metadata.distance)
216
+ for item in response.objects
217
+ ]
218
+
219
+ def _create_valid_uuid_id(self, id: str) -> Any:
220
+ try:
221
+ id = get_valid_uuid(id)
222
+ return id
223
+ except Exception:
224
+ return generate_uuid5(id)
225
+
226
+ def weaviate_obj_to_doc(self, input_object: Any) -> Document:
227
+ content = input_object.properties.get("content", "")
228
+ metadata_dict = input_object.properties.get("metadata", {})
229
+
230
+ window_ids = metadata_dict.pop("window_ids", [])
231
+ window_ids = [str(uuid) for uuid in window_ids]
232
+
233
+ # Ensure the id is a valid UUID string
234
+ id_value = get_valid_uuid(input_object.uuid)
235
+
236
+ metadata = DocMetaData(id=id_value, window_ids=window_ids, **metadata_dict)
237
+
238
+ return Document(content=content, metadata=metadata)
239
+
240
+ @staticmethod
241
+ def validate_and_format_collection_name(name: str) -> str:
242
+ """
243
+ Formats the collection name to comply with Weaviate's naming rules:
244
+ - Name must start with a capital letter.
245
+ - Name can only contain letters, numbers, and underscores.
246
+ - Replaces invalid characters with underscores.
247
+ """
248
+ if not name:
249
+ raise ValueError("Collection name cannot be empty.")
250
+
251
+ formatted_name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
252
+
253
+ # Ensure the first letter is capitalized
254
+ if not formatted_name[0].isupper():
255
+ formatted_name = formatted_name.capitalize()
256
+
257
+ # Check if the name now meets the criteria
258
+ if not re.match(r"^[A-Z][A-Za-z0-9_]*$", formatted_name):
259
+ raise ValueError(
260
+ f"Invalid collection name '{name}'."
261
+ " Names must start with a capital letter "
262
+ "and contain only letters, numbers, and underscores."
263
+ )
264
+
265
+ if formatted_name != name:
266
+ logger.warning(
267
+ f"Collection name '{name}' was reformatted to '{formatted_name}' "
268
+ "to comply with Weaviate's rules."
269
+ )
270
+
271
+ return formatted_name
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "langroid"
3
- version = "0.35.1"
3
+ version = "0.36.1"
4
4
  authors = [
5
5
  {name = "Prasad Chalasani", email = "pchalasani@gmail.com"},
6
6
  ]
@@ -40,7 +40,7 @@ dependencies = [
40
40
  "pygments<3.0.0,>=2.15.1",
41
41
  "pyparsing<4.0.0,>=3.0.9",
42
42
  "pytest-rerunfailures<16.0,>=15.0",
43
- "python-dotenv<2.0.0,>=1.0.0",
43
+ "python-dotenv>=1.0.0,<2.0.0",
44
44
  "python-magic<1.0.0,>=0.4.27",
45
45
  "pyyaml<7.0.0,>=6.0.1",
46
46
  "qdrant-client<2.0.0,>=1.8.0",
@@ -79,6 +79,7 @@ vecdbs = [
79
79
  "tantivy<0.22.0,>=0.21.0",
80
80
  "pyarrow<16.0.0,>=15.0.0",
81
81
  "chromadb<=0.4.23,>=0.4.21",
82
+ "weaviate-client>=4.9.6",
82
83
  ]
83
84
 
84
85
  db = [
@@ -103,6 +104,7 @@ all = [
103
104
  "transformers<5.0.0,>=4.40.1",
104
105
  "huggingface-hub<0.22.0,>=0.21.2",
105
106
  "chromadb<=0.4.23,>=0.4.21",
107
+ "weaviate-client>=4.9.6",
106
108
  "metaphor-python<0.2.0,>=0.1.23",
107
109
  "neo4j<6.0.0,>=5.14.1",
108
110
  "python-arango<9.0.0,>=8.1.2",
@@ -190,6 +192,9 @@ chainlit = [
190
192
  chromadb = [
191
193
  "chromadb<=0.4.23,>=0.4.21",
192
194
  ]
195
+ weaviate = [
196
+ "weaviate-client>=4.9.6",
197
+ ]
193
198
 
194
199
  meilisearch = [
195
200
  "meilisearch-python-sdk<3.0.0,>=2.2.3",
@@ -1,41 +0,0 @@
1
- def extract_markdown_references(md_string: str) -> list[int]:
2
- """
3
- Extracts markdown references (e.g., [^1], [^2]) from a string and returns
4
- them as a sorted list of integers.
5
-
6
- Args:
7
- md_string (str): The markdown string containing references.
8
-
9
- Returns:
10
- list[int]: A sorted list of unique integers from the markdown references.
11
- """
12
- import re
13
-
14
- # Regex to find all occurrences of [^<number>]
15
- matches = re.findall(r"\[\^(\d+)\]", md_string)
16
- # Convert matches to integers, remove duplicates with set, and sort
17
- return sorted(set(int(match) for match in matches))
18
-
19
-
20
- def format_footnote_text(content: str, width: int = 80) -> str:
21
- """
22
- Formats the content part of a footnote (i.e. not the first line that
23
- appears right after the reference [^4])
24
- It wraps the text so that no line is longer than the specified width and indents
25
- lines as necessary for markdown footnotes.
26
-
27
- Args:
28
- content (str): The text of the footnote to be formatted.
29
- width (int): Maximum width of the text lines.
30
-
31
- Returns:
32
- str: Properly formatted markdown footnote text.
33
- """
34
- import textwrap
35
-
36
- # Wrap the text to the specified width
37
- wrapped_lines = textwrap.wrap(content, width)
38
- if len(wrapped_lines) == 0:
39
- return ""
40
- indent = " " # Indentation for markdown footnotes
41
- return indent + ("\n" + indent).join(wrapped_lines)
File without changes
File without changes
File without changes
File without changes