langroid 0.43.0__tar.gz → 0.44.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langroid-0.43.0 → langroid-0.44.0}/PKG-INFO +19 -4
- {langroid-0.43.0 → langroid-0.44.0}/README.md +18 -3
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/callbacks/chainlit.py +19 -9
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/chat_agent.py +16 -2
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/doc_chat_agent.py +8 -7
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/document_parser.py +17 -25
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/repo_loader.py +35 -15
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/search.py +3 -3
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/url_loader.py +7 -6
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/urls.py +2 -1
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/utils.py +16 -12
- {langroid-0.43.0 → langroid-0.44.0}/langroid/vector_store/postgres.py +5 -1
- {langroid-0.43.0 → langroid-0.44.0}/langroid/vector_store/qdrantdb.py +37 -18
- {langroid-0.43.0 → langroid-0.44.0}/langroid/vector_store/weaviatedb.py +30 -24
- {langroid-0.43.0 → langroid-0.44.0}/pyproject.toml +1 -1
- {langroid-0.43.0 → langroid-0.44.0}/.gitignore +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/LICENSE +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/base.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/batch.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/callbacks/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/chat_document.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/openai_assistant.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/arangodb/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/arangodb/system_messages.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/arangodb/tools.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/arangodb/utils.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/lance_rag/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/lance_tools.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/neo4j/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/neo4j/system_messages.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/neo4j/tools.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/relevance_extractor_agent.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/retriever_agent.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/sql/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/sql/utils/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/sql/utils/system_message.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/sql/utils/tools.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/special/table_chat_agent.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/task.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tool_message.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/exa_search_tool.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/file_tools.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/google_search_tool.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/metaphor_search_tool.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/orchestration.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/recipient_tool.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/retrieval_tool.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/rewind_tool.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/segment_extract_tool.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/tools/tavily_search_tool.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/agent/xml_tool_message.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/cachedb/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/cachedb/base.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/cachedb/momento_cachedb.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/cachedb/redis_cachedb.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/embedding_models/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/embedding_models/base.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/embedding_models/models.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/embedding_models/protoc/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/embedding_models/protoc/embeddings.proto +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/embedding_models/remote_embeds.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/exceptions.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/azure_openai.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/base.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/config.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/mock_lm.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/model_info.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/openai_gpt.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/prompt_formatter/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/prompt_formatter/base.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/language_models/utils.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/mytypes.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/agent_chats.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/code_parser.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/para_sentence_split.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/parse_json.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/parser.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/pdf_utils.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/routing.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/spider.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/table_loader.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/parsing/web_search.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/prompts/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/prompts/dialog.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/prompts/prompts_config.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/prompts/templates.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/py.typed +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/pydantic_v1/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/pydantic_v1/main.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/algorithms/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/algorithms/graph.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/configuration.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/constants.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/git_utils.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/globals.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/logging.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/object_registry.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/output/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/output/citations.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/output/printing.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/output/status.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/pandas_utils.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/pydantic_utils.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/system.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/utils/types.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/vector_store/__init__.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/vector_store/base.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/vector_store/chromadb.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/vector_store/lancedb.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/vector_store/meilisearch.py +0 -0
- {langroid-0.43.0 → langroid-0.44.0}/langroid/vector_store/pineconedb.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.44.0
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
Author-email: Prasad Chalasani <pchalasani@gmail.com>
|
6
6
|
License: MIT
|
@@ -237,9 +237,11 @@ This Multi-Agent paradigm is inspired by the
|
|
237
237
|
|
238
238
|
`Langroid` is a fresh take on LLM app-development, where considerable thought has gone
|
239
239
|
into simplifying the developer experience;
|
240
|
-
it does not use `Langchain`, or any other LLM framework
|
240
|
+
it does not use `Langchain`, or any other LLM framework,
|
241
|
+
and works with [practically any LLM](https://langroid.github.io/langroid/tutorials/supported-models/).
|
241
242
|
|
242
|
-
:fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/)
|
243
|
+
:fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/),
|
244
|
+
and a [quick tour of Langroid](https://langroid.github.io/langroid/tutorials/langroid-tour/).
|
243
245
|
|
244
246
|
📢 Companies are using/adapting Langroid in **production**. Here is a quote:
|
245
247
|
|
@@ -327,6 +329,18 @@ teacher_task.run()
|
|
327
329
|
<details>
|
328
330
|
<summary> <b>Click to expand</b></summary>
|
329
331
|
|
332
|
+
- **Feb 2025:**
|
333
|
+
- [0.43.0](https://github.com/langroid/langroid/releases/tag/0.43.0): `GeminiPdfParser` for parsing PDF using
|
334
|
+
Gemini LLMs - Thanks @abab-dev.
|
335
|
+
- [0.42.0](https://github.com/langroid/langroid/releases/tag/0.42.0): `markitdown` parser for `pptx,xlsx,xls` files
|
336
|
+
Thanks @abab-dev.
|
337
|
+
- [0.41.0](https://github.com/langroid/langroid/releases/tag/0.41.0): `pinecone` vector-db (Thanks @coretado),
|
338
|
+
`Tavily` web-search (Thanks @Sozhan308), `Exa` web-search (Thanks @MuddyHope).
|
339
|
+
- [0.40.0](https://github.com/langroid/langroid/releases/tag/0.40.0): `pgvector` vector-db. Thanks @abab-dev.
|
340
|
+
- [0.39.0](https://github.com/langroid/langroid/releases/tag/0.39.0): `ChatAgentConfig.handle_llm_no_tool` for
|
341
|
+
handling LLM "forgetting" to use a tool.
|
342
|
+
- [0.38.0](https://github.com/langroid/langroid/releases/tag/0.38.0): Gemini embeddings - Thanks @abab-dev)
|
343
|
+
- [0.37.0](https://github.com/langroid/langroid/releases/tag/0.37.0): New PDF Parsers: `docling`, `pymupdf4llm`
|
330
344
|
- **Jan 2025:**
|
331
345
|
- [0.36.0](https://github.com/langroid/langroid/releases/tag/0.36.0): Weaviate vector-db support (thanks @abab-dev).
|
332
346
|
- [0.35.0](https://github.com/langroid/langroid/releases/tag/0.35.0): Capture/Stream reasoning content from
|
@@ -591,7 +605,8 @@ section above)
|
|
591
605
|
Agents with specific skills, wrap them in Tasks, and combine tasks in a flexible way.
|
592
606
|
- **LLM Support**: Langroid supports OpenAI LLMs as well as LLMs from hundreds of
|
593
607
|
providers ([local/open](https://langroid.github.io/langroid/tutorials/local-llm-setup/) or [remote/commercial](https://langroid.github.io/langroid/tutorials/non-openai-llms/)) via proxy libraries and local model servers
|
594
|
-
such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui),
|
608
|
+
such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui),
|
609
|
+
[LiteLLM](https://docs.litellm.ai/docs/providers) that in effect mimic the OpenAI API. See the [supported LLMs](https://langroid.github.io/langroid/tutorials/supported-models/).
|
595
610
|
- **Caching of LLM responses:** Langroid supports [Redis](https://redis.com/try-free/) and
|
596
611
|
[Momento](https://www.gomomento.com/) to cache LLM responses.
|
597
612
|
- **Vector-stores**: [LanceDB](https://github.com/lancedb/lancedb), [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) are currently supported.
|
@@ -45,9 +45,11 @@ This Multi-Agent paradigm is inspired by the
|
|
45
45
|
|
46
46
|
`Langroid` is a fresh take on LLM app-development, where considerable thought has gone
|
47
47
|
into simplifying the developer experience;
|
48
|
-
it does not use `Langchain`, or any other LLM framework
|
48
|
+
it does not use `Langchain`, or any other LLM framework,
|
49
|
+
and works with [practically any LLM](https://langroid.github.io/langroid/tutorials/supported-models/).
|
49
50
|
|
50
|
-
:fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/)
|
51
|
+
:fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/),
|
52
|
+
and a [quick tour of Langroid](https://langroid.github.io/langroid/tutorials/langroid-tour/).
|
51
53
|
|
52
54
|
📢 Companies are using/adapting Langroid in **production**. Here is a quote:
|
53
55
|
|
@@ -135,6 +137,18 @@ teacher_task.run()
|
|
135
137
|
<details>
|
136
138
|
<summary> <b>Click to expand</b></summary>
|
137
139
|
|
140
|
+
- **Feb 2025:**
|
141
|
+
- [0.43.0](https://github.com/langroid/langroid/releases/tag/0.43.0): `GeminiPdfParser` for parsing PDF using
|
142
|
+
Gemini LLMs - Thanks @abab-dev.
|
143
|
+
- [0.42.0](https://github.com/langroid/langroid/releases/tag/0.42.0): `markitdown` parser for `pptx,xlsx,xls` files
|
144
|
+
Thanks @abab-dev.
|
145
|
+
- [0.41.0](https://github.com/langroid/langroid/releases/tag/0.41.0): `pinecone` vector-db (Thanks @coretado),
|
146
|
+
`Tavily` web-search (Thanks @Sozhan308), `Exa` web-search (Thanks @MuddyHope).
|
147
|
+
- [0.40.0](https://github.com/langroid/langroid/releases/tag/0.40.0): `pgvector` vector-db. Thanks @abab-dev.
|
148
|
+
- [0.39.0](https://github.com/langroid/langroid/releases/tag/0.39.0): `ChatAgentConfig.handle_llm_no_tool` for
|
149
|
+
handling LLM "forgetting" to use a tool.
|
150
|
+
- [0.38.0](https://github.com/langroid/langroid/releases/tag/0.38.0): Gemini embeddings - Thanks @abab-dev)
|
151
|
+
- [0.37.0](https://github.com/langroid/langroid/releases/tag/0.37.0): New PDF Parsers: `docling`, `pymupdf4llm`
|
138
152
|
- **Jan 2025:**
|
139
153
|
- [0.36.0](https://github.com/langroid/langroid/releases/tag/0.36.0): Weaviate vector-db support (thanks @abab-dev).
|
140
154
|
- [0.35.0](https://github.com/langroid/langroid/releases/tag/0.35.0): Capture/Stream reasoning content from
|
@@ -399,7 +413,8 @@ section above)
|
|
399
413
|
Agents with specific skills, wrap them in Tasks, and combine tasks in a flexible way.
|
400
414
|
- **LLM Support**: Langroid supports OpenAI LLMs as well as LLMs from hundreds of
|
401
415
|
providers ([local/open](https://langroid.github.io/langroid/tutorials/local-llm-setup/) or [remote/commercial](https://langroid.github.io/langroid/tutorials/non-openai-llms/)) via proxy libraries and local model servers
|
402
|
-
such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui),
|
416
|
+
such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui),
|
417
|
+
[LiteLLM](https://docs.litellm.ai/docs/providers) that in effect mimic the OpenAI API. See the [supported LLMs](https://langroid.github.io/langroid/tutorials/supported-models/).
|
403
418
|
- **Caching of LLM responses:** Langroid supports [Redis](https://redis.com/try-free/) and
|
404
419
|
[Momento](https://www.gomomento.com/) to cache LLM responses.
|
405
420
|
- **Vector-stores**: [LanceDB](https://github.com/lancedb/lancedb), [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) are currently supported.
|
@@ -5,7 +5,16 @@ Callbacks for Chainlit integration.
|
|
5
5
|
import json
|
6
6
|
import logging
|
7
7
|
import textwrap
|
8
|
-
from typing import
|
8
|
+
from typing import (
|
9
|
+
TYPE_CHECKING,
|
10
|
+
Any,
|
11
|
+
Callable,
|
12
|
+
Dict,
|
13
|
+
List,
|
14
|
+
Literal,
|
15
|
+
Optional,
|
16
|
+
no_type_check,
|
17
|
+
)
|
9
18
|
|
10
19
|
from langroid.exceptions import LangroidImportError
|
11
20
|
from langroid.pydantic_v1 import BaseSettings
|
@@ -18,7 +27,8 @@ except ImportError:
|
|
18
27
|
from chainlit import run_sync
|
19
28
|
from chainlit.logger import logger
|
20
29
|
|
21
|
-
|
30
|
+
if TYPE_CHECKING:
|
31
|
+
from langroid import Agent, Task
|
22
32
|
import langroid.language_models as lm
|
23
33
|
from langroid.language_models import StreamEventType
|
24
34
|
from langroid.utils.configuration import settings
|
@@ -222,11 +232,11 @@ class ChainlitAgentCallbacks:
|
|
222
232
|
last_step: Optional[cl.Step] = None # used to display sub-steps under this
|
223
233
|
curr_step: Optional[cl.Step] = None # used to update an initiated step
|
224
234
|
stream: Optional[cl.Step] = None # pushed into openai_gpt.py to stream tokens
|
225
|
-
parent_agent: Optional[
|
235
|
+
parent_agent: Optional["Agent"] = None # used to get parent id, for step nesting
|
226
236
|
|
227
237
|
def __init__(
|
228
238
|
self,
|
229
|
-
agent:
|
239
|
+
agent: "Agent",
|
230
240
|
config: ChainlitCallbackConfig = ChainlitCallbackConfig(),
|
231
241
|
):
|
232
242
|
"""Add callbacks to the agent, and save the initial message,
|
@@ -245,7 +255,7 @@ class ChainlitAgentCallbacks:
|
|
245
255
|
agent.callbacks.show_error_message = self.show_error_message
|
246
256
|
agent.callbacks.show_start_response = self.show_start_response
|
247
257
|
self.config = config
|
248
|
-
self.agent:
|
258
|
+
self.agent: "Agent" = agent
|
249
259
|
if self.agent.llm is not None:
|
250
260
|
# We don't want to suppress LLM output in async + streaming,
|
251
261
|
# since we often use chainlit async callbacks to display LLM output
|
@@ -271,7 +281,7 @@ class ChainlitAgentCallbacks:
|
|
271
281
|
)
|
272
282
|
return last_step.id # type: ignore
|
273
283
|
|
274
|
-
def set_parent_agent(self, parent:
|
284
|
+
def set_parent_agent(self, parent: "Agent") -> None:
|
275
285
|
self.parent_agent = parent
|
276
286
|
|
277
287
|
def get_last_step(self) -> Optional[cl.Step]:
|
@@ -559,7 +569,7 @@ class ChainlitTaskCallbacks(ChainlitAgentCallbacks):
|
|
559
569
|
|
560
570
|
def __init__(
|
561
571
|
self,
|
562
|
-
task:
|
572
|
+
task: "Task",
|
563
573
|
config: ChainlitCallbackConfig = ChainlitCallbackConfig(),
|
564
574
|
):
|
565
575
|
"""Inject callbacks recursively, ensuring msg is passed to the
|
@@ -573,7 +583,7 @@ class ChainlitTaskCallbacks(ChainlitAgentCallbacks):
|
|
573
583
|
|
574
584
|
@classmethod
|
575
585
|
def _inject_callbacks(
|
576
|
-
cls, task:
|
586
|
+
cls, task: "Task", config: ChainlitCallbackConfig = ChainlitCallbackConfig()
|
577
587
|
) -> None:
|
578
588
|
# recursively apply ChainlitAgentCallbacks to agents of sub-tasks
|
579
589
|
for t in task.sub_tasks:
|
@@ -581,7 +591,7 @@ class ChainlitTaskCallbacks(ChainlitAgentCallbacks):
|
|
581
591
|
# ChainlitTaskCallbacks(t, config=config)
|
582
592
|
|
583
593
|
def show_subtask_response(
|
584
|
-
self, task:
|
594
|
+
self, task: "Task", content: str, is_tool: bool = False
|
585
595
|
) -> None:
|
586
596
|
"""Show sub-task response as a step, nested at the right level."""
|
587
597
|
|
@@ -1069,6 +1069,13 @@ class ChatAgent(Agent):
|
|
1069
1069
|
was enabled, disables it for the tool, else triggers strict recovery.
|
1070
1070
|
"""
|
1071
1071
|
self.tool_error = False
|
1072
|
+
most_recent_sent_by_llm = (
|
1073
|
+
len(self.message_history) > 0
|
1074
|
+
and self.message_history[-1].role == Role.ASSISTANT
|
1075
|
+
)
|
1076
|
+
was_llm = most_recent_sent_by_llm or (
|
1077
|
+
isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM
|
1078
|
+
)
|
1072
1079
|
try:
|
1073
1080
|
tools = super().get_tool_messages(msg, all_tools)
|
1074
1081
|
except ValidationError as ve:
|
@@ -1099,9 +1106,16 @@ class ChatAgent(Agent):
|
|
1099
1106
|
if isinstance(msg, ChatDocument):
|
1100
1107
|
self.tool_error = msg.metadata.sender == Entity.LLM
|
1101
1108
|
else:
|
1102
|
-
self.tool_error =
|
1109
|
+
self.tool_error = most_recent_sent_by_llm
|
1103
1110
|
|
1104
|
-
|
1111
|
+
if was_llm:
|
1112
|
+
raise ve
|
1113
|
+
else:
|
1114
|
+
self.tool_error = False
|
1115
|
+
return []
|
1116
|
+
|
1117
|
+
if not was_llm:
|
1118
|
+
self.tool_error = False
|
1105
1119
|
|
1106
1120
|
return tools
|
1107
1121
|
|
@@ -14,6 +14,7 @@ pip install "langroid[hf-embeddings]"
|
|
14
14
|
|
15
15
|
"""
|
16
16
|
|
17
|
+
import importlib
|
17
18
|
import logging
|
18
19
|
from collections import OrderedDict
|
19
20
|
from functools import cache
|
@@ -82,14 +83,13 @@ about them, or summarize them into coherent answers.
|
|
82
83
|
"""
|
83
84
|
|
84
85
|
CHUNK_ENRICHMENT_DELIMITER = "\n<##-##-##>\n"
|
85
|
-
|
86
|
-
has_sentence_transformers = False
|
87
86
|
try:
|
88
|
-
|
89
|
-
|
90
|
-
has_sentence_transformers =
|
91
|
-
except
|
92
|
-
|
87
|
+
# Check if module exists in sys.path
|
88
|
+
spec = importlib.util.find_spec("sentence_transformers")
|
89
|
+
has_sentence_transformers = spec is not None
|
90
|
+
except Exception as e:
|
91
|
+
logger.warning(f"Error checking sentence_transformers: {e}")
|
92
|
+
has_sentence_transformers = False
|
93
93
|
|
94
94
|
|
95
95
|
hf_embed_config = SentenceTransformerEmbeddingsConfig(
|
@@ -236,6 +236,7 @@ class DocChatAgent(ChatAgent):
|
|
236
236
|
self.chunked_docs: List[Document] = []
|
237
237
|
self.chunked_docs_clean: List[Document] = []
|
238
238
|
self.response: None | Document = None
|
239
|
+
|
239
240
|
if len(config.doc_paths) > 0:
|
240
241
|
self.ingest()
|
241
242
|
|
@@ -16,28 +16,11 @@ from dotenv import load_dotenv
|
|
16
16
|
from langroid.exceptions import LangroidImportError
|
17
17
|
from langroid.utils.object_registry import ObjectRegistry
|
18
18
|
|
19
|
-
|
19
|
+
if TYPE_CHECKING:
|
20
|
+
import docling # noqa
|
20
21
|
import fitz
|
21
|
-
|
22
|
-
if not TYPE_CHECKING:
|
23
|
-
fitz = None
|
24
|
-
try:
|
25
|
-
import pymupdf4llm
|
26
|
-
except ImportError:
|
27
|
-
if not TYPE_CHECKING:
|
28
|
-
pymupdf4llm = None
|
29
|
-
|
30
|
-
try:
|
31
|
-
import docling
|
32
|
-
except ImportError:
|
33
|
-
if not TYPE_CHECKING:
|
34
|
-
docling = None
|
35
|
-
|
36
|
-
try:
|
22
|
+
import pymupdf4llm # noqa
|
37
23
|
import pypdf
|
38
|
-
except ImportError:
|
39
|
-
if not TYPE_CHECKING:
|
40
|
-
pypdf = None
|
41
24
|
|
42
25
|
|
43
26
|
import requests
|
@@ -469,8 +452,10 @@ class FitzPDFParser(DocumentParser):
|
|
469
452
|
Returns:
|
470
453
|
Generator[fitz.Page]: Generator yielding each page.
|
471
454
|
"""
|
472
|
-
|
473
|
-
|
455
|
+
try:
|
456
|
+
import fitz
|
457
|
+
except ImportError:
|
458
|
+
LangroidImportError("fitz", "doc-chat")
|
474
459
|
doc = fitz.open(stream=self.doc_bytes, filetype="pdf")
|
475
460
|
for i, page in enumerate(doc):
|
476
461
|
yield i, page
|
@@ -504,7 +489,10 @@ class PyMuPDF4LLMParser(DocumentParser):
|
|
504
489
|
Returns:
|
505
490
|
Generator[fitz.Page]: Generator yielding each page.
|
506
491
|
"""
|
507
|
-
|
492
|
+
try:
|
493
|
+
import pymupdf4llm # noqa
|
494
|
+
import fitz
|
495
|
+
except ImportError:
|
508
496
|
raise LangroidImportError(
|
509
497
|
"pymupdf4llm", ["pymupdf4llm", "all", "pdf-parsers", "doc-chat"]
|
510
498
|
)
|
@@ -548,7 +536,9 @@ class DoclingParser(DocumentParser):
|
|
548
536
|
Returns:
|
549
537
|
Generator[docling.Page]: Generator yielding each page.
|
550
538
|
"""
|
551
|
-
|
539
|
+
try:
|
540
|
+
import docling # noqa
|
541
|
+
except ImportError:
|
552
542
|
raise LangroidImportError(
|
553
543
|
"docling", ["docling", "pdf-parsers", "all", "doc-chat"]
|
554
544
|
)
|
@@ -637,7 +627,9 @@ class PyPDFParser(DocumentParser):
|
|
637
627
|
Returns:
|
638
628
|
Generator[pypdf.pdf.PageObject]: Generator yielding each page.
|
639
629
|
"""
|
640
|
-
|
630
|
+
try:
|
631
|
+
import pypdf
|
632
|
+
except ImportError:
|
641
633
|
raise LangroidImportError("pypdf", "pdf-parsers")
|
642
634
|
reader = pypdf.PdfReader(self.doc_bytes)
|
643
635
|
for i, page in enumerate(reader.pages):
|
@@ -7,14 +7,16 @@ import tempfile
|
|
7
7
|
import time
|
8
8
|
from collections import deque
|
9
9
|
from pathlib import Path
|
10
|
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
10
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
11
11
|
from urllib.parse import urlparse
|
12
12
|
|
13
13
|
from dotenv import load_dotenv
|
14
|
-
|
15
|
-
|
16
|
-
from github
|
17
|
-
from github.
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from github import Github
|
17
|
+
from github.ContentFile import ContentFile
|
18
|
+
from github.Label import Label
|
19
|
+
from github.Repository import Repository
|
18
20
|
|
19
21
|
from langroid.mytypes import DocMetaData, Document
|
20
22
|
from langroid.parsing.document_parser import DocumentParser, DocumentType
|
@@ -24,7 +26,7 @@ from langroid.pydantic_v1 import BaseModel, BaseSettings, Field
|
|
24
26
|
logger = logging.getLogger(__name__)
|
25
27
|
|
26
28
|
|
27
|
-
def _get_decoded_content(content_file: ContentFile) -> str:
|
29
|
+
def _get_decoded_content(content_file: "ContentFile") -> str:
|
28
30
|
if content_file.encoding == "base64":
|
29
31
|
return content_file.decoded_content.decode("utf-8") or ""
|
30
32
|
elif content_file.encoding == "none":
|
@@ -54,7 +56,7 @@ class IssueData(BaseModel):
|
|
54
56
|
text: str = Field(..., description="Text of issue, i.e. description body")
|
55
57
|
|
56
58
|
|
57
|
-
def get_issue_size(labels: List[Label]) -> str | None:
|
59
|
+
def get_issue_size(labels: List["Label"]) -> str | None:
|
58
60
|
sizes = ["XS", "S", "M", "L", "XL", "XXL"]
|
59
61
|
return next((label.name for label in labels if label.name in sizes), None)
|
60
62
|
|
@@ -117,6 +119,8 @@ class RepoLoader:
|
|
117
119
|
self.config = config
|
118
120
|
self.clone_path: Optional[str] = None
|
119
121
|
self.log_file = ".logs/repo_loader/download_log.json"
|
122
|
+
self.repo: Optional["Repository"] = None # Initialize repo as Optional
|
123
|
+
|
120
124
|
os.makedirs(os.path.dirname(self.log_file), exist_ok=True)
|
121
125
|
if not os.path.exists(self.log_file):
|
122
126
|
with open(self.log_file, "w") as f:
|
@@ -127,20 +131,25 @@ class RepoLoader:
|
|
127
131
|
logger.info(f"Repo Already downloaded in {log[self.url]}")
|
128
132
|
self.clone_path = log[self.url]
|
129
133
|
|
134
|
+
# it's a core dependency, so we don't need to enclose in try/except
|
135
|
+
from github import Github # Late import
|
136
|
+
|
137
|
+
load_dotenv()
|
138
|
+
# authenticated calls to github api have higher rate limit
|
139
|
+
token = os.getenv("GITHUB_ACCESS_TOKEN")
|
140
|
+
|
130
141
|
if "github.com" in self.url:
|
131
142
|
repo_name = self.url.split("github.com/")[1]
|
132
143
|
else:
|
133
144
|
repo_name = self.url
|
134
|
-
|
135
|
-
# authenticated calls to github api have higher rate limit
|
136
|
-
token = os.getenv("GITHUB_ACCESS_TOKEN")
|
145
|
+
|
137
146
|
g = Github(token)
|
138
147
|
self.repo = self._get_repo_with_retry(g, repo_name)
|
139
148
|
|
140
149
|
@staticmethod
|
141
150
|
def _get_repo_with_retry(
|
142
|
-
g: Github, repo_name: str, max_retries: int = 5
|
143
|
-
) -> Repository:
|
151
|
+
g: "Github", repo_name: str, max_retries: int = 5
|
152
|
+
) -> "Repository":
|
144
153
|
"""
|
145
154
|
Get a repo from the GitHub API, retrying if the request fails,
|
146
155
|
with exponential backoff.
|
@@ -173,6 +182,10 @@ class RepoLoader:
|
|
173
182
|
|
174
183
|
def get_issues(self, k: int | None = 100) -> List[IssueData]:
|
175
184
|
"""Get up to k issues from the GitHub repo."""
|
185
|
+
if self.repo is None:
|
186
|
+
logger.warning("No repo found. Ensure the URL is correct.")
|
187
|
+
return [] # Return an empty list rather than raise an error in this case
|
188
|
+
|
176
189
|
if k is None:
|
177
190
|
issues = self.repo.get_issues(state="all")
|
178
191
|
else:
|
@@ -224,7 +237,7 @@ class RepoLoader:
|
|
224
237
|
"""
|
225
238
|
return file_type not in self.config.non_code_types
|
226
239
|
|
227
|
-
def _is_allowed(self, content: ContentFile) -> bool:
|
240
|
+
def _is_allowed(self, content: "ContentFile") -> bool:
|
228
241
|
"""
|
229
242
|
Check if a file or directory content is allowed to be included.
|
230
243
|
|
@@ -301,6 +314,10 @@ class RepoLoader:
|
|
301
314
|
Dict[str, Union[str, List[Dict]]]:
|
302
315
|
A dictionary containing file and directory names, with file contents.
|
303
316
|
"""
|
317
|
+
if self.repo is None:
|
318
|
+
logger.warning("No repo found. Ensure the URL is correct.")
|
319
|
+
return {} # Return an empty dict rather than raise an error in this case
|
320
|
+
|
304
321
|
root_contents = self.repo.get_contents("")
|
305
322
|
if not isinstance(root_contents, list):
|
306
323
|
root_contents = [root_contents]
|
@@ -519,8 +536,7 @@ class RepoLoader:
|
|
519
536
|
which includes all depths.
|
520
537
|
lines (int, optional): Number of lines to read from each file.
|
521
538
|
Defaults to None, which reads all lines.
|
522
|
-
doc_type (str|DocumentType, optional): The type of document to parse.
|
523
|
-
|
539
|
+
doc_type (str|DocumentType | None, optional): The type of document to parse.
|
524
540
|
Returns:
|
525
541
|
List[Document]: List of Document objects representing files.
|
526
542
|
|
@@ -584,6 +600,10 @@ class RepoLoader:
|
|
584
600
|
list of Document objects, each has fields `content` and `metadata`,
|
585
601
|
and `metadata` has fields `url`, `filename`, `extension`, `language`
|
586
602
|
"""
|
603
|
+
if self.repo is None:
|
604
|
+
logger.warning("No repo found. Ensure the URL is correct.")
|
605
|
+
return [] # Return an empty list rather than raise an error
|
606
|
+
|
587
607
|
contents = self.repo.get_contents("")
|
588
608
|
if not isinstance(contents, list):
|
589
609
|
contents = [contents]
|
@@ -10,9 +10,6 @@ import difflib
|
|
10
10
|
import re
|
11
11
|
from typing import List, Tuple
|
12
12
|
|
13
|
-
from nltk.corpus import stopwords
|
14
|
-
from nltk.stem import WordNetLemmatizer
|
15
|
-
from nltk.tokenize import RegexpTokenizer
|
16
13
|
from rank_bm25 import BM25Okapi
|
17
14
|
from thefuzz import fuzz, process
|
18
15
|
|
@@ -120,6 +117,9 @@ def preprocess_text(text: str) -> str:
|
|
120
117
|
# Ensure the NLTK resources are available
|
121
118
|
for resource in ["tokenizers/punkt", "corpora/wordnet", "corpora/stopwords"]:
|
122
119
|
download_nltk_resource(resource)
|
120
|
+
from nltk.corpus import stopwords
|
121
|
+
from nltk.stem import WordNetLemmatizer
|
122
|
+
from nltk.tokenize import RegexpTokenizer
|
123
123
|
|
124
124
|
# Lowercase the text
|
125
125
|
text = text.lower()
|
@@ -4,12 +4,6 @@ from tempfile import NamedTemporaryFile
|
|
4
4
|
from typing import List, no_type_check
|
5
5
|
|
6
6
|
import requests
|
7
|
-
import trafilatura
|
8
|
-
from trafilatura.downloads import (
|
9
|
-
add_to_compressed_dict,
|
10
|
-
buffered_downloads,
|
11
|
-
load_download_buffer,
|
12
|
-
)
|
13
7
|
|
14
8
|
from langroid.mytypes import DocMetaData, Document
|
15
9
|
from langroid.parsing.document_parser import DocumentParser, ImagePdfParser
|
@@ -36,6 +30,13 @@ class URLLoader:
|
|
36
30
|
|
37
31
|
@no_type_check
|
38
32
|
def load(self) -> List[Document]:
|
33
|
+
import trafilatura
|
34
|
+
from trafilatura.downloads import (
|
35
|
+
add_to_compressed_dict,
|
36
|
+
buffered_downloads,
|
37
|
+
load_download_buffer,
|
38
|
+
)
|
39
|
+
|
39
40
|
docs = []
|
40
41
|
threads = 4
|
41
42
|
# converted the input list to an internal format
|
@@ -11,7 +11,6 @@ import requests
|
|
11
11
|
from bs4 import BeautifulSoup
|
12
12
|
from rich import print
|
13
13
|
from rich.prompt import Prompt
|
14
|
-
from trafilatura.spider import focused_crawler
|
15
14
|
|
16
15
|
from langroid.pydantic_v1 import BaseModel, HttpUrl, ValidationError, parse_obj_as
|
17
16
|
|
@@ -150,6 +149,8 @@ def crawl_url(url: str, max_urls: int = 1) -> List[str]:
|
|
150
149
|
up to a maximum of `max_urls`.
|
151
150
|
This has not been tested to work as intended. Ignore.
|
152
151
|
"""
|
152
|
+
from trafilatura.spider import focused_crawler
|
153
|
+
|
153
154
|
if max_urls == 1:
|
154
155
|
# no need to crawl, just return the original list
|
155
156
|
return [url]
|
@@ -6,7 +6,6 @@ from functools import cache
|
|
6
6
|
from itertools import islice
|
7
7
|
from typing import Iterable, List, Sequence, TypeVar
|
8
8
|
|
9
|
-
import nltk
|
10
9
|
from faker import Faker
|
11
10
|
|
12
11
|
from langroid.mytypes import Document
|
@@ -22,19 +21,19 @@ random.seed(43)
|
|
22
21
|
logger = logging.getLogger(__name__)
|
23
22
|
|
24
23
|
|
25
|
-
# Ensures the NLTK resource is available
|
26
|
-
@cache
|
27
24
|
def download_nltk_resource(resource: str) -> None:
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
25
|
+
import nltk
|
26
|
+
|
27
|
+
@cache
|
28
|
+
def _download() -> None:
|
29
|
+
try:
|
30
|
+
nltk.data.find(resource)
|
31
|
+
except LookupError:
|
32
|
+
model = resource.split("/")[-1]
|
33
|
+
nltk.download(model, quiet=True)
|
33
34
|
|
35
|
+
_download()
|
34
36
|
|
35
|
-
# Download punkt_tab resource at module import
|
36
|
-
download_nltk_resource("tokenizers/punkt_tab")
|
37
|
-
download_nltk_resource("corpora/gutenberg")
|
38
37
|
|
39
38
|
T = TypeVar("T")
|
40
39
|
|
@@ -51,9 +50,12 @@ def batched(iterable: Iterable[T], n: int) -> Iterable[Sequence[T]]:
|
|
51
50
|
|
52
51
|
def generate_random_sentences(k: int) -> str:
|
53
52
|
# Load the sample text
|
54
|
-
|
53
|
+
import nltk
|
55
54
|
from nltk.corpus import gutenberg
|
56
55
|
|
56
|
+
download_nltk_resource("corpora/gutenberg")
|
57
|
+
download_nltk_resource("tokenizers/punkt")
|
58
|
+
|
57
59
|
text = gutenberg.raw("austen-emma.txt")
|
58
60
|
|
59
61
|
# Split the text into sentences
|
@@ -155,6 +157,8 @@ def number_segments(s: str, granularity: int = 1) -> str:
|
|
155
157
|
>>> number_segments("Hello world! How are you? Have a good day.")
|
156
158
|
'<#1#> Hello world! <#2#> How are you? <#3#> Have a good day.'
|
157
159
|
"""
|
160
|
+
import nltk
|
161
|
+
|
158
162
|
if granularity < 0:
|
159
163
|
return "<#1#> " + s
|
160
164
|
numbered_text = []
|
@@ -27,7 +27,6 @@ try:
|
|
27
27
|
)
|
28
28
|
from sqlalchemy.dialects.postgresql import JSONB
|
29
29
|
from sqlalchemy.engine import Connection, Engine
|
30
|
-
from sqlalchemy.orm import sessionmaker
|
31
30
|
from sqlalchemy.sql.expression import insert
|
32
31
|
except ImportError:
|
33
32
|
Engine = Any # type: ignore
|
@@ -56,6 +55,11 @@ class PostgresDB(VectorStore):
|
|
56
55
|
super().__init__(config)
|
57
56
|
if not has_postgres:
|
58
57
|
raise LangroidImportError("pgvector", "postgres")
|
58
|
+
try:
|
59
|
+
from sqlalchemy.orm import sessionmaker
|
60
|
+
except ImportError:
|
61
|
+
raise LangroidImportError("sqlalchemy", "postgres")
|
62
|
+
|
59
63
|
self.config: PostgresDBConfig = config
|
60
64
|
self.engine = self._create_engine()
|
61
65
|
PostgresDB._create_vector_extension(self.engine)
|