langroid 0.43.1__tar.gz → 0.45.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {langroid-0.43.1 → langroid-0.45.0}/PKG-INFO +27 -6
- {langroid-0.43.1 → langroid-0.45.0}/README.md +20 -5
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/base.py +1 -1
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/callbacks/chainlit.py +19 -9
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/doc_chat_agent.py +8 -7
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/document_parser.py +101 -25
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/parser.py +20 -5
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/repo_loader.py +35 -15
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/search.py +3 -3
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/url_loader.py +7 -6
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/urls.py +2 -1
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/utils.py +16 -12
- {langroid-0.43.1 → langroid-0.45.0}/langroid/vector_store/postgres.py +5 -1
- {langroid-0.43.1 → langroid-0.45.0}/langroid/vector_store/qdrantdb.py +37 -18
- {langroid-0.43.1 → langroid-0.45.0}/langroid/vector_store/weaviatedb.py +30 -24
- {langroid-0.43.1 → langroid-0.45.0}/pyproject.toml +9 -1
- {langroid-0.43.1 → langroid-0.45.0}/.gitignore +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/LICENSE +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/batch.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/callbacks/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/chat_agent.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/chat_document.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/openai_assistant.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/arangodb/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/arangodb/system_messages.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/arangodb/tools.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/arangodb/utils.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/lance_rag/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/lance_tools.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/neo4j/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/neo4j/system_messages.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/neo4j/tools.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/relevance_extractor_agent.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/retriever_agent.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/sql/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/sql/utils/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/sql/utils/system_message.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/sql/utils/tools.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/special/table_chat_agent.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/task.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tool_message.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/exa_search_tool.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/file_tools.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/google_search_tool.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/metaphor_search_tool.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/orchestration.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/recipient_tool.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/retrieval_tool.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/rewind_tool.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/segment_extract_tool.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/tools/tavily_search_tool.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/agent/xml_tool_message.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/cachedb/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/cachedb/base.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/cachedb/momento_cachedb.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/cachedb/redis_cachedb.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/embedding_models/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/embedding_models/base.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/embedding_models/models.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/embedding_models/protoc/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/embedding_models/protoc/embeddings.proto +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/embedding_models/remote_embeds.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/exceptions.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/azure_openai.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/base.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/config.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/mock_lm.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/model_info.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/openai_gpt.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/prompt_formatter/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/prompt_formatter/base.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/language_models/utils.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/mytypes.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/agent_chats.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/code_parser.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/para_sentence_split.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/parse_json.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/pdf_utils.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/routing.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/spider.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/table_loader.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/parsing/web_search.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/prompts/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/prompts/dialog.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/prompts/prompts_config.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/prompts/templates.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/py.typed +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/pydantic_v1/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/pydantic_v1/main.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/algorithms/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/algorithms/graph.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/configuration.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/constants.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/git_utils.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/globals.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/logging.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/object_registry.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/output/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/output/citations.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/output/printing.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/output/status.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/pandas_utils.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/pydantic_utils.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/system.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/utils/types.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/vector_store/__init__.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/vector_store/base.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/vector_store/chromadb.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/vector_store/lancedb.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/vector_store/meilisearch.py +0 -0
- {langroid-0.43.1 → langroid-0.45.0}/langroid/vector_store/pineconedb.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.45.0
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
Author-email: Prasad Chalasani <pchalasani@gmail.com>
|
6
6
|
License: MIT
|
@@ -63,6 +63,7 @@ Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'all'
|
|
63
63
|
Requires-Dist: fastembed<0.4.0,>=0.3.1; extra == 'all'
|
64
64
|
Requires-Dist: huggingface-hub<1.0.0,>=0.21.2; extra == 'all'
|
65
65
|
Requires-Dist: litellm<2.0.0,>=1.30.1; extra == 'all'
|
66
|
+
Requires-Dist: marker-pdf; extra == 'all'
|
66
67
|
Requires-Dist: metaphor-python<0.2.0,>=0.1.23; extra == 'all'
|
67
68
|
Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == 'all'
|
68
69
|
Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'all'
|
@@ -99,6 +100,7 @@ Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'db'
|
|
99
100
|
Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'db'
|
100
101
|
Provides-Extra: doc-chat
|
101
102
|
Requires-Dist: docling<3.0.0,>=2.20.0; extra == 'doc-chat'
|
103
|
+
Requires-Dist: marker-pdf; extra == 'doc-chat'
|
102
104
|
Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'doc-chat'
|
103
105
|
Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'doc-chat'
|
104
106
|
Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'doc-chat'
|
@@ -138,6 +140,9 @@ Requires-Dist: pyarrow<16.0.0,>=15.0.0; extra == 'lancedb'
|
|
138
140
|
Requires-Dist: tantivy<0.22.0,>=0.21.0; extra == 'lancedb'
|
139
141
|
Provides-Extra: litellm
|
140
142
|
Requires-Dist: litellm<2.0.0,>=1.30.1; extra == 'litellm'
|
143
|
+
Provides-Extra: marker-pdf
|
144
|
+
Requires-Dist: marker-pdf[full]>=1.6.0; (sys_platform != 'darwin' or platform_machine != 'x86_64') and extra == 'marker-pdf'
|
145
|
+
Requires-Dist: opencv-python>=4.11.0.86; extra == 'marker-pdf'
|
141
146
|
Provides-Extra: meilisearch
|
142
147
|
Requires-Dist: meilisearch-python-sdk<3.0.0,>=2.2.3; extra == 'meilisearch'
|
143
148
|
Provides-Extra: metaphor
|
@@ -150,6 +155,7 @@ Provides-Extra: neo4j
|
|
150
155
|
Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == 'neo4j'
|
151
156
|
Provides-Extra: pdf-parsers
|
152
157
|
Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'pdf-parsers'
|
158
|
+
Requires-Dist: marker-pdf; extra == 'pdf-parsers'
|
153
159
|
Requires-Dist: markitdown>=0.0.1a3; extra == 'pdf-parsers'
|
154
160
|
Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'pdf-parsers'
|
155
161
|
Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'pdf-parsers'
|
@@ -237,9 +243,11 @@ This Multi-Agent paradigm is inspired by the
|
|
237
243
|
|
238
244
|
`Langroid` is a fresh take on LLM app-development, where considerable thought has gone
|
239
245
|
into simplifying the developer experience;
|
240
|
-
it does not use `Langchain`, or any other LLM framework
|
246
|
+
it does not use `Langchain`, or any other LLM framework,
|
247
|
+
and works with [practically any LLM](https://langroid.github.io/langroid/tutorials/supported-models/).
|
241
248
|
|
242
|
-
:fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/)
|
249
|
+
:fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/),
|
250
|
+
and a [quick tour of Langroid](https://langroid.github.io/langroid/tutorials/langroid-tour/).
|
243
251
|
|
244
252
|
📢 Companies are using/adapting Langroid in **production**. Here is a quote:
|
245
253
|
|
@@ -327,6 +335,18 @@ teacher_task.run()
|
|
327
335
|
<details>
|
328
336
|
<summary> <b>Click to expand</b></summary>
|
329
337
|
|
338
|
+
- **Feb 2025:**
|
339
|
+
- [0.43.0](https://github.com/langroid/langroid/releases/tag/0.43.0): `GeminiPdfParser` for parsing PDF using
|
340
|
+
Gemini LLMs - Thanks @abab-dev.
|
341
|
+
- [0.42.0](https://github.com/langroid/langroid/releases/tag/0.42.0): `markitdown` parser for `pptx,xlsx,xls` files
|
342
|
+
Thanks @abab-dev.
|
343
|
+
- [0.41.0](https://github.com/langroid/langroid/releases/tag/0.41.0): `pinecone` vector-db (Thanks @coretado),
|
344
|
+
`Tavily` web-search (Thanks @Sozhan308), `Exa` web-search (Thanks @MuddyHope).
|
345
|
+
- [0.40.0](https://github.com/langroid/langroid/releases/tag/0.40.0): `pgvector` vector-db. Thanks @abab-dev.
|
346
|
+
- [0.39.0](https://github.com/langroid/langroid/releases/tag/0.39.0): `ChatAgentConfig.handle_llm_no_tool` for
|
347
|
+
handling LLM "forgetting" to use a tool.
|
348
|
+
- [0.38.0](https://github.com/langroid/langroid/releases/tag/0.38.0): Gemini embeddings - Thanks @abab-dev)
|
349
|
+
- [0.37.0](https://github.com/langroid/langroid/releases/tag/0.37.0): New PDF Parsers: `docling`, `pymupdf4llm`
|
330
350
|
- **Jan 2025:**
|
331
351
|
- [0.36.0](https://github.com/langroid/langroid/releases/tag/0.36.0): Weaviate vector-db support (thanks @abab-dev).
|
332
352
|
- [0.35.0](https://github.com/langroid/langroid/releases/tag/0.35.0): Capture/Stream reasoning content from
|
@@ -591,7 +611,8 @@ section above)
|
|
591
611
|
Agents with specific skills, wrap them in Tasks, and combine tasks in a flexible way.
|
592
612
|
- **LLM Support**: Langroid supports OpenAI LLMs as well as LLMs from hundreds of
|
593
613
|
providers ([local/open](https://langroid.github.io/langroid/tutorials/local-llm-setup/) or [remote/commercial](https://langroid.github.io/langroid/tutorials/non-openai-llms/)) via proxy libraries and local model servers
|
594
|
-
such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui),
|
614
|
+
such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui),
|
615
|
+
[LiteLLM](https://docs.litellm.ai/docs/providers) that in effect mimic the OpenAI API. See the [supported LLMs](https://langroid.github.io/langroid/tutorials/supported-models/).
|
595
616
|
- **Caching of LLM responses:** Langroid supports [Redis](https://redis.com/try-free/) and
|
596
617
|
[Momento](https://www.gomomento.com/) to cache LLM responses.
|
597
618
|
- **Vector-stores**: [LanceDB](https://github.com/lancedb/lancedb), [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) are currently supported.
|
@@ -776,8 +797,8 @@ wget -O .env https://raw.githubusercontent.com/langroid/langroid/main/.env-templ
|
|
776
797
|
# Edit the .env file with your favorite editor (here nano), and remove any un-used settings. E.g. there are "dummy" values like "your-redis-port" etc -- if you are not using them, you MUST remove them.
|
777
798
|
nano .env
|
778
799
|
|
779
|
-
# launch the container
|
780
|
-
docker run -it --rm -v ./.env:/langroid/.env langroid/langroid
|
800
|
+
# launch the container (the appropriate image for your architecture will be pulled automatically)
|
801
|
+
docker run -it --rm -v ./.env:/langroid/.env langroid/langroid:latest
|
781
802
|
|
782
803
|
# Use this command to run any of the scripts in the `examples` directory
|
783
804
|
python examples/<Path/To/Example.py>
|
@@ -45,9 +45,11 @@ This Multi-Agent paradigm is inspired by the
|
|
45
45
|
|
46
46
|
`Langroid` is a fresh take on LLM app-development, where considerable thought has gone
|
47
47
|
into simplifying the developer experience;
|
48
|
-
it does not use `Langchain`, or any other LLM framework
|
48
|
+
it does not use `Langchain`, or any other LLM framework,
|
49
|
+
and works with [practically any LLM](https://langroid.github.io/langroid/tutorials/supported-models/).
|
49
50
|
|
50
|
-
:fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/)
|
51
|
+
:fire: Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/),
|
52
|
+
and a [quick tour of Langroid](https://langroid.github.io/langroid/tutorials/langroid-tour/).
|
51
53
|
|
52
54
|
📢 Companies are using/adapting Langroid in **production**. Here is a quote:
|
53
55
|
|
@@ -135,6 +137,18 @@ teacher_task.run()
|
|
135
137
|
<details>
|
136
138
|
<summary> <b>Click to expand</b></summary>
|
137
139
|
|
140
|
+
- **Feb 2025:**
|
141
|
+
- [0.43.0](https://github.com/langroid/langroid/releases/tag/0.43.0): `GeminiPdfParser` for parsing PDF using
|
142
|
+
Gemini LLMs - Thanks @abab-dev.
|
143
|
+
- [0.42.0](https://github.com/langroid/langroid/releases/tag/0.42.0): `markitdown` parser for `pptx,xlsx,xls` files
|
144
|
+
Thanks @abab-dev.
|
145
|
+
- [0.41.0](https://github.com/langroid/langroid/releases/tag/0.41.0): `pinecone` vector-db (Thanks @coretado),
|
146
|
+
`Tavily` web-search (Thanks @Sozhan308), `Exa` web-search (Thanks @MuddyHope).
|
147
|
+
- [0.40.0](https://github.com/langroid/langroid/releases/tag/0.40.0): `pgvector` vector-db. Thanks @abab-dev.
|
148
|
+
- [0.39.0](https://github.com/langroid/langroid/releases/tag/0.39.0): `ChatAgentConfig.handle_llm_no_tool` for
|
149
|
+
handling LLM "forgetting" to use a tool.
|
150
|
+
- [0.38.0](https://github.com/langroid/langroid/releases/tag/0.38.0): Gemini embeddings - Thanks @abab-dev)
|
151
|
+
- [0.37.0](https://github.com/langroid/langroid/releases/tag/0.37.0): New PDF Parsers: `docling`, `pymupdf4llm`
|
138
152
|
- **Jan 2025:**
|
139
153
|
- [0.36.0](https://github.com/langroid/langroid/releases/tag/0.36.0): Weaviate vector-db support (thanks @abab-dev).
|
140
154
|
- [0.35.0](https://github.com/langroid/langroid/releases/tag/0.35.0): Capture/Stream reasoning content from
|
@@ -399,7 +413,8 @@ section above)
|
|
399
413
|
Agents with specific skills, wrap them in Tasks, and combine tasks in a flexible way.
|
400
414
|
- **LLM Support**: Langroid supports OpenAI LLMs as well as LLMs from hundreds of
|
401
415
|
providers ([local/open](https://langroid.github.io/langroid/tutorials/local-llm-setup/) or [remote/commercial](https://langroid.github.io/langroid/tutorials/non-openai-llms/)) via proxy libraries and local model servers
|
402
|
-
such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui),
|
416
|
+
such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui),
|
417
|
+
[LiteLLM](https://docs.litellm.ai/docs/providers) that in effect mimic the OpenAI API. See the [supported LLMs](https://langroid.github.io/langroid/tutorials/supported-models/).
|
403
418
|
- **Caching of LLM responses:** Langroid supports [Redis](https://redis.com/try-free/) and
|
404
419
|
[Momento](https://www.gomomento.com/) to cache LLM responses.
|
405
420
|
- **Vector-stores**: [LanceDB](https://github.com/lancedb/lancedb), [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) are currently supported.
|
@@ -584,8 +599,8 @@ wget -O .env https://raw.githubusercontent.com/langroid/langroid/main/.env-templ
|
|
584
599
|
# Edit the .env file with your favorite editor (here nano), and remove any un-used settings. E.g. there are "dummy" values like "your-redis-port" etc -- if you are not using them, you MUST remove them.
|
585
600
|
nano .env
|
586
601
|
|
587
|
-
# launch the container
|
588
|
-
docker run -it --rm -v ./.env:/langroid/.env langroid/langroid
|
602
|
+
# launch the container (the appropriate image for your architecture will be pulled automatically)
|
603
|
+
docker run -it --rm -v ./.env:/langroid/.env langroid/langroid:latest
|
589
604
|
|
590
605
|
# Use this command to run any of the scripts in the `examples` directory
|
591
606
|
python examples/<Path/To/Example.py>
|
@@ -1016,7 +1016,7 @@ class Agent(ABC):
|
|
1016
1016
|
# we would have already displayed the msg "live" ONLY if
|
1017
1017
|
# streaming was enabled, AND we did not find a cached response
|
1018
1018
|
# If we are here, it means the response has not yet been displayed.
|
1019
|
-
cached =
|
1019
|
+
cached = "[red](cached)[/red]" if response.cached else ""
|
1020
1020
|
console.print(f"[green]{self.indent}", end="")
|
1021
1021
|
print(cached + "[green]" + escape(response.message))
|
1022
1022
|
self.update_token_usage(
|
@@ -5,7 +5,16 @@ Callbacks for Chainlit integration.
|
|
5
5
|
import json
|
6
6
|
import logging
|
7
7
|
import textwrap
|
8
|
-
from typing import
|
8
|
+
from typing import (
|
9
|
+
TYPE_CHECKING,
|
10
|
+
Any,
|
11
|
+
Callable,
|
12
|
+
Dict,
|
13
|
+
List,
|
14
|
+
Literal,
|
15
|
+
Optional,
|
16
|
+
no_type_check,
|
17
|
+
)
|
9
18
|
|
10
19
|
from langroid.exceptions import LangroidImportError
|
11
20
|
from langroid.pydantic_v1 import BaseSettings
|
@@ -18,7 +27,8 @@ except ImportError:
|
|
18
27
|
from chainlit import run_sync
|
19
28
|
from chainlit.logger import logger
|
20
29
|
|
21
|
-
|
30
|
+
if TYPE_CHECKING:
|
31
|
+
from langroid import Agent, Task
|
22
32
|
import langroid.language_models as lm
|
23
33
|
from langroid.language_models import StreamEventType
|
24
34
|
from langroid.utils.configuration import settings
|
@@ -222,11 +232,11 @@ class ChainlitAgentCallbacks:
|
|
222
232
|
last_step: Optional[cl.Step] = None # used to display sub-steps under this
|
223
233
|
curr_step: Optional[cl.Step] = None # used to update an initiated step
|
224
234
|
stream: Optional[cl.Step] = None # pushed into openai_gpt.py to stream tokens
|
225
|
-
parent_agent: Optional[
|
235
|
+
parent_agent: Optional["Agent"] = None # used to get parent id, for step nesting
|
226
236
|
|
227
237
|
def __init__(
|
228
238
|
self,
|
229
|
-
agent:
|
239
|
+
agent: "Agent",
|
230
240
|
config: ChainlitCallbackConfig = ChainlitCallbackConfig(),
|
231
241
|
):
|
232
242
|
"""Add callbacks to the agent, and save the initial message,
|
@@ -245,7 +255,7 @@ class ChainlitAgentCallbacks:
|
|
245
255
|
agent.callbacks.show_error_message = self.show_error_message
|
246
256
|
agent.callbacks.show_start_response = self.show_start_response
|
247
257
|
self.config = config
|
248
|
-
self.agent:
|
258
|
+
self.agent: "Agent" = agent
|
249
259
|
if self.agent.llm is not None:
|
250
260
|
# We don't want to suppress LLM output in async + streaming,
|
251
261
|
# since we often use chainlit async callbacks to display LLM output
|
@@ -271,7 +281,7 @@ class ChainlitAgentCallbacks:
|
|
271
281
|
)
|
272
282
|
return last_step.id # type: ignore
|
273
283
|
|
274
|
-
def set_parent_agent(self, parent:
|
284
|
+
def set_parent_agent(self, parent: "Agent") -> None:
|
275
285
|
self.parent_agent = parent
|
276
286
|
|
277
287
|
def get_last_step(self) -> Optional[cl.Step]:
|
@@ -559,7 +569,7 @@ class ChainlitTaskCallbacks(ChainlitAgentCallbacks):
|
|
559
569
|
|
560
570
|
def __init__(
|
561
571
|
self,
|
562
|
-
task:
|
572
|
+
task: "Task",
|
563
573
|
config: ChainlitCallbackConfig = ChainlitCallbackConfig(),
|
564
574
|
):
|
565
575
|
"""Inject callbacks recursively, ensuring msg is passed to the
|
@@ -573,7 +583,7 @@ class ChainlitTaskCallbacks(ChainlitAgentCallbacks):
|
|
573
583
|
|
574
584
|
@classmethod
|
575
585
|
def _inject_callbacks(
|
576
|
-
cls, task:
|
586
|
+
cls, task: "Task", config: ChainlitCallbackConfig = ChainlitCallbackConfig()
|
577
587
|
) -> None:
|
578
588
|
# recursively apply ChainlitAgentCallbacks to agents of sub-tasks
|
579
589
|
for t in task.sub_tasks:
|
@@ -581,7 +591,7 @@ class ChainlitTaskCallbacks(ChainlitAgentCallbacks):
|
|
581
591
|
# ChainlitTaskCallbacks(t, config=config)
|
582
592
|
|
583
593
|
def show_subtask_response(
|
584
|
-
self, task:
|
594
|
+
self, task: "Task", content: str, is_tool: bool = False
|
585
595
|
) -> None:
|
586
596
|
"""Show sub-task response as a step, nested at the right level."""
|
587
597
|
|
@@ -14,6 +14,7 @@ pip install "langroid[hf-embeddings]"
|
|
14
14
|
|
15
15
|
"""
|
16
16
|
|
17
|
+
import importlib
|
17
18
|
import logging
|
18
19
|
from collections import OrderedDict
|
19
20
|
from functools import cache
|
@@ -82,14 +83,13 @@ about them, or summarize them into coherent answers.
|
|
82
83
|
"""
|
83
84
|
|
84
85
|
CHUNK_ENRICHMENT_DELIMITER = "\n<##-##-##>\n"
|
85
|
-
|
86
|
-
has_sentence_transformers = False
|
87
86
|
try:
|
88
|
-
|
89
|
-
|
90
|
-
has_sentence_transformers =
|
91
|
-
except
|
92
|
-
|
87
|
+
# Check if module exists in sys.path
|
88
|
+
spec = importlib.util.find_spec("sentence_transformers")
|
89
|
+
has_sentence_transformers = spec is not None
|
90
|
+
except Exception as e:
|
91
|
+
logger.warning(f"Error checking sentence_transformers: {e}")
|
92
|
+
has_sentence_transformers = False
|
93
93
|
|
94
94
|
|
95
95
|
hf_embed_config = SentenceTransformerEmbeddingsConfig(
|
@@ -236,6 +236,7 @@ class DocChatAgent(ChatAgent):
|
|
236
236
|
self.chunked_docs: List[Document] = []
|
237
237
|
self.chunked_docs_clean: List[Document] = []
|
238
238
|
self.response: None | Document = None
|
239
|
+
|
239
240
|
if len(config.doc_paths) > 0:
|
240
241
|
self.ingest()
|
241
242
|
|
@@ -16,28 +16,11 @@ from dotenv import load_dotenv
|
|
16
16
|
from langroid.exceptions import LangroidImportError
|
17
17
|
from langroid.utils.object_registry import ObjectRegistry
|
18
18
|
|
19
|
-
|
19
|
+
if TYPE_CHECKING:
|
20
|
+
import docling # noqa
|
20
21
|
import fitz
|
21
|
-
|
22
|
-
if not TYPE_CHECKING:
|
23
|
-
fitz = None
|
24
|
-
try:
|
25
|
-
import pymupdf4llm
|
26
|
-
except ImportError:
|
27
|
-
if not TYPE_CHECKING:
|
28
|
-
pymupdf4llm = None
|
29
|
-
|
30
|
-
try:
|
31
|
-
import docling
|
32
|
-
except ImportError:
|
33
|
-
if not TYPE_CHECKING:
|
34
|
-
docling = None
|
35
|
-
|
36
|
-
try:
|
22
|
+
import pymupdf4llm # noqa
|
37
23
|
import pypdf
|
38
|
-
except ImportError:
|
39
|
-
if not TYPE_CHECKING:
|
40
|
-
pypdf = None
|
41
24
|
|
42
25
|
|
43
26
|
import requests
|
@@ -167,6 +150,8 @@ class DocumentParser(Parser):
|
|
167
150
|
return ImagePdfParser(source, config)
|
168
151
|
elif config.pdf.library == "gemini":
|
169
152
|
return GeminiPdfParser(source, config)
|
153
|
+
elif config.pdf.library == "marker":
|
154
|
+
return MarkerPdfParser(source, config)
|
170
155
|
else:
|
171
156
|
raise ValueError(
|
172
157
|
f"Unsupported PDF library specified: {config.pdf.library}"
|
@@ -469,8 +454,10 @@ class FitzPDFParser(DocumentParser):
|
|
469
454
|
Returns:
|
470
455
|
Generator[fitz.Page]: Generator yielding each page.
|
471
456
|
"""
|
472
|
-
|
473
|
-
|
457
|
+
try:
|
458
|
+
import fitz
|
459
|
+
except ImportError:
|
460
|
+
LangroidImportError("fitz", "doc-chat")
|
474
461
|
doc = fitz.open(stream=self.doc_bytes, filetype="pdf")
|
475
462
|
for i, page in enumerate(doc):
|
476
463
|
yield i, page
|
@@ -504,7 +491,10 @@ class PyMuPDF4LLMParser(DocumentParser):
|
|
504
491
|
Returns:
|
505
492
|
Generator[fitz.Page]: Generator yielding each page.
|
506
493
|
"""
|
507
|
-
|
494
|
+
try:
|
495
|
+
import pymupdf4llm # noqa
|
496
|
+
import fitz
|
497
|
+
except ImportError:
|
508
498
|
raise LangroidImportError(
|
509
499
|
"pymupdf4llm", ["pymupdf4llm", "all", "pdf-parsers", "doc-chat"]
|
510
500
|
)
|
@@ -548,7 +538,9 @@ class DoclingParser(DocumentParser):
|
|
548
538
|
Returns:
|
549
539
|
Generator[docling.Page]: Generator yielding each page.
|
550
540
|
"""
|
551
|
-
|
541
|
+
try:
|
542
|
+
import docling # noqa
|
543
|
+
except ImportError:
|
552
544
|
raise LangroidImportError(
|
553
545
|
"docling", ["docling", "pdf-parsers", "all", "doc-chat"]
|
554
546
|
)
|
@@ -637,7 +629,9 @@ class PyPDFParser(DocumentParser):
|
|
637
629
|
Returns:
|
638
630
|
Generator[pypdf.pdf.PageObject]: Generator yielding each page.
|
639
631
|
"""
|
640
|
-
|
632
|
+
try:
|
633
|
+
import pypdf
|
634
|
+
except ImportError:
|
641
635
|
raise LangroidImportError("pypdf", "pdf-parsers")
|
642
636
|
reader = pypdf.PdfReader(self.doc_bytes)
|
643
637
|
for i, page in enumerate(reader.pages):
|
@@ -1364,3 +1358,85 @@ class GeminiPdfParser(DocumentParser):
|
|
1364
1358
|
content=page,
|
1365
1359
|
metadata=DocMetaData(source=self.source),
|
1366
1360
|
)
|
1361
|
+
|
1362
|
+
|
1363
|
+
class MarkerPdfParser(DocumentParser):
|
1364
|
+
DEFAULT_CONFIG = {"paginate_output": True, "output_format": "markdown"}
|
1365
|
+
|
1366
|
+
def __init__(self, source: Union[str, bytes], config: ParsingConfig):
|
1367
|
+
super().__init__(source, config)
|
1368
|
+
user_config = (
|
1369
|
+
config.pdf.marker_config.config_dict if config.pdf.marker_config else {}
|
1370
|
+
)
|
1371
|
+
|
1372
|
+
self.config_dict = {**MarkerPdfParser.DEFAULT_CONFIG, **user_config}
|
1373
|
+
|
1374
|
+
def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:
|
1375
|
+
"""
|
1376
|
+
Yield each page in the PDF using `marker`.
|
1377
|
+
"""
|
1378
|
+
try:
|
1379
|
+
import marker # noqa
|
1380
|
+
except ImportError:
|
1381
|
+
raise LangroidImportError(
|
1382
|
+
"marker-pdf", ["marker-pdf", "pdf-parsers", "all", "doc-chat"]
|
1383
|
+
)
|
1384
|
+
|
1385
|
+
import re
|
1386
|
+
|
1387
|
+
from marker.config.parser import ConfigParser
|
1388
|
+
from marker.converters.pdf import PdfConverter
|
1389
|
+
from marker.models import create_model_dict
|
1390
|
+
from marker.output import save_output
|
1391
|
+
|
1392
|
+
config_parser = ConfigParser(self.config_dict)
|
1393
|
+
converter = PdfConverter(
|
1394
|
+
config=config_parser.generate_config_dict(),
|
1395
|
+
artifact_dict=create_model_dict(),
|
1396
|
+
processor_list=config_parser.get_processors(),
|
1397
|
+
renderer=config_parser.get_renderer(),
|
1398
|
+
llm_service=config_parser.get_llm_service(),
|
1399
|
+
)
|
1400
|
+
doc_path = self.source
|
1401
|
+
if doc_path == "bytes":
|
1402
|
+
# write to tmp file, then use that path
|
1403
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
1404
|
+
temp_file.write(self.doc_bytes.getvalue())
|
1405
|
+
doc_path = temp_file.name
|
1406
|
+
|
1407
|
+
output_dir = Path(str(Path(doc_path).with_suffix("")) + "-pages")
|
1408
|
+
os.makedirs(output_dir, exist_ok=True)
|
1409
|
+
filename = Path(doc_path).stem + "_converted"
|
1410
|
+
|
1411
|
+
rendered = converter(doc_path)
|
1412
|
+
save_output(rendered, output_dir=output_dir, fname_base=filename)
|
1413
|
+
file_path = output_dir / f"{filename}.md"
|
1414
|
+
|
1415
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
1416
|
+
full_markdown = f.read()
|
1417
|
+
|
1418
|
+
# Regex for splitting pages
|
1419
|
+
pages = re.split(r"\{\d+\}----+", full_markdown)
|
1420
|
+
|
1421
|
+
page_no = 0
|
1422
|
+
for page in pages:
|
1423
|
+
if page.strip():
|
1424
|
+
yield page_no, page
|
1425
|
+
page_no += 1
|
1426
|
+
|
1427
|
+
def get_document_from_page(self, page: str) -> Document:
|
1428
|
+
"""
|
1429
|
+
Get Document object from a given 1-page markdown file,
|
1430
|
+
possibly containing image refs.
|
1431
|
+
|
1432
|
+
Args:
|
1433
|
+
page (str): The page we get by splitting large md file from
|
1434
|
+
marker
|
1435
|
+
|
1436
|
+
Returns:
|
1437
|
+
Document: Document object, with content and possible metadata.
|
1438
|
+
"""
|
1439
|
+
return Document(
|
1440
|
+
content=self.fix_text(page),
|
1441
|
+
metadata=DocMetaData(source=self.source),
|
1442
|
+
)
|
@@ -38,8 +38,13 @@ class GeminiConfig(BaseSettings):
|
|
38
38
|
requests_per_minute: Optional[int] = 5
|
39
39
|
|
40
40
|
|
41
|
-
class
|
41
|
+
class MarkerConfig(BaseSettings):
|
42
|
+
"""Configuration for Markitdown-based parsing."""
|
43
|
+
|
44
|
+
config_dict: Dict[str, Any] = {}
|
42
45
|
|
46
|
+
|
47
|
+
class PdfParsingConfig(BaseParsingConfig):
|
43
48
|
library: Literal[
|
44
49
|
"fitz",
|
45
50
|
"pymupdf4llm",
|
@@ -49,16 +54,26 @@ class PdfParsingConfig(BaseParsingConfig):
|
|
49
54
|
"pdf2image",
|
50
55
|
"markitdown",
|
51
56
|
"gemini",
|
57
|
+
"marker",
|
52
58
|
] = "pymupdf4llm"
|
53
59
|
gemini_config: Optional[GeminiConfig] = None
|
60
|
+
marker_config: Optional[MarkerConfig] = None
|
54
61
|
|
55
62
|
@root_validator(pre=True)
|
56
|
-
def
|
57
|
-
"""Ensure
|
58
|
-
|
59
|
-
|
63
|
+
def enable_configs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
64
|
+
"""Ensure correct config is set based on library selection."""
|
65
|
+
library = values.get("library")
|
66
|
+
|
67
|
+
if library == "gemini":
|
68
|
+
values.setdefault("gemini_config", GeminiConfig())
|
60
69
|
else:
|
61
70
|
values["gemini_config"] = None
|
71
|
+
|
72
|
+
if library == "marker":
|
73
|
+
values.setdefault("marker_config", MarkerConfig())
|
74
|
+
else:
|
75
|
+
values["marker_config"] = None
|
76
|
+
|
62
77
|
return values
|
63
78
|
|
64
79
|
|
@@ -7,14 +7,16 @@ import tempfile
|
|
7
7
|
import time
|
8
8
|
from collections import deque
|
9
9
|
from pathlib import Path
|
10
|
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
10
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
11
11
|
from urllib.parse import urlparse
|
12
12
|
|
13
13
|
from dotenv import load_dotenv
|
14
|
-
|
15
|
-
|
16
|
-
from github
|
17
|
-
from github.
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from github import Github
|
17
|
+
from github.ContentFile import ContentFile
|
18
|
+
from github.Label import Label
|
19
|
+
from github.Repository import Repository
|
18
20
|
|
19
21
|
from langroid.mytypes import DocMetaData, Document
|
20
22
|
from langroid.parsing.document_parser import DocumentParser, DocumentType
|
@@ -24,7 +26,7 @@ from langroid.pydantic_v1 import BaseModel, BaseSettings, Field
|
|
24
26
|
logger = logging.getLogger(__name__)
|
25
27
|
|
26
28
|
|
27
|
-
def _get_decoded_content(content_file: ContentFile) -> str:
|
29
|
+
def _get_decoded_content(content_file: "ContentFile") -> str:
|
28
30
|
if content_file.encoding == "base64":
|
29
31
|
return content_file.decoded_content.decode("utf-8") or ""
|
30
32
|
elif content_file.encoding == "none":
|
@@ -54,7 +56,7 @@ class IssueData(BaseModel):
|
|
54
56
|
text: str = Field(..., description="Text of issue, i.e. description body")
|
55
57
|
|
56
58
|
|
57
|
-
def get_issue_size(labels: List[Label]) -> str | None:
|
59
|
+
def get_issue_size(labels: List["Label"]) -> str | None:
|
58
60
|
sizes = ["XS", "S", "M", "L", "XL", "XXL"]
|
59
61
|
return next((label.name for label in labels if label.name in sizes), None)
|
60
62
|
|
@@ -117,6 +119,8 @@ class RepoLoader:
|
|
117
119
|
self.config = config
|
118
120
|
self.clone_path: Optional[str] = None
|
119
121
|
self.log_file = ".logs/repo_loader/download_log.json"
|
122
|
+
self.repo: Optional["Repository"] = None # Initialize repo as Optional
|
123
|
+
|
120
124
|
os.makedirs(os.path.dirname(self.log_file), exist_ok=True)
|
121
125
|
if not os.path.exists(self.log_file):
|
122
126
|
with open(self.log_file, "w") as f:
|
@@ -127,20 +131,25 @@ class RepoLoader:
|
|
127
131
|
logger.info(f"Repo Already downloaded in {log[self.url]}")
|
128
132
|
self.clone_path = log[self.url]
|
129
133
|
|
134
|
+
# it's a core dependency, so we don't need to enclose in try/except
|
135
|
+
from github import Github # Late import
|
136
|
+
|
137
|
+
load_dotenv()
|
138
|
+
# authenticated calls to github api have higher rate limit
|
139
|
+
token = os.getenv("GITHUB_ACCESS_TOKEN")
|
140
|
+
|
130
141
|
if "github.com" in self.url:
|
131
142
|
repo_name = self.url.split("github.com/")[1]
|
132
143
|
else:
|
133
144
|
repo_name = self.url
|
134
|
-
|
135
|
-
# authenticated calls to github api have higher rate limit
|
136
|
-
token = os.getenv("GITHUB_ACCESS_TOKEN")
|
145
|
+
|
137
146
|
g = Github(token)
|
138
147
|
self.repo = self._get_repo_with_retry(g, repo_name)
|
139
148
|
|
140
149
|
@staticmethod
|
141
150
|
def _get_repo_with_retry(
|
142
|
-
g: Github, repo_name: str, max_retries: int = 5
|
143
|
-
) -> Repository:
|
151
|
+
g: "Github", repo_name: str, max_retries: int = 5
|
152
|
+
) -> "Repository":
|
144
153
|
"""
|
145
154
|
Get a repo from the GitHub API, retrying if the request fails,
|
146
155
|
with exponential backoff.
|
@@ -173,6 +182,10 @@ class RepoLoader:
|
|
173
182
|
|
174
183
|
def get_issues(self, k: int | None = 100) -> List[IssueData]:
|
175
184
|
"""Get up to k issues from the GitHub repo."""
|
185
|
+
if self.repo is None:
|
186
|
+
logger.warning("No repo found. Ensure the URL is correct.")
|
187
|
+
return [] # Return an empty list rather than raise an error in this case
|
188
|
+
|
176
189
|
if k is None:
|
177
190
|
issues = self.repo.get_issues(state="all")
|
178
191
|
else:
|
@@ -224,7 +237,7 @@ class RepoLoader:
|
|
224
237
|
"""
|
225
238
|
return file_type not in self.config.non_code_types
|
226
239
|
|
227
|
-
def _is_allowed(self, content: ContentFile) -> bool:
|
240
|
+
def _is_allowed(self, content: "ContentFile") -> bool:
|
228
241
|
"""
|
229
242
|
Check if a file or directory content is allowed to be included.
|
230
243
|
|
@@ -301,6 +314,10 @@ class RepoLoader:
|
|
301
314
|
Dict[str, Union[str, List[Dict]]]:
|
302
315
|
A dictionary containing file and directory names, with file contents.
|
303
316
|
"""
|
317
|
+
if self.repo is None:
|
318
|
+
logger.warning("No repo found. Ensure the URL is correct.")
|
319
|
+
return {} # Return an empty dict rather than raise an error in this case
|
320
|
+
|
304
321
|
root_contents = self.repo.get_contents("")
|
305
322
|
if not isinstance(root_contents, list):
|
306
323
|
root_contents = [root_contents]
|
@@ -519,8 +536,7 @@ class RepoLoader:
|
|
519
536
|
which includes all depths.
|
520
537
|
lines (int, optional): Number of lines to read from each file.
|
521
538
|
Defaults to None, which reads all lines.
|
522
|
-
doc_type (str|DocumentType, optional): The type of document to parse.
|
523
|
-
|
539
|
+
doc_type (str|DocumentType | None, optional): The type of document to parse.
|
524
540
|
Returns:
|
525
541
|
List[Document]: List of Document objects representing files.
|
526
542
|
|
@@ -584,6 +600,10 @@ class RepoLoader:
|
|
584
600
|
list of Document objects, each has fields `content` and `metadata`,
|
585
601
|
and `metadata` has fields `url`, `filename`, `extension`, `language`
|
586
602
|
"""
|
603
|
+
if self.repo is None:
|
604
|
+
logger.warning("No repo found. Ensure the URL is correct.")
|
605
|
+
return [] # Return an empty list rather than raise an error
|
606
|
+
|
587
607
|
contents = self.repo.get_contents("")
|
588
608
|
if not isinstance(contents, list):
|
589
609
|
contents = [contents]
|
@@ -10,9 +10,6 @@ import difflib
|
|
10
10
|
import re
|
11
11
|
from typing import List, Tuple
|
12
12
|
|
13
|
-
from nltk.corpus import stopwords
|
14
|
-
from nltk.stem import WordNetLemmatizer
|
15
|
-
from nltk.tokenize import RegexpTokenizer
|
16
13
|
from rank_bm25 import BM25Okapi
|
17
14
|
from thefuzz import fuzz, process
|
18
15
|
|
@@ -120,6 +117,9 @@ def preprocess_text(text: str) -> str:
|
|
120
117
|
# Ensure the NLTK resources are available
|
121
118
|
for resource in ["tokenizers/punkt", "corpora/wordnet", "corpora/stopwords"]:
|
122
119
|
download_nltk_resource(resource)
|
120
|
+
from nltk.corpus import stopwords
|
121
|
+
from nltk.stem import WordNetLemmatizer
|
122
|
+
from nltk.tokenize import RegexpTokenizer
|
123
123
|
|
124
124
|
# Lowercase the text
|
125
125
|
text = text.lower()
|