local-search-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_search_agent-0.1.0/LICENSE +21 -0
- local_search_agent-0.1.0/PKG-INFO +261 -0
- local_search_agent-0.1.0/README.md +203 -0
- local_search_agent-0.1.0/local_search_agent/__init__.py +62 -0
- local_search_agent-0.1.0/local_search_agent/agent/__init__.py +7 -0
- local_search_agent-0.1.0/local_search_agent/agent/agent.py +451 -0
- local_search_agent-0.1.0/local_search_agent/agent/prompts.py +93 -0
- local_search_agent-0.1.0/local_search_agent/agent/provider_factory.py +186 -0
- local_search_agent-0.1.0/local_search_agent/agent/rate_limit_handler.py +466 -0
- local_search_agent-0.1.0/local_search_agent/agent/tools/__init__.py +7 -0
- local_search_agent-0.1.0/local_search_agent/agent/tools/fetch_tool.py +112 -0
- local_search_agent-0.1.0/local_search_agent/agent/tools/graph_tool.py +91 -0
- local_search_agent-0.1.0/local_search_agent/agent/tools/search_tool.py +142 -0
- local_search_agent-0.1.0/local_search_agent/cli/__init__.py +1 -0
- local_search_agent-0.1.0/local_search_agent/cli/__main__.py +25 -0
- local_search_agent-0.1.0/local_search_agent/cli/commands.py +885 -0
- local_search_agent-0.1.0/local_search_agent/core/__init__.py +23 -0
- local_search_agent-0.1.0/local_search_agent/core/config.py +170 -0
- local_search_agent-0.1.0/local_search_agent/core/constants.py +129 -0
- local_search_agent-0.1.0/local_search_agent/core/document_node.py +162 -0
- local_search_agent-0.1.0/local_search_agent/core/framework.py +523 -0
- local_search_agent-0.1.0/local_search_agent/core/key_manager.py +396 -0
- local_search_agent-0.1.0/local_search_agent/core/meilisearch_manager.py +645 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/__init__.py +25 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/chunker.py +347 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/cleaner.py +164 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parser.py +72 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parsers/__init__.py +23 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parsers/csv_parser.py +106 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parsers/docx_parser.py +362 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parsers/eml_parser.py +181 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parsers/html_parser.py +264 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parsers/json_parser.py +137 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parsers/pdf_parser.py +365 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parsers/text_parser.py +53 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parsers/xlsx_parser.py +155 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/parsers/xml_parser.py +155 -0
- local_search_agent-0.1.0/local_search_agent/ingestion/pipeline.py +269 -0
- local_search_agent-0.1.0/local_search_agent/scheduler/__init__.py +11 -0
- local_search_agent-0.1.0/local_search_agent/scheduler/incremental_sync.py +340 -0
- local_search_agent-0.1.0/local_search_agent/scheduler/monitor.py +183 -0
- local_search_agent-0.1.0/local_search_agent/search/__init__.py +6 -0
- local_search_agent-0.1.0/local_search_agent/search/meilisearch_client.py +339 -0
- local_search_agent-0.1.0/local_search_agent/search/query_builder.py +106 -0
- local_search_agent-0.1.0/local_search_agent/semantic/__init__.py +17 -0
- local_search_agent-0.1.0/local_search_agent/semantic/concept_compiler.py +143 -0
- local_search_agent-0.1.0/local_search_agent/semantic/enricher.py +154 -0
- local_search_agent-0.1.0/local_search_agent/semantic/link_graph.py +227 -0
- local_search_agent-0.1.0/local_search_agent/semantic/query_expander.py +199 -0
- local_search_agent-0.1.0/local_search_agent/semantic/structural_parser.py +170 -0
- local_search_agent-0.1.0/local_search_agent/server/__init__.py +1 -0
- local_search_agent-0.1.0/local_search_agent/server/fastapi_app.py +455 -0
- local_search_agent-0.1.0/local_search_agent/server/middleware/__init__.py +1 -0
- local_search_agent-0.1.0/local_search_agent/server/middleware/access_control.py +258 -0
- local_search_agent-0.1.0/local_search_agent/server/static_mounts.py +42 -0
- local_search_agent-0.1.0/local_search_agent/ui/__init__.py +9 -0
- local_search_agent-0.1.0/local_search_agent/ui/api_routes.py +948 -0
- local_search_agent-0.1.0/local_search_agent/ui/assets/icon1.ico +0 -0
- local_search_agent-0.1.0/local_search_agent/ui/assets/icon2.ico +0 -0
- local_search_agent-0.1.0/local_search_agent/ui/assets/logo1.webp +0 -0
- local_search_agent-0.1.0/local_search_agent/ui/assets/logo2.webp +0 -0
- local_search_agent-0.1.0/local_search_agent/ui/dashboard.py +496 -0
- local_search_agent-0.1.0/local_search_agent/ui/store.py +301 -0
- local_search_agent-0.1.0/local_search_agent/ui/templates/_chat.html +13 -0
- local_search_agent-0.1.0/local_search_agent/ui/templates/_drawer.html +12 -0
- local_search_agent-0.1.0/local_search_agent/ui/templates/_modals.html +243 -0
- local_search_agent-0.1.0/local_search_agent/ui/templates/_script.html +1336 -0
- local_search_agent-0.1.0/local_search_agent/ui/templates/_sidebar.html +73 -0
- local_search_agent-0.1.0/local_search_agent/ui/templates/_statusbar.html +14 -0
- local_search_agent-0.1.0/local_search_agent/ui/templates/_styles.html +606 -0
- local_search_agent-0.1.0/local_search_agent/ui/templates/_topbar.html +27 -0
- local_search_agent-0.1.0/local_search_agent/ui/templates/index.html +25 -0
- local_search_agent-0.1.0/local_search_agent/workspace/__init__.py +6 -0
- local_search_agent-0.1.0/local_search_agent/workspace/metadata_db.py +254 -0
- local_search_agent-0.1.0/local_search_agent/workspace/workspace_manager.py +253 -0
- local_search_agent-0.1.0/local_search_agent.egg-info/PKG-INFO +261 -0
- local_search_agent-0.1.0/local_search_agent.egg-info/SOURCES.txt +97 -0
- local_search_agent-0.1.0/local_search_agent.egg-info/dependency_links.txt +1 -0
- local_search_agent-0.1.0/local_search_agent.egg-info/entry_points.txt +2 -0
- local_search_agent-0.1.0/local_search_agent.egg-info/requires.txt +34 -0
- local_search_agent-0.1.0/local_search_agent.egg-info/top_level.txt +1 -0
- local_search_agent-0.1.0/pyproject.toml +172 -0
- local_search_agent-0.1.0/setup.cfg +4 -0
- local_search_agent-0.1.0/tests/test_agent.py +353 -0
- local_search_agent-0.1.0/tests/test_chunker.py +417 -0
- local_search_agent-0.1.0/tests/test_cleaner.py +174 -0
- local_search_agent-0.1.0/tests/test_cli.py +644 -0
- local_search_agent-0.1.0/tests/test_config.py +229 -0
- local_search_agent-0.1.0/tests/test_document_node.py +188 -0
- local_search_agent-0.1.0/tests/test_framework_semantic.py +252 -0
- local_search_agent-0.1.0/tests/test_heavy_parsers.py +670 -0
- local_search_agent-0.1.0/tests/test_ingestion.py +363 -0
- local_search_agent-0.1.0/tests/test_key_manager.py +558 -0
- local_search_agent-0.1.0/tests/test_parsers.py +381 -0
- local_search_agent-0.1.0/tests/test_scheduler.py +413 -0
- local_search_agent-0.1.0/tests/test_search.py +238 -0
- local_search_agent-0.1.0/tests/test_semantic.py +433 -0
- local_search_agent-0.1.0/tests/test_server.py +317 -0
- local_search_agent-0.1.0/tests/test_workspace_manager.py +132 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Wissam Metawee
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: local-search-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A framework that replace traditional RAG pipelines. Ingest any number of documents in multiple workspaces (channels, departments, etc.), index it with BM25, and let the agent search, fetch, and reason over it, exactly like searching the web, but entirely on your machine. No vector store, no embedding needed.
|
|
5
|
+
Author-email: Wissam Metawee <wissammetawee84@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/wiss84/local-search-agent
|
|
8
|
+
Project-URL: Repository, https://github.com/wiss84/local-search-agent
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/wiss84/local-search-agent/issues
|
|
10
|
+
Keywords: rag,search,meilisearch,bm25,langchain,langgraph,agent,local,documents,llm
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Information Technology
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
19
|
+
Classifier: Topic :: Text Processing :: Indexing
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: fastapi>=0.136.1
|
|
25
|
+
Requires-Dist: uvicorn[standard]>=0.34.0
|
|
26
|
+
Requires-Dist: python-multipart>=0.0.20
|
|
27
|
+
Requires-Dist: meilisearch-python-sdk>=7.1.5
|
|
28
|
+
Requires-Dist: apscheduler<4.0,>=3.11.2
|
|
29
|
+
Requires-Dist: httpx>=0.28.1
|
|
30
|
+
Requires-Dist: filelock>=3.13.0
|
|
31
|
+
Requires-Dist: platformdirs>=4.2.0
|
|
32
|
+
Requires-Dist: rich>=13.7.0
|
|
33
|
+
Requires-Dist: pyfiglet>=1.0.2
|
|
34
|
+
Requires-Dist: docling>=2.0.0
|
|
35
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
|
36
|
+
Requires-Dist: lxml>=5.2.0
|
|
37
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
38
|
+
Requires-Dist: python-pptx>=0.6.23
|
|
39
|
+
Requires-Dist: PyMuPDF>=1.25.0
|
|
40
|
+
Requires-Dist: pypdf>=5.0.0
|
|
41
|
+
Requires-Dist: langgraph>=1.1.10
|
|
42
|
+
Requires-Dist: langchain-core>=1.3.3
|
|
43
|
+
Requires-Dist: langchain-google-genai>=4.2.2
|
|
44
|
+
Requires-Dist: langchain-ollama>=1.1.0
|
|
45
|
+
Requires-Dist: langchain-openai>=1.1.10
|
|
46
|
+
Requires-Dist: langchain-anthropic>=1.4.2
|
|
47
|
+
Requires-Dist: google-api-core>=2.30.3
|
|
48
|
+
Requires-Dist: pywebview>=5.3.3
|
|
49
|
+
Requires-Dist: jinja2>=3.1.4
|
|
50
|
+
Provides-Extra: dev
|
|
51
|
+
Requires-Dist: pytest>=9.0.3; extra == "dev"
|
|
52
|
+
Requires-Dist: pytest-asyncio>=1.3.0; extra == "dev"
|
|
53
|
+
Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
|
|
54
|
+
Requires-Dist: ruff>=0.11.0; extra == "dev"
|
|
55
|
+
Requires-Dist: mypy>=1.15.0; extra == "dev"
|
|
56
|
+
Requires-Dist: python-docx>=1.1.0; extra == "dev"
|
|
57
|
+
Dynamic: license-file
|
|
58
|
+
|
|
59
|
+
# Local Search Agent
|
|
60
|
+
|
|
61
|
+
**Give your AI agent a search engine for your local files.**
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## What is this?
|
|
66
|
+
|
|
67
|
+
Local Search Agent is a Python framework that gives your AI agent a search engine for your local files and lets it search, fetch, and reason over your local documents — the same way a researcher searches the web, but entirely on your machine.
|
|
68
|
+
|
|
69
|
+
Point it at a folder. Ask a question. The agent searches your documents, reads the relevant ones, and gives you an answer with citations — no cloud upload, no API calls to external search services, no embeddings, no vector stores.
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
"What was the AWS spend in Q3?" → agent searches index → fetches relevant docs → answers with sources
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
## Why not RAG?
|
|
79
|
+
|
|
80
|
+
Traditional RAG (Retrieval-Augmented Generation) has a fundamental problem: it converts your documents into embeddings and stores them in a vector database. That means:
|
|
81
|
+
|
|
82
|
+
- **Stale indexes** — embeddings go out of date silently. You never know if the agent is reading your latest documents or a six-month-old snapshot
|
|
83
|
+
- **Black-box retrieval** — you can't see why a document was retrieved or not. Debugging poor answers is guesswork
|
|
84
|
+
- **Chunking anxiety** — split too small and you lose context. Split too large and retrieval quality degrades. There's no right answer
|
|
85
|
+
- **Infrastructure overhead** — a vector database is another service to run, maintain, and pay for
|
|
86
|
+
- **Semantic drift** — embeddings are sensitive to how questions are phrased. A question about "cloud expenditure" may never match a document that says "AWS spend"
|
|
87
|
+
|
|
88
|
+
Local Search Agent takes a different approach: **BM25 keyword search via Meilisearch, structured metadata, and a LangGraph agent loop with tools**. The agent searches your document index the same way a developer searches Stack Overflow — with real queries, real results, and full transparency into what was retrieved and why.
|
|
89
|
+
|
|
90
|
+
The result is deterministic, auditable, and fast. You can see exactly what the agent fetched for every answer.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## How it works
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
1. INGEST Your documents → parsed, cleaned, chunked, indexed into Meilisearch
|
|
98
|
+
2. SERVE FastAPI file server makes documents available to the agent via HTTP
|
|
99
|
+
3. SEARCH LangGraph agent loop: search_local_index → fetch_local_url → reason
|
|
100
|
+
4. ANSWER Agent returns an answer with inline source citations
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Everything runs locally. Meilisearch downloads automatically on first use, no manual setup.
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Screenshots
|
|
108
|
+
|
|
109
|
+
### Desktop UI
|
|
110
|
+

|
|
111
|
+
|
|
112
|
+
### CLI Interactive Mode
|
|
113
|
+

|
|
114
|
+
|
|
115
|
+
### Python API
|
|
116
|
+

|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Install
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
pip install local-search-agent
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Set your API key
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
# Google AI Studio (free tier — recommended) or paid from openai or anthropic
|
|
130
|
+
local-search config set-key --provider google --key YOUR_KEY
|
|
131
|
+
|
|
132
|
+
# Or use Ollama for a fully local, zero-cost setup (no key needed)
|
|
133
|
+
# Install from https://ollama.com
|
|
134
|
+
# Download any model that support function calling and system instructions:
|
|
135
|
+
`ollama pull gemma4:e2b` (7.2GB) or `ollama pull gemma4:e4b` (9.6GB)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Quick Start
|
|
141
|
+
|
|
142
|
+
### Desktop UI
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
local-search ui
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
The desktop window opens. Create a workspace, point it at a folder, ingest, and start asking questions.
|
|
149
|
+
|
|
150
|
+
### CLI
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
# Create a workspace and ingest documents
|
|
154
|
+
local-search workspace create finance "C:\my_docs"
|
|
155
|
+
local-search ingest --workspace finance --dirs "C:\my_docs"
|
|
156
|
+
|
|
157
|
+
# Start the file server (keep this running)
|
|
158
|
+
local-search serve --workspace finance
|
|
159
|
+
|
|
160
|
+
# Ask a question
|
|
161
|
+
local-search query "What was the AWS spend in Q3?" --workspace finance --provider google
|
|
162
|
+
|
|
163
|
+
# Use interactive mode
|
|
164
|
+
local-search --workspace finance --provider google
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Python API
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
from local_search_agent import SearchAgentFramework, SearchAgentConfig
|
|
171
|
+
|
|
172
|
+
config = SearchAgentConfig(
|
|
173
|
+
document_dirs=["C:/my_docs"],
|
|
174
|
+
workspace_name="finance",
|
|
175
|
+
provider="google",
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
framework = SearchAgentFramework(config)
|
|
179
|
+
framework.ingest_and_index()
|
|
180
|
+
framework.start_file_server()
|
|
181
|
+
|
|
182
|
+
response = framework.query("What was the AWS spend in Q3?")
|
|
183
|
+
print(response["answer"])
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Supported File Types
|
|
189
|
+
|
|
190
|
+
| Format | Extension |
|
|
191
|
+
|--------|-----------|
|
|
192
|
+
| PDF | `.pdf` |
|
|
193
|
+
| Word | `.docx` |
|
|
194
|
+
| Excel | `.xlsx` |
|
|
195
|
+
| PowerPoint | `.pptx` |
|
|
196
|
+
| HTML | `.html`, `.htm` |
|
|
197
|
+
| Plain text | `.txt`, `.md` |
|
|
198
|
+
| CSV | `.csv` |
|
|
199
|
+
| JSON | `.json` |
|
|
200
|
+
| XML | `.xml` |
|
|
201
|
+
| Email | `.eml` |
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## Key Features
|
|
206
|
+
|
|
207
|
+
- **One command install** — `pip install local-search-agent`. Meilisearch downloads automatically
|
|
208
|
+
- **No embeddings, no vector stores** — BM25 search with structured metadata. Fast, deterministic, auditable
|
|
209
|
+
- **Native desktop UI** — pywebview window with live streaming agent responses, workspace management, and chat history
|
|
210
|
+
- **Multi-provider LLM** — Google, Ollama (local), OpenAI, Anthropic
|
|
211
|
+
- **Multi-workspace** — isolate document collections by department, project, channel, or topic. Each workspace is its own search index
|
|
212
|
+
- **Incremental sync** — background scheduler re-indexes only changed files. A 10,000-document corpus with 50 changes re-indexes only the 50
|
|
213
|
+
- **Full CLI parity** — everything you can do in the UI you can do from the terminal
|
|
214
|
+
- **Python API** — embed the framework directly in your own application
|
|
215
|
+
- **Cross-platform** — Windows, macOS, Linux
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## Documentation
|
|
220
|
+
|
|
221
|
+
| Guide | Description |
|
|
222
|
+
|-------|-------------|
|
|
223
|
+
| [Getting Started](docs/getting-started.md) | First steps, quick start for UI, CLI, and Python API |
|
|
224
|
+
| [Installation](docs/installation.md) | Full install guide, API keys, Ollama setup, platform notes |
|
|
225
|
+
| [Architecture](docs/architecture.md) | Full architrecture, design guide |
|
|
226
|
+
| [CLI Reference](docs/cli-reference.md) | All commands and flags |
|
|
227
|
+
| [Python API Reference](docs/api-reference.md) | Full API documentation |
|
|
228
|
+
| [Configuration](docs/configuration.md) | All config options and patterns |
|
|
229
|
+
| [Ingestion](docs/ingestion.md) | How ingestion works, supported formats, chunking, scheduler |
|
|
230
|
+
| [Multi-Workspace](docs/multi-workspace.md) | Managing multiple document collections |
|
|
231
|
+
| [Semantic Search](docs/semantic-search.md) | Experimental: concept extraction, query expansion, link graph |
|
|
232
|
+
| [Troubleshooting](docs/troubleshooting.md) | Common issues and fixes |
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
## Contributing
|
|
237
|
+
|
|
238
|
+
Contributions are welcome. Clone the repo and install in editable mode with dev dependencies:
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
git clone https://github.com/wiss84/local-search-agent
|
|
242
|
+
cd local-search-agent
|
|
243
|
+
pip install -e ".[dev]"
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Run tests before submitting a PR:
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
pytest tests/
|
|
250
|
+
ruff check .
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## License
|
|
256
|
+
|
|
257
|
+
MIT — see [LICENSE](LICENSE) for details.
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
Built by [Wissam Metawee](https://github.com/wiss84)
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# Local Search Agent
|
|
2
|
+
|
|
3
|
+
**Give your AI agent a search engine for your local files.**
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## What is this?
|
|
8
|
+
|
|
9
|
+
Local Search Agent is a Python framework that gives your AI agent a search engine for your local files and lets it search, fetch, and reason over your local documents — the same way a researcher searches the web, but entirely on your machine.
|
|
10
|
+
|
|
11
|
+
Point it at a folder. Ask a question. The agent searches your documents, reads the relevant ones, and gives you an answer with citations — no cloud upload, no API calls to external search services, no embeddings, no vector stores.
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
"What was the AWS spend in Q3?" → agent searches index → fetches relevant docs → answers with sources
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
## Why not RAG?
|
|
21
|
+
|
|
22
|
+
Traditional RAG (Retrieval-Augmented Generation) has a fundamental problem: it converts your documents into embeddings and stores them in a vector database. That means:
|
|
23
|
+
|
|
24
|
+
- **Stale indexes** — embeddings go out of date silently. You never know if the agent is reading your latest documents or a six-month-old snapshot
|
|
25
|
+
- **Black-box retrieval** — you can't see why a document was retrieved or not. Debugging poor answers is guesswork
|
|
26
|
+
- **Chunking anxiety** — split too small and you lose context. Split too large and retrieval quality degrades. There's no right answer
|
|
27
|
+
- **Infrastructure overhead** — a vector database is another service to run, maintain, and pay for
|
|
28
|
+
- **Semantic drift** — embeddings are sensitive to how questions are phrased. A question about "cloud expenditure" may never match a document that says "AWS spend"
|
|
29
|
+
|
|
30
|
+
Local Search Agent takes a different approach: **BM25 keyword search via Meilisearch, structured metadata, and a LangGraph agent loop with tools**. The agent searches your document index the same way a developer searches Stack Overflow — with real queries, real results, and full transparency into what was retrieved and why.
|
|
31
|
+
|
|
32
|
+
The result is deterministic, auditable, and fast. You can see exactly what the agent fetched for every answer.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## How it works
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
1. INGEST Your documents → parsed, cleaned, chunked, indexed into Meilisearch
|
|
40
|
+
2. SERVE FastAPI file server makes documents available to the agent via HTTP
|
|
41
|
+
3. SEARCH LangGraph agent loop: search_local_index → fetch_local_url → reason
|
|
42
|
+
4. ANSWER Agent returns an answer with inline source citations
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Everything runs locally. Meilisearch downloads automatically on first use, no manual setup.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Screenshots
|
|
50
|
+
|
|
51
|
+
### Desktop UI
|
|
52
|
+

|
|
53
|
+
|
|
54
|
+
### CLI Interactive Mode
|
|
55
|
+

|
|
56
|
+
|
|
57
|
+
### Python API
|
|
58
|
+

|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Install
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install local-search-agent
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Set your API key
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
# Google AI Studio (free tier — recommended) or paid from openai or anthropic
|
|
72
|
+
local-search config set-key --provider google --key YOUR_KEY
|
|
73
|
+
|
|
74
|
+
# Or use Ollama for a fully local, zero-cost setup (no key needed)
|
|
75
|
+
# Install from https://ollama.com
|
|
76
|
+
# Download any model that support function calling and system instructions:
|
|
77
|
+
`ollama pull gemma4:e2b` (7.2GB) or `ollama pull gemma4:e4b` (9.6GB)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Quick Start
|
|
83
|
+
|
|
84
|
+
### Desktop UI
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
local-search ui
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
The desktop window opens. Create a workspace, point it at a folder, ingest, and start asking questions.
|
|
91
|
+
|
|
92
|
+
### CLI
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# Create a workspace and ingest documents
|
|
96
|
+
local-search workspace create finance "C:\my_docs"
|
|
97
|
+
local-search ingest --workspace finance --dirs "C:\my_docs"
|
|
98
|
+
|
|
99
|
+
# Start the file server (keep this running)
|
|
100
|
+
local-search serve --workspace finance
|
|
101
|
+
|
|
102
|
+
# Ask a question
|
|
103
|
+
local-search query "What was the AWS spend in Q3?" --workspace finance --provider google
|
|
104
|
+
|
|
105
|
+
# Use interactive mode
|
|
106
|
+
local-search --workspace finance --provider google
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Python API
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from local_search_agent import SearchAgentFramework, SearchAgentConfig
|
|
113
|
+
|
|
114
|
+
config = SearchAgentConfig(
|
|
115
|
+
document_dirs=["C:/my_docs"],
|
|
116
|
+
workspace_name="finance",
|
|
117
|
+
provider="google",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
framework = SearchAgentFramework(config)
|
|
121
|
+
framework.ingest_and_index()
|
|
122
|
+
framework.start_file_server()
|
|
123
|
+
|
|
124
|
+
response = framework.query("What was the AWS spend in Q3?")
|
|
125
|
+
print(response["answer"])
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Supported File Types
|
|
131
|
+
|
|
132
|
+
| Format | Extension |
|
|
133
|
+
|--------|-----------|
|
|
134
|
+
| PDF | `.pdf` |
|
|
135
|
+
| Word | `.docx` |
|
|
136
|
+
| Excel | `.xlsx` |
|
|
137
|
+
| PowerPoint | `.pptx` |
|
|
138
|
+
| HTML | `.html`, `.htm` |
|
|
139
|
+
| Plain text | `.txt`, `.md` |
|
|
140
|
+
| CSV | `.csv` |
|
|
141
|
+
| JSON | `.json` |
|
|
142
|
+
| XML | `.xml` |
|
|
143
|
+
| Email | `.eml` |
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## Key Features
|
|
148
|
+
|
|
149
|
+
- **One command install** — `pip install local-search-agent`. Meilisearch downloads automatically
|
|
150
|
+
- **No embeddings, no vector stores** — BM25 search with structured metadata. Fast, deterministic, auditable
|
|
151
|
+
- **Native desktop UI** — pywebview window with live streaming agent responses, workspace management, and chat history
|
|
152
|
+
- **Multi-provider LLM** — Google, Ollama (local), OpenAI, Anthropic
|
|
153
|
+
- **Multi-workspace** — isolate document collections by department, project, channel, or topic. Each workspace is its own search index
|
|
154
|
+
- **Incremental sync** — background scheduler re-indexes only changed files. A 10,000-document corpus with 50 changes re-indexes only the 50
|
|
155
|
+
- **Full CLI parity** — everything you can do in the UI you can do from the terminal
|
|
156
|
+
- **Python API** — embed the framework directly in your own application
|
|
157
|
+
- **Cross-platform** — Windows, macOS, Linux
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Documentation
|
|
162
|
+
|
|
163
|
+
| Guide | Description |
|
|
164
|
+
|-------|-------------|
|
|
165
|
+
| [Getting Started](docs/getting-started.md) | First steps, quick start for UI, CLI, and Python API |
|
|
166
|
+
| [Installation](docs/installation.md) | Full install guide, API keys, Ollama setup, platform notes |
|
|
167
|
+
| [Architecture](docs/architecture.md) | Full architrecture, design guide |
|
|
168
|
+
| [CLI Reference](docs/cli-reference.md) | All commands and flags |
|
|
169
|
+
| [Python API Reference](docs/api-reference.md) | Full API documentation |
|
|
170
|
+
| [Configuration](docs/configuration.md) | All config options and patterns |
|
|
171
|
+
| [Ingestion](docs/ingestion.md) | How ingestion works, supported formats, chunking, scheduler |
|
|
172
|
+
| [Multi-Workspace](docs/multi-workspace.md) | Managing multiple document collections |
|
|
173
|
+
| [Semantic Search](docs/semantic-search.md) | Experimental: concept extraction, query expansion, link graph |
|
|
174
|
+
| [Troubleshooting](docs/troubleshooting.md) | Common issues and fixes |
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Contributing
|
|
179
|
+
|
|
180
|
+
Contributions are welcome. Clone the repo and install in editable mode with dev dependencies:
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
git clone https://github.com/wiss84/local-search-agent
|
|
184
|
+
cd local-search-agent
|
|
185
|
+
pip install -e ".[dev]"
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Run tests before submitting a PR:
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
pytest tests/
|
|
192
|
+
ruff check .
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
---
|
|
196
|
+
|
|
197
|
+
## License
|
|
198
|
+
|
|
199
|
+
MIT — see [LICENSE](LICENSE) for details.
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
Built by [Wissam Metawee](https://github.com/wiss84)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""
|
|
2
|
+
local_search_agent
|
|
3
|
+
==================
|
|
4
|
+
An open-source, pip-installable Python framework that replaces vector-based RAG
|
|
5
|
+
with a deterministic, auditable local search system.
|
|
6
|
+
|
|
7
|
+
Phases
|
|
8
|
+
------
|
|
9
|
+
Phase 1: File server, DocumentNode, WorkspaceManager
|
|
10
|
+
Phase 2: Ingestion pipeline (PDF/DOCX/HTML/XLSX), text cleaner, Meilisearch indexing
|
|
11
|
+
Phase 3: LangGraph agent loop, multi-provider LLM, search + fetch tools
|
|
12
|
+
Phase 4: Multi-workspace isolation, APScheduler incremental sync, IndexMonitor
|
|
13
|
+
Phase 5: Semantic search (ConceptCompiler + StructuralParser + QueryExpander),
|
|
14
|
+
LinkGraph cross-document relationships, Windows/LDAP access control
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from local_search_agent.agent.agent import LocalSearchAgent
|
|
18
|
+
from local_search_agent.core.config import SearchAgentConfig
|
|
19
|
+
from local_search_agent.core.document_node import DocumentNode
|
|
20
|
+
from local_search_agent.core.framework import SearchAgentFramework
|
|
21
|
+
from local_search_agent.ingestion.pipeline import IngestionPipeline, IngestStats
|
|
22
|
+
from local_search_agent.scheduler.incremental_sync import IncrementalSyncScheduler
|
|
23
|
+
from local_search_agent.scheduler.monitor import IndexHealthSummary, IndexMonitor
|
|
24
|
+
from local_search_agent.search.meilisearch_client import MeilisearchClient
|
|
25
|
+
from local_search_agent.search.query_builder import QueryBuilder
|
|
26
|
+
from local_search_agent.semantic.concept_compiler import ConceptCompiler, ConceptMetadata
|
|
27
|
+
from local_search_agent.semantic.enricher import SemanticEnricher
|
|
28
|
+
from local_search_agent.semantic.link_graph import LinkGraph
|
|
29
|
+
from local_search_agent.semantic.query_expander import QueryExpander
|
|
30
|
+
from local_search_agent.semantic.structural_parser import StructuralMetadata, StructuralParser
|
|
31
|
+
from local_search_agent.workspace.metadata_db import MetadataDB
|
|
32
|
+
from local_search_agent.workspace.workspace_manager import WorkspaceManager
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
# Core
|
|
36
|
+
"SearchAgentFramework",
|
|
37
|
+
"SearchAgentConfig",
|
|
38
|
+
"DocumentNode",
|
|
39
|
+
# Ingestion
|
|
40
|
+
"IngestionPipeline",
|
|
41
|
+
"IngestStats",
|
|
42
|
+
# Search
|
|
43
|
+
"MeilisearchClient",
|
|
44
|
+
"QueryBuilder",
|
|
45
|
+
# Agent
|
|
46
|
+
"LocalSearchAgent",
|
|
47
|
+
# Scheduler
|
|
48
|
+
"IncrementalSyncScheduler",
|
|
49
|
+
"IndexMonitor",
|
|
50
|
+
"IndexHealthSummary",
|
|
51
|
+
# Workspace
|
|
52
|
+
"WorkspaceManager",
|
|
53
|
+
"MetadataDB",
|
|
54
|
+
# Semantic (Phase 5)
|
|
55
|
+
"ConceptCompiler",
|
|
56
|
+
"ConceptMetadata",
|
|
57
|
+
"StructuralParser",
|
|
58
|
+
"StructuralMetadata",
|
|
59
|
+
"QueryExpander",
|
|
60
|
+
"LinkGraph",
|
|
61
|
+
"SemanticEnricher",
|
|
62
|
+
]
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""local_search_agent.agent — public re-exports."""
|
|
2
|
+
|
|
3
|
+
from local_search_agent.agent.agent import AgentState, LocalSearchAgent
|
|
4
|
+
from local_search_agent.agent.prompts import build_system_prompt
|
|
5
|
+
from local_search_agent.agent.provider_factory import build_llm
|
|
6
|
+
|
|
7
|
+
__all__ = ["LocalSearchAgent", "AgentState", "build_llm", "build_system_prompt"]
|