local-search-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. local_search_agent-0.1.0/LICENSE +21 -0
  2. local_search_agent-0.1.0/PKG-INFO +261 -0
  3. local_search_agent-0.1.0/README.md +203 -0
  4. local_search_agent-0.1.0/local_search_agent/__init__.py +62 -0
  5. local_search_agent-0.1.0/local_search_agent/agent/__init__.py +7 -0
  6. local_search_agent-0.1.0/local_search_agent/agent/agent.py +451 -0
  7. local_search_agent-0.1.0/local_search_agent/agent/prompts.py +93 -0
  8. local_search_agent-0.1.0/local_search_agent/agent/provider_factory.py +186 -0
  9. local_search_agent-0.1.0/local_search_agent/agent/rate_limit_handler.py +466 -0
  10. local_search_agent-0.1.0/local_search_agent/agent/tools/__init__.py +7 -0
  11. local_search_agent-0.1.0/local_search_agent/agent/tools/fetch_tool.py +112 -0
  12. local_search_agent-0.1.0/local_search_agent/agent/tools/graph_tool.py +91 -0
  13. local_search_agent-0.1.0/local_search_agent/agent/tools/search_tool.py +142 -0
  14. local_search_agent-0.1.0/local_search_agent/cli/__init__.py +1 -0
  15. local_search_agent-0.1.0/local_search_agent/cli/__main__.py +25 -0
  16. local_search_agent-0.1.0/local_search_agent/cli/commands.py +885 -0
  17. local_search_agent-0.1.0/local_search_agent/core/__init__.py +23 -0
  18. local_search_agent-0.1.0/local_search_agent/core/config.py +170 -0
  19. local_search_agent-0.1.0/local_search_agent/core/constants.py +129 -0
  20. local_search_agent-0.1.0/local_search_agent/core/document_node.py +162 -0
  21. local_search_agent-0.1.0/local_search_agent/core/framework.py +523 -0
  22. local_search_agent-0.1.0/local_search_agent/core/key_manager.py +396 -0
  23. local_search_agent-0.1.0/local_search_agent/core/meilisearch_manager.py +645 -0
  24. local_search_agent-0.1.0/local_search_agent/ingestion/__init__.py +25 -0
  25. local_search_agent-0.1.0/local_search_agent/ingestion/chunker.py +347 -0
  26. local_search_agent-0.1.0/local_search_agent/ingestion/cleaner.py +164 -0
  27. local_search_agent-0.1.0/local_search_agent/ingestion/parser.py +72 -0
  28. local_search_agent-0.1.0/local_search_agent/ingestion/parsers/__init__.py +23 -0
  29. local_search_agent-0.1.0/local_search_agent/ingestion/parsers/csv_parser.py +106 -0
  30. local_search_agent-0.1.0/local_search_agent/ingestion/parsers/docx_parser.py +362 -0
  31. local_search_agent-0.1.0/local_search_agent/ingestion/parsers/eml_parser.py +181 -0
  32. local_search_agent-0.1.0/local_search_agent/ingestion/parsers/html_parser.py +264 -0
  33. local_search_agent-0.1.0/local_search_agent/ingestion/parsers/json_parser.py +137 -0
  34. local_search_agent-0.1.0/local_search_agent/ingestion/parsers/pdf_parser.py +365 -0
  35. local_search_agent-0.1.0/local_search_agent/ingestion/parsers/text_parser.py +53 -0
  36. local_search_agent-0.1.0/local_search_agent/ingestion/parsers/xlsx_parser.py +155 -0
  37. local_search_agent-0.1.0/local_search_agent/ingestion/parsers/xml_parser.py +155 -0
  38. local_search_agent-0.1.0/local_search_agent/ingestion/pipeline.py +269 -0
  39. local_search_agent-0.1.0/local_search_agent/scheduler/__init__.py +11 -0
  40. local_search_agent-0.1.0/local_search_agent/scheduler/incremental_sync.py +340 -0
  41. local_search_agent-0.1.0/local_search_agent/scheduler/monitor.py +183 -0
  42. local_search_agent-0.1.0/local_search_agent/search/__init__.py +6 -0
  43. local_search_agent-0.1.0/local_search_agent/search/meilisearch_client.py +339 -0
  44. local_search_agent-0.1.0/local_search_agent/search/query_builder.py +106 -0
  45. local_search_agent-0.1.0/local_search_agent/semantic/__init__.py +17 -0
  46. local_search_agent-0.1.0/local_search_agent/semantic/concept_compiler.py +143 -0
  47. local_search_agent-0.1.0/local_search_agent/semantic/enricher.py +154 -0
  48. local_search_agent-0.1.0/local_search_agent/semantic/link_graph.py +227 -0
  49. local_search_agent-0.1.0/local_search_agent/semantic/query_expander.py +199 -0
  50. local_search_agent-0.1.0/local_search_agent/semantic/structural_parser.py +170 -0
  51. local_search_agent-0.1.0/local_search_agent/server/__init__.py +1 -0
  52. local_search_agent-0.1.0/local_search_agent/server/fastapi_app.py +455 -0
  53. local_search_agent-0.1.0/local_search_agent/server/middleware/__init__.py +1 -0
  54. local_search_agent-0.1.0/local_search_agent/server/middleware/access_control.py +258 -0
  55. local_search_agent-0.1.0/local_search_agent/server/static_mounts.py +42 -0
  56. local_search_agent-0.1.0/local_search_agent/ui/__init__.py +9 -0
  57. local_search_agent-0.1.0/local_search_agent/ui/api_routes.py +948 -0
  58. local_search_agent-0.1.0/local_search_agent/ui/assets/icon1.ico +0 -0
  59. local_search_agent-0.1.0/local_search_agent/ui/assets/icon2.ico +0 -0
  60. local_search_agent-0.1.0/local_search_agent/ui/assets/logo1.webp +0 -0
  61. local_search_agent-0.1.0/local_search_agent/ui/assets/logo2.webp +0 -0
  62. local_search_agent-0.1.0/local_search_agent/ui/dashboard.py +496 -0
  63. local_search_agent-0.1.0/local_search_agent/ui/store.py +301 -0
  64. local_search_agent-0.1.0/local_search_agent/ui/templates/_chat.html +13 -0
  65. local_search_agent-0.1.0/local_search_agent/ui/templates/_drawer.html +12 -0
  66. local_search_agent-0.1.0/local_search_agent/ui/templates/_modals.html +243 -0
  67. local_search_agent-0.1.0/local_search_agent/ui/templates/_script.html +1336 -0
  68. local_search_agent-0.1.0/local_search_agent/ui/templates/_sidebar.html +73 -0
  69. local_search_agent-0.1.0/local_search_agent/ui/templates/_statusbar.html +14 -0
  70. local_search_agent-0.1.0/local_search_agent/ui/templates/_styles.html +606 -0
  71. local_search_agent-0.1.0/local_search_agent/ui/templates/_topbar.html +27 -0
  72. local_search_agent-0.1.0/local_search_agent/ui/templates/index.html +25 -0
  73. local_search_agent-0.1.0/local_search_agent/workspace/__init__.py +6 -0
  74. local_search_agent-0.1.0/local_search_agent/workspace/metadata_db.py +254 -0
  75. local_search_agent-0.1.0/local_search_agent/workspace/workspace_manager.py +253 -0
  76. local_search_agent-0.1.0/local_search_agent.egg-info/PKG-INFO +261 -0
  77. local_search_agent-0.1.0/local_search_agent.egg-info/SOURCES.txt +97 -0
  78. local_search_agent-0.1.0/local_search_agent.egg-info/dependency_links.txt +1 -0
  79. local_search_agent-0.1.0/local_search_agent.egg-info/entry_points.txt +2 -0
  80. local_search_agent-0.1.0/local_search_agent.egg-info/requires.txt +34 -0
  81. local_search_agent-0.1.0/local_search_agent.egg-info/top_level.txt +1 -0
  82. local_search_agent-0.1.0/pyproject.toml +172 -0
  83. local_search_agent-0.1.0/setup.cfg +4 -0
  84. local_search_agent-0.1.0/tests/test_agent.py +353 -0
  85. local_search_agent-0.1.0/tests/test_chunker.py +417 -0
  86. local_search_agent-0.1.0/tests/test_cleaner.py +174 -0
  87. local_search_agent-0.1.0/tests/test_cli.py +644 -0
  88. local_search_agent-0.1.0/tests/test_config.py +229 -0
  89. local_search_agent-0.1.0/tests/test_document_node.py +188 -0
  90. local_search_agent-0.1.0/tests/test_framework_semantic.py +252 -0
  91. local_search_agent-0.1.0/tests/test_heavy_parsers.py +670 -0
  92. local_search_agent-0.1.0/tests/test_ingestion.py +363 -0
  93. local_search_agent-0.1.0/tests/test_key_manager.py +558 -0
  94. local_search_agent-0.1.0/tests/test_parsers.py +381 -0
  95. local_search_agent-0.1.0/tests/test_scheduler.py +413 -0
  96. local_search_agent-0.1.0/tests/test_search.py +238 -0
  97. local_search_agent-0.1.0/tests/test_semantic.py +433 -0
  98. local_search_agent-0.1.0/tests/test_server.py +317 -0
  99. local_search_agent-0.1.0/tests/test_workspace_manager.py +132 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Wissam Metawee
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,261 @@
1
+ Metadata-Version: 2.4
2
+ Name: local-search-agent
3
+ Version: 0.1.0
4
+ Summary: A framework that replace traditional RAG pipelines. Ingest any number of documents in multiple workspaces (channels, departments, etc.), index it with BM25, and let the agent search, fetch, and reason over it, exactly like searching the web, but entirely on your machine. No vector store, no embedding needed.
5
+ Author-email: Wissam Metawee <wissammetawee84@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/wiss84/local-search-agent
8
+ Project-URL: Repository, https://github.com/wiss84/local-search-agent
9
+ Project-URL: Bug Tracker, https://github.com/wiss84/local-search-agent/issues
10
+ Keywords: rag,search,meilisearch,bm25,langchain,langgraph,agent,local,documents,llm
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Information Technology
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Classifier: Topic :: Text Processing :: Indexing
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.11
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: fastapi>=0.136.1
25
+ Requires-Dist: uvicorn[standard]>=0.34.0
26
+ Requires-Dist: python-multipart>=0.0.20
27
+ Requires-Dist: meilisearch-python-sdk>=7.1.5
28
+ Requires-Dist: apscheduler<4.0,>=3.11.2
29
+ Requires-Dist: httpx>=0.28.1
30
+ Requires-Dist: filelock>=3.13.0
31
+ Requires-Dist: platformdirs>=4.2.0
32
+ Requires-Dist: rich>=13.7.0
33
+ Requires-Dist: pyfiglet>=1.0.2
34
+ Requires-Dist: docling>=2.0.0
35
+ Requires-Dist: beautifulsoup4>=4.12.0
36
+ Requires-Dist: lxml>=5.2.0
37
+ Requires-Dist: openpyxl>=3.1.0
38
+ Requires-Dist: python-pptx>=0.6.23
39
+ Requires-Dist: PyMuPDF>=1.25.0
40
+ Requires-Dist: pypdf>=5.0.0
41
+ Requires-Dist: langgraph>=1.1.10
42
+ Requires-Dist: langchain-core>=1.3.3
43
+ Requires-Dist: langchain-google-genai>=4.2.2
44
+ Requires-Dist: langchain-ollama>=1.1.0
45
+ Requires-Dist: langchain-openai>=1.1.10
46
+ Requires-Dist: langchain-anthropic>=1.4.2
47
+ Requires-Dist: google-api-core>=2.30.3
48
+ Requires-Dist: pywebview>=5.3.3
49
+ Requires-Dist: jinja2>=3.1.4
50
+ Provides-Extra: dev
51
+ Requires-Dist: pytest>=9.0.3; extra == "dev"
52
+ Requires-Dist: pytest-asyncio>=1.3.0; extra == "dev"
53
+ Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
54
+ Requires-Dist: ruff>=0.11.0; extra == "dev"
55
+ Requires-Dist: mypy>=1.15.0; extra == "dev"
56
+ Requires-Dist: python-docx>=1.1.0; extra == "dev"
57
+ Dynamic: license-file
58
+
59
+ # Local Search Agent
60
+
61
+ **Give your AI agent a search engine for your local files.**
62
+
63
+ ---
64
+
65
+ ## What is this?
66
+
67
+ Local Search Agent is a Python framework that gives your AI agent a search engine for your local files and lets it search, fetch, and reason over your local documents — the same way a researcher searches the web, but entirely on your machine.
68
+
69
+ Point it at a folder. Ask a question. The agent searches your documents, reads the relevant ones, and gives you an answer with citations — no cloud upload, no API calls to external search services, no embeddings, no vector stores.
70
+
71
+ ```
72
+ "What was the AWS spend in Q3?" → agent searches index → fetches relevant docs → answers with sources
73
+ ```
74
+
75
+ ---
76
+
77
+
78
+ ## Why not RAG?
79
+
80
+ Traditional RAG (Retrieval-Augmented Generation) has a fundamental problem: it converts your documents into embeddings and stores them in a vector database. That means:
81
+
82
+ - **Stale indexes** — embeddings go out of date silently. You never know if the agent is reading your latest documents or a six-month-old snapshot
83
+ - **Black-box retrieval** — you can't see why a document was retrieved or not. Debugging poor answers is guesswork
84
+ - **Chunking anxiety** — split too small and you lose context. Split too large and retrieval quality degrades. There's no right answer
85
+ - **Infrastructure overhead** — a vector database is another service to run, maintain, and pay for
86
+ - **Semantic drift** — embeddings are sensitive to how questions are phrased. A question about "cloud expenditure" may never match a document that says "AWS spend"
87
+
88
+ Local Search Agent takes a different approach: **BM25 keyword search via Meilisearch, structured metadata, and a LangGraph agent loop with tools**. The agent searches your document index the same way a developer searches Stack Overflow — with real queries, real results, and full transparency into what was retrieved and why.
89
+
90
+ The result is deterministic, auditable, and fast. You can see exactly what the agent fetched for every answer.
91
+
92
+ ---
93
+
94
+ ## How it works
95
+
96
+ ```
97
+ 1. INGEST Your documents → parsed, cleaned, chunked, indexed into Meilisearch
98
+ 2. SERVE FastAPI file server makes documents available to the agent via HTTP
99
+ 3. SEARCH LangGraph agent loop: search_local_index → fetch_local_url → reason
100
+ 4. ANSWER Agent returns an answer with inline source citations
101
+ ```
102
+
103
+ Everything runs locally. Meilisearch downloads automatically on first use, no manual setup.
104
+
105
+ ---
106
+
107
+ ## Screenshots
108
+
109
+ ### Desktop UI
110
+ ![Local Search Agent UI](https://raw.githubusercontent.com/wiss84/local-search-agent/main/docs/assets/local_search_agent_ui.webp)
111
+
112
+ ### CLI Interactive Mode
113
+ ![Local Search Agent CLI](https://raw.githubusercontent.com/wiss84/local-search-agent/main/docs/assets/local_search_agent_cli.webp)
114
+
115
+ ### Python API
116
+ ![Local Search Agent Python API](https://raw.githubusercontent.com/wiss84/local-search-agent/main/docs/assets/local_search_agent_api.webp)
117
+
118
+ ---
119
+
120
+ ## Install
121
+
122
+ ```bash
123
+ pip install local-search-agent
124
+ ```
125
+
126
+ ## Set your API key
127
+
128
+ ```bash
129
+ # Google AI Studio (free tier — recommended) or paid from openai or anthropic
130
+ local-search config set-key --provider google --key YOUR_KEY
131
+
132
+ # Or use Ollama for a fully local, zero-cost setup (no key needed)
133
+ # Install from https://ollama.com
134
+ # Download any model that support function calling and system instructions:
135
+ `ollama pull gemma4:e2b` (7.2GB) or `ollama pull gemma4:e4b` (9.6GB)
136
+ ```
137
+
138
+ ---
139
+
140
+ ## Quick Start
141
+
142
+ ### Desktop UI
143
+
144
+ ```bash
145
+ local-search ui
146
+ ```
147
+
148
+ The desktop window opens. Create a workspace, point it at a folder, ingest, and start asking questions.
149
+
150
+ ### CLI
151
+
152
+ ```bash
153
+ # Create a workspace and ingest documents
154
+ local-search workspace create finance "C:\my_docs"
155
+ local-search ingest --workspace finance --dirs "C:\my_docs"
156
+
157
+ # Start the file server (keep this running)
158
+ local-search serve --workspace finance
159
+
160
+ # Ask a question
161
+ local-search query "What was the AWS spend in Q3?" --workspace finance --provider google
162
+
163
+ # Use interactive mode
164
+ local-search --workspace finance --provider google
165
+ ```
166
+
167
+ ### Python API
168
+
169
+ ```python
170
+ from local_search_agent import SearchAgentFramework, SearchAgentConfig
171
+
172
+ config = SearchAgentConfig(
173
+ document_dirs=["C:/my_docs"],
174
+ workspace_name="finance",
175
+ provider="google",
176
+ )
177
+
178
+ framework = SearchAgentFramework(config)
179
+ framework.ingest_and_index()
180
+ framework.start_file_server()
181
+
182
+ response = framework.query("What was the AWS spend in Q3?")
183
+ print(response["answer"])
184
+ ```
185
+
186
+ ---
187
+
188
+ ## Supported File Types
189
+
190
+ | Format | Extension |
191
+ |--------|-----------|
192
+ | PDF | `.pdf` |
193
+ | Word | `.docx` |
194
+ | Excel | `.xlsx` |
195
+ | PowerPoint | `.pptx` |
196
+ | HTML | `.html`, `.htm` |
197
+ | Plain text | `.txt`, `.md` |
198
+ | CSV | `.csv` |
199
+ | JSON | `.json` |
200
+ | XML | `.xml` |
201
+ | Email | `.eml` |
202
+
203
+ ---
204
+
205
+ ## Key Features
206
+
207
+ - **One command install** — `pip install local-search-agent`. Meilisearch downloads automatically
208
+ - **No embeddings, no vector stores** — BM25 search with structured metadata. Fast, deterministic, auditable
209
+ - **Native desktop UI** — pywebview window with live streaming agent responses, workspace management, and chat history
210
+ - **Multi-provider LLM** — Google, Ollama (local), OpenAI, Anthropic
211
+ - **Multi-workspace** — isolate document collections by department, project, channel, or topic. Each workspace is its own search index
212
+ - **Incremental sync** — background scheduler re-indexes only changed files. A 10,000-document corpus with 50 changes re-indexes only the 50
213
+ - **Full CLI parity** — everything you can do in the UI you can do from the terminal
214
+ - **Python API** — embed the framework directly in your own application
215
+ - **Cross-platform** — Windows, macOS, Linux
216
+
217
+ ---
218
+
219
+ ## Documentation
220
+
221
+ | Guide | Description |
222
+ |-------|-------------|
223
+ | [Getting Started](docs/getting-started.md) | First steps, quick start for UI, CLI, and Python API |
224
+ | [Installation](docs/installation.md) | Full install guide, API keys, Ollama setup, platform notes |
225
+ | [Architecture](docs/architecture.md) | Full architrecture, design guide |
226
+ | [CLI Reference](docs/cli-reference.md) | All commands and flags |
227
+ | [Python API Reference](docs/api-reference.md) | Full API documentation |
228
+ | [Configuration](docs/configuration.md) | All config options and patterns |
229
+ | [Ingestion](docs/ingestion.md) | How ingestion works, supported formats, chunking, scheduler |
230
+ | [Multi-Workspace](docs/multi-workspace.md) | Managing multiple document collections |
231
+ | [Semantic Search](docs/semantic-search.md) | Experimental: concept extraction, query expansion, link graph |
232
+ | [Troubleshooting](docs/troubleshooting.md) | Common issues and fixes |
233
+
234
+ ---
235
+
236
+ ## Contributing
237
+
238
+ Contributions are welcome. Clone the repo and install in editable mode with dev dependencies:
239
+
240
+ ```bash
241
+ git clone https://github.com/wiss84/local-search-agent
242
+ cd local-search-agent
243
+ pip install -e ".[dev]"
244
+ ```
245
+
246
+ Run tests before submitting a PR:
247
+
248
+ ```bash
249
+ pytest tests/
250
+ ruff check .
251
+ ```
252
+
253
+ ---
254
+
255
+ ## License
256
+
257
+ MIT — see [LICENSE](LICENSE) for details.
258
+
259
+ ---
260
+
261
+ Built by [Wissam Metawee](https://github.com/wiss84)
@@ -0,0 +1,203 @@
1
+ # Local Search Agent
2
+
3
+ **Give your AI agent a search engine for your local files.**
4
+
5
+ ---
6
+
7
+ ## What is this?
8
+
9
+ Local Search Agent is a Python framework that gives your AI agent a search engine for your local files and lets it search, fetch, and reason over your local documents — the same way a researcher searches the web, but entirely on your machine.
10
+
11
+ Point it at a folder. Ask a question. The agent searches your documents, reads the relevant ones, and gives you an answer with citations — no cloud upload, no API calls to external search services, no embeddings, no vector stores.
12
+
13
+ ```
14
+ "What was the AWS spend in Q3?" → agent searches index → fetches relevant docs → answers with sources
15
+ ```
16
+
17
+ ---
18
+
19
+
20
+ ## Why not RAG?
21
+
22
+ Traditional RAG (Retrieval-Augmented Generation) has a fundamental problem: it converts your documents into embeddings and stores them in a vector database. That means:
23
+
24
+ - **Stale indexes** — embeddings go out of date silently. You never know if the agent is reading your latest documents or a six-month-old snapshot
25
+ - **Black-box retrieval** — you can't see why a document was retrieved or not. Debugging poor answers is guesswork
26
+ - **Chunking anxiety** — split too small and you lose context. Split too large and retrieval quality degrades. There's no right answer
27
+ - **Infrastructure overhead** — a vector database is another service to run, maintain, and pay for
28
+ - **Semantic drift** — embeddings are sensitive to how questions are phrased. A question about "cloud expenditure" may never match a document that says "AWS spend"
29
+
30
+ Local Search Agent takes a different approach: **BM25 keyword search via Meilisearch, structured metadata, and a LangGraph agent loop with tools**. The agent searches your document index the same way a developer searches Stack Overflow — with real queries, real results, and full transparency into what was retrieved and why.
31
+
32
+ The result is deterministic, auditable, and fast. You can see exactly what the agent fetched for every answer.
33
+
34
+ ---
35
+
36
+ ## How it works
37
+
38
+ ```
39
+ 1. INGEST Your documents → parsed, cleaned, chunked, indexed into Meilisearch
40
+ 2. SERVE FastAPI file server makes documents available to the agent via HTTP
41
+ 3. SEARCH LangGraph agent loop: search_local_index → fetch_local_url → reason
42
+ 4. ANSWER Agent returns an answer with inline source citations
43
+ ```
44
+
45
+ Everything runs locally. Meilisearch downloads automatically on first use, no manual setup.
46
+
47
+ ---
48
+
49
+ ## Screenshots
50
+
51
+ ### Desktop UI
52
+ ![Local Search Agent UI](https://raw.githubusercontent.com/wiss84/local-search-agent/main/docs/assets/local_search_agent_ui.webp)
53
+
54
+ ### CLI Interactive Mode
55
+ ![Local Search Agent CLI](https://raw.githubusercontent.com/wiss84/local-search-agent/main/docs/assets/local_search_agent_cli.webp)
56
+
57
+ ### Python API
58
+ ![Local Search Agent Python API](https://raw.githubusercontent.com/wiss84/local-search-agent/main/docs/assets/local_search_agent_api.webp)
59
+
60
+ ---
61
+
62
+ ## Install
63
+
64
+ ```bash
65
+ pip install local-search-agent
66
+ ```
67
+
68
+ ## Set your API key
69
+
70
+ ```bash
71
+ # Google AI Studio (free tier — recommended) or paid from openai or anthropic
72
+ local-search config set-key --provider google --key YOUR_KEY
73
+
74
+ # Or use Ollama for a fully local, zero-cost setup (no key needed)
75
+ # Install from https://ollama.com
76
+ # Download any model that support function calling and system instructions:
77
+ `ollama pull gemma4:e2b` (7.2GB) or `ollama pull gemma4:e4b` (9.6GB)
78
+ ```
79
+
80
+ ---
81
+
82
+ ## Quick Start
83
+
84
+ ### Desktop UI
85
+
86
+ ```bash
87
+ local-search ui
88
+ ```
89
+
90
+ The desktop window opens. Create a workspace, point it at a folder, ingest, and start asking questions.
91
+
92
+ ### CLI
93
+
94
+ ```bash
95
+ # Create a workspace and ingest documents
96
+ local-search workspace create finance "C:\my_docs"
97
+ local-search ingest --workspace finance --dirs "C:\my_docs"
98
+
99
+ # Start the file server (keep this running)
100
+ local-search serve --workspace finance
101
+
102
+ # Ask a question
103
+ local-search query "What was the AWS spend in Q3?" --workspace finance --provider google
104
+
105
+ # Use interactive mode
106
+ local-search --workspace finance --provider google
107
+ ```
108
+
109
+ ### Python API
110
+
111
+ ```python
112
+ from local_search_agent import SearchAgentFramework, SearchAgentConfig
113
+
114
+ config = SearchAgentConfig(
115
+ document_dirs=["C:/my_docs"],
116
+ workspace_name="finance",
117
+ provider="google",
118
+ )
119
+
120
+ framework = SearchAgentFramework(config)
121
+ framework.ingest_and_index()
122
+ framework.start_file_server()
123
+
124
+ response = framework.query("What was the AWS spend in Q3?")
125
+ print(response["answer"])
126
+ ```
127
+
128
+ ---
129
+
130
+ ## Supported File Types
131
+
132
+ | Format | Extension |
133
+ |--------|-----------|
134
+ | PDF | `.pdf` |
135
+ | Word | `.docx` |
136
+ | Excel | `.xlsx` |
137
+ | PowerPoint | `.pptx` |
138
+ | HTML | `.html`, `.htm` |
139
+ | Plain text | `.txt`, `.md` |
140
+ | CSV | `.csv` |
141
+ | JSON | `.json` |
142
+ | XML | `.xml` |
143
+ | Email | `.eml` |
144
+
145
+ ---
146
+
147
+ ## Key Features
148
+
149
+ - **One command install** — `pip install local-search-agent`. Meilisearch downloads automatically
150
+ - **No embeddings, no vector stores** — BM25 search with structured metadata. Fast, deterministic, auditable
151
+ - **Native desktop UI** — pywebview window with live streaming agent responses, workspace management, and chat history
152
+ - **Multi-provider LLM** — Google, Ollama (local), OpenAI, Anthropic
153
+ - **Multi-workspace** — isolate document collections by department, project, channel, or topic. Each workspace is its own search index
154
+ - **Incremental sync** — background scheduler re-indexes only changed files. A 10,000-document corpus with 50 changes re-indexes only the 50
155
+ - **Full CLI parity** — everything you can do in the UI you can do from the terminal
156
+ - **Python API** — embed the framework directly in your own application
157
+ - **Cross-platform** — Windows, macOS, Linux
158
+
159
+ ---
160
+
161
+ ## Documentation
162
+
163
+ | Guide | Description |
164
+ |-------|-------------|
165
+ | [Getting Started](docs/getting-started.md) | First steps, quick start for UI, CLI, and Python API |
166
+ | [Installation](docs/installation.md) | Full install guide, API keys, Ollama setup, platform notes |
167
+ | [Architecture](docs/architecture.md) | Full architrecture, design guide |
168
+ | [CLI Reference](docs/cli-reference.md) | All commands and flags |
169
+ | [Python API Reference](docs/api-reference.md) | Full API documentation |
170
+ | [Configuration](docs/configuration.md) | All config options and patterns |
171
+ | [Ingestion](docs/ingestion.md) | How ingestion works, supported formats, chunking, scheduler |
172
+ | [Multi-Workspace](docs/multi-workspace.md) | Managing multiple document collections |
173
+ | [Semantic Search](docs/semantic-search.md) | Experimental: concept extraction, query expansion, link graph |
174
+ | [Troubleshooting](docs/troubleshooting.md) | Common issues and fixes |
175
+
176
+ ---
177
+
178
+ ## Contributing
179
+
180
+ Contributions are welcome. Clone the repo and install in editable mode with dev dependencies:
181
+
182
+ ```bash
183
+ git clone https://github.com/wiss84/local-search-agent
184
+ cd local-search-agent
185
+ pip install -e ".[dev]"
186
+ ```
187
+
188
+ Run tests before submitting a PR:
189
+
190
+ ```bash
191
+ pytest tests/
192
+ ruff check .
193
+ ```
194
+
195
+ ---
196
+
197
+ ## License
198
+
199
+ MIT — see [LICENSE](LICENSE) for details.
200
+
201
+ ---
202
+
203
+ Built by [Wissam Metawee](https://github.com/wiss84)
@@ -0,0 +1,62 @@
1
+ """
2
+ local_search_agent
3
+ ==================
4
+ An open-source, pip-installable Python framework that replaces vector-based RAG
5
+ with a deterministic, auditable local search system.
6
+
7
+ Phases
8
+ ------
9
+ Phase 1: File server, DocumentNode, WorkspaceManager
10
+ Phase 2: Ingestion pipeline (PDF/DOCX/HTML/XLSX), text cleaner, Meilisearch indexing
11
+ Phase 3: LangGraph agent loop, multi-provider LLM, search + fetch tools
12
+ Phase 4: Multi-workspace isolation, APScheduler incremental sync, IndexMonitor
13
+ Phase 5: Semantic search (ConceptCompiler + StructuralParser + QueryExpander),
14
+ LinkGraph cross-document relationships, Windows/LDAP access control
15
+ """
16
+
17
+ from local_search_agent.agent.agent import LocalSearchAgent
18
+ from local_search_agent.core.config import SearchAgentConfig
19
+ from local_search_agent.core.document_node import DocumentNode
20
+ from local_search_agent.core.framework import SearchAgentFramework
21
+ from local_search_agent.ingestion.pipeline import IngestionPipeline, IngestStats
22
+ from local_search_agent.scheduler.incremental_sync import IncrementalSyncScheduler
23
+ from local_search_agent.scheduler.monitor import IndexHealthSummary, IndexMonitor
24
+ from local_search_agent.search.meilisearch_client import MeilisearchClient
25
+ from local_search_agent.search.query_builder import QueryBuilder
26
+ from local_search_agent.semantic.concept_compiler import ConceptCompiler, ConceptMetadata
27
+ from local_search_agent.semantic.enricher import SemanticEnricher
28
+ from local_search_agent.semantic.link_graph import LinkGraph
29
+ from local_search_agent.semantic.query_expander import QueryExpander
30
+ from local_search_agent.semantic.structural_parser import StructuralMetadata, StructuralParser
31
+ from local_search_agent.workspace.metadata_db import MetadataDB
32
+ from local_search_agent.workspace.workspace_manager import WorkspaceManager
33
+
34
+ __all__ = [
35
+ # Core
36
+ "SearchAgentFramework",
37
+ "SearchAgentConfig",
38
+ "DocumentNode",
39
+ # Ingestion
40
+ "IngestionPipeline",
41
+ "IngestStats",
42
+ # Search
43
+ "MeilisearchClient",
44
+ "QueryBuilder",
45
+ # Agent
46
+ "LocalSearchAgent",
47
+ # Scheduler
48
+ "IncrementalSyncScheduler",
49
+ "IndexMonitor",
50
+ "IndexHealthSummary",
51
+ # Workspace
52
+ "WorkspaceManager",
53
+ "MetadataDB",
54
+ # Semantic (Phase 5)
55
+ "ConceptCompiler",
56
+ "ConceptMetadata",
57
+ "StructuralParser",
58
+ "StructuralMetadata",
59
+ "QueryExpander",
60
+ "LinkGraph",
61
+ "SemanticEnricher",
62
+ ]
@@ -0,0 +1,7 @@
1
+ """local_search_agent.agent — public re-exports."""
2
+
3
+ from local_search_agent.agent.agent import AgentState, LocalSearchAgent
4
+ from local_search_agent.agent.prompts import build_system_prompt
5
+ from local_search_agent.agent.provider_factory import build_llm
6
+
7
+ __all__ = ["LocalSearchAgent", "AgentState", "build_llm", "build_system_prompt"]