sirchmunk 0.0.0__tar.gz → 0.0.1.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. sirchmunk-0.0.1.post1/PKG-INFO +483 -0
  2. sirchmunk-0.0.1.post1/README.md +393 -0
  3. {sirchmunk-0.0.0 → sirchmunk-0.0.1.post1}/pyproject.toml +18 -38
  4. sirchmunk-0.0.1.post1/requirements/core.txt +15 -0
  5. sirchmunk-0.0.1.post1/requirements/docs.txt +10 -0
  6. sirchmunk-0.0.1.post1/requirements/tests.txt +2 -0
  7. sirchmunk-0.0.1.post1/requirements/web.txt +19 -0
  8. sirchmunk-0.0.1.post1/setup.py +5 -0
  9. sirchmunk-0.0.1.post1/src/sirchmunk/__init__.py +8 -0
  10. sirchmunk-0.0.1.post1/src/sirchmunk/base.py +17 -0
  11. sirchmunk-0.0.1.post1/src/sirchmunk/insight/__init__.py +4 -0
  12. sirchmunk-0.0.1.post1/src/sirchmunk/insight/text_insights.py +292 -0
  13. sirchmunk-0.0.1.post1/src/sirchmunk/learnings/__init__.py +1 -0
  14. sirchmunk-0.0.1.post1/src/sirchmunk/learnings/evidence_processor.py +525 -0
  15. sirchmunk-0.0.1.post1/src/sirchmunk/learnings/knowledge_base.py +232 -0
  16. sirchmunk-0.0.1.post1/src/sirchmunk/llm/__init__.py +2 -0
  17. sirchmunk-0.0.1.post1/src/sirchmunk/llm/openai_chat.py +247 -0
  18. sirchmunk-0.0.1.post1/src/sirchmunk/llm/prompts.py +216 -0
  19. sirchmunk-0.0.1.post1/src/sirchmunk/retrieve/__init__.py +1 -0
  20. sirchmunk-0.0.1.post1/src/sirchmunk/retrieve/base.py +25 -0
  21. sirchmunk-0.0.1.post1/src/sirchmunk/retrieve/text_retriever.py +1026 -0
  22. sirchmunk-0.0.1.post1/src/sirchmunk/scan/__init__.py +1 -0
  23. sirchmunk-0.0.1.post1/src/sirchmunk/scan/base.py +18 -0
  24. sirchmunk-0.0.1.post1/src/sirchmunk/scan/file_scanner.py +373 -0
  25. sirchmunk-0.0.1.post1/src/sirchmunk/scan/web_scanner.py +18 -0
  26. sirchmunk-0.0.1.post1/src/sirchmunk/schema/__init__.py +2 -0
  27. sirchmunk-0.0.1.post1/src/sirchmunk/schema/cognition.py +106 -0
  28. sirchmunk-0.0.1.post1/src/sirchmunk/schema/context.py +25 -0
  29. sirchmunk-0.0.1.post1/src/sirchmunk/schema/knowledge.py +318 -0
  30. sirchmunk-0.0.1.post1/src/sirchmunk/schema/metadata.py +658 -0
  31. sirchmunk-0.0.1.post1/src/sirchmunk/schema/request.py +221 -0
  32. sirchmunk-0.0.1.post1/src/sirchmunk/schema/response.py +20 -0
  33. sirchmunk-0.0.1.post1/src/sirchmunk/schema/snapshot.py +346 -0
  34. sirchmunk-0.0.1.post1/src/sirchmunk/search.py +475 -0
  35. sirchmunk-0.0.1.post1/src/sirchmunk/storage/__init__.py +7 -0
  36. sirchmunk-0.0.1.post1/src/sirchmunk/storage/duckdb.py +676 -0
  37. sirchmunk-0.0.1.post1/src/sirchmunk/storage/knowledge_manager.py +720 -0
  38. sirchmunk-0.0.1.post1/src/sirchmunk/utils/__init__.py +15 -0
  39. sirchmunk-0.0.1.post1/src/sirchmunk/utils/constants.py +15 -0
  40. sirchmunk-0.0.1.post1/src/sirchmunk/utils/deps.py +23 -0
  41. sirchmunk-0.0.1.post1/src/sirchmunk/utils/file_utils.py +70 -0
  42. sirchmunk-0.0.1.post1/src/sirchmunk/utils/install_rga.py +124 -0
  43. sirchmunk-0.0.1.post1/src/sirchmunk/utils/log_utils.py +360 -0
  44. sirchmunk-0.0.1.post1/src/sirchmunk/utils/tokenizer_util.py +55 -0
  45. sirchmunk-0.0.1.post1/src/sirchmunk/utils/utils.py +108 -0
  46. sirchmunk-0.0.1.post1/src/sirchmunk/version.py +1 -0
  47. sirchmunk-0.0.1.post1/src/sirchmunk.egg-info/PKG-INFO +483 -0
  48. sirchmunk-0.0.1.post1/src/sirchmunk.egg-info/SOURCES.txt +53 -0
  49. sirchmunk-0.0.1.post1/src/sirchmunk.egg-info/requires.txt +71 -0
  50. sirchmunk-0.0.0/PKG-INFO +0 -26
  51. sirchmunk-0.0.0/README.md +0 -1
  52. sirchmunk-0.0.0/requirements/docs.txt +0 -0
  53. sirchmunk-0.0.0/requirements/framework.txt +0 -0
  54. sirchmunk-0.0.0/requirements/tests.txt +0 -0
  55. sirchmunk-0.0.0/sirchmunk/version.py +0 -1
  56. sirchmunk-0.0.0/sirchmunk.egg-info/PKG-INFO +0 -26
  57. sirchmunk-0.0.0/sirchmunk.egg-info/SOURCES.txt +0 -14
  58. sirchmunk-0.0.0/sirchmunk.egg-info/requires.txt +0 -4
  59. {sirchmunk-0.0.0 → sirchmunk-0.0.1.post1}/LICENSE +0 -0
  60. {sirchmunk-0.0.0 → sirchmunk-0.0.1.post1}/setup.cfg +0 -0
  61. {sirchmunk-0.0.0/sirchmunk → sirchmunk-0.0.1.post1/src/sirchmunk/scheduler}/__init__.py +0 -0
  62. {sirchmunk-0.0.0 → sirchmunk-0.0.1.post1/src}/sirchmunk.egg-info/dependency_links.txt +0 -0
  63. {sirchmunk-0.0.0 → sirchmunk-0.0.1.post1/src}/sirchmunk.egg-info/entry_points.txt +0 -0
  64. {sirchmunk-0.0.0 → sirchmunk-0.0.1.post1/src}/sirchmunk.egg-info/top_level.txt +0 -0
@@ -0,0 +1,483 @@
1
+ Metadata-Version: 2.4
2
+ Name: sirchmunk
3
+ Version: 0.0.1.post1
4
+ Summary: Sirchmunk: From raw data to self-evolving real-time intelligence.
5
+ Author: ModelScope Team
6
+ Author-email: contact@modelscope.cn
7
+ License: Apache License 2.0
8
+ Project-URL: Homepage, https://github.com/modelscope/sirchmunk
9
+ Keywords: LLM,Agentic Search,Embedding-Free,RAG,Indexless,Self-evolving,Real-time Intelligence,Multi-modal
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Classifier: License :: OSI Approved :: Apache Software License
19
+ Requires-Python: >=3.10
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: loguru
23
+ Requires-Dist: fastapi
24
+ Requires-Dist: openai
25
+ Requires-Dist: genson
26
+ Requires-Dist: pillow
27
+ Requires-Dist: pypdf
28
+ Requires-Dist: pandas
29
+ Requires-Dist: parquet
30
+ Requires-Dist: numpy
31
+ Requires-Dist: msgpack
32
+ Requires-Dist: sentencepiece
33
+ Requires-Dist: tqdm
34
+ Requires-Dist: rapidfuzz
35
+ Requires-Dist: duckdb
36
+ Requires-Dist: kreuzberg>=4.0.0rc1
37
+ Provides-Extra: web
38
+ Requires-Dist: fastapi>=0.100.0; extra == "web"
39
+ Requires-Dist: uvicorn[standard]>=0.24.0; extra == "web"
40
+ Requires-Dist: websockets>=12.0; extra == "web"
41
+ Requires-Dist: python-multipart>=0.0.6; extra == "web"
42
+ Requires-Dist: pydantic>=2.0.0; extra == "web"
43
+ Requires-Dist: requests>=2.32.2; extra == "web"
44
+ Requires-Dist: aiohttp>=3.9.4; extra == "web"
45
+ Requires-Dist: httpx>=0.27.0; extra == "web"
46
+ Requires-Dist: urllib3>=2.2.1; extra == "web"
47
+ Requires-Dist: pydantic>=2.0; extra == "web"
48
+ Requires-Dist: python-dotenv>=1.0.0; extra == "web"
49
+ Requires-Dist: psutil; extra == "web"
50
+ Provides-Extra: docs
51
+ Requires-Dist: docutils>=0.16.0; extra == "docs"
52
+ Requires-Dist: myst_parser; extra == "docs"
53
+ Requires-Dist: recommonmark; extra == "docs"
54
+ Requires-Dist: sphinx>=5.3.0; extra == "docs"
55
+ Requires-Dist: sphinx-book-theme; extra == "docs"
56
+ Requires-Dist: sphinx-copybutton; extra == "docs"
57
+ Requires-Dist: sphinx-design; extra == "docs"
58
+ Requires-Dist: sphinx_markdown_tables; extra == "docs"
59
+ Requires-Dist: sphinxawesome-theme; extra == "docs"
60
+ Requires-Dist: sphinxcontrib-mermaid; extra == "docs"
61
+ Provides-Extra: tests
62
+ Requires-Dist: pytest; extra == "tests"
63
+ Requires-Dist: pytest-asyncio; extra == "tests"
64
+ Provides-Extra: all
65
+ Requires-Dist: fastapi>=0.100.0; extra == "all"
66
+ Requires-Dist: uvicorn[standard]>=0.24.0; extra == "all"
67
+ Requires-Dist: websockets>=12.0; extra == "all"
68
+ Requires-Dist: python-multipart>=0.0.6; extra == "all"
69
+ Requires-Dist: pydantic>=2.0.0; extra == "all"
70
+ Requires-Dist: requests>=2.32.2; extra == "all"
71
+ Requires-Dist: aiohttp>=3.9.4; extra == "all"
72
+ Requires-Dist: httpx>=0.27.0; extra == "all"
73
+ Requires-Dist: urllib3>=2.2.1; extra == "all"
74
+ Requires-Dist: pydantic>=2.0; extra == "all"
75
+ Requires-Dist: python-dotenv>=1.0.0; extra == "all"
76
+ Requires-Dist: psutil; extra == "all"
77
+ Requires-Dist: docutils>=0.16.0; extra == "all"
78
+ Requires-Dist: myst_parser; extra == "all"
79
+ Requires-Dist: recommonmark; extra == "all"
80
+ Requires-Dist: sphinx>=5.3.0; extra == "all"
81
+ Requires-Dist: sphinx-book-theme; extra == "all"
82
+ Requires-Dist: sphinx-copybutton; extra == "all"
83
+ Requires-Dist: sphinx-design; extra == "all"
84
+ Requires-Dist: sphinx_markdown_tables; extra == "all"
85
+ Requires-Dist: sphinxawesome-theme; extra == "all"
86
+ Requires-Dist: sphinxcontrib-mermaid; extra == "all"
87
+ Requires-Dist: pytest; extra == "all"
88
+ Requires-Dist: pytest-asyncio; extra == "all"
89
+ Dynamic: license-file
90
+
91
+ <div align="center">
92
+
93
+ <img src="web/public/logo-v2.png" alt="Sirchmunk Logo" width="250" style="border-radius: 15px;">
94
+
95
+ # Sirchmunk: Raw data to self-evolving intelligence, real-time.
96
+
97
+ [![Python](https://img.shields.io/badge/Python-3.10%2B-3776AB?style=flat-square&logo=python&logoColor=white)](https://www.python.org/downloads/)
98
+ [![FastAPI](https://img.shields.io/badge/FastAPI-0.100%2B-009688?style=flat-square&logo=fastapi&logoColor=white)](https://fastapi.tiangolo.com/)
99
+ [![Next.js](https://img.shields.io/badge/Next.js-14-000000?style=flat-square&logo=next.js&logoColor=white)](https://nextjs.org/)
100
+ [![TailwindCSS](https://img.shields.io/badge/Tailwind-3.4-06B6D4?style=flat-square&logo=tailwindcss&logoColor=white)](https://tailwindcss.com/)
101
+ [![DuckDB](https://img.shields.io/badge/DuckDB-OLAP-FFF000?style=flat-square&logo=duckdb&logoColor=black)](https://duckdb.org/)
102
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue?style=flat-square)](LICENSE)
103
+ [![ripgrep-all](https://img.shields.io/badge/ripgrep--all-Search-E67E22?style=flat-square&logo=rust&logoColor=white)](https://github.com/phiresky/ripgrep-all)
104
+ [![OpenAI](https://img.shields.io/badge/OpenAI-API-412991?style=flat-square&logo=openai&logoColor=white)](https://github.com/openai/openai-python)
105
+ [![Kreuzberg](https://img.shields.io/badge/Kreuzberg-Text_Extraction-4CAF50?style=flat-square)](https://github.com/kreuzberg-dev/kreuzberg)
106
+
107
+
108
+ [**Quick Start**](#-quick-start) · [**Key Features**](#-key-features) · [**Web UI**](#-web-ui) · [**How it Works**](#-how-it-works) · [**FAQ**](#-faq)
109
+
110
+ [🇨🇳 中文](README_zh.md)
111
+
112
+ </div>
113
+
114
+ <div align="center">
115
+
116
+ 🔍 **Agentic Search** &nbsp;•&nbsp; 🧠 **Knowledge Clustering** &nbsp;•&nbsp; 📊 **Monte Carlo Evidence Sampling**<br>
117
+ ⚡ **Indexless Retrieval** &nbsp;•&nbsp; 🔄 **Self-Evolving Knowledge Base** &nbsp;•&nbsp; 💬 **Real-time Chat**
118
+
119
+ </div>
120
+
121
+ ---
122
+
123
+ ## 🌰 Why “Sirchmunk”?
124
+
125
+ Intelligence pipelines built upon vector-based retrieval can be _rigid and brittle_. They rely on static vector embeddings that are **expensive to compute, blind to real-time changes, and detached from the raw context**. We introduce **Sirchmunk** to usher in a more agile paradigm, where data is no longer treated as a snapshot, and insights can evolve together with the data.
126
+
127
+ ---
128
+
129
+ ## ✨ Key Features
130
+
131
+ ### 1. Embedding-Free: Data in its Purest Form
132
+
133
+ **Sirchmunk** works directly with **raw data** -- bypassing the heavy overhead of squeezing your rich files into fixed-dimensional vectors.
134
+
135
+ * **Instant Search:** Eliminating complex pre-processing pipelines in hours long indexing; just drop your files and search immediately.
136
+ * **Full Fidelity:** Zero information loss —- stay true to your data without vector approximation.
137
+
138
+ ### 2. Self-Evolving: A Living Index
139
+
140
+ Data is a stream, not a snapshot. **Sirchmunk** is **dynamic by design**, while vector DB can become obsolete the moment your data changes.
141
+
142
+ * **Context-Aware:** Evolves in real-time with your data context.
143
+ * **LLM-Powered Autonomy:** Designed for Agents that perceive data as it lives, utilizing **token-efficient** reasoning that triggers LLM inference only when necessary to maximize intelligence while minimizing cost.
144
+
145
+ ### 3. Intelligence at Scale: Real-Time & Massive
146
+
147
+ **Sirchmunk** bridges massive local repositories and the web with **high-scale throughput** and **real-time awareness**. <br/>
148
+ It serves as a unified intelligent hub for AI agents, delivering deep insights across vast datasets at the speed of thought.
149
+
150
+ ---
151
+
152
+ ### Traditional RAG vs. Sirchmunk
153
+
154
+ <div style="display: flex; justify-content: center; width: 100%;">
155
+ <table style="width: 100%; max-width: 900px; border-collapse: separate; border-spacing: 0; overflow: hidden; border-radius: 12px; font-family: sans-serif; border: 1px solid rgba(128, 128, 128, 0.2); margin: 0 auto;">
156
+ <colgroup>
157
+ <col style="width: 25%;">
158
+ <col style="width: 30%;">
159
+ <col style="width: 45%;">
160
+ </colgroup>
161
+ <thead>
162
+ <tr style="background-color: rgba(128, 128, 128, 0.05);">
163
+ <th style="text-align: left; padding: 16px; border-bottom: 2px solid rgba(128, 128, 128, 0.2); font-size: 1.3em;">Dimension</th>
164
+ <th style="text-align: left; padding: 16px; border-bottom: 2px solid rgba(128, 128, 128, 0.2); font-size: 1.3em; opacity: 0.7;">Traditional RAG</th>
165
+ <th style="text-align: left; padding: 16px; border-bottom: 2px solid rgba(58, 134, 255, 0.5); color: #3a86ff; font-weight: 800; font-size: 1.3em;">✨Sirchmunk</th>
166
+ </tr>
167
+ </thead>
168
+ <tbody>
169
+ <tr>
170
+ <td style="padding: 16px; font-weight: 600; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">💰 Setup Cost</td>
171
+ <td style="padding: 16px; opacity: 0.6; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">High Overhead <br/> (VectorDB, GraphDB, Complex Document Parser...)</td>
172
+ <td style="padding: 16px; background-color: rgba(58, 134, 255, 0.08); color: #4895ef; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">
173
+ ✅ Zero Infrastructure <br/>
174
+ <small style="opacity: 0.8; font-size: 0.85em;">Direct-to-data retrieval without vector silos</small>
175
+ </td>
176
+ </tr>
177
+ <tr>
178
+ <td style="padding: 16px; font-weight: 600; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">🕒 Data Freshness</td>
179
+ <td style="padding: 16px; opacity: 0.6; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">Stale (Batch Re-indexing)</td>
180
+ <td style="padding: 16px; background-color: rgba(58, 134, 255, 0.08); color: #4895ef; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">
181
+ ✅ Instant &amp; Dynamic <br/>
182
+ <small style="opacity: 0.8; font-size: 0.85em;">Self-evolving index that reflects live changes</small>
183
+ </td>
184
+ </tr>
185
+ <tr>
186
+ <td style="padding: 16px; font-weight: 600; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">📈 Scalability</td>
187
+ <td style="padding: 16px; opacity: 0.6; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">Linear Cost Growth</td>
188
+ <td style="padding: 16px; background-color: rgba(58, 134, 255, 0.08); color: #4895ef; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">
189
+ ✅ Extremely low RAM/CPU consumption <br/>
190
+ <small style="opacity: 0.8; font-size: 0.85em;">Native Elastic Support, efficiently handles large-scale datasets</small>
191
+ </td>
192
+ </tr>
193
+ <tr>
194
+ <td style="padding: 16px; font-weight: 600; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">🎯 Accuracy</td>
195
+ <td style="padding: 16px; opacity: 0.6; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">Approximate Vector Matches</td>
196
+ <td style="padding: 16px; background-color: rgba(58, 134, 255, 0.08); color: #4895ef; border-bottom: 1px solid rgba(128, 128, 128, 0.1);">
197
+ ✅ Deterministic &amp; Contextual <br/>
198
+ <small style="opacity: 0.8; font-size: 0.85em;">Hybrid logic ensuring semantic precision</small>
199
+ </td>
200
+ </tr>
201
+ <tr>
202
+ <td style="padding: 16px; font-weight: 600;">⚙️ Workflow</td>
203
+ <td style="padding: 16px; opacity: 0.6;">Complex ETL Pipelines</td>
204
+ <td style="padding: 16px; background-color: rgba(58, 134, 255, 0.08); color: #4895ef;">
205
+ ✅ Drop-and-Search <br/>
206
+ <small style="opacity: 0.8; font-size: 0.85em;">Zero-config integration for rapid deployment</small>
207
+ </td>
208
+ </tr>
209
+ </tbody>
210
+ </table>
211
+ </div>
212
+
213
+ ---
214
+
215
+
216
+ ## Demonstration
217
+
218
+
219
+ <div align="center">
220
+ <img src="assets/gif/Sirchmunk_Web.gif" alt="Sirchmunk WebUI" width="100%">
221
+ <p style="font-size: 1.1em; font-weight: 600; margin-top: 8px; color: #00bcd4;">
222
+ Access files directly to start chatting
223
+ </p>
224
+ </div>
225
+
226
+ ---
227
+
228
+
229
+ ## 🎉 News
230
+
231
+ * 🎉🎉 Jan 22, 2026: Introducing **Sirchmunk**: Initial Release v0.0.1 Now Available!
232
+
233
+
234
+ ---
235
+
236
+ ## 🚀 Quick Start
237
+
238
+ ### Prerequisites
239
+
240
+ - **Python** 3.10+
241
+ - **LLM API Key** (OpenAI-compatible endpoint, local or remote)
242
+ - **Node.js** 18+ (Optional, for web interface)
243
+
244
+ ### Installation
245
+
246
+ ```bash
247
+ # Create virtual environment (recommended)
248
+ conda create -n sirchmunk python=3.13 -y && conda activate sirchmunk
249
+
250
+ pip install sirchmunk
251
+
252
+ # Or via UV:
253
+ uv pip install sirchmunk
254
+
255
+ # Alternatively, install from source:
256
+ git clone https://github.com/modelscope/sirchmunk.git && cd sirchmunk
257
+ pip install -e .
258
+ ```
259
+
260
+ ### Python SDK Usage
261
+
262
+ ```python
263
+ import asyncio
264
+
265
+ from sirchmunk import AgenticSearch
266
+ from sirchmunk.llm import OpenAIChat
267
+
268
+ llm = OpenAIChat(
269
+ api_key="your-api-key",
270
+ base_url="your-base-url", # e.g., https://api.openai.com/v1
271
+ model="your-model-name" # e.g., gpt-4o
272
+ )
273
+
274
+ async def main():
275
+
276
+ agent_search = AgenticSearch(llm=llm)
277
+
278
+ result: str = await agent_search.search(
279
+ query="How does transformer attention work?",
280
+ search_paths=["/path/to/documents"],
281
+ )
282
+
283
+ print(result)
284
+
285
+ asyncio.run(main())
286
+ ```
287
+
288
+ **⚠️ Notes:**
289
+ - Upon initialization, AgenticSearch automatically checks if ripgrep-all and ripgrep are installed. If they are missing, it will attempt to install them automatically. If the automatic installation fails, please install them manually.
290
+ - References: https://github.com/BurntSushi/ripgrep | https://github.com/phiresky/ripgrep-all
291
+ - Replace `"your-api-key"`, `"your-base-url"`, `"your-model-name"` and `/path/to/documents` with your actual values.
292
+
293
+
294
+ ---
295
+
296
+ ## 🖥️ Web UI
297
+
298
+ The web UI is built for fast, transparent workflows: chat, knowledge analytics, and system monitoring in one place.
299
+
300
+ <div align="center">
301
+ <img src="assets/pic/Sirchmunk_Home.png" alt="Sirchmunk Home" width="85%">
302
+ <p><sub>Home — Chat with streaming logs, file-based RAG, and session management.</sub></p>
303
+ </div>
304
+
305
+ <div align="center">
306
+ <img src="assets/pic/Sirchmunk_Monitor.png" alt="Sirchmunk Monitor" width="85%">
307
+ <p><sub>Monitor — System health, chat activity, knowledge analytics, and LLM usage.</sub></p>
308
+ </div>
309
+
310
+ ### Installation
311
+
312
+ ```bash
313
+ git clone https://github.com/modelscope/sirchmunk.git && cd sirchmunk
314
+
315
+ pip install ".[web]"
316
+
317
+ npm install --prefix web
318
+ ```
319
+ - Note: Node.js 18+ is required for the web interface.
320
+
321
+
322
+ ### Running the Web UI
323
+
324
+ ```bash
325
+ # Start frontend and backend
326
+ python scripts/start_web.py
327
+
328
+ # Stop frontend and backend
329
+ python scripts/stop_web.py
330
+ ```
331
+
332
+ **Access the web UI at (By default):**
333
+ - Backend APIs: http://localhost:8584/docs
334
+ - Frontend: http://localhost:8585
335
+
336
+ **Configuration:**
337
+
338
+ - Access `Settings` → `Envrionment Variables` to configure LLM API, and other parameters.
339
+
340
+
341
+ ---
342
+
343
+ ## 🏗️ How it Works
344
+
345
+ ### Sirchmunk Framework
346
+
347
+ <div align="center">
348
+ <img src="assets/pic/Sirchmunk_Architecture.png" alt="Sirchmunk Architecture" width="85%">
349
+ </div>
350
+
351
+ ### Core Components
352
+
353
+ | Component | Description |
354
+ |:----------------------|:-------------------------------------------------------------------------|
355
+ | **AgenticSearch** | Search orchestrator with LLM-enhanced retrieval capabilities |
356
+ | **KnowledgeBase** | Transforms raw results into structured knowledge clusters with evidences |
357
+ | **EvidenceProcessor** | Evidence processing based on the MonteCarlo Importance Sampling |
358
+ | **GrepRetriever** | High-performance _indexless_ file search with parallel processing |
359
+ | **OpenAIChat** | Unified LLM interface supporting streaming and usage tracking |
360
+ | **MonitorTracker** | Real-time system and application metrics collection |
361
+
362
+ ---
363
+
364
+
365
+ ### Data Storage
366
+
367
+ All persistent data is stored in the configured `WORK_PATH` (default: `~/.sirchmunk/`):
368
+
369
+ ```
370
+ {WORK_PATH}/
371
+ ├── .cache/
372
+ ├── history/ # Chat session history (DuckDB)
373
+ │ └── chat_history.db
374
+ ├── knowledge/ # Knowledge clusters (Parquet)
375
+ │ └── knowledge_clusters.parquet
376
+ └── settings/ # User settings (DuckDB)
377
+ └── settings.db
378
+
379
+ ```
380
+
381
+ ---
382
+
383
+ ## ❓ FAQ
384
+
385
+ <details>
386
+ <summary><b>How is this different from traditional RAG systems?</b></summary>
387
+
388
+ Sirchmunk takes an **indexless approach**:
389
+
390
+ 1. **No pre-indexing**: Direct file search without vector database setup
391
+ 2. **Self-evolving**: Knowledge clusters evolve based on search patterns
392
+ 3. **Multi-level retrieval**: Adaptive keyword granularity for better recall
393
+ 4. **Evidence-based**: Monte Carlo sampling for precise content extraction
394
+
395
+ </details>
396
+
397
+ <details>
398
+ <summary><b>What LLM providers are supported?</b></summary>
399
+
400
+ Any OpenAI-compatible API endpoint, including (but not limited too):
401
+ - OpenAI (GPT-4, GPT-4o, GPT-3.5)
402
+ - Local models served via Ollama, llama.cpp, vLLM, SGLang etc.
403
+ - Claude via API proxy
404
+
405
+ </details>
406
+
407
+ <details>
408
+ <summary><b>How do I add documents to search?</b></summary>
409
+
410
+ Simply specify the path in your search query:
411
+
412
+ ```python
413
+ result = await search.search(
414
+ query="Your question",
415
+ search_paths=["/path/to/folder", "/path/to/file.pdf"]
416
+ )
417
+ ```
418
+
419
+ No pre-processing or indexing required!
420
+
421
+ </details>
422
+
423
+ <details>
424
+ <summary><b>Where are knowledge clusters stored?</b></summary>
425
+
426
+ Knowledge clusters are persisted in Parquet format at:
427
+ ```
428
+ {WORK_PATH}/.cache/knowledge/knowledge_clusters.parquet
429
+ ```
430
+
431
+ You can query them using DuckDB or the `KnowledgeManager` API.
432
+
433
+ </details>
434
+
435
+ <details>
436
+ <summary><b>How do I monitor LLM token usage?</b></summary>
437
+
438
+ 1. **Web Dashboard**: Visit the Monitor page for real-time statistics
439
+ 2. **API**: `GET /api/v1/monitor/llm` returns usage metrics
440
+ 3. **Code**: Access `search.llm_usages` after search completion
441
+
442
+ </details>
443
+
444
+ ---
445
+
446
+ ## 📋 Roadmap
447
+
448
+ - [x] Text-retrieval from raw files
449
+ - [x] Knowledge structuring & persistence
450
+ - [x] Real-time chat with RAG
451
+ - [x] Web UI support
452
+ - [ ] Web search integration
453
+ - [ ] Multi-modal support (images, videos)
454
+ - [ ] Distributed search across nodes
455
+ - [ ] Knowledge visualization and deep analytics
456
+ - [ ] More file type support
457
+
458
+ ---
459
+
460
+ ## 🤝 Contributing
461
+
462
+ We welcome [contributions](https://github.com/modelscope/sirchmunk/pulls) !
463
+
464
+ ---
465
+
466
+ ## 📄 License
467
+
468
+ This project is licensed under the [Apache License 2.0](LICENSE).
469
+
470
+ ---
471
+
472
+ <div align="center">
473
+
474
+ **[ModelScope](https://github.com/modelscope)** · [⭐ Star us](https://github.com/modelscope/sirchmunk/stargazers) · [🐛 Report a bug](https://github.com/modelscope/sirchmunk/issues) · [💬 Discussions](https://github.com/modelscope/sirchmunk/discussions)
475
+
476
+ *✨ Sirchmunk: Raw data to self-evolving intelligence, real-time.*
477
+
478
+ </div>
479
+
480
+ <p align="center">
481
+ <em> ❤️ Thanks for Visiting ✨ Sirchmunk !</em><br><br>
482
+ <img src="https://visitor-badge.laobi.icu/badge?page_id=modelscope.sirchmunk&style=for-the-badge&color=00d4ff" alt="Views">
483
+ </p>