sirchmunk 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/PKG-INFO +157 -52
  2. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/README.md +128 -8
  3. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/pyproject.toml +14 -29
  4. sirchmunk-0.0.2/requirements/core.txt +17 -0
  5. sirchmunk-0.0.2/requirements/docs.txt +10 -0
  6. sirchmunk-0.0.2/requirements/tests.txt +2 -0
  7. sirchmunk-0.0.2/requirements/web.txt +19 -0
  8. sirchmunk-0.0.2/src/sirchmunk/api/chat.py +1123 -0
  9. sirchmunk-0.0.2/src/sirchmunk/api/components/history_storage.py +402 -0
  10. sirchmunk-0.0.2/src/sirchmunk/api/components/monitor_tracker.py +518 -0
  11. sirchmunk-0.0.2/src/sirchmunk/api/components/settings_storage.py +353 -0
  12. sirchmunk-0.0.2/src/sirchmunk/api/history.py +254 -0
  13. sirchmunk-0.0.2/src/sirchmunk/api/knowledge.py +411 -0
  14. sirchmunk-0.0.2/src/sirchmunk/api/main.py +120 -0
  15. sirchmunk-0.0.2/src/sirchmunk/api/monitor.py +219 -0
  16. sirchmunk-0.0.2/src/sirchmunk/api/run_server.py +54 -0
  17. sirchmunk-0.0.2/src/sirchmunk/api/search.py +230 -0
  18. sirchmunk-0.0.2/src/sirchmunk/api/settings.py +309 -0
  19. sirchmunk-0.0.2/src/sirchmunk/api/tools.py +315 -0
  20. sirchmunk-0.0.2/src/sirchmunk/cli/__init__.py +11 -0
  21. sirchmunk-0.0.2/src/sirchmunk/cli/cli.py +789 -0
  22. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/learnings/knowledge_base.py +5 -2
  23. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/llm/prompts.py +12 -1
  24. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/retrieve/text_retriever.py +186 -2
  25. sirchmunk-0.0.2/src/sirchmunk/scan/__init__.py +1 -0
  26. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/scan/file_scanner.py +2 -2
  27. sirchmunk-0.0.2/src/sirchmunk/scheduler/__init__.py +0 -0
  28. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/schema/knowledge.py +119 -35
  29. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/search.py +384 -26
  30. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/storage/__init__.py +2 -2
  31. sirchmunk-0.0.1/src/sirchmunk/storage/knowledge_manager.py → sirchmunk-0.0.2/src/sirchmunk/storage/knowledge_storage.py +265 -60
  32. sirchmunk-0.0.2/src/sirchmunk/utils/constants.py +17 -0
  33. sirchmunk-0.0.2/src/sirchmunk/utils/embedding_util.py +217 -0
  34. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/utils/tokenizer_util.py +36 -1
  35. sirchmunk-0.0.2/src/sirchmunk/version.py +1 -0
  36. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk.egg-info/PKG-INFO +157 -52
  37. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk.egg-info/SOURCES.txt +30 -4
  38. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk.egg-info/requires.txt +15 -26
  39. sirchmunk-0.0.2/src/sirchmunk.egg-info/top_level.txt +2 -0
  40. sirchmunk-0.0.2/src/sirchmunk_mcp/__init__.py +25 -0
  41. sirchmunk-0.0.2/src/sirchmunk_mcp/cli.py +478 -0
  42. sirchmunk-0.0.2/src/sirchmunk_mcp/config.py +276 -0
  43. sirchmunk-0.0.2/src/sirchmunk_mcp/server.py +355 -0
  44. sirchmunk-0.0.2/src/sirchmunk_mcp/service.py +327 -0
  45. sirchmunk-0.0.2/src/sirchmunk_mcp/setup.py +15 -0
  46. sirchmunk-0.0.2/src/sirchmunk_mcp/tools.py +410 -0
  47. sirchmunk-0.0.1/setup.py +0 -179
  48. sirchmunk-0.0.1/src/sirchmunk/utils/constants.py +0 -15
  49. sirchmunk-0.0.1/src/sirchmunk/version.py +0 -1
  50. sirchmunk-0.0.1/src/sirchmunk.egg-info/not-zip-safe +0 -1
  51. sirchmunk-0.0.1/src/sirchmunk.egg-info/top_level.txt +0 -1
  52. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/LICENSE +0 -0
  53. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/setup.cfg +0 -0
  54. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/__init__.py +0 -0
  55. {sirchmunk-0.0.1/src/sirchmunk/learnings → sirchmunk-0.0.2/src/sirchmunk/api}/__init__.py +0 -0
  56. {sirchmunk-0.0.1/src/sirchmunk/scheduler → sirchmunk-0.0.2/src/sirchmunk/api/components}/__init__.py +0 -0
  57. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/base.py +0 -0
  58. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/insight/__init__.py +0 -0
  59. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/insight/text_insights.py +0 -0
  60. {sirchmunk-0.0.1/src/sirchmunk/retrieve → sirchmunk-0.0.2/src/sirchmunk/learnings}/__init__.py +0 -0
  61. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/learnings/evidence_processor.py +0 -0
  62. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/llm/__init__.py +0 -0
  63. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/llm/openai_chat.py +0 -0
  64. {sirchmunk-0.0.1/src/sirchmunk/scan → sirchmunk-0.0.2/src/sirchmunk/retrieve}/__init__.py +0 -0
  65. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/retrieve/base.py +0 -0
  66. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/scan/base.py +0 -0
  67. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/scan/web_scanner.py +0 -0
  68. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/schema/__init__.py +0 -0
  69. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/schema/cognition.py +0 -0
  70. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/schema/context.py +0 -0
  71. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/schema/metadata.py +0 -0
  72. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/schema/request.py +0 -0
  73. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/schema/response.py +0 -0
  74. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/schema/snapshot.py +0 -0
  75. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/storage/duckdb.py +0 -0
  76. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/utils/__init__.py +0 -0
  77. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/utils/deps.py +0 -0
  78. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/utils/file_utils.py +0 -0
  79. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/utils/install_rga.py +0 -0
  80. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/utils/log_utils.py +0 -0
  81. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk/utils/utils.py +0 -0
  82. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk.egg-info/dependency_links.txt +0 -0
  83. {sirchmunk-0.0.1 → sirchmunk-0.0.2}/src/sirchmunk.egg-info/entry_points.txt +0 -0
@@ -1,13 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sirchmunk
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: Sirchmunk: From raw data to self-evolving real-time intelligence.
5
- Home-page: https://github.com/modelscope/sirchmunk
6
5
  Author: ModelScope Team
7
6
  Author-email: contact@modelscope.cn
8
7
  License: Apache License 2.0
9
8
  Project-URL: Homepage, https://github.com/modelscope/sirchmunk
10
- Keywords: LLM,Agentic,Search,RAG,Indexless,Self-evolving,Real-time Intelligence,Multi-modal
9
+ Keywords: LLM,Agentic Search,Embedding-Free,RAG,Indexless,Self-evolving,Real-time Intelligence,Multi-modal
11
10
  Classifier: Development Status :: 4 - Beta
12
11
  Classifier: Operating System :: OS Independent
13
12
  Classifier: Programming Language :: Python :: 3
@@ -35,6 +34,21 @@ Requires-Dist: tqdm
35
34
  Requires-Dist: rapidfuzz
36
35
  Requires-Dist: duckdb
37
36
  Requires-Dist: kreuzberg>=4.0.0rc1
37
+ Requires-Dist: sentence-transformers
38
+ Requires-Dist: modelscope
39
+ Provides-Extra: web
40
+ Requires-Dist: fastapi>=0.100.0; extra == "web"
41
+ Requires-Dist: uvicorn[standard]>=0.24.0; extra == "web"
42
+ Requires-Dist: websockets>=12.0; extra == "web"
43
+ Requires-Dist: python-multipart>=0.0.6; extra == "web"
44
+ Requires-Dist: pydantic>=2.0.0; extra == "web"
45
+ Requires-Dist: requests>=2.32.2; extra == "web"
46
+ Requires-Dist: aiohttp>=3.9.4; extra == "web"
47
+ Requires-Dist: httpx>=0.27.0; extra == "web"
48
+ Requires-Dist: urllib3>=2.2.1; extra == "web"
49
+ Requires-Dist: pydantic>=2.0; extra == "web"
50
+ Requires-Dist: python-dotenv>=1.0.0; extra == "web"
51
+ Requires-Dist: psutil; extra == "web"
38
52
  Provides-Extra: docs
39
53
  Requires-Dist: docutils>=0.16.0; extra == "docs"
40
54
  Requires-Dist: myst_parser; extra == "docs"
@@ -49,34 +63,19 @@ Requires-Dist: sphinxcontrib-mermaid; extra == "docs"
49
63
  Provides-Extra: tests
50
64
  Requires-Dist: pytest; extra == "tests"
51
65
  Requires-Dist: pytest-asyncio; extra == "tests"
52
- Provides-Extra: web
53
- Requires-Dist: fastapi>=0.100.0; extra == "web"
54
- Requires-Dist: uvicorn[standard]>=0.24.0; extra == "web"
55
- Requires-Dist: websockets>=12.0; extra == "web"
56
- Requires-Dist: python-multipart>=0.0.6; extra == "web"
57
- Requires-Dist: pydantic>=2.0.0; extra == "web"
58
- Requires-Dist: requests>=2.32.2; extra == "web"
59
- Requires-Dist: aiohttp>=3.9.4; extra == "web"
60
- Requires-Dist: urllib3>=2.2.1; extra == "web"
61
- Requires-Dist: pydantic>=2.0; extra == "web"
62
- Requires-Dist: python-dotenv>=1.0.0; extra == "web"
63
- Requires-Dist: psutil; extra == "web"
64
66
  Provides-Extra: all
65
- Requires-Dist: loguru; extra == "all"
66
- Requires-Dist: fastapi; extra == "all"
67
- Requires-Dist: openai; extra == "all"
68
- Requires-Dist: genson; extra == "all"
69
- Requires-Dist: pillow; extra == "all"
70
- Requires-Dist: pypdf; extra == "all"
71
- Requires-Dist: pandas; extra == "all"
72
- Requires-Dist: parquet; extra == "all"
73
- Requires-Dist: numpy; extra == "all"
74
- Requires-Dist: msgpack; extra == "all"
75
- Requires-Dist: sentencepiece; extra == "all"
76
- Requires-Dist: tqdm; extra == "all"
77
- Requires-Dist: rapidfuzz; extra == "all"
78
- Requires-Dist: duckdb; extra == "all"
79
- Requires-Dist: kreuzberg>=4.0.0rc1; extra == "all"
67
+ Requires-Dist: fastapi>=0.100.0; extra == "all"
68
+ Requires-Dist: uvicorn[standard]>=0.24.0; extra == "all"
69
+ Requires-Dist: websockets>=12.0; extra == "all"
70
+ Requires-Dist: python-multipart>=0.0.6; extra == "all"
71
+ Requires-Dist: pydantic>=2.0.0; extra == "all"
72
+ Requires-Dist: requests>=2.32.2; extra == "all"
73
+ Requires-Dist: aiohttp>=3.9.4; extra == "all"
74
+ Requires-Dist: httpx>=0.27.0; extra == "all"
75
+ Requires-Dist: urllib3>=2.2.1; extra == "all"
76
+ Requires-Dist: pydantic>=2.0; extra == "all"
77
+ Requires-Dist: python-dotenv>=1.0.0; extra == "all"
78
+ Requires-Dist: psutil; extra == "all"
80
79
  Requires-Dist: docutils>=0.16.0; extra == "all"
81
80
  Requires-Dist: myst_parser; extra == "all"
82
81
  Requires-Dist: recommonmark; extra == "all"
@@ -89,21 +88,7 @@ Requires-Dist: sphinxawesome-theme; extra == "all"
89
88
  Requires-Dist: sphinxcontrib-mermaid; extra == "all"
90
89
  Requires-Dist: pytest; extra == "all"
91
90
  Requires-Dist: pytest-asyncio; extra == "all"
92
- Requires-Dist: fastapi>=0.100.0; extra == "all"
93
- Requires-Dist: uvicorn[standard]>=0.24.0; extra == "all"
94
- Requires-Dist: websockets>=12.0; extra == "all"
95
- Requires-Dist: python-multipart>=0.0.6; extra == "all"
96
- Requires-Dist: pydantic>=2.0.0; extra == "all"
97
- Requires-Dist: requests>=2.32.2; extra == "all"
98
- Requires-Dist: aiohttp>=3.9.4; extra == "all"
99
- Requires-Dist: urllib3>=2.2.1; extra == "all"
100
- Requires-Dist: pydantic>=2.0; extra == "all"
101
- Requires-Dist: python-dotenv>=1.0.0; extra == "all"
102
- Requires-Dist: psutil; extra == "all"
103
- Dynamic: home-page
104
91
  Dynamic: license-file
105
- Dynamic: provides-extra
106
- Dynamic: requires-dist
107
92
 
108
93
  <div align="center">
109
94
 
@@ -120,6 +105,7 @@ Dynamic: requires-dist
120
105
  [![ripgrep-all](https://img.shields.io/badge/ripgrep--all-Search-E67E22?style=flat-square&logo=rust&logoColor=white)](https://github.com/phiresky/ripgrep-all)
121
106
  [![OpenAI](https://img.shields.io/badge/OpenAI-API-412991?style=flat-square&logo=openai&logoColor=white)](https://github.com/openai/openai-python)
122
107
  [![Kreuzberg](https://img.shields.io/badge/Kreuzberg-Text_Extraction-4CAF50?style=flat-square)](https://github.com/kreuzberg-dev/kreuzberg)
108
+ [![MCP](https://img.shields.io/badge/MCP-Python_SDK-8B5CF6?style=flat-square&logo=python&logoColor=white)](https://github.com/modelcontextprotocol/python-sdk)
123
109
 
124
110
 
125
111
  [**Quick Start**](#-quick-start) · [**Key Features**](#-key-features) · [**Web UI**](#-web-ui) · [**How it Works**](#-how-it-works) · [**FAQ**](#-faq)
@@ -243,6 +229,19 @@ It serves as a unified intelligent hub for AI agents, delivering deep insights a
243
229
  ---
244
230
 
245
231
 
232
+ ## 🎉 News
233
+
234
+ * 🚀 **Feb 5, 2026**: Release **v0.0.2** — MCP Support, CLI Commands & Knowledge Persistence!
235
+ - **MCP Integration**: Full [Model Context Protocol](https://modelcontextprotocol.io) support, works seamlessly with Claude Desktop and Cursor IDE.
236
+ - **CLI Commands**: New `sirchmunk` CLI with `init`, `config`, `serve`, and `search` commands.
237
+ - **KnowledgeCluster Persistence**: DuckDB-powered storage with Parquet export for efficient knowledge management.
238
+ - **Knowledge Reuse**: Semantic similarity-based cluster retrieval for faster searches via embedding vectors.
239
+
240
+ * 🎉🎉 Jan 22, 2026: Introducing **Sirchmunk**: Initial Release v0.0.1 Now Available!
241
+
242
+
243
+ ---
244
+
246
245
  ## 🚀 Quick Start
247
246
 
248
247
  ### Prerequisites
@@ -283,9 +282,9 @@ llm = OpenAIChat(
283
282
 
284
283
  async def main():
285
284
 
286
- agent_search = AgenticSearch(llm=llm)
285
+ searcher = AgenticSearch(llm=llm)
287
286
 
288
- result: str = await agent_search.search(
287
+ result: str = await searcher.search(
289
288
  query="How does transformer attention work?",
290
289
  search_paths=["/path/to/documents"],
291
290
  )
@@ -296,11 +295,117 @@ asyncio.run(main())
296
295
  ```
297
296
 
298
297
  **⚠️ Notes:**
299
- - Upon initialization, AgenticSearch automatically checks if ripgrep-all and ripgrep are installed. If they are missing, it will attempt to install them automatically. If the automatic installation fails, please install them manually.
298
+ - Upon initialization, `AgenticSearch` automatically checks if `ripgrep-all` and `ripgrep` are installed. If they are missing, it will attempt to install them automatically. If the automatic installation fails, please install them manually.
300
299
  - References: https://github.com/BurntSushi/ripgrep | https://github.com/phiresky/ripgrep-all
301
300
  - Replace `"your-api-key"`, `"your-base-url"`, `"your-model-name"` and `/path/to/documents` with your actual values.
302
301
 
303
302
 
303
+ ### Command Line Interface
304
+
305
+ Sirchmunk provides a powerful CLI for server management and search operations.
306
+
307
+ #### Installation
308
+
309
+ ```bash
310
+ pip install "sirchmunk[web]"
311
+
312
+ # or install via UV
313
+ uv pip install "sirchmunk[web]"
314
+ ```
315
+
316
+
317
+ #### Initialize
318
+
319
+ ```bash
320
+ # Initialize Sirchmunk with default settings (Default work path: `~/.sirchmunk/`)
321
+ sirchmunk init
322
+
323
+ # Alternatively, initialize with custom work path
324
+ sirchmunk init --work-path /path/to/workspace
325
+ ```
326
+
327
+ #### Configure
328
+
329
+ ```bash
330
+ # Show current configuration
331
+ sirchmunk config
332
+
333
+ # Regenerate configuration file if needed (Default config file: ~/.sirchmunk/.env)
334
+ sirchmunk config --generate
335
+ ```
336
+
337
+ #### Start API Server
338
+
339
+ ```bash
340
+ # Start server with default settings
341
+ sirchmunk serve
342
+
343
+ # Custom host and port
344
+ sirchmunk serve --host 0.0.0.0 --port 8000
345
+
346
+ # Development mode with auto-reload
347
+ sirchmunk serve --reload
348
+ ```
349
+
350
+ #### Search
351
+
352
+ ```bash
353
+ # Search in current directory
354
+ sirchmunk search "How does authentication work?"
355
+
356
+ # Search in specific paths
357
+ sirchmunk search "find all API endpoints" ./src ./docs
358
+
359
+ # Quick filename search
360
+ sirchmunk search "config" --mode FILENAME_ONLY
361
+
362
+ # Output as JSON
363
+ sirchmunk search "database schema" --output json
364
+
365
+ # Use API server (requires running server)
366
+ sirchmunk search "query" --api --api-url http://localhost:8584
367
+ ```
368
+
369
+ #### Available Commands
370
+
371
+ | Command | Description |
372
+ |---------|-------------|
373
+ | `sirchmunk init` | Initialize working directory and configuration |
374
+ | `sirchmunk config` | Show or generate configuration |
375
+ | `sirchmunk serve` | Start the API server |
376
+ | `sirchmunk search` | Perform search queries |
377
+ | `sirchmunk version` | Show version information |
378
+
379
+ ---
380
+
381
+ ## 🔌 MCP Server
382
+
383
+ Sirchmunk provides a [Model Context Protocol (MCP)](https://modelcontextprotocol.io) server that exposes its intelligent search capabilities as MCP tools. This enables seamless integration with AI assistants like **Claude Desktop** and **Cursor IDE**.
384
+
385
+ ### Quick Start
386
+
387
+ ```bash
388
+ # Install MCP package
389
+ pip install sirchmunk-mcp
390
+
391
+ # Initialize and configure
392
+ sirchmunk-mcp init
393
+ sirchmunk-mcp config --generate
394
+
395
+ # Edit ~/.sirchmunk/.mcp_env with your LLM API key
396
+
397
+ # Test with MCP Inspector
398
+ npx @modelcontextprotocol/inspector sirchmunk-mcp serve
399
+ ```
400
+
401
+ ### Features
402
+
403
+ - **Multi-Mode Search**: DEEP mode for comprehensive analysis, FILENAME_ONLY for fast file discovery
404
+ - **Knowledge Cluster Management**: Automatic extraction, storage, and reuse of knowledge
405
+ - **Standard MCP Protocol**: Works with stdio and Streamable HTTP transports
406
+
407
+ 📖 **For detailed documentation, see [Sirchmunk MCP README](src/sirchmunk_mcp/README.md)**.
408
+
304
409
  ---
305
410
 
306
411
  ## 🖥️ Web UI
@@ -374,10 +479,10 @@ python scripts/stop_web.py
374
479
 
375
480
  ### Data Storage
376
481
 
377
- All persistent data is stored in the configured `WORK_PATH` (default: `~/.sirchmunk/`):
482
+ All persistent data is stored in the configured `SIRCHMUNK_WORK_PATH` (default: `~/.sirchmunk/`):
378
483
 
379
484
  ```
380
- {WORK_PATH}/
485
+ {SIRCHMUNK_WORK_PATH}/
381
486
  ├── .cache/
382
487
  ├── history/ # Chat session history (DuckDB)
383
488
  │ └── chat_history.db
@@ -420,7 +525,7 @@ Any OpenAI-compatible API endpoint, including (but not limited too):
420
525
  Simply specify the path in your search query:
421
526
 
422
527
  ```python
423
- result = await search.search(
528
+ result = await searcher.search(
424
529
  query="Your question",
425
530
  search_paths=["/path/to/folder", "/path/to/file.pdf"]
426
531
  )
@@ -435,7 +540,7 @@ No pre-processing or indexing required!
435
540
 
436
541
  Knowledge clusters are persisted in Parquet format at:
437
542
  ```
438
- {WORK_PATH}/.cache/knowledge/knowledge_clusters.parquet
543
+ {SIRCHMUNK_WORK_PATH}/.cache/knowledge/knowledge_clusters.parquet
439
544
  ```
440
545
 
441
546
  You can query them using DuckDB or the `KnowledgeManager` API.
@@ -447,7 +552,7 @@ You can query them using DuckDB or the `KnowledgeManager` API.
447
552
 
448
553
  1. **Web Dashboard**: Visit the Monitor page for real-time statistics
449
554
  2. **API**: `GET /api/v1/monitor/llm` returns usage metrics
450
- 3. **Code**: Access `search.llm_usages` after search completion
555
+ 3. **Code**: Access `searcher.llm_usages` after search completion
451
556
 
452
557
  </details>
453
558
 
@@ -13,6 +13,7 @@
13
13
  [![ripgrep-all](https://img.shields.io/badge/ripgrep--all-Search-E67E22?style=flat-square&logo=rust&logoColor=white)](https://github.com/phiresky/ripgrep-all)
14
14
  [![OpenAI](https://img.shields.io/badge/OpenAI-API-412991?style=flat-square&logo=openai&logoColor=white)](https://github.com/openai/openai-python)
15
15
  [![Kreuzberg](https://img.shields.io/badge/Kreuzberg-Text_Extraction-4CAF50?style=flat-square)](https://github.com/kreuzberg-dev/kreuzberg)
16
+ [![MCP](https://img.shields.io/badge/MCP-Python_SDK-8B5CF6?style=flat-square&logo=python&logoColor=white)](https://github.com/modelcontextprotocol/python-sdk)
16
17
 
17
18
 
18
19
  [**Quick Start**](#-quick-start) · [**Key Features**](#-key-features) · [**Web UI**](#-web-ui) · [**How it Works**](#-how-it-works) · [**FAQ**](#-faq)
@@ -136,6 +137,19 @@ It serves as a unified intelligent hub for AI agents, delivering deep insights a
136
137
  ---
137
138
 
138
139
 
140
+ ## 🎉 News
141
+
142
+ * 🚀 **Feb 5, 2026**: Release **v0.0.2** — MCP Support, CLI Commands & Knowledge Persistence!
143
+ - **MCP Integration**: Full [Model Context Protocol](https://modelcontextprotocol.io) support, works seamlessly with Claude Desktop and Cursor IDE.
144
+ - **CLI Commands**: New `sirchmunk` CLI with `init`, `config`, `serve`, and `search` commands.
145
+ - **KnowledgeCluster Persistence**: DuckDB-powered storage with Parquet export for efficient knowledge management.
146
+ - **Knowledge Reuse**: Semantic similarity-based cluster retrieval for faster searches via embedding vectors.
147
+
148
+ * 🎉🎉 Jan 22, 2026: Introducing **Sirchmunk**: Initial Release v0.0.1 Now Available!
149
+
150
+
151
+ ---
152
+
139
153
  ## 🚀 Quick Start
140
154
 
141
155
  ### Prerequisites
@@ -176,9 +190,9 @@ llm = OpenAIChat(
176
190
 
177
191
  async def main():
178
192
 
179
- agent_search = AgenticSearch(llm=llm)
193
+ searcher = AgenticSearch(llm=llm)
180
194
 
181
- result: str = await agent_search.search(
195
+ result: str = await searcher.search(
182
196
  query="How does transformer attention work?",
183
197
  search_paths=["/path/to/documents"],
184
198
  )
@@ -189,11 +203,117 @@ asyncio.run(main())
189
203
  ```
190
204
 
191
205
  **⚠️ Notes:**
192
- - Upon initialization, AgenticSearch automatically checks if ripgrep-all and ripgrep are installed. If they are missing, it will attempt to install them automatically. If the automatic installation fails, please install them manually.
206
+ - Upon initialization, `AgenticSearch` automatically checks if `ripgrep-all` and `ripgrep` are installed. If they are missing, it will attempt to install them automatically. If the automatic installation fails, please install them manually.
193
207
  - References: https://github.com/BurntSushi/ripgrep | https://github.com/phiresky/ripgrep-all
194
208
  - Replace `"your-api-key"`, `"your-base-url"`, `"your-model-name"` and `/path/to/documents` with your actual values.
195
209
 
196
210
 
211
+ ### Command Line Interface
212
+
213
+ Sirchmunk provides a powerful CLI for server management and search operations.
214
+
215
+ #### Installation
216
+
217
+ ```bash
218
+ pip install "sirchmunk[web]"
219
+
220
+ # or install via UV
221
+ uv pip install "sirchmunk[web]"
222
+ ```
223
+
224
+
225
+ #### Initialize
226
+
227
+ ```bash
228
+ # Initialize Sirchmunk with default settings (Default work path: `~/.sirchmunk/`)
229
+ sirchmunk init
230
+
231
+ # Alternatively, initialize with custom work path
232
+ sirchmunk init --work-path /path/to/workspace
233
+ ```
234
+
235
+ #### Configure
236
+
237
+ ```bash
238
+ # Show current configuration
239
+ sirchmunk config
240
+
241
+ # Regenerate configuration file if needed (Default config file: ~/.sirchmunk/.env)
242
+ sirchmunk config --generate
243
+ ```
244
+
245
+ #### Start API Server
246
+
247
+ ```bash
248
+ # Start server with default settings
249
+ sirchmunk serve
250
+
251
+ # Custom host and port
252
+ sirchmunk serve --host 0.0.0.0 --port 8000
253
+
254
+ # Development mode with auto-reload
255
+ sirchmunk serve --reload
256
+ ```
257
+
258
+ #### Search
259
+
260
+ ```bash
261
+ # Search in current directory
262
+ sirchmunk search "How does authentication work?"
263
+
264
+ # Search in specific paths
265
+ sirchmunk search "find all API endpoints" ./src ./docs
266
+
267
+ # Quick filename search
268
+ sirchmunk search "config" --mode FILENAME_ONLY
269
+
270
+ # Output as JSON
271
+ sirchmunk search "database schema" --output json
272
+
273
+ # Use API server (requires running server)
274
+ sirchmunk search "query" --api --api-url http://localhost:8584
275
+ ```
276
+
277
+ #### Available Commands
278
+
279
+ | Command | Description |
280
+ |---------|-------------|
281
+ | `sirchmunk init` | Initialize working directory and configuration |
282
+ | `sirchmunk config` | Show or generate configuration |
283
+ | `sirchmunk serve` | Start the API server |
284
+ | `sirchmunk search` | Perform search queries |
285
+ | `sirchmunk version` | Show version information |
286
+
287
+ ---
288
+
289
+ ## 🔌 MCP Server
290
+
291
+ Sirchmunk provides a [Model Context Protocol (MCP)](https://modelcontextprotocol.io) server that exposes its intelligent search capabilities as MCP tools. This enables seamless integration with AI assistants like **Claude Desktop** and **Cursor IDE**.
292
+
293
+ ### Quick Start
294
+
295
+ ```bash
296
+ # Install MCP package
297
+ pip install sirchmunk-mcp
298
+
299
+ # Initialize and configure
300
+ sirchmunk-mcp init
301
+ sirchmunk-mcp config --generate
302
+
303
+ # Edit ~/.sirchmunk/.mcp_env with your LLM API key
304
+
305
+ # Test with MCP Inspector
306
+ npx @modelcontextprotocol/inspector sirchmunk-mcp serve
307
+ ```
308
+
309
+ ### Features
310
+
311
+ - **Multi-Mode Search**: DEEP mode for comprehensive analysis, FILENAME_ONLY for fast file discovery
312
+ - **Knowledge Cluster Management**: Automatic extraction, storage, and reuse of knowledge
313
+ - **Standard MCP Protocol**: Works with stdio and Streamable HTTP transports
314
+
315
+ 📖 **For detailed documentation, see [Sirchmunk MCP README](src/sirchmunk_mcp/README.md)**.
316
+
197
317
  ---
198
318
 
199
319
  ## 🖥️ Web UI
@@ -267,10 +387,10 @@ python scripts/stop_web.py
267
387
 
268
388
  ### Data Storage
269
389
 
270
- All persistent data is stored in the configured `WORK_PATH` (default: `~/.sirchmunk/`):
390
+ All persistent data is stored in the configured `SIRCHMUNK_WORK_PATH` (default: `~/.sirchmunk/`):
271
391
 
272
392
  ```
273
- {WORK_PATH}/
393
+ {SIRCHMUNK_WORK_PATH}/
274
394
  ├── .cache/
275
395
  ├── history/ # Chat session history (DuckDB)
276
396
  │ └── chat_history.db
@@ -313,7 +433,7 @@ Any OpenAI-compatible API endpoint, including (but not limited too):
313
433
  Simply specify the path in your search query:
314
434
 
315
435
  ```python
316
- result = await search.search(
436
+ result = await searcher.search(
317
437
  query="Your question",
318
438
  search_paths=["/path/to/folder", "/path/to/file.pdf"]
319
439
  )
@@ -328,7 +448,7 @@ No pre-processing or indexing required!
328
448
 
329
449
  Knowledge clusters are persisted in Parquet format at:
330
450
  ```
331
- {WORK_PATH}/.cache/knowledge/knowledge_clusters.parquet
451
+ {SIRCHMUNK_WORK_PATH}/.cache/knowledge/knowledge_clusters.parquet
332
452
  ```
333
453
 
334
454
  You can query them using DuckDB or the `KnowledgeManager` API.
@@ -340,7 +460,7 @@ You can query them using DuckDB or the `KnowledgeManager` API.
340
460
 
341
461
  1. **Web Dashboard**: Visit the Monitor page for real-time statistics
342
462
  2. **API**: `GET /api/v1/monitor/llm` returns usage metrics
343
- 3. **Code**: Access `search.llm_usages` after search completion
463
+ 3. **Code**: Access `searcher.llm_usages` after search completion
344
464
 
345
465
  </details>
346
466
 
@@ -8,7 +8,7 @@ authors = [
8
8
  {name = "ModelScope Team"},
9
9
  {email = "contact@modelscope.cn"}
10
10
  ]
11
- keywords = ["LLM", "Agentic", "Search", "RAG", "Indexless", "Self-evolving", "Real-time Intelligence", "Multi-modal"]
11
+ keywords = ["LLM", "Agentic Search", "Embedding-Free", "RAG", "Indexless", "Self-evolving", "Real-time Intelligence", "Multi-modal"]
12
12
  requires-python = ">=3.10"
13
13
  classifiers = [
14
14
  "Development Status :: 4 - Beta",
@@ -35,35 +35,20 @@ build-backend = "setuptools.build_meta"
35
35
  [tool.setuptools]
36
36
  include-package-data = true
37
37
 
38
- [tool.setuptools.dynamic]
39
- version = {attr = "sirchmunk.version.__version__"}
40
-
41
38
  [tool.setuptools.packages.find]
42
39
  where = ["src"]
43
40
  include = ["sirchmunk*"]
44
41
 
45
- [tool.black]
46
- line-length = 88
47
- target-version = ["py313"]
48
- include = "\\.pyi?$"
49
- exclude = '''
50
- /(
51
- \.eggs
52
- | \.git
53
- | \.hg
54
- | \.mypy_cache
55
- | \.tox
56
- | \.venv
57
- | _build
58
- | buck-out
59
- | build
60
- | dist
61
- | src/sirchmunk/.*/__pycache__
62
- )/
63
- '''
64
-
65
- [tool.ruff]
66
- select = ["E", "W", "F", "I"]
67
- ignore = ["E501"]
68
- target-version = "py313"
69
- line-length = 88
42
+ [tool.setuptools.dynamic]
43
+ version = {attr = "sirchmunk.version.__version__"}
44
+ dependencies = {file = ["requirements/core.txt"]}
45
+
46
+ [tool.setuptools.dynamic.optional-dependencies]
47
+ web = {file = ["requirements/web.txt"]}
48
+ docs = {file = ["requirements/docs.txt"]}
49
+ tests = {file = ["requirements/tests.txt"]}
50
+ all = {file = [
51
+ "requirements/web.txt",
52
+ "requirements/docs.txt",
53
+ "requirements/tests.txt",
54
+ ]}
@@ -0,0 +1,17 @@
1
+ loguru
2
+ fastapi
3
+ openai
4
+ genson
5
+ pillow
6
+ pypdf
7
+ pandas
8
+ parquet
9
+ numpy
10
+ msgpack
11
+ sentencepiece
12
+ tqdm
13
+ rapidfuzz
14
+ duckdb
15
+ kreuzberg>=4.0.0rc1
16
+ sentence-transformers
17
+ modelscope
@@ -0,0 +1,10 @@
1
+ docutils>=0.16.0
2
+ myst_parser
3
+ recommonmark
4
+ sphinx>=5.3.0
5
+ sphinx-book-theme
6
+ sphinx-copybutton
7
+ sphinx-design
8
+ sphinx_markdown_tables
9
+ sphinxawesome-theme
10
+ sphinxcontrib-mermaid
@@ -0,0 +1,2 @@
1
+ pytest
2
+ pytest-asyncio
@@ -0,0 +1,19 @@
1
+ # --- Web Framework & Server ---
2
+ fastapi>=0.100.0
3
+ uvicorn[standard]>=0.24.0
4
+ websockets>=12.0
5
+ python-multipart>=0.0.6
6
+ pydantic>=2.0.0
7
+
8
+ # --- HTTP Clients ---
9
+ requests>=2.32.2
10
+ aiohttp>=3.9.4
11
+ httpx>=0.27.0
12
+ urllib3>=2.2.1
13
+
14
+ # --- Data Validation & Environment ---
15
+ pydantic>=2.0
16
+ python-dotenv>=1.0.0
17
+
18
+ # --- System & Utilities ---
19
+ psutil