sirchmunk 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sirchmunk/api/__init__.py +1 -0
- sirchmunk/api/chat.py +1123 -0
- sirchmunk/api/components/__init__.py +0 -0
- sirchmunk/api/components/history_storage.py +402 -0
- sirchmunk/api/components/monitor_tracker.py +518 -0
- sirchmunk/api/components/settings_storage.py +353 -0
- sirchmunk/api/history.py +254 -0
- sirchmunk/api/knowledge.py +411 -0
- sirchmunk/api/main.py +120 -0
- sirchmunk/api/monitor.py +219 -0
- sirchmunk/api/run_server.py +54 -0
- sirchmunk/api/search.py +230 -0
- sirchmunk/api/settings.py +309 -0
- sirchmunk/api/tools.py +315 -0
- sirchmunk/cli/__init__.py +11 -0
- sirchmunk/cli/cli.py +789 -0
- sirchmunk/learnings/knowledge_base.py +5 -2
- sirchmunk/llm/prompts.py +12 -1
- sirchmunk/retrieve/text_retriever.py +186 -2
- sirchmunk/scan/file_scanner.py +2 -2
- sirchmunk/schema/knowledge.py +119 -35
- sirchmunk/search.py +384 -26
- sirchmunk/storage/__init__.py +2 -2
- sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
- sirchmunk/utils/constants.py +7 -5
- sirchmunk/utils/embedding_util.py +217 -0
- sirchmunk/utils/tokenizer_util.py +36 -1
- sirchmunk/version.py +1 -1
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +196 -14
- sirchmunk-0.0.2.dist-info/RECORD +69 -0
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
- sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
- sirchmunk_mcp/__init__.py +25 -0
- sirchmunk_mcp/cli.py +478 -0
- sirchmunk_mcp/config.py +276 -0
- sirchmunk_mcp/server.py +355 -0
- sirchmunk_mcp/service.py +327 -0
- sirchmunk_mcp/setup.py +15 -0
- sirchmunk_mcp/tools.py +410 -0
- sirchmunk-0.0.1.dist-info/RECORD +0 -45
- sirchmunk-0.0.1.dist-info/top_level.txt +0 -1
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0
sirchmunk_mcp/service.py
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
# Copyright (c) ModelScope Contributors. All rights reserved.
|
|
2
|
+
"""
|
|
3
|
+
Sirchmunk Service Wrapper for MCP Server.
|
|
4
|
+
|
|
5
|
+
Provides a high-level interface to Sirchmunk's AgenticSearch functionality,
|
|
6
|
+
managing initialization, configuration, and session state.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import contextlib
|
|
12
|
+
import io
|
|
13
|
+
import logging
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
18
|
+
|
|
19
|
+
from .config import Config
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from sirchmunk.schema.knowledge import KnowledgeCluster
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@contextlib.contextmanager
|
|
29
|
+
def suppress_stdout():
|
|
30
|
+
"""Context manager to suppress stdout output.
|
|
31
|
+
|
|
32
|
+
Used during initialization to prevent third-party libraries
|
|
33
|
+
(ModelScope, transformers, etc.) from printing to stdout,
|
|
34
|
+
which would break MCP stdio protocol.
|
|
35
|
+
"""
|
|
36
|
+
# Check if we're in stdio MCP mode (stdout should be protected)
|
|
37
|
+
if os.environ.get("MCP_TRANSPORT") == "stdio":
|
|
38
|
+
old_stdout = sys.stdout
|
|
39
|
+
sys.stdout = io.StringIO()
|
|
40
|
+
try:
|
|
41
|
+
yield
|
|
42
|
+
finally:
|
|
43
|
+
sys.stdout = old_stdout
|
|
44
|
+
else:
|
|
45
|
+
yield
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SirchmunkService:
|
|
49
|
+
"""Service wrapper for AgenticSearch with lifecycle management.
|
|
50
|
+
|
|
51
|
+
This class manages the AgenticSearch instance and provides a clean interface
|
|
52
|
+
for MCP tool implementations.
|
|
53
|
+
|
|
54
|
+
Attributes:
|
|
55
|
+
config: Configuration object
|
|
56
|
+
search: AgenticSearch instance
|
|
57
|
+
initialized: Whether the service is initialized
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, config: Config):
|
|
61
|
+
"""Initialize Sirchmunk service.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
config: Configuration object
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
RuntimeError: If initialization fails
|
|
68
|
+
"""
|
|
69
|
+
self.config = config
|
|
70
|
+
self.searcher: Optional[AgenticSearch] = None
|
|
71
|
+
self.initialized = False
|
|
72
|
+
|
|
73
|
+
logger.info(f"Initializing Sirchmunk service with config: {config.sirchmunk.work_path}")
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
self._initialize_search()
|
|
77
|
+
self.initialized = True
|
|
78
|
+
logger.info("Sirchmunk service initialized successfully")
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.error(f"Failed to initialize Sirchmunk service: {e}")
|
|
81
|
+
raise RuntimeError(f"Sirchmunk service initialization failed: {e}") from e
|
|
82
|
+
|
|
83
|
+
def _initialize_search(self) -> None:
|
|
84
|
+
"""Initialize AgenticSearch instance with configuration.
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
Exception: If AgenticSearch initialization fails
|
|
88
|
+
"""
|
|
89
|
+
# Import sirchmunk modules inside function to allow stdout suppression
|
|
90
|
+
# These imports may trigger model downloads that print to stdout
|
|
91
|
+
with suppress_stdout():
|
|
92
|
+
from sirchmunk.search import AgenticSearch
|
|
93
|
+
from sirchmunk.llm.openai_chat import OpenAIChat
|
|
94
|
+
|
|
95
|
+
# Create LLM client
|
|
96
|
+
llm = OpenAIChat(
|
|
97
|
+
base_url=self.config.llm.base_url,
|
|
98
|
+
api_key=self.config.llm.api_key,
|
|
99
|
+
model=self.config.llm.model_name,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Create AgenticSearch instance with stdout suppression
|
|
103
|
+
# AgenticSearch may load embedding models which print progress
|
|
104
|
+
with suppress_stdout():
|
|
105
|
+
self.searcher = AgenticSearch(
|
|
106
|
+
llm=llm,
|
|
107
|
+
work_path=self.config.sirchmunk.work_path,
|
|
108
|
+
verbose=False, # Disable verbose in stdio mode to prevent stdout pollution
|
|
109
|
+
reuse_knowledge=self.config.sirchmunk.enable_cluster_reuse,
|
|
110
|
+
cluster_sim_threshold=self.config.sirchmunk.cluster_similarity.threshold,
|
|
111
|
+
cluster_sim_top_k=self.config.sirchmunk.cluster_similarity.top_k,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
logger.info("AgenticSearch instance created")
|
|
115
|
+
|
|
116
|
+
async def search(
|
|
117
|
+
self,
|
|
118
|
+
query: str,
|
|
119
|
+
search_paths: Union[str, List[str]],
|
|
120
|
+
mode: str = "DEEP",
|
|
121
|
+
max_depth: Optional[int] = None,
|
|
122
|
+
top_k_files: Optional[int] = None,
|
|
123
|
+
keyword_levels: Optional[int] = None,
|
|
124
|
+
include: Optional[List[str]] = None,
|
|
125
|
+
exclude: Optional[List[str]] = None,
|
|
126
|
+
return_cluster: bool = False,
|
|
127
|
+
) -> Union[str, List[Dict[str, Any]], KnowledgeCluster, None]:
|
|
128
|
+
"""Search and retrieve various types of raw documents using AgenticSearch.
|
|
129
|
+
|
|
130
|
+
This method performs intelligent search across code, documentation, and
|
|
131
|
+
other text-based documents. It directly retrieves and analyzes raw content
|
|
132
|
+
from multiple file types including source code, markdown files, PDFs,
|
|
133
|
+
text files, and other document formats supported by ripgrep-all.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
query: Search query or question to find relevant documents
|
|
137
|
+
search_paths: Paths to search in (files or directories)
|
|
138
|
+
mode: Search mode (DEEP, FAST, FILENAME_ONLY)
|
|
139
|
+
max_depth: Maximum directory depth to search
|
|
140
|
+
top_k_files: Number of top files to return
|
|
141
|
+
keyword_levels: Number of keyword granularity levels (DEEP mode)
|
|
142
|
+
include: File patterns to include (glob)
|
|
143
|
+
exclude: File patterns to exclude (glob)
|
|
144
|
+
return_cluster: Whether to return full KnowledgeCluster object
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Search results: str (summary), List[Dict] (FILENAME_ONLY),
|
|
148
|
+
KnowledgeCluster (if return_cluster=True), or None (if no results)
|
|
149
|
+
|
|
150
|
+
Raises:
|
|
151
|
+
RuntimeError: If service is not initialized
|
|
152
|
+
ValueError: If parameters are invalid
|
|
153
|
+
"""
|
|
154
|
+
if not self.initialized or self.searcher is None:
|
|
155
|
+
raise RuntimeError("Sirchmunk service is not initialized")
|
|
156
|
+
|
|
157
|
+
# Validate mode
|
|
158
|
+
if mode not in ("DEEP", "FAST", "FILENAME_ONLY"):
|
|
159
|
+
raise ValueError(f"Invalid mode: {mode}. Must be DEEP, FAST, or FILENAME_ONLY")
|
|
160
|
+
|
|
161
|
+
# Normalize search_paths
|
|
162
|
+
if isinstance(search_paths, str):
|
|
163
|
+
search_paths = [search_paths]
|
|
164
|
+
|
|
165
|
+
# Validate search paths
|
|
166
|
+
for path in search_paths:
|
|
167
|
+
path_obj = Path(path)
|
|
168
|
+
if not path_obj.exists():
|
|
169
|
+
logger.warning(f"Search path does not exist: {path}")
|
|
170
|
+
|
|
171
|
+
# Apply defaults from configuration
|
|
172
|
+
max_depth = max_depth or self.config.sirchmunk.search_defaults.max_depth
|
|
173
|
+
top_k_files = top_k_files or self.config.sirchmunk.search_defaults.top_k_files
|
|
174
|
+
keyword_levels = keyword_levels or self.config.sirchmunk.search_defaults.keyword_levels
|
|
175
|
+
|
|
176
|
+
logger.info(
|
|
177
|
+
f"Starting search: mode={mode}, query='{query[:50]}...', "
|
|
178
|
+
f"paths={len(search_paths)}, max_depth={max_depth}"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
# Perform search
|
|
183
|
+
result = await self.searcher.search(
|
|
184
|
+
query=query,
|
|
185
|
+
search_paths=search_paths,
|
|
186
|
+
mode=mode,
|
|
187
|
+
max_depth=max_depth,
|
|
188
|
+
top_k_files=top_k_files,
|
|
189
|
+
keyword_levels=keyword_levels,
|
|
190
|
+
include=include,
|
|
191
|
+
exclude=exclude,
|
|
192
|
+
verbose=self.config.sirchmunk.verbose,
|
|
193
|
+
grep_timeout=self.config.sirchmunk.search_defaults.grep_timeout,
|
|
194
|
+
return_cluster=return_cluster,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
logger.info(f"Search completed: mode={mode}, result_type={type(result).__name__}")
|
|
198
|
+
return result
|
|
199
|
+
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.error(f"Search failed: {e}", exc_info=True)
|
|
202
|
+
raise
|
|
203
|
+
|
|
204
|
+
async def get_cluster(self, cluster_id: str) -> Optional[KnowledgeCluster]:
|
|
205
|
+
"""Retrieve a knowledge cluster by ID.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
cluster_id: Cluster ID (e.g., 'C1007')
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
KnowledgeCluster if found, None otherwise
|
|
212
|
+
|
|
213
|
+
Raises:
|
|
214
|
+
RuntimeError: If service is not initialized
|
|
215
|
+
"""
|
|
216
|
+
if not self.initialized or self.searcher is None:
|
|
217
|
+
raise RuntimeError("Sirchmunk service is not initialized")
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
cluster = await self.searcher.knowledge_manager.get(cluster_id)
|
|
221
|
+
if cluster:
|
|
222
|
+
logger.info(f"Retrieved cluster: {cluster_id}")
|
|
223
|
+
else:
|
|
224
|
+
logger.warning(f"Cluster not found: {cluster_id}")
|
|
225
|
+
return cluster
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.error(f"Failed to get cluster {cluster_id}: {e}")
|
|
228
|
+
raise
|
|
229
|
+
|
|
230
|
+
async def list_clusters(
|
|
231
|
+
self,
|
|
232
|
+
limit: int = 10,
|
|
233
|
+
sort_by: str = "last_modified",
|
|
234
|
+
) -> List[Dict[str, Any]]:
|
|
235
|
+
"""List saved knowledge clusters with optional filtering.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
limit: Maximum number of clusters to return
|
|
239
|
+
sort_by: Sort field (hotness, confidence, last_modified)
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
List of cluster metadata dictionaries
|
|
243
|
+
|
|
244
|
+
Raises:
|
|
245
|
+
RuntimeError: If service is not initialized
|
|
246
|
+
"""
|
|
247
|
+
if not self.initialized or self.searcher is None:
|
|
248
|
+
raise RuntimeError("Sirchmunk service is not initialized")
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
# Get all cluster IDs
|
|
252
|
+
all_clusters = await self.searcher.knowledge_manager.list_all()
|
|
253
|
+
|
|
254
|
+
# Sort clusters
|
|
255
|
+
if sort_by == "hotness":
|
|
256
|
+
all_clusters.sort(key=lambda c: c.hotness or 0.0, reverse=True)
|
|
257
|
+
elif sort_by == "confidence":
|
|
258
|
+
all_clusters.sort(key=lambda c: c.confidence or 0.0, reverse=True)
|
|
259
|
+
else: # last_modified
|
|
260
|
+
all_clusters.sort(key=lambda c: c.last_modified, reverse=True)
|
|
261
|
+
|
|
262
|
+
# Limit results
|
|
263
|
+
result_clusters = all_clusters[:limit]
|
|
264
|
+
|
|
265
|
+
# Convert to dictionaries
|
|
266
|
+
results = []
|
|
267
|
+
for cluster in result_clusters:
|
|
268
|
+
results.append({
|
|
269
|
+
"id": cluster.id,
|
|
270
|
+
"name": cluster.name,
|
|
271
|
+
"confidence": cluster.confidence,
|
|
272
|
+
"hotness": cluster.hotness,
|
|
273
|
+
"lifecycle": cluster.lifecycle.value,
|
|
274
|
+
"version": cluster.version,
|
|
275
|
+
"last_modified": cluster.last_modified.isoformat() if cluster.last_modified else None,
|
|
276
|
+
"queries": cluster.queries,
|
|
277
|
+
"evidences_count": len(cluster.evidences),
|
|
278
|
+
})
|
|
279
|
+
|
|
280
|
+
logger.info(f"Listed {len(results)} clusters (limit={limit}, sort_by={sort_by})")
|
|
281
|
+
return results
|
|
282
|
+
|
|
283
|
+
except Exception as e:
|
|
284
|
+
logger.error(f"Failed to list clusters: {e}")
|
|
285
|
+
raise
|
|
286
|
+
|
|
287
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
288
|
+
"""Get service statistics.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
Dictionary with service statistics
|
|
292
|
+
|
|
293
|
+
Raises:
|
|
294
|
+
RuntimeError: If service is not initialized
|
|
295
|
+
"""
|
|
296
|
+
if not self.initialized or self.searcher is None:
|
|
297
|
+
raise RuntimeError("Sirchmunk service is not initialized")
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
# Get knowledge manager stats
|
|
301
|
+
stats = self.searcher.knowledge_manager.get_stats()
|
|
302
|
+
|
|
303
|
+
# Add service-level stats
|
|
304
|
+
stats["service"] = {
|
|
305
|
+
"initialized": self.initialized,
|
|
306
|
+
"work_path": str(self.config.sirchmunk.work_path),
|
|
307
|
+
"cluster_reuse_enabled": self.config.sirchmunk.enable_cluster_reuse,
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
return stats
|
|
311
|
+
except Exception as e:
|
|
312
|
+
logger.error(f"Failed to get stats: {e}")
|
|
313
|
+
return {"error": str(e)}
|
|
314
|
+
|
|
315
|
+
async def shutdown(self) -> None:
|
|
316
|
+
"""Gracefully shutdown the service.
|
|
317
|
+
|
|
318
|
+
Performs cleanup operations like closing connections and saving state.
|
|
319
|
+
"""
|
|
320
|
+
logger.info("Shutting down Sirchmunk service")
|
|
321
|
+
|
|
322
|
+
try:
|
|
323
|
+
# Currently no cleanup needed, but this provides extension point
|
|
324
|
+
self.initialized = False
|
|
325
|
+
logger.info("Sirchmunk service shutdown complete")
|
|
326
|
+
except Exception as e:
|
|
327
|
+
logger.error(f"Error during shutdown: {e}")
|
sirchmunk_mcp/setup.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright (c) ModelScope Contributors. All rights reserved.
|
|
2
|
+
"""
|
|
3
|
+
Setup script for Sirchmunk MCP Server.
|
|
4
|
+
|
|
5
|
+
For backwards compatibility with older pip versions.
|
|
6
|
+
Modern installations should use pyproject.toml.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from setuptools import setup, find_packages
|
|
10
|
+
|
|
11
|
+
if __name__ == "__main__":
|
|
12
|
+
setup(
|
|
13
|
+
packages=find_packages(),
|
|
14
|
+
include_package_data=True,
|
|
15
|
+
)
|