sirchmunk 0.0.1.post1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. sirchmunk/api/__init__.py +1 -0
  2. sirchmunk/api/chat.py +1123 -0
  3. sirchmunk/api/components/__init__.py +0 -0
  4. sirchmunk/api/components/history_storage.py +402 -0
  5. sirchmunk/api/components/monitor_tracker.py +518 -0
  6. sirchmunk/api/components/settings_storage.py +353 -0
  7. sirchmunk/api/history.py +254 -0
  8. sirchmunk/api/knowledge.py +411 -0
  9. sirchmunk/api/main.py +120 -0
  10. sirchmunk/api/monitor.py +219 -0
  11. sirchmunk/api/run_server.py +54 -0
  12. sirchmunk/api/search.py +230 -0
  13. sirchmunk/api/settings.py +309 -0
  14. sirchmunk/api/tools.py +315 -0
  15. sirchmunk/cli/__init__.py +11 -0
  16. sirchmunk/cli/cli.py +789 -0
  17. sirchmunk/learnings/knowledge_base.py +5 -2
  18. sirchmunk/llm/prompts.py +12 -1
  19. sirchmunk/retrieve/text_retriever.py +186 -2
  20. sirchmunk/scan/file_scanner.py +2 -2
  21. sirchmunk/schema/knowledge.py +119 -35
  22. sirchmunk/search.py +384 -26
  23. sirchmunk/storage/__init__.py +2 -2
  24. sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
  25. sirchmunk/utils/constants.py +7 -5
  26. sirchmunk/utils/embedding_util.py +217 -0
  27. sirchmunk/utils/tokenizer_util.py +36 -1
  28. sirchmunk/version.py +1 -1
  29. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +124 -9
  30. sirchmunk-0.0.2.dist-info/RECORD +69 -0
  31. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
  32. sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
  33. sirchmunk_mcp/__init__.py +25 -0
  34. sirchmunk_mcp/cli.py +478 -0
  35. sirchmunk_mcp/config.py +276 -0
  36. sirchmunk_mcp/server.py +355 -0
  37. sirchmunk_mcp/service.py +327 -0
  38. sirchmunk_mcp/setup.py +15 -0
  39. sirchmunk_mcp/tools.py +410 -0
  40. sirchmunk-0.0.1.post1.dist-info/RECORD +0 -45
  41. sirchmunk-0.0.1.post1.dist-info/top_level.txt +0 -1
  42. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
  43. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,327 @@
1
+ # Copyright (c) ModelScope Contributors. All rights reserved.
2
+ """
3
+ Sirchmunk Service Wrapper for MCP Server.
4
+
5
+ Provides a high-level interface to Sirchmunk's AgenticSearch functionality,
6
+ managing initialization, configuration, and session state.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import contextlib
12
+ import io
13
+ import logging
14
+ import os
15
+ import sys
16
+ from pathlib import Path
17
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
18
+
19
+ from .config import Config
20
+
21
+ if TYPE_CHECKING:
22
+ from sirchmunk.schema.knowledge import KnowledgeCluster
23
+
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ @contextlib.contextmanager
29
+ def suppress_stdout():
30
+ """Context manager to suppress stdout output.
31
+
32
+ Used during initialization to prevent third-party libraries
33
+ (ModelScope, transformers, etc.) from printing to stdout,
34
+ which would break MCP stdio protocol.
35
+ """
36
+ # Check if we're in stdio MCP mode (stdout should be protected)
37
+ if os.environ.get("MCP_TRANSPORT") == "stdio":
38
+ old_stdout = sys.stdout
39
+ sys.stdout = io.StringIO()
40
+ try:
41
+ yield
42
+ finally:
43
+ sys.stdout = old_stdout
44
+ else:
45
+ yield
46
+
47
+
48
+ class SirchmunkService:
49
+ """Service wrapper for AgenticSearch with lifecycle management.
50
+
51
+ This class manages the AgenticSearch instance and provides a clean interface
52
+ for MCP tool implementations.
53
+
54
+ Attributes:
55
+ config: Configuration object
56
+ search: AgenticSearch instance
57
+ initialized: Whether the service is initialized
58
+ """
59
+
60
+ def __init__(self, config: Config):
61
+ """Initialize Sirchmunk service.
62
+
63
+ Args:
64
+ config: Configuration object
65
+
66
+ Raises:
67
+ RuntimeError: If initialization fails
68
+ """
69
+ self.config = config
70
+ self.searcher: Optional[AgenticSearch] = None
71
+ self.initialized = False
72
+
73
+ logger.info(f"Initializing Sirchmunk service with config: {config.sirchmunk.work_path}")
74
+
75
+ try:
76
+ self._initialize_search()
77
+ self.initialized = True
78
+ logger.info("Sirchmunk service initialized successfully")
79
+ except Exception as e:
80
+ logger.error(f"Failed to initialize Sirchmunk service: {e}")
81
+ raise RuntimeError(f"Sirchmunk service initialization failed: {e}") from e
82
+
83
+ def _initialize_search(self) -> None:
84
+ """Initialize AgenticSearch instance with configuration.
85
+
86
+ Raises:
87
+ Exception: If AgenticSearch initialization fails
88
+ """
89
+ # Import sirchmunk modules inside function to allow stdout suppression
90
+ # These imports may trigger model downloads that print to stdout
91
+ with suppress_stdout():
92
+ from sirchmunk.search import AgenticSearch
93
+ from sirchmunk.llm.openai_chat import OpenAIChat
94
+
95
+ # Create LLM client
96
+ llm = OpenAIChat(
97
+ base_url=self.config.llm.base_url,
98
+ api_key=self.config.llm.api_key,
99
+ model=self.config.llm.model_name,
100
+ )
101
+
102
+ # Create AgenticSearch instance with stdout suppression
103
+ # AgenticSearch may load embedding models which print progress
104
+ with suppress_stdout():
105
+ self.searcher = AgenticSearch(
106
+ llm=llm,
107
+ work_path=self.config.sirchmunk.work_path,
108
+ verbose=False, # Disable verbose in stdio mode to prevent stdout pollution
109
+ reuse_knowledge=self.config.sirchmunk.enable_cluster_reuse,
110
+ cluster_sim_threshold=self.config.sirchmunk.cluster_similarity.threshold,
111
+ cluster_sim_top_k=self.config.sirchmunk.cluster_similarity.top_k,
112
+ )
113
+
114
+ logger.info("AgenticSearch instance created")
115
+
116
+ async def search(
117
+ self,
118
+ query: str,
119
+ search_paths: Union[str, List[str]],
120
+ mode: str = "DEEP",
121
+ max_depth: Optional[int] = None,
122
+ top_k_files: Optional[int] = None,
123
+ keyword_levels: Optional[int] = None,
124
+ include: Optional[List[str]] = None,
125
+ exclude: Optional[List[str]] = None,
126
+ return_cluster: bool = False,
127
+ ) -> Union[str, List[Dict[str, Any]], KnowledgeCluster, None]:
128
+ """Search and retrieve various types of raw documents using AgenticSearch.
129
+
130
+ This method performs intelligent search across code, documentation, and
131
+ other text-based documents. It directly retrieves and analyzes raw content
132
+ from multiple file types including source code, markdown files, PDFs,
133
+ text files, and other document formats supported by ripgrep-all.
134
+
135
+ Args:
136
+ query: Search query or question to find relevant documents
137
+ search_paths: Paths to search in (files or directories)
138
+ mode: Search mode (DEEP, FAST, FILENAME_ONLY)
139
+ max_depth: Maximum directory depth to search
140
+ top_k_files: Number of top files to return
141
+ keyword_levels: Number of keyword granularity levels (DEEP mode)
142
+ include: File patterns to include (glob)
143
+ exclude: File patterns to exclude (glob)
144
+ return_cluster: Whether to return full KnowledgeCluster object
145
+
146
+ Returns:
147
+ Search results: str (summary), List[Dict] (FILENAME_ONLY),
148
+ KnowledgeCluster (if return_cluster=True), or None (if no results)
149
+
150
+ Raises:
151
+ RuntimeError: If service is not initialized
152
+ ValueError: If parameters are invalid
153
+ """
154
+ if not self.initialized or self.searcher is None:
155
+ raise RuntimeError("Sirchmunk service is not initialized")
156
+
157
+ # Validate mode
158
+ if mode not in ("DEEP", "FAST", "FILENAME_ONLY"):
159
+ raise ValueError(f"Invalid mode: {mode}. Must be DEEP, FAST, or FILENAME_ONLY")
160
+
161
+ # Normalize search_paths
162
+ if isinstance(search_paths, str):
163
+ search_paths = [search_paths]
164
+
165
+ # Validate search paths
166
+ for path in search_paths:
167
+ path_obj = Path(path)
168
+ if not path_obj.exists():
169
+ logger.warning(f"Search path does not exist: {path}")
170
+
171
+ # Apply defaults from configuration
172
+ max_depth = max_depth or self.config.sirchmunk.search_defaults.max_depth
173
+ top_k_files = top_k_files or self.config.sirchmunk.search_defaults.top_k_files
174
+ keyword_levels = keyword_levels or self.config.sirchmunk.search_defaults.keyword_levels
175
+
176
+ logger.info(
177
+ f"Starting search: mode={mode}, query='{query[:50]}...', "
178
+ f"paths={len(search_paths)}, max_depth={max_depth}"
179
+ )
180
+
181
+ try:
182
+ # Perform search
183
+ result = await self.searcher.search(
184
+ query=query,
185
+ search_paths=search_paths,
186
+ mode=mode,
187
+ max_depth=max_depth,
188
+ top_k_files=top_k_files,
189
+ keyword_levels=keyword_levels,
190
+ include=include,
191
+ exclude=exclude,
192
+ verbose=self.config.sirchmunk.verbose,
193
+ grep_timeout=self.config.sirchmunk.search_defaults.grep_timeout,
194
+ return_cluster=return_cluster,
195
+ )
196
+
197
+ logger.info(f"Search completed: mode={mode}, result_type={type(result).__name__}")
198
+ return result
199
+
200
+ except Exception as e:
201
+ logger.error(f"Search failed: {e}", exc_info=True)
202
+ raise
203
+
204
+ async def get_cluster(self, cluster_id: str) -> Optional[KnowledgeCluster]:
205
+ """Retrieve a knowledge cluster by ID.
206
+
207
+ Args:
208
+ cluster_id: Cluster ID (e.g., 'C1007')
209
+
210
+ Returns:
211
+ KnowledgeCluster if found, None otherwise
212
+
213
+ Raises:
214
+ RuntimeError: If service is not initialized
215
+ """
216
+ if not self.initialized or self.searcher is None:
217
+ raise RuntimeError("Sirchmunk service is not initialized")
218
+
219
+ try:
220
+ cluster = await self.searcher.knowledge_manager.get(cluster_id)
221
+ if cluster:
222
+ logger.info(f"Retrieved cluster: {cluster_id}")
223
+ else:
224
+ logger.warning(f"Cluster not found: {cluster_id}")
225
+ return cluster
226
+ except Exception as e:
227
+ logger.error(f"Failed to get cluster {cluster_id}: {e}")
228
+ raise
229
+
230
+ async def list_clusters(
231
+ self,
232
+ limit: int = 10,
233
+ sort_by: str = "last_modified",
234
+ ) -> List[Dict[str, Any]]:
235
+ """List saved knowledge clusters with optional filtering.
236
+
237
+ Args:
238
+ limit: Maximum number of clusters to return
239
+ sort_by: Sort field (hotness, confidence, last_modified)
240
+
241
+ Returns:
242
+ List of cluster metadata dictionaries
243
+
244
+ Raises:
245
+ RuntimeError: If service is not initialized
246
+ """
247
+ if not self.initialized or self.searcher is None:
248
+ raise RuntimeError("Sirchmunk service is not initialized")
249
+
250
+ try:
251
+ # Get all cluster IDs
252
+ all_clusters = await self.searcher.knowledge_manager.list_all()
253
+
254
+ # Sort clusters
255
+ if sort_by == "hotness":
256
+ all_clusters.sort(key=lambda c: c.hotness or 0.0, reverse=True)
257
+ elif sort_by == "confidence":
258
+ all_clusters.sort(key=lambda c: c.confidence or 0.0, reverse=True)
259
+ else: # last_modified
260
+ all_clusters.sort(key=lambda c: c.last_modified, reverse=True)
261
+
262
+ # Limit results
263
+ result_clusters = all_clusters[:limit]
264
+
265
+ # Convert to dictionaries
266
+ results = []
267
+ for cluster in result_clusters:
268
+ results.append({
269
+ "id": cluster.id,
270
+ "name": cluster.name,
271
+ "confidence": cluster.confidence,
272
+ "hotness": cluster.hotness,
273
+ "lifecycle": cluster.lifecycle.value,
274
+ "version": cluster.version,
275
+ "last_modified": cluster.last_modified.isoformat() if cluster.last_modified else None,
276
+ "queries": cluster.queries,
277
+ "evidences_count": len(cluster.evidences),
278
+ })
279
+
280
+ logger.info(f"Listed {len(results)} clusters (limit={limit}, sort_by={sort_by})")
281
+ return results
282
+
283
+ except Exception as e:
284
+ logger.error(f"Failed to list clusters: {e}")
285
+ raise
286
+
287
+ def get_stats(self) -> Dict[str, Any]:
288
+ """Get service statistics.
289
+
290
+ Returns:
291
+ Dictionary with service statistics
292
+
293
+ Raises:
294
+ RuntimeError: If service is not initialized
295
+ """
296
+ if not self.initialized or self.searcher is None:
297
+ raise RuntimeError("Sirchmunk service is not initialized")
298
+
299
+ try:
300
+ # Get knowledge manager stats
301
+ stats = self.searcher.knowledge_manager.get_stats()
302
+
303
+ # Add service-level stats
304
+ stats["service"] = {
305
+ "initialized": self.initialized,
306
+ "work_path": str(self.config.sirchmunk.work_path),
307
+ "cluster_reuse_enabled": self.config.sirchmunk.enable_cluster_reuse,
308
+ }
309
+
310
+ return stats
311
+ except Exception as e:
312
+ logger.error(f"Failed to get stats: {e}")
313
+ return {"error": str(e)}
314
+
315
+ async def shutdown(self) -> None:
316
+ """Gracefully shutdown the service.
317
+
318
+ Performs cleanup operations like closing connections and saving state.
319
+ """
320
+ logger.info("Shutting down Sirchmunk service")
321
+
322
+ try:
323
+ # Currently no cleanup needed, but this provides extension point
324
+ self.initialized = False
325
+ logger.info("Sirchmunk service shutdown complete")
326
+ except Exception as e:
327
+ logger.error(f"Error during shutdown: {e}")
sirchmunk_mcp/setup.py ADDED
@@ -0,0 +1,15 @@
1
+ # Copyright (c) ModelScope Contributors. All rights reserved.
2
+ """
3
+ Setup script for Sirchmunk MCP Server.
4
+
5
+ For backwards compatibility with older pip versions.
6
+ Modern installations should use pyproject.toml.
7
+ """
8
+
9
+ from setuptools import setup, find_packages
10
+
11
+ if __name__ == "__main__":
12
+ setup(
13
+ packages=find_packages(),
14
+ include_package_data=True,
15
+ )