sirchmunk 0.0.1.post1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. sirchmunk/api/__init__.py +1 -0
  2. sirchmunk/api/chat.py +1123 -0
  3. sirchmunk/api/components/__init__.py +0 -0
  4. sirchmunk/api/components/history_storage.py +402 -0
  5. sirchmunk/api/components/monitor_tracker.py +518 -0
  6. sirchmunk/api/components/settings_storage.py +353 -0
  7. sirchmunk/api/history.py +254 -0
  8. sirchmunk/api/knowledge.py +411 -0
  9. sirchmunk/api/main.py +120 -0
  10. sirchmunk/api/monitor.py +219 -0
  11. sirchmunk/api/run_server.py +54 -0
  12. sirchmunk/api/search.py +230 -0
  13. sirchmunk/api/settings.py +309 -0
  14. sirchmunk/api/tools.py +315 -0
  15. sirchmunk/cli/__init__.py +11 -0
  16. sirchmunk/cli/cli.py +789 -0
  17. sirchmunk/learnings/knowledge_base.py +5 -2
  18. sirchmunk/llm/prompts.py +12 -1
  19. sirchmunk/retrieve/text_retriever.py +186 -2
  20. sirchmunk/scan/file_scanner.py +2 -2
  21. sirchmunk/schema/knowledge.py +119 -35
  22. sirchmunk/search.py +384 -26
  23. sirchmunk/storage/__init__.py +2 -2
  24. sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
  25. sirchmunk/utils/constants.py +7 -5
  26. sirchmunk/utils/embedding_util.py +217 -0
  27. sirchmunk/utils/tokenizer_util.py +36 -1
  28. sirchmunk/version.py +1 -1
  29. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +124 -9
  30. sirchmunk-0.0.2.dist-info/RECORD +69 -0
  31. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
  32. sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
  33. sirchmunk_mcp/__init__.py +25 -0
  34. sirchmunk_mcp/cli.py +478 -0
  35. sirchmunk_mcp/config.py +276 -0
  36. sirchmunk_mcp/server.py +355 -0
  37. sirchmunk_mcp/service.py +327 -0
  38. sirchmunk_mcp/setup.py +15 -0
  39. sirchmunk_mcp/tools.py +410 -0
  40. sirchmunk-0.0.1.post1.dist-info/RECORD +0 -45
  41. sirchmunk-0.0.1.post1.dist-info/top_level.txt +0 -1
  42. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
  43. {sirchmunk-0.0.1.post1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,411 @@
1
+ # Copyright (c) ModelScope Contributors. All rights reserved.
2
+ """
3
+ Knowledge Base API endpoints
4
+ Provides CRUD and analytics for KnowledgeCluster objects
5
+ """
6
+
7
+ from fastapi import APIRouter, HTTPException
8
+ from typing import Optional
9
+ from pydantic import BaseModel
10
+
11
+ from sirchmunk.storage.knowledge_storage import KnowledgeStorage
12
+ from sirchmunk.schema.knowledge import AbstractionLevel
13
+
14
+ router = APIRouter(prefix="/api/v1/knowledge", tags=["knowledge"])
15
+
16
+ # Initialize Knowledge Manager
17
+ km = KnowledgeStorage()
18
+
19
+ # === Request/Response Models ===
20
+
21
+ class SearchRequest(BaseModel):
22
+ query: str
23
+ limit: int = 10
24
+
25
+ # === API Endpoints ===
26
+
27
+ @router.get("/list")
28
+ async def list_knowledge_bases_alias():
29
+ """Alias for /clusters endpoint (backward compatibility)"""
30
+ return await get_all_clusters(limit=100)
31
+
32
+
33
+ @router.get("/clusters")
34
+ async def get_all_clusters(
35
+ limit: int = 100,
36
+ lifecycle: Optional[str] = None,
37
+ abstraction_level: Optional[str] = None
38
+ ):
39
+ """
40
+ Get all knowledge clusters with optional filtering
41
+
42
+ Query params:
43
+ limit: Maximum number of clusters to return
44
+ lifecycle: Filter by lifecycle (STABLE, EMERGING, CONTESTED, DEPRECATED)
45
+ abstraction_level: Filter by abstraction level
46
+ """
47
+ try:
48
+ stats = km.get_stats()
49
+
50
+ # Get all clusters by searching with empty query (returns all)
51
+ all_clusters = []
52
+
53
+ # Fetch clusters using DuckDB directly
54
+ sql = "SELECT * FROM knowledge_clusters"
55
+ where_clauses = []
56
+ params = []
57
+
58
+ if lifecycle:
59
+ where_clauses.append("lifecycle = ?")
60
+ params.append(lifecycle.upper())
61
+
62
+ if abstraction_level:
63
+ where_clauses.append("abstraction_level = ?")
64
+ params.append(abstraction_level.upper())
65
+
66
+ if where_clauses:
67
+ sql += " WHERE " + " AND ".join(where_clauses)
68
+
69
+ sql += f" ORDER BY last_modified DESC LIMIT {limit}"
70
+
71
+ try:
72
+ rows = km.db.fetch_all(sql, params if params else None)
73
+ except Exception as fetch_error:
74
+ # If table is missing or schema is out of date, recreate and return empty list.
75
+ km._create_table()
76
+ rows = []
77
+ clusters = []
78
+ for row in rows:
79
+ try:
80
+ clusters.append(km._row_to_cluster(row))
81
+ except Exception:
82
+ # Skip malformed rows to avoid failing the whole request
83
+ continue
84
+
85
+ return {
86
+ "success": True,
87
+ "count": len(clusters),
88
+ "total": stats.get('custom_stats', {}).get('total_clusters', 0),
89
+ "data": [c.to_dict() for c in clusters]
90
+ }
91
+ except Exception as e:
92
+ raise HTTPException(status_code=500, detail=str(e))
93
+
94
+ @router.get("/clusters/{cluster_id}")
95
+ async def get_cluster(cluster_id: str):
96
+ """Get a specific knowledge cluster by ID"""
97
+ try:
98
+ cluster = await km.get(cluster_id)
99
+ if not cluster:
100
+ raise HTTPException(status_code=404, detail="Cluster not found")
101
+
102
+ return {
103
+ "success": True,
104
+ "data": cluster.to_dict()
105
+ }
106
+ except HTTPException:
107
+ raise
108
+ except Exception as e:
109
+ raise HTTPException(status_code=500, detail=str(e))
110
+
111
+ @router.post("/search")
112
+ async def search_clusters(request: SearchRequest):
113
+ """
114
+ Search knowledge clusters by query
115
+
116
+ Searches across: id, name, description, content, patterns
117
+ """
118
+ try:
119
+ results = await km.find(request.query, limit=request.limit)
120
+
121
+ return {
122
+ "success": True,
123
+ "query": request.query,
124
+ "count": len(results),
125
+ "data": [c.to_dict() for c in results]
126
+ }
127
+ except Exception as e:
128
+ raise HTTPException(status_code=500, detail=str(e))
129
+
130
+ @router.get("/stats")
131
+ async def get_knowledge_stats():
132
+ """
133
+ Get comprehensive knowledge base statistics
134
+
135
+ Returns:
136
+ - Total clusters
137
+ - Lifecycle distribution
138
+ - Abstraction level distribution
139
+ - Average confidence
140
+ - Hotness distribution
141
+ - Top patterns
142
+ - Recent activity
143
+ """
144
+ try:
145
+ stats = km.get_stats()
146
+ custom_stats = stats.get('custom_stats', {})
147
+
148
+ # Get lifecycle distribution
149
+ lifecycle_dist = custom_stats.get('lifecycle_distribution', {})
150
+
151
+ # Get abstraction level distribution
152
+ abstraction_dist = {}
153
+ for level in AbstractionLevel:
154
+ count_row = km.db.fetch_one(
155
+ "SELECT COUNT(*) FROM knowledge_clusters WHERE abstraction_level = ?",
156
+ [level.name]
157
+ )
158
+ abstraction_dist[level.name] = count_row[0] if count_row else 0
159
+
160
+ # Get confidence statistics
161
+ confidence_stats_row = km.db.fetch_one(
162
+ """
163
+ SELECT
164
+ MIN(confidence) as min_confidence,
165
+ MAX(confidence) as max_confidence,
166
+ AVG(confidence) as avg_confidence
167
+ FROM knowledge_clusters
168
+ WHERE confidence IS NOT NULL
169
+ """
170
+ )
171
+
172
+ confidence_stats = {
173
+ "min": confidence_stats_row[0] if confidence_stats_row and confidence_stats_row[0] else 0,
174
+ "max": confidence_stats_row[1] if confidence_stats_row and confidence_stats_row[1] else 0,
175
+ "avg": round(confidence_stats_row[2], 4) if confidence_stats_row and confidence_stats_row[2] else 0,
176
+ }
177
+
178
+ # Get hotness statistics
179
+ hotness_stats_row = km.db.fetch_one(
180
+ """
181
+ SELECT
182
+ MIN(hotness) as min_hotness,
183
+ MAX(hotness) as max_hotness,
184
+ AVG(hotness) as avg_hotness
185
+ FROM knowledge_clusters
186
+ WHERE hotness IS NOT NULL
187
+ """
188
+ )
189
+
190
+ hotness_stats = {
191
+ "min": hotness_stats_row[0] if hotness_stats_row and hotness_stats_row[0] else 0,
192
+ "max": hotness_stats_row[1] if hotness_stats_row and hotness_stats_row[1] else 0,
193
+ "avg": round(hotness_stats_row[2], 4) if hotness_stats_row and hotness_stats_row[2] else 0,
194
+ }
195
+
196
+ # Get top 10 most recent clusters
197
+ recent_rows = km.db.fetch_all(
198
+ """
199
+ SELECT id, name, last_modified
200
+ FROM knowledge_clusters
201
+ ORDER BY last_modified DESC
202
+ LIMIT 10
203
+ """
204
+ )
205
+
206
+ recent_clusters = [
207
+ {
208
+ "id": row[0],
209
+ "name": row[1],
210
+ "last_modified": row[2]
211
+ }
212
+ for row in recent_rows
213
+ ]
214
+
215
+ # Get top 10 highest confidence clusters
216
+ top_confidence_rows = km.db.fetch_all(
217
+ """
218
+ SELECT id, name, confidence
219
+ FROM knowledge_clusters
220
+ WHERE confidence IS NOT NULL
221
+ ORDER BY confidence DESC
222
+ LIMIT 10
223
+ """
224
+ )
225
+
226
+ top_confidence = [
227
+ {
228
+ "id": row[0],
229
+ "name": row[1],
230
+ "confidence": row[2]
231
+ }
232
+ for row in top_confidence_rows
233
+ ]
234
+
235
+ # Get top 10 hottest clusters
236
+ top_hotness_rows = km.db.fetch_all(
237
+ """
238
+ SELECT id, name, hotness
239
+ FROM knowledge_clusters
240
+ WHERE hotness IS NOT NULL
241
+ ORDER BY hotness DESC
242
+ LIMIT 10
243
+ """
244
+ )
245
+
246
+ top_hotness = [
247
+ {
248
+ "id": row[0],
249
+ "name": row[1],
250
+ "hotness": row[2]
251
+ }
252
+ for row in top_hotness_rows
253
+ ]
254
+
255
+ # Timeline data (clusters created per day for last 30 days)
256
+ timeline_rows = km.db.fetch_all(
257
+ """
258
+ SELECT
259
+ CAST(create_time AS DATE) as date,
260
+ COUNT(*) as count
261
+ FROM knowledge_clusters
262
+ WHERE create_time >= current_date - INTERVAL '30 days'
263
+ GROUP BY CAST(create_time AS DATE)
264
+ ORDER BY date ASC
265
+ """
266
+ )
267
+
268
+ timeline = [
269
+ {
270
+ "date": str(row[0]),
271
+ "count": row[1]
272
+ }
273
+ for row in timeline_rows
274
+ ]
275
+
276
+ return {
277
+ "success": True,
278
+ "data": {
279
+ "overview": {
280
+ "total_clusters": custom_stats.get('total_clusters', 0),
281
+ "avg_confidence": custom_stats.get('average_confidence', 0),
282
+ },
283
+ "lifecycle_distribution": lifecycle_dist,
284
+ "abstraction_level_distribution": abstraction_dist,
285
+ "confidence_stats": confidence_stats,
286
+ "hotness_stats": hotness_stats,
287
+ "recent_clusters": recent_clusters,
288
+ "top_confidence_clusters": top_confidence,
289
+ "top_hotness_clusters": top_hotness,
290
+ "timeline": timeline,
291
+ }
292
+ }
293
+ except Exception as e:
294
+ raise HTTPException(status_code=500, detail=str(e))
295
+
296
+ @router.get("/patterns")
297
+ async def get_top_patterns(limit: int = 20):
298
+ """
299
+ Get most common patterns across all clusters
300
+
301
+ Query params:
302
+ limit: Number of top patterns to return
303
+ """
304
+ try:
305
+ # Fetch all patterns and count occurrences
306
+ rows = km.db.fetch_all("SELECT patterns FROM knowledge_clusters WHERE patterns IS NOT NULL")
307
+
308
+ import json
309
+ from collections import Counter
310
+
311
+ pattern_counter = Counter()
312
+ for row in rows:
313
+ patterns_json = row[0]
314
+ if patterns_json:
315
+ patterns = json.loads(patterns_json)
316
+ pattern_counter.update(patterns)
317
+
318
+ top_patterns = [
319
+ {"pattern": pattern, "count": count}
320
+ for pattern, count in pattern_counter.most_common(limit)
321
+ ]
322
+
323
+ return {
324
+ "success": True,
325
+ "count": len(top_patterns),
326
+ "data": top_patterns
327
+ }
328
+ except Exception as e:
329
+ raise HTTPException(status_code=500, detail=str(e))
330
+
331
+ @router.get("/graph")
332
+ async def get_knowledge_graph():
333
+ """
334
+ Get knowledge graph data (nodes and edges)
335
+
336
+ Returns clusters as nodes and related_clusters as edges
337
+ """
338
+ try:
339
+ # Get all clusters
340
+ rows = km.db.fetch_all(
341
+ "SELECT id, name, confidence, hotness, lifecycle, abstraction_level, related_clusters FROM knowledge_clusters"
342
+ )
343
+
344
+ import json
345
+
346
+ nodes = []
347
+ edges = []
348
+
349
+ for row in rows:
350
+ cluster_id, name, confidence, hotness, lifecycle, abstraction_level, related_clusters_json = row
351
+
352
+ # Add node
353
+ nodes.append({
354
+ "id": cluster_id,
355
+ "name": name,
356
+ "confidence": confidence,
357
+ "hotness": hotness,
358
+ "lifecycle": lifecycle,
359
+ "abstraction_level": abstraction_level,
360
+ })
361
+
362
+ # Add edges
363
+ if related_clusters_json:
364
+ related_clusters = json.loads(related_clusters_json)
365
+ for rc in related_clusters:
366
+ edges.append({
367
+ "source": cluster_id,
368
+ "target": rc["target_cluster_id"],
369
+ "weight": rc["weight"],
370
+ "type": rc["source"]
371
+ })
372
+
373
+ return {
374
+ "success": True,
375
+ "data": {
376
+ "nodes": nodes,
377
+ "edges": edges
378
+ }
379
+ }
380
+ except Exception as e:
381
+ raise HTTPException(status_code=500, detail=str(e))
382
+
383
+ @router.delete("/clusters/{cluster_id}")
384
+ async def delete_cluster(cluster_id: str):
385
+ """Delete a knowledge cluster"""
386
+ try:
387
+ success = await km.remove(cluster_id)
388
+ if not success:
389
+ raise HTTPException(status_code=404, detail="Cluster not found")
390
+
391
+ return {
392
+ "success": True,
393
+ "message": f"Cluster {cluster_id} deleted successfully"
394
+ }
395
+ except HTTPException:
396
+ raise
397
+ except Exception as e:
398
+ raise HTTPException(status_code=500, detail=str(e))
399
+
400
+ @router.delete("/clusters")
401
+ async def clear_all_clusters():
402
+ """Clear all knowledge clusters (use with caution!)"""
403
+ try:
404
+ success = await km.clear()
405
+
406
+ return {
407
+ "success": success,
408
+ "message": "All clusters cleared successfully"
409
+ }
410
+ except Exception as e:
411
+ raise HTTPException(status_code=500, detail=str(e))
sirchmunk/api/main.py ADDED
@@ -0,0 +1,120 @@
1
+ # Copyright (c) ModelScope Contributors. All rights reserved.
2
+ """
3
+ Main FastAPI application for Sirchmunk API
4
+ Combines all API modules and provides centralized configuration
5
+ """
6
+
7
+ from fastapi import FastAPI, HTTPException
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.responses import JSONResponse
10
+ import uvicorn
11
+
12
+ # Import all API routers
13
+ from .knowledge import router as knowledge_router
14
+ from .settings import router as settings_router
15
+ from .history import router as history_router, dashboard_router
16
+ from .chat import router as chat_router
17
+ from .monitor import router as monitor_router
18
+ from .search import router as search_router
19
+
20
+ # Create FastAPI application
21
+ app = FastAPI(
22
+ title="Sirchmunk API",
23
+ description="APIs for Sirchmunk",
24
+ version="1.0.0",
25
+ docs_url="/docs",
26
+ redoc_url="/redoc"
27
+ )
28
+
29
+ # Configure CORS
30
+ app.add_middleware(
31
+ CORSMiddleware,
32
+ allow_origins=["*"], # In production, specify actual origins
33
+ allow_credentials=True,
34
+ allow_methods=["*"],
35
+ allow_headers=["*"],
36
+ )
37
+
38
+ # Include all API routers
39
+ app.include_router(knowledge_router)
40
+ app.include_router(settings_router)
41
+ app.include_router(history_router)
42
+ app.include_router(dashboard_router)
43
+ app.include_router(chat_router)
44
+ app.include_router(monitor_router)
45
+ app.include_router(search_router)
46
+
47
+ @app.get("/")
48
+ async def root():
49
+ """Root endpoint with API information"""
50
+ return {
51
+ "name": "Sirchmunk API",
52
+ "version": "1.0.0",
53
+ "description": "APIs for Sirchmunk",
54
+ "status": "running",
55
+ "endpoints": {
56
+ "search": "/api/v1/search",
57
+ "knowledge": "/api/v1/knowledge",
58
+ "settings": "/api/v1/settings",
59
+ "history": "/api/v1/history",
60
+ "chat": "/api/v1/chat",
61
+ "monitor": "/api/v1/monitor"
62
+ },
63
+ "documentation": {
64
+ "swagger": "/docs",
65
+ "redoc": "/redoc"
66
+ }
67
+ }
68
+
69
+ @app.get("/health")
70
+ async def health_check():
71
+ """Health check endpoint"""
72
+ return {
73
+ "status": "healthy",
74
+ "timestamp": "2024-01-13T17:30:00Z",
75
+ "services": {
76
+ "api": "running",
77
+ "database": "connected",
78
+ "llm": "available",
79
+ "embedding": "available"
80
+ }
81
+ }
82
+
83
+ @app.exception_handler(404)
84
+ async def not_found_handler(request, exc):
85
+ """Custom 404 handler"""
86
+ return JSONResponse(
87
+ status_code=404,
88
+ content={
89
+ "success": False,
90
+ "error": {
91
+ "code": "NOT_FOUND",
92
+ "message": "The requested resource was not found",
93
+ "path": str(request.url.path)
94
+ }
95
+ }
96
+ )
97
+
98
+ @app.exception_handler(500)
99
+ async def internal_error_handler(request, exc):
100
+ """Custom 500 handler"""
101
+ return JSONResponse(
102
+ status_code=500,
103
+ content={
104
+ "success": False,
105
+ "error": {
106
+ "code": "INTERNAL_ERROR",
107
+ "message": "An internal server error occurred",
108
+ "details": "Please try again later or contact support"
109
+ }
110
+ }
111
+ )
112
+
113
+ if __name__ == "__main__":
114
+ uvicorn.run(
115
+ "main:app",
116
+ host="0.0.0.0",
117
+ port=8584,
118
+ reload=True,
119
+ log_level="info"
120
+ )